diff --git a/frontend/src/pages/DataAnnotation/Template/TemplateForm.tsx b/frontend/src/pages/DataAnnotation/Template/TemplateForm.tsx index 511c831..073b9d6 100644 --- a/frontend/src/pages/DataAnnotation/Template/TemplateForm.tsx +++ b/frontend/src/pages/DataAnnotation/Template/TemplateForm.tsx @@ -17,6 +17,7 @@ import { updateAnnotationTemplateByIdUsingPut, } from "../annotation.api"; import type { AnnotationTemplate } from "../annotation.model"; +import { DataTypeMap, ClassificationMap, AnnotationTypeMap } from "../annotation.const"; import TagSelector from "./components/TagSelector"; const { TextArea } = Input; @@ -147,7 +148,7 @@ const TemplateForm: React.FC = ({ /> - + = ({ style={{ width: 200 }} > @@ -168,12 +168,10 @@ const TemplateForm: React.FC = ({ rules={[{ required: true, message: "请选择标注类型" }]} style={{ width: 220 }} > - + {Object.entries(AnnotationTypeMap).map(([key, item]) => ( + + ))} @@ -193,12 +191,10 @@ const TemplateForm: React.FC = ({ name="category" style={{ width: 180 }} > - + {Object.entries(ClassificationMap).map(([key, item]) => ( + + ))} diff --git a/frontend/src/pages/DataAnnotation/Template/TemplateList.tsx b/frontend/src/pages/DataAnnotation/Template/TemplateList.tsx index 2a4370b..5278169 100644 --- a/frontend/src/pages/DataAnnotation/Template/TemplateList.tsx +++ b/frontend/src/pages/DataAnnotation/Template/TemplateList.tsx @@ -112,10 +112,17 @@ const TemplateList: React.FC = () => { const getCategoryColor = (category: string) => { const colors: Record = { + "audio-speech": "purple", + "chat": "cyan", "computer-vision": "blue", + "conversational-ai": "magenta", + "generative-ai": "volcano", "nlp": "green", - "audio": "purple", - "quality-control": "orange", + "ranking-scoring": "gold", + "structured-data": "lime", + "time-series": "geekblue", + "video": "orange", + "community": "pink", "custom": "default", }; return colors[category] || "default"; @@ -164,7 +171,7 @@ const TemplateList: React.FC = () => { key: "dataType", width: 120, render: (dataType: string) => ( - {dataType} + {DataTypeMap[dataType as keyof typeof DataTypeMap]?.label || dataType} ), }, { @@ -173,7 +180,7 @@ const TemplateList: React.FC = () => { key: "labelingType", width: 150, render: (labelingType: string) => ( - {labelingType} + {AnnotationTypeMap[labelingType as keyof typeof AnnotationTypeMap]?.label || labelingType} ), }, { @@ -182,7 +189,7 @@ const TemplateList: React.FC = () => { key: "category", width: 150, render: (category: string) => ( - {category} + {ClassificationMap[category as keyof typeof ClassificationMap]?.label || category} ), }, { diff --git a/frontend/src/pages/DataAnnotation/annotation.const.tsx b/frontend/src/pages/DataAnnotation/annotation.const.tsx index a60b055..9cbb7d2 100644 --- a/frontend/src/pages/DataAnnotation/annotation.const.tsx +++ b/frontend/src/pages/DataAnnotation/annotation.const.tsx @@ -84,24 +84,72 @@ export const DataTypeMap = { label: "视频", value: DataType.VIDEO }, + [DataType.PDF]: { + label: "PDF文档", + value: DataType.PDF + }, + [DataType.TIMESERIES]: { + label: "时间序列", + value: DataType.TIMESERIES + }, + [DataType.CHAT]: { + label: "聊天数据", + value: DataType.CHAT + }, + [DataType.HTML]: { + label: "HTML文档", + value: DataType.HTML + }, + [DataType.TABLE]: { + label: "表格数据", + value: DataType.TABLE + }, } export const ClassificationMap = { - [Classification.COMPUTER_VERSION]: { + [Classification.AUDIO_SPEECH]: { + label: "音频/语音处理", + value: Classification.AUDIO_SPEECH + }, + [Classification.CHAT]: { + label: "聊天评估", + value: Classification.CHAT + }, + [Classification.COMPUTER_VISION]: { label: "计算机视觉", - value: Classification.COMPUTER_VERSION + value: Classification.COMPUTER_VISION + }, + [Classification.CONVERSATIONAL_AI]: { + label: "对话AI", + value: Classification.CONVERSATIONAL_AI + }, + [Classification.GENERATIVE_AI]: { + label: "生成式AI", + value: Classification.GENERATIVE_AI }, [Classification.NLP]: { label: "自然语言处理", value: Classification.NLP }, - [Classification.AUDIO]: { - label: "音频", - value: Classification.AUDIO + [Classification.RANKING_SCORING]: { + label: "排名与评分", + value: Classification.RANKING_SCORING }, - [Classification.QUALITY_CONTROL]: { - label: "质量控制", - value: Classification.QUALITY_CONTROL + [Classification.STRUCTURED_DATA]: { + label: "结构化数据解析", + value: Classification.STRUCTURED_DATA + }, + [Classification.TIME_SERIES]: { + label: "时间序列分析", + value: Classification.TIME_SERIES + }, + [Classification.VIDEO]: { + label: "视频处理", + value: Classification.VIDEO + }, + [Classification.COMMUNITY]: { + label: "社区贡献", + value: Classification.COMMUNITY }, [Classification.CUSTOM]: { label: "自定义", @@ -110,22 +158,313 @@ export const ClassificationMap = { } export const AnnotationTypeMap = { - [AnnotationType.CLASSIFICATION]: { - label: "分类", - value: AnnotationType.CLASSIFICATION + // ===== 音频/语音处理 ===== + [AnnotationType.ASR_SEGMENTS]: { + label: "语音识别(分段)", + value: AnnotationType.ASR_SEGMENTS + }, + [AnnotationType.ASR]: { + label: "语音识别", + value: AnnotationType.ASR + }, + [AnnotationType.CONVERSATION_ANALYSIS]: { + label: "对话分析", + value: AnnotationType.CONVERSATION_ANALYSIS + }, + [AnnotationType.INTENT_CLASSIFICATION]: { + label: "意图分类", + value: AnnotationType.INTENT_CLASSIFICATION + }, + [AnnotationType.SIGNAL_QUALITY]: { + label: "信号质量检测", + value: AnnotationType.SIGNAL_QUALITY + }, + [AnnotationType.SOUND_EVENT_DETECTION]: { + label: "声音事件检测", + value: AnnotationType.SOUND_EVENT_DETECTION + }, + [AnnotationType.SPEAKER_SEGMENTATION]: { + label: "说话人分割", + value: AnnotationType.SPEAKER_SEGMENTATION + }, + [AnnotationType.SPEECH_TRANSCRIPTION]: { + label: "语音转录", + value: AnnotationType.SPEECH_TRANSCRIPTION + }, + + // ===== 聊天评估 ===== + [AnnotationType.AGENT_FINE_TUNE]: { + label: "Agent微调(无LLM)", + value: AnnotationType.AGENT_FINE_TUNE + }, + [AnnotationType.AGENT_FINE_TUNE_LLM]: { + label: "Agent微调(有LLM)", + value: AnnotationType.AGENT_FINE_TUNE_LLM + }, + [AnnotationType.RED_TEAMING]: { + label: "红队测试", + value: AnnotationType.RED_TEAMING + }, + [AnnotationType.RLHF_EVALUATION]: { + label: "RLHF对话评估", + value: AnnotationType.RLHF_EVALUATION + }, + [AnnotationType.CHATBOT_EVALUATION]: { + label: "聊天机器人评估", + value: AnnotationType.CHATBOT_EVALUATION + }, + + // ===== 计算机视觉 ===== + [AnnotationType.IMAGE_CAPTIONING]: { + label: "图像描述", + value: AnnotationType.IMAGE_CAPTIONING + }, + [AnnotationType.IMAGE_CLASSIFICATION]: { + label: "图像分类", + value: AnnotationType.IMAGE_CLASSIFICATION + }, + [AnnotationType.INVENTORY_TRACKING]: { + label: "库存追踪", + value: AnnotationType.INVENTORY_TRACKING + }, + [AnnotationType.KEYPOINT_LABELING]: { + label: "关键点标注", + value: AnnotationType.KEYPOINT_LABELING + }, + [AnnotationType.MEDICAL_IMAGE_CLASSIFICATION]: { + label: "医学图像分类", + value: AnnotationType.MEDICAL_IMAGE_CLASSIFICATION + }, + [AnnotationType.MULTIPAGE_DOCUMENT]: { + label: "多页文档标注", + value: AnnotationType.MULTIPAGE_DOCUMENT }, [AnnotationType.OBJECT_DETECTION]: { label: "目标检测", value: AnnotationType.OBJECT_DETECTION }, - [AnnotationType.SEGMENTATION]: { - label: "分割", - value: AnnotationType.SEGMENTATION + [AnnotationType.OCR]: { + label: "OCR识别", + value: AnnotationType.OCR + }, + [AnnotationType.PDF_OCR]: { + label: "PDF OCR标注", + value: AnnotationType.PDF_OCR + }, + [AnnotationType.SEMANTIC_SEGMENTATION_MASK]: { + label: "语义分割(掩码)", + value: AnnotationType.SEMANTIC_SEGMENTATION_MASK + }, + [AnnotationType.SEMANTIC_SEGMENTATION_POLYGON]: { + label: "语义分割(多边形)", + value: AnnotationType.SEMANTIC_SEGMENTATION_POLYGON + }, + [AnnotationType.VISUAL_GENOME]: { + label: "Visual Genome", + value: AnnotationType.VISUAL_GENOME + }, + [AnnotationType.VQA]: { + label: "视觉问答", + value: AnnotationType.VQA + }, + + // ===== 对话AI ===== + [AnnotationType.COREFERENCE_RESOLUTION]: { + label: "共指消解", + value: AnnotationType.COREFERENCE_RESOLUTION + }, + [AnnotationType.SLOT_FILLING]: { + label: "槽填充", + value: AnnotationType.SLOT_FILLING + }, + [AnnotationType.RESPONSE_GENERATION]: { + label: "响应生成", + value: AnnotationType.RESPONSE_GENERATION + }, + [AnnotationType.RESPONSE_SELECTION]: { + label: "响应选择", + value: AnnotationType.RESPONSE_SELECTION + }, + + // ===== 生成式AI ===== + [AnnotationType.CHATBOT_ASSESSMENT]: { + label: "聊天机器人评估", + value: AnnotationType.CHATBOT_ASSESSMENT + }, + [AnnotationType.RLHF_PREFERENCE]: { + label: "RLHF人类偏好", + value: AnnotationType.RLHF_PREFERENCE + }, + [AnnotationType.LLM_RANKING]: { + label: "LLM排名", + value: AnnotationType.LLM_RANKING + }, + [AnnotationType.LLM_GRADING]: { + label: "LLM响应评分", + value: AnnotationType.LLM_GRADING + }, + [AnnotationType.SFT]: { + label: "监督微调", + value: AnnotationType.SFT + }, + [AnnotationType.VISUAL_RANKING]: { + label: "视觉排名", + value: AnnotationType.VISUAL_RANKING + }, + + // ===== 自然语言处理 ===== + [AnnotationType.CONTENT_MODERATION]: { + label: "内容审核", + value: AnnotationType.CONTENT_MODERATION + }, + [AnnotationType.MACHINE_TRANSLATION]: { + label: "机器翻译", + value: AnnotationType.MACHINE_TRANSLATION }, [AnnotationType.NER]: { label: "命名实体识别", value: AnnotationType.NER }, + [AnnotationType.QUESTION_ANSWERING]: { + label: "问答", + value: AnnotationType.QUESTION_ANSWERING + }, + [AnnotationType.RELATION_EXTRACTION]: { + label: "关系抽取", + value: AnnotationType.RELATION_EXTRACTION + }, + [AnnotationType.TAXONOMY]: { + label: "层级分类", + value: AnnotationType.TAXONOMY + }, + [AnnotationType.TEXT_CLASSIFICATION]: { + label: "文本分类", + value: AnnotationType.TEXT_CLASSIFICATION + }, + [AnnotationType.TEXT_SUMMARIZATION]: { + label: "文本摘要", + value: AnnotationType.TEXT_SUMMARIZATION + }, + + // ===== 排名与评分 ===== + [AnnotationType.ASR_HYPOTHESES]: { + label: "ASR假设选择", + value: AnnotationType.ASR_HYPOTHESES + }, + [AnnotationType.IMAGE_RETRIEVAL]: { + label: "图像检索", + value: AnnotationType.IMAGE_RETRIEVAL + }, + [AnnotationType.DOCUMENT_RETRIEVAL]: { + label: "文档检索", + value: AnnotationType.DOCUMENT_RETRIEVAL + }, + [AnnotationType.PAIRWISE_CLASSIFICATION]: { + label: "成对分类", + value: AnnotationType.PAIRWISE_CLASSIFICATION + }, + [AnnotationType.PAIRWISE_REGRESSION]: { + label: "成对回归", + value: AnnotationType.PAIRWISE_REGRESSION + }, + [AnnotationType.SERP_RANKING]: { + label: "搜索排名", + value: AnnotationType.SERP_RANKING + }, + [AnnotationType.TEXT_TO_IMAGE]: { + label: "文本生成图像", + value: AnnotationType.TEXT_TO_IMAGE + }, + + // ===== 结构化数据解析 ===== + [AnnotationType.FREEFORM_METADATA]: { + label: "自由格式元数据", + value: AnnotationType.FREEFORM_METADATA + }, + [AnnotationType.HTML_ENTITY_RECOGNITION]: { + label: "HTML实体识别", + value: AnnotationType.HTML_ENTITY_RECOGNITION + }, + [AnnotationType.PDF_CLASSIFICATION]: { + label: "PDF分类", + value: AnnotationType.PDF_CLASSIFICATION + }, + [AnnotationType.TABULAR_DATA]: { + label: "表格数据标注", + value: AnnotationType.TABULAR_DATA + }, + + // ===== 时间序列分析 ===== + [AnnotationType.ACTIVITY_RECOGNITION]: { + label: "活动识别", + value: AnnotationType.ACTIVITY_RECOGNITION + }, + [AnnotationType.CHANGE_POINT_DETECTION]: { + label: "变点检测", + value: AnnotationType.CHANGE_POINT_DETECTION + }, + [AnnotationType.ANOMALY_DETECTION]: { + label: "异常检测", + value: AnnotationType.ANOMALY_DETECTION + }, + [AnnotationType.TIMESERIES_SIGNAL_QUALITY]: { + label: "时序信号质量", + value: AnnotationType.TIMESERIES_SIGNAL_QUALITY + }, + [AnnotationType.TIMESERIES_FORECASTING]: { + label: "时序预测", + value: AnnotationType.TIMESERIES_FORECASTING + }, + + // ===== 视频处理 ===== + [AnnotationType.VIDEO_CLASSIFICATION]: { + label: "视频分类", + value: AnnotationType.VIDEO_CLASSIFICATION + }, + [AnnotationType.VIDEO_FRAME_CLASSIFICATION]: { + label: "视频帧分类", + value: AnnotationType.VIDEO_FRAME_CLASSIFICATION + }, + [AnnotationType.VIDEO_OBJECT_TRACKING]: { + label: "视频目标追踪", + value: AnnotationType.VIDEO_OBJECT_TRACKING + }, + [AnnotationType.VIDEO_TIMELINE_SEGMENTATION]: { + label: "视频时间线分割", + value: AnnotationType.VIDEO_TIMELINE_SEGMENTATION + }, + + // ===== 社区贡献 ===== + [AnnotationType.MAMMOGRAM_CLASSIFICATION]: { + label: "乳房X光分类", + value: AnnotationType.MAMMOGRAM_CLASSIFICATION + }, + [AnnotationType.HTML_NER_TAGGING]: { + label: "HTML NER标注", + value: AnnotationType.HTML_NER_TAGGING + }, + [AnnotationType.INVOICE_NER_BIO]: { + label: "发票NER(BIO)", + value: AnnotationType.INVOICE_NER_BIO + }, + [AnnotationType.OCR_INVOICE_PRE_NER]: { + label: "OCR发票预NER", + value: AnnotationType.OCR_INVOICE_PRE_NER + }, + [AnnotationType.TWITTER_SENTIMENT]: { + label: "Twitter情感分析", + value: AnnotationType.TWITTER_SENTIMENT + }, + + // ===== 通用/遗留类型 ===== + [AnnotationType.CLASSIFICATION]: { + label: "通用分类", + value: AnnotationType.CLASSIFICATION + }, + [AnnotationType.SEGMENTATION]: { + label: "通用分割", + value: AnnotationType.SEGMENTATION + }, } export const TemplateTypeMap = { diff --git a/frontend/src/pages/DataAnnotation/annotation.model.ts b/frontend/src/pages/DataAnnotation/annotation.model.ts index 3a751d6..e74859a 100644 --- a/frontend/src/pages/DataAnnotation/annotation.model.ts +++ b/frontend/src/pages/DataAnnotation/annotation.model.ts @@ -79,26 +79,223 @@ export interface AnnotationTemplateListResponse { totalPages: number; } +/** + * 数据类型枚举 + * 定义标注任务支持的数据类型 + */ export enum DataType { TEXT = "text", IMAGE = "image", AUDIO = "audio", VIDEO = "video", + PDF = "pdf", + TIMESERIES = "timeseries", + CHAT = "chat", + HTML = "html", + TABLE = "table", } +/** + * 模板分类枚举 + * 基于 Label Studio 模板分类体系 + */ export enum Classification { - COMPUTER_VERSION = "computer-vision", + // 音频/语音处理 + AUDIO_SPEECH = "audio-speech", + // 聊天评估 + CHAT = "chat", + // 计算机视觉 + COMPUTER_VISION = "computer-vision", + // 对话AI + CONVERSATIONAL_AI = "conversational-ai", + // 生成式AI + GENERATIVE_AI = "generative-ai", + // 自然语言处理 NLP = "nlp", - AUDIO = "audio", - QUALITY_CONTROL = "quality-control", - CUSTOM = "custom" + // 排名与评分 + RANKING_SCORING = "ranking-scoring", + // 结构化数据解析 + STRUCTURED_DATA = "structured-data", + // 时间序列分析 + TIME_SERIES = "time-series", + // 视频处理 + VIDEO = "video", + // 社区贡献 + COMMUNITY = "community", + // 自定义 + CUSTOM = "custom", } +/** + * 标注类型枚举 + * 定义各种具体的标注任务类型 + */ export enum AnnotationType { - CLASSIFICATION = "classification", + // ===== 音频/语音处理 ===== + // 自动语音识别(分段) + ASR_SEGMENTS = "asr-segments", + // 自动语音识别 + ASR = "asr", + // 对话分析 + CONVERSATION_ANALYSIS = "conversation-analysis", + // 意图分类 + INTENT_CLASSIFICATION = "intent-classification", + // 信号质量检测 + SIGNAL_QUALITY = "signal-quality", + // 声音事件检测 + SOUND_EVENT_DETECTION = "sound-event-detection", + // 说话人分割 + SPEAKER_SEGMENTATION = "speaker-segmentation", + // 语音转录 + SPEECH_TRANSCRIPTION = "speech-transcription", + + // ===== 聊天评估 ===== + // 无LLM的Agent微调 + AGENT_FINE_TUNE = "agent-fine-tune", + // 有LLM的Agent微调 + AGENT_FINE_TUNE_LLM = "agent-fine-tune-llm", + // 红队测试 + RED_TEAMING = "red-teaming", + // RLHF生产对话评估 + RLHF_EVALUATION = "rlhf-evaluation", + // 聊天机器人评估 + CHATBOT_EVALUATION = "chatbot-evaluation", + + // ===== 计算机视觉 ===== + // 图像描述 + IMAGE_CAPTIONING = "image-captioning", + // 图像分类 + IMAGE_CLASSIFICATION = "image-classification", + // 库存追踪 + INVENTORY_TRACKING = "inventory-tracking", + // 关键点标注 + KEYPOINT_LABELING = "keypoint-labeling", + // 医学图像分类 + MEDICAL_IMAGE_CLASSIFICATION = "medical-image-classification", + // 多页文档标注 + MULTIPAGE_DOCUMENT = "multipage-document", + // 目标检测(边界框) OBJECT_DETECTION = "object-detection", + // OCR识别 + OCR = "ocr", + // PDF OCR标注 + PDF_OCR = "pdf-ocr", + // 语义分割(掩码) + SEMANTIC_SEGMENTATION_MASK = "semantic-segmentation-mask", + // 语义分割(多边形) + SEMANTIC_SEGMENTATION_POLYGON = "semantic-segmentation-polygon", + // Visual Genome + VISUAL_GENOME = "visual-genome", + // 视觉问答 + VQA = "vqa", + + // ===== 对话AI ===== + // 共指消解与实体链接 + COREFERENCE_RESOLUTION = "coreference-resolution", + // 意图分类与槽填充 + SLOT_FILLING = "slot-filling", + // 响应生成 + RESPONSE_GENERATION = "response-generation", + // 响应选择 + RESPONSE_SELECTION = "response-selection", + + // ===== 生成式AI ===== + // 聊天机器人模型评估 + CHATBOT_ASSESSMENT = "chatbot-assessment", + // RLHF人类偏好收集 + RLHF_PREFERENCE = "rlhf-preference", + // LLM排名 + LLM_RANKING = "llm-ranking", + // LLM响应评分 + LLM_GRADING = "llm-grading", + // 监督微调 + SFT = "sft", + // 视觉排名 + VISUAL_RANKING = "visual-ranking", + + // ===== 自然语言处理 ===== + // 内容审核 + CONTENT_MODERATION = "content-moderation", + // 机器翻译 + MACHINE_TRANSLATION = "machine-translation", + // 命名实体识别 + NER = "ner", + // 问答 + QUESTION_ANSWERING = "question-answering", + // 关系抽取 + RELATION_EXTRACTION = "relation-extraction", + // 分类法/层级分类 + TAXONOMY = "taxonomy", + // 文本分类 + TEXT_CLASSIFICATION = "text-classification", + // 文本摘要 + TEXT_SUMMARIZATION = "text-summarization", + + // ===== 排名与评分 ===== + // ASR假设选择 + ASR_HYPOTHESES = "asr-hypotheses", + // 基于内容的图像检索 + IMAGE_RETRIEVAL = "image-retrieval", + // 文档检索 + DOCUMENT_RETRIEVAL = "document-retrieval", + // 成对分类 + PAIRWISE_CLASSIFICATION = "pairwise-classification", + // 成对回归 + PAIRWISE_REGRESSION = "pairwise-regression", + // 搜索页面排名 + SERP_RANKING = "serp-ranking", + // 文本到图像生成 + TEXT_TO_IMAGE = "text-to-image", + + // ===== 结构化数据解析 ===== + // 自由格式元数据 + FREEFORM_METADATA = "freeform-metadata", + // HTML实体识别 + HTML_ENTITY_RECOGNITION = "html-entity-recognition", + // PDF分类 + PDF_CLASSIFICATION = "pdf-classification", + // 表格数据标注 + TABULAR_DATA = "tabular-data", + + // ===== 时间序列分析 ===== + // 活动识别 + ACTIVITY_RECOGNITION = "activity-recognition", + // 变点检测 + CHANGE_POINT_DETECTION = "change-point-detection", + // 异常检测 + ANOMALY_DETECTION = "anomaly-detection", + // 时间序列信号质量 + TIMESERIES_SIGNAL_QUALITY = "timeseries-signal-quality", + // 时间序列预测 + TIMESERIES_FORECASTING = "timeseries-forecasting", + + // ===== 视频处理 ===== + // 视频分类 + VIDEO_CLASSIFICATION = "video-classification", + // 视频帧分类 + VIDEO_FRAME_CLASSIFICATION = "video-frame-classification", + // 视频目标追踪 + VIDEO_OBJECT_TRACKING = "video-object-tracking", + // 视频时间线分割 + VIDEO_TIMELINE_SEGMENTATION = "video-timeline-segmentation", + + // ===== 社区贡献 ===== + // 乳腺癌乳房X光分类 + MAMMOGRAM_CLASSIFICATION = "mammogram-classification", + // HTML NER标注 + HTML_NER_TAGGING = "html-ner-tagging", + // 发票NER标注(BIO格式) + INVOICE_NER_BIO = "invoice-ner-bio", + // OCR发票预NER(BIO格式) + OCR_INVOICE_PRE_NER = "ocr-invoice-pre-ner", + // Twitter/X情感分析 + TWITTER_SENTIMENT = "twitter-sentiment", + + // ===== 通用/遗留类型 ===== + // 通用分类 + CLASSIFICATION = "classification", + // 通用分割 SEGMENTATION = "segmentation", - NER = "ner" } export enum TemplateType {