Files
DataMate/frontend/src/pages/DataAnnotation/annotation.model.ts
Jerry Yan 33cf65c9f8 feat(annotation): 添加分段标注统计和进度跟踪功能
- 新增 SegmentStats 类型定义用于分段统计
- 实现分段标注进度计算和缓存机制
- 添加标注任务状态判断逻辑支持分段模式
- 集成分段统计数据显示到任务列表界面
- 实现分段总数自动计算和验证功能
- 扩展标注状态枚举支持进行中标注状态
- 优化任务选择逻辑基于分段完成状态
- 添加分段统计数据预加载和同步机制
2026-01-31 15:42:04 +08:00

312 lines
7.8 KiB
TypeScript

import type { DatasetType } from "@/pages/DataManagement/dataset.model";
export enum AnnotationTaskStatus {
ACTIVE = "active",
INACTIVE = "inactive",
PROCESSING = "processing",
COMPLETED = "completed",
SKIPPED = "skipped",
}
export enum AnnotationResultStatus {
ANNOTATED = "ANNOTATED",
IN_PROGRESS = "IN_PROGRESS",
NO_ANNOTATION = "NO_ANNOTATION",
NOT_APPLICABLE = "NOT_APPLICABLE",
}
export interface AnnotationTask {
id: string;
name: string;
labelingProjId: string;
datasetId: string;
annotationCount: number;
description?: string;
assignedTo?: string;
progress: number;
statistics: {
accuracy: number;
averageTime: number;
reviewCount: number;
};
status: AnnotationTaskStatus;
totalDataCount: number;
type: DatasetType;
createdAt: string;
updatedAt: string;
}
// 标注模板相关类型
export interface LabelDefinition {
fromName: string;
toName: string;
type: string;
options?: string[];
labels?: string[];
required?: boolean;
description?: string;
}
export interface ObjectDefinition {
name: string;
type: string;
value: string;
}
export interface TemplateConfiguration {
labels: LabelDefinition[];
objects: ObjectDefinition[];
metadata?: Record<string, unknown>;
}
export interface AnnotationTemplate {
id: string;
name: string;
description?: string;
dataType: string;
labelingType: string;
configuration?: TemplateConfiguration;
labelConfig?: string;
style: string;
category: string;
builtIn: boolean;
version: string;
createdAt: string;
updatedAt?: string;
}
export interface AnnotationTemplateListResponse {
content: AnnotationTemplate[];
total: number;
page: number;
size: number;
totalPages: number;
}
/**
* 数据类型枚举
* 定义标注任务支持的数据类型
*/
export enum DataType {
TEXT = "text",
IMAGE = "image",
AUDIO = "audio",
VIDEO = "video",
PDF = "pdf",
TIMESERIES = "timeseries",
CHAT = "chat",
HTML = "html",
TABLE = "table",
}
/**
* 模板分类枚举
* 基于 Label Studio 模板分类体系
*/
export enum Classification {
// 音频/语音处理
AUDIO_SPEECH = "audio-speech",
// 聊天评估
CHAT = "chat",
// 计算机视觉
COMPUTER_VISION = "computer-vision",
// 对话AI
CONVERSATIONAL_AI = "conversational-ai",
// 生成式AI
GENERATIVE_AI = "generative-ai",
// 自然语言处理
NLP = "nlp",
// 排名与评分
RANKING_SCORING = "ranking-scoring",
// 结构化数据解析
STRUCTURED_DATA = "structured-data",
// 时间序列分析
TIME_SERIES = "time-series",
// 视频处理
VIDEO = "video",
// 社区贡献
COMMUNITY = "community",
// 自定义
CUSTOM = "custom",
}
/**
* 标注类型枚举
* 定义各种具体的标注任务类型
*/
export enum AnnotationType {
// ===== 音频/语音处理 =====
// 自动语音识别(分段)
ASR_SEGMENTS = "asr-segments",
// 自动语音识别
ASR = "asr",
// 对话分析
CONVERSATION_ANALYSIS = "conversation-analysis",
// 意图分类
INTENT_CLASSIFICATION = "intent-classification",
// 信号质量检测
SIGNAL_QUALITY = "signal-quality",
// 声音事件检测
SOUND_EVENT_DETECTION = "sound-event-detection",
// 说话人分割
SPEAKER_SEGMENTATION = "speaker-segmentation",
// 语音转录
SPEECH_TRANSCRIPTION = "speech-transcription",
// ===== 聊天评估 =====
// 无LLM的Agent微调
AGENT_FINE_TUNE = "agent-fine-tune",
// 有LLM的Agent微调
AGENT_FINE_TUNE_LLM = "agent-fine-tune-llm",
// 红队测试
RED_TEAMING = "red-teaming",
// RLHF生产对话评估
RLHF_EVALUATION = "rlhf-evaluation",
// 聊天机器人评估
CHATBOT_EVALUATION = "chatbot-evaluation",
// ===== 计算机视觉 =====
// 图像描述
IMAGE_CAPTIONING = "image-captioning",
// 图像分类
IMAGE_CLASSIFICATION = "image-classification",
// 库存追踪
INVENTORY_TRACKING = "inventory-tracking",
// 关键点标注
KEYPOINT_LABELING = "keypoint-labeling",
// 医学图像分类
MEDICAL_IMAGE_CLASSIFICATION = "medical-image-classification",
// 多页文档标注
MULTIPAGE_DOCUMENT = "multipage-document",
// 目标检测(边界框)
OBJECT_DETECTION = "object-detection",
// OCR识别
OCR = "ocr",
// PDF OCR标注
PDF_OCR = "pdf-ocr",
// 语义分割(掩码)
SEMANTIC_SEGMENTATION_MASK = "semantic-segmentation-mask",
// 语义分割(多边形)
SEMANTIC_SEGMENTATION_POLYGON = "semantic-segmentation-polygon",
// Visual Genome
VISUAL_GENOME = "visual-genome",
// 视觉问答
VQA = "vqa",
// ===== 对话AI =====
// 共指消解与实体链接
COREFERENCE_RESOLUTION = "coreference-resolution",
// 意图分类与槽填充
SLOT_FILLING = "slot-filling",
// 响应生成
RESPONSE_GENERATION = "response-generation",
// 响应选择
RESPONSE_SELECTION = "response-selection",
// ===== 生成式AI =====
// 聊天机器人模型评估
CHATBOT_ASSESSMENT = "chatbot-assessment",
// RLHF人类偏好收集
RLHF_PREFERENCE = "rlhf-preference",
// LLM排名
LLM_RANKING = "llm-ranking",
// LLM响应评分
LLM_GRADING = "llm-grading",
// 监督微调
SFT = "sft",
// 视觉排名
VISUAL_RANKING = "visual-ranking",
// ===== 自然语言处理 =====
// 内容审核
CONTENT_MODERATION = "content-moderation",
// 机器翻译
MACHINE_TRANSLATION = "machine-translation",
// 命名实体识别
NER = "ner",
// 问答
QUESTION_ANSWERING = "question-answering",
// 关系抽取
RELATION_EXTRACTION = "relation-extraction",
// 分类法/层级分类
TAXONOMY = "taxonomy",
// 文本分类
TEXT_CLASSIFICATION = "text-classification",
// 文本摘要
TEXT_SUMMARIZATION = "text-summarization",
// ===== 排名与评分 =====
// ASR假设选择
ASR_HYPOTHESES = "asr-hypotheses",
// 基于内容的图像检索
IMAGE_RETRIEVAL = "image-retrieval",
// 文档检索
DOCUMENT_RETRIEVAL = "document-retrieval",
// 成对分类
PAIRWISE_CLASSIFICATION = "pairwise-classification",
// 成对回归
PAIRWISE_REGRESSION = "pairwise-regression",
// 搜索页面排名
SERP_RANKING = "serp-ranking",
// 文本到图像生成
TEXT_TO_IMAGE = "text-to-image",
// ===== 结构化数据解析 =====
// 自由格式元数据
FREEFORM_METADATA = "freeform-metadata",
// HTML实体识别
HTML_ENTITY_RECOGNITION = "html-entity-recognition",
// PDF分类
PDF_CLASSIFICATION = "pdf-classification",
// 表格数据标注
TABULAR_DATA = "tabular-data",
// ===== 时间序列分析 =====
// 活动识别
ACTIVITY_RECOGNITION = "activity-recognition",
// 变点检测
CHANGE_POINT_DETECTION = "change-point-detection",
// 异常检测
ANOMALY_DETECTION = "anomaly-detection",
// 时间序列信号质量
TIMESERIES_SIGNAL_QUALITY = "timeseries-signal-quality",
// 时间序列预测
TIMESERIES_FORECASTING = "timeseries-forecasting",
// ===== 视频处理 =====
// 视频分类
VIDEO_CLASSIFICATION = "video-classification",
// 视频帧分类
VIDEO_FRAME_CLASSIFICATION = "video-frame-classification",
// 视频目标追踪
VIDEO_OBJECT_TRACKING = "video-object-tracking",
// 视频时间线分割
VIDEO_TIMELINE_SEGMENTATION = "video-timeline-segmentation",
// ===== 社区贡献 =====
// 乳腺癌乳房X光分类
MAMMOGRAM_CLASSIFICATION = "mammogram-classification",
// HTML NER标注
HTML_NER_TAGGING = "html-ner-tagging",
// 发票NER标注(BIO格式)
INVOICE_NER_BIO = "invoice-ner-bio",
// OCR发票预NER(BIO格式)
OCR_INVOICE_PRE_NER = "ocr-invoice-pre-ner",
// Twitter/X情感分析
TWITTER_SENTIMENT = "twitter-sentiment",
// ===== 通用/遗留类型 =====
// 通用分类
CLASSIFICATION = "classification",
// 通用分割
SEGMENTATION = "segmentation",
}
export enum TemplateType {
SYSTEM = "true",
CUSTOM = "false"
}