You've already forked DataMate
- 新增 SegmentStats 类型定义用于分段统计 - 实现分段标注进度计算和缓存机制 - 添加标注任务状态判断逻辑支持分段模式 - 集成分段统计数据显示到任务列表界面 - 实现分段总数自动计算和验证功能 - 扩展标注状态枚举支持进行中标注状态 - 优化任务选择逻辑基于分段完成状态 - 添加分段统计数据预加载和同步机制
312 lines
7.8 KiB
TypeScript
312 lines
7.8 KiB
TypeScript
import type { DatasetType } from "@/pages/DataManagement/dataset.model";
|
|
|
|
export enum AnnotationTaskStatus {
|
|
ACTIVE = "active",
|
|
INACTIVE = "inactive",
|
|
PROCESSING = "processing",
|
|
COMPLETED = "completed",
|
|
SKIPPED = "skipped",
|
|
}
|
|
|
|
export enum AnnotationResultStatus {
|
|
ANNOTATED = "ANNOTATED",
|
|
IN_PROGRESS = "IN_PROGRESS",
|
|
NO_ANNOTATION = "NO_ANNOTATION",
|
|
NOT_APPLICABLE = "NOT_APPLICABLE",
|
|
}
|
|
|
|
export interface AnnotationTask {
|
|
id: string;
|
|
name: string;
|
|
labelingProjId: string;
|
|
datasetId: string;
|
|
|
|
annotationCount: number;
|
|
|
|
description?: string;
|
|
assignedTo?: string;
|
|
progress: number;
|
|
statistics: {
|
|
accuracy: number;
|
|
averageTime: number;
|
|
reviewCount: number;
|
|
};
|
|
status: AnnotationTaskStatus;
|
|
totalDataCount: number;
|
|
type: DatasetType;
|
|
|
|
createdAt: string;
|
|
updatedAt: string;
|
|
}
|
|
|
|
// 标注模板相关类型
|
|
export interface LabelDefinition {
|
|
fromName: string;
|
|
toName: string;
|
|
type: string;
|
|
options?: string[];
|
|
labels?: string[];
|
|
required?: boolean;
|
|
description?: string;
|
|
}
|
|
|
|
export interface ObjectDefinition {
|
|
name: string;
|
|
type: string;
|
|
value: string;
|
|
}
|
|
|
|
export interface TemplateConfiguration {
|
|
labels: LabelDefinition[];
|
|
objects: ObjectDefinition[];
|
|
metadata?: Record<string, unknown>;
|
|
}
|
|
|
|
export interface AnnotationTemplate {
|
|
id: string;
|
|
name: string;
|
|
description?: string;
|
|
dataType: string;
|
|
labelingType: string;
|
|
configuration?: TemplateConfiguration;
|
|
labelConfig?: string;
|
|
style: string;
|
|
category: string;
|
|
builtIn: boolean;
|
|
version: string;
|
|
createdAt: string;
|
|
updatedAt?: string;
|
|
}
|
|
|
|
export interface AnnotationTemplateListResponse {
|
|
content: AnnotationTemplate[];
|
|
total: number;
|
|
page: number;
|
|
size: number;
|
|
totalPages: number;
|
|
}
|
|
|
|
/**
|
|
* 数据类型枚举
|
|
* 定义标注任务支持的数据类型
|
|
*/
|
|
export enum DataType {
|
|
TEXT = "text",
|
|
IMAGE = "image",
|
|
AUDIO = "audio",
|
|
VIDEO = "video",
|
|
PDF = "pdf",
|
|
TIMESERIES = "timeseries",
|
|
CHAT = "chat",
|
|
HTML = "html",
|
|
TABLE = "table",
|
|
}
|
|
|
|
/**
|
|
* 模板分类枚举
|
|
* 基于 Label Studio 模板分类体系
|
|
*/
|
|
export enum Classification {
|
|
// 音频/语音处理
|
|
AUDIO_SPEECH = "audio-speech",
|
|
// 聊天评估
|
|
CHAT = "chat",
|
|
// 计算机视觉
|
|
COMPUTER_VISION = "computer-vision",
|
|
// 对话AI
|
|
CONVERSATIONAL_AI = "conversational-ai",
|
|
// 生成式AI
|
|
GENERATIVE_AI = "generative-ai",
|
|
// 自然语言处理
|
|
NLP = "nlp",
|
|
// 排名与评分
|
|
RANKING_SCORING = "ranking-scoring",
|
|
// 结构化数据解析
|
|
STRUCTURED_DATA = "structured-data",
|
|
// 时间序列分析
|
|
TIME_SERIES = "time-series",
|
|
// 视频处理
|
|
VIDEO = "video",
|
|
// 社区贡献
|
|
COMMUNITY = "community",
|
|
// 自定义
|
|
CUSTOM = "custom",
|
|
}
|
|
|
|
/**
|
|
* 标注类型枚举
|
|
* 定义各种具体的标注任务类型
|
|
*/
|
|
export enum AnnotationType {
|
|
// ===== 音频/语音处理 =====
|
|
// 自动语音识别(分段)
|
|
ASR_SEGMENTS = "asr-segments",
|
|
// 自动语音识别
|
|
ASR = "asr",
|
|
// 对话分析
|
|
CONVERSATION_ANALYSIS = "conversation-analysis",
|
|
// 意图分类
|
|
INTENT_CLASSIFICATION = "intent-classification",
|
|
// 信号质量检测
|
|
SIGNAL_QUALITY = "signal-quality",
|
|
// 声音事件检测
|
|
SOUND_EVENT_DETECTION = "sound-event-detection",
|
|
// 说话人分割
|
|
SPEAKER_SEGMENTATION = "speaker-segmentation",
|
|
// 语音转录
|
|
SPEECH_TRANSCRIPTION = "speech-transcription",
|
|
|
|
// ===== 聊天评估 =====
|
|
// 无LLM的Agent微调
|
|
AGENT_FINE_TUNE = "agent-fine-tune",
|
|
// 有LLM的Agent微调
|
|
AGENT_FINE_TUNE_LLM = "agent-fine-tune-llm",
|
|
// 红队测试
|
|
RED_TEAMING = "red-teaming",
|
|
// RLHF生产对话评估
|
|
RLHF_EVALUATION = "rlhf-evaluation",
|
|
// 聊天机器人评估
|
|
CHATBOT_EVALUATION = "chatbot-evaluation",
|
|
|
|
// ===== 计算机视觉 =====
|
|
// 图像描述
|
|
IMAGE_CAPTIONING = "image-captioning",
|
|
// 图像分类
|
|
IMAGE_CLASSIFICATION = "image-classification",
|
|
// 库存追踪
|
|
INVENTORY_TRACKING = "inventory-tracking",
|
|
// 关键点标注
|
|
KEYPOINT_LABELING = "keypoint-labeling",
|
|
// 医学图像分类
|
|
MEDICAL_IMAGE_CLASSIFICATION = "medical-image-classification",
|
|
// 多页文档标注
|
|
MULTIPAGE_DOCUMENT = "multipage-document",
|
|
// 目标检测(边界框)
|
|
OBJECT_DETECTION = "object-detection",
|
|
// OCR识别
|
|
OCR = "ocr",
|
|
// PDF OCR标注
|
|
PDF_OCR = "pdf-ocr",
|
|
// 语义分割(掩码)
|
|
SEMANTIC_SEGMENTATION_MASK = "semantic-segmentation-mask",
|
|
// 语义分割(多边形)
|
|
SEMANTIC_SEGMENTATION_POLYGON = "semantic-segmentation-polygon",
|
|
// Visual Genome
|
|
VISUAL_GENOME = "visual-genome",
|
|
// 视觉问答
|
|
VQA = "vqa",
|
|
|
|
// ===== 对话AI =====
|
|
// 共指消解与实体链接
|
|
COREFERENCE_RESOLUTION = "coreference-resolution",
|
|
// 意图分类与槽填充
|
|
SLOT_FILLING = "slot-filling",
|
|
// 响应生成
|
|
RESPONSE_GENERATION = "response-generation",
|
|
// 响应选择
|
|
RESPONSE_SELECTION = "response-selection",
|
|
|
|
// ===== 生成式AI =====
|
|
// 聊天机器人模型评估
|
|
CHATBOT_ASSESSMENT = "chatbot-assessment",
|
|
// RLHF人类偏好收集
|
|
RLHF_PREFERENCE = "rlhf-preference",
|
|
// LLM排名
|
|
LLM_RANKING = "llm-ranking",
|
|
// LLM响应评分
|
|
LLM_GRADING = "llm-grading",
|
|
// 监督微调
|
|
SFT = "sft",
|
|
// 视觉排名
|
|
VISUAL_RANKING = "visual-ranking",
|
|
|
|
// ===== 自然语言处理 =====
|
|
// 内容审核
|
|
CONTENT_MODERATION = "content-moderation",
|
|
// 机器翻译
|
|
MACHINE_TRANSLATION = "machine-translation",
|
|
// 命名实体识别
|
|
NER = "ner",
|
|
// 问答
|
|
QUESTION_ANSWERING = "question-answering",
|
|
// 关系抽取
|
|
RELATION_EXTRACTION = "relation-extraction",
|
|
// 分类法/层级分类
|
|
TAXONOMY = "taxonomy",
|
|
// 文本分类
|
|
TEXT_CLASSIFICATION = "text-classification",
|
|
// 文本摘要
|
|
TEXT_SUMMARIZATION = "text-summarization",
|
|
|
|
// ===== 排名与评分 =====
|
|
// ASR假设选择
|
|
ASR_HYPOTHESES = "asr-hypotheses",
|
|
// 基于内容的图像检索
|
|
IMAGE_RETRIEVAL = "image-retrieval",
|
|
// 文档检索
|
|
DOCUMENT_RETRIEVAL = "document-retrieval",
|
|
// 成对分类
|
|
PAIRWISE_CLASSIFICATION = "pairwise-classification",
|
|
// 成对回归
|
|
PAIRWISE_REGRESSION = "pairwise-regression",
|
|
// 搜索页面排名
|
|
SERP_RANKING = "serp-ranking",
|
|
// 文本到图像生成
|
|
TEXT_TO_IMAGE = "text-to-image",
|
|
|
|
// ===== 结构化数据解析 =====
|
|
// 自由格式元数据
|
|
FREEFORM_METADATA = "freeform-metadata",
|
|
// HTML实体识别
|
|
HTML_ENTITY_RECOGNITION = "html-entity-recognition",
|
|
// PDF分类
|
|
PDF_CLASSIFICATION = "pdf-classification",
|
|
// 表格数据标注
|
|
TABULAR_DATA = "tabular-data",
|
|
|
|
// ===== 时间序列分析 =====
|
|
// 活动识别
|
|
ACTIVITY_RECOGNITION = "activity-recognition",
|
|
// 变点检测
|
|
CHANGE_POINT_DETECTION = "change-point-detection",
|
|
// 异常检测
|
|
ANOMALY_DETECTION = "anomaly-detection",
|
|
// 时间序列信号质量
|
|
TIMESERIES_SIGNAL_QUALITY = "timeseries-signal-quality",
|
|
// 时间序列预测
|
|
TIMESERIES_FORECASTING = "timeseries-forecasting",
|
|
|
|
// ===== 视频处理 =====
|
|
// 视频分类
|
|
VIDEO_CLASSIFICATION = "video-classification",
|
|
// 视频帧分类
|
|
VIDEO_FRAME_CLASSIFICATION = "video-frame-classification",
|
|
// 视频目标追踪
|
|
VIDEO_OBJECT_TRACKING = "video-object-tracking",
|
|
// 视频时间线分割
|
|
VIDEO_TIMELINE_SEGMENTATION = "video-timeline-segmentation",
|
|
|
|
// ===== 社区贡献 =====
|
|
// 乳腺癌乳房X光分类
|
|
MAMMOGRAM_CLASSIFICATION = "mammogram-classification",
|
|
// HTML NER标注
|
|
HTML_NER_TAGGING = "html-ner-tagging",
|
|
// 发票NER标注(BIO格式)
|
|
INVOICE_NER_BIO = "invoice-ner-bio",
|
|
// OCR发票预NER(BIO格式)
|
|
OCR_INVOICE_PRE_NER = "ocr-invoice-pre-ner",
|
|
// Twitter/X情感分析
|
|
TWITTER_SENTIMENT = "twitter-sentiment",
|
|
|
|
// ===== 通用/遗留类型 =====
|
|
// 通用分类
|
|
CLASSIFICATION = "classification",
|
|
// 通用分割
|
|
SEGMENTATION = "segmentation",
|
|
}
|
|
|
|
export enum TemplateType {
|
|
SYSTEM = "true",
|
|
CUSTOM = "false"
|
|
}
|