""" 标注编辑器(Label Studio Editor)接口模型 设计目标: - 单人单份最终标签:每个 project_id + file_id 只维护 1 条最终标注结果 - 完全兼容 Label Studio:标注结果以 annotation 原始 JSON 形式存储与返回 """ from __future__ import annotations from datetime import datetime from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field, ConfigDict class EditorProjectInfo(BaseModel): """编辑器项目元信息""" project_id: str = Field(..., alias="projectId", description="DataMate 标注项目ID(t_dm_labeling_projects.id)") dataset_id: str = Field(..., alias="datasetId", description="数据集ID(t_dm_datasets.id)") template_id: Optional[str] = Field(None, alias="templateId", description="模板ID(t_dm_annotation_templates.id)") label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML 配置") supported: bool = Field(..., description="当前数据类型是否支持内嵌编辑器") unsupported_reason: Optional[str] = Field(None, alias="unsupportedReason", description="不支持原因(当 supported=false)") model_config = ConfigDict(populate_by_name=True) class EditorTaskListItem(BaseModel): """编辑器任务列表条目(对应一个数据集文件)""" file_id: str = Field(..., alias="fileId", description="文件ID") file_name: str = Field(..., alias="fileName", description="文件名") file_type: Optional[str] = Field(None, alias="fileType", description="文件类型") has_annotation: bool = Field(..., alias="hasAnnotation", description="是否已有最终标注") annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间") model_config = ConfigDict(populate_by_name=True) class EditorTaskListResponse(BaseModel): """编辑器任务列表响应""" content: List[EditorTaskListItem] = Field(..., description="任务列表") total_elements: int = Field(..., alias="totalElements", description="总条数") total_pages: int = Field(..., alias="totalPages", description="总页数") page: int = Field(..., description="页码(从0开始)") size: int = Field(..., description="每页大小") model_config = ConfigDict(populate_by_name=True) class SegmentInfo(BaseModel): """段落信息(用于文本分段标注)""" idx: int = Field(..., description="段落索引") text: str = Field(..., description="段落文本") start: int = Field(..., description="在原文中的起始位置") end: int = Field(..., description="在原文中的结束位置") has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注") model_config = ConfigDict(populate_by_name=True) class EditorTaskResponse(BaseModel): """编辑器任务详情(可直接喂给 Label Studio Editor 的 task 对象)""" task: Dict[str, Any] = Field(..., description="Label Studio task 对象") annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间") # 分段相关字段 segmented: bool = Field(False, description="是否启用分段模式") segments: Optional[List[SegmentInfo]] = Field(None, description="段落列表") total_segments: int = Field(0, alias="totalSegments", description="总段落数") current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引") model_config = ConfigDict(populate_by_name=True) class UpsertAnnotationRequest(BaseModel): """保存/覆盖最终标注(Label Studio annotation 原始对象)""" annotation: Dict[str, Any] = Field(..., description="Label Studio annotation 对象(包含 result 等)") expected_updated_at: Optional[datetime] = Field( None, alias="expectedUpdatedAt", description="乐观锁:若提供则要求与当前记录 updated_at 一致,否则返回 409", ) # 分段保存支持 segment_index: Optional[int] = Field( None, alias="segmentIndex", description="段落索引(分段模式下必填)", ) model_config = ConfigDict(populate_by_name=True) class UpsertAnnotationResponse(BaseModel): """保存/覆盖最终标注响应""" annotation_id: str = Field(..., alias="annotationId", description="标注结果ID(t_dm_annotation_results.id)") updated_at: datetime = Field(..., alias="updatedAt", description="标注更新时间") model_config = ConfigDict(populate_by_name=True)