diff --git a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx index c92f125..ac57e7b 100644 --- a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx +++ b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx @@ -9,6 +9,7 @@ import { listEditorTasksUsingGet, upsertEditorAnnotationUsingPut, } from "../annotation.api"; +import { AnnotationResultStatus } from "../annotation.model"; type EditorProjectInfo = { projectId: string; @@ -26,6 +27,7 @@ type EditorTaskListItem = { fileType?: string | null; hasAnnotation: boolean; annotationUpdatedAt?: string | null; + annotationStatus?: AnnotationResultStatus | null; }; type LsfMessage = { @@ -88,6 +90,10 @@ type SwitchDecision = "save" | "discard" | "cancel"; const LSF_IFRAME_SRC = "/lsf/lsf.html"; const TASK_PAGE_START = 0; const TASK_PAGE_SIZE = 200; +const NO_ANNOTATION_LABEL = "无标注/不适用"; +const NO_ANNOTATION_CONFIRM_TITLE = "没有标注任何内容"; +const NO_ANNOTATION_CONFIRM_OK_TEXT = "设为无标注并保存"; +const NO_ANNOTATION_CONFIRM_CANCEL_TEXT = "继续标注"; type NormalizedTaskList = { items: EditorTaskListItem[]; @@ -119,6 +125,24 @@ const resolvePayloadMessage = (payload: unknown) => { const isRecord = (value: unknown): value is Record => !!value && typeof value === "object" && !Array.isArray(value); +const isAnnotationResultEmpty = (annotation?: Record) => { + if (!annotation) return true; + if (!("result" in annotation)) return true; + const result = (annotation as { result?: unknown }).result; + if (!Array.isArray(result)) return false; + return result.length === 0; +}; + +const resolveTaskStatusMeta = (item: EditorTaskListItem) => { + if (!item.hasAnnotation) { + return { text: "未标注", type: "secondary" as const }; + } + if (item.annotationStatus === AnnotationResultStatus.NO_ANNOTATION) { + return { text: NO_ANNOTATION_LABEL, type: "warning" as const }; + } + return { text: "已标注", type: "success" as const }; +}; + const normalizeSnapshotValue = (value: unknown, seen: WeakSet): unknown => { if (!value || typeof value !== "object") return value; const obj = value as object; @@ -247,6 +271,30 @@ export default function LabelStudioTextEditor() { win.postMessage({ type, payload }, origin); }, [origin]); + const confirmNoAnnotation = useCallback(() => { + return new Promise((resolve) => { + let resolved = false; + const settle = (value: boolean) => { + if (resolved) return; + resolved = true; + resolve(value); + }; + modal.confirm({ + title: NO_ANNOTATION_CONFIRM_TITLE, + content: ( +
+ 当前未发现任何标注内容。 + 如确认为无标注/不适用,可继续保存。 +
+ ), + okText: NO_ANNOTATION_CONFIRM_OK_TEXT, + cancelText: NO_ANNOTATION_CONFIRM_CANCEL_TEXT, + onOk: () => settle(true), + onCancel: () => settle(false), + }); + }); + }, [modal]); + const loadProject = useCallback(async () => { setLoadingProject(true); try { @@ -539,11 +587,27 @@ export default function LabelStudioTextEditor() { ? currentSegmentIndex : undefined; + const annotationRecord = annotation as Record; + let resolvedStatus: AnnotationResultStatus; + if (isAnnotationResultEmpty(annotationRecord)) { + const currentStatus = tasks.find((item) => item.fileId === String(fileId))?.annotationStatus; + if (currentStatus === AnnotationResultStatus.NO_ANNOTATION) { + resolvedStatus = AnnotationResultStatus.NO_ANNOTATION; + } else { + const confirmed = await confirmNoAnnotation(); + if (!confirmed) return false; + resolvedStatus = AnnotationResultStatus.NO_ANNOTATION; + } + } else { + resolvedStatus = AnnotationResultStatus.ANNOTATED; + } + setSaving(true); try { const resp = (await upsertEditorAnnotationUsingPut(projectId, String(fileId), { annotation, segmentIndex, + annotationStatus: resolvedStatus, })) as ApiResponse; const updatedAt = resp?.data?.updatedAt; message.success("标注已保存"); @@ -553,6 +617,7 @@ export default function LabelStudioTextEditor() { ? { ...item, hasAnnotation: true, + annotationStatus: resolvedStatus, annotationUpdatedAt: updatedAt || item.annotationUpdatedAt, } : item @@ -586,11 +651,13 @@ export default function LabelStudioTextEditor() { } }, [ advanceAfterSave, + confirmNoAnnotation, currentSegmentIndex, message, projectId, segmented, selectedFileId, + tasks, ]); const requestExportForCheck = useCallback(() => { @@ -1016,37 +1083,37 @@ export default function LabelStudioTextEditor() { size="small" dataSource={tasks} loadMore={loadMoreNode} - renderItem={(item) => ( - setSelectedFileId(item.fileId)} - > -
- - {item.fileName} - -
- - {item.hasAnnotation ? "已标注" : "未标注"} + renderItem={(item) => { + const statusMeta = resolveTaskStatusMeta(item); + return ( + setSelectedFileId(item.fileId)} + > +
+ + {item.fileName} - {item.annotationUpdatedAt && ( - - {item.annotationUpdatedAt} +
+ + {statusMeta.text} - )} + {item.annotationUpdatedAt && ( + + {item.annotationUpdatedAt} + + )} +
-
- - )} + + ); + }} />
{segmented && ( diff --git a/frontend/src/pages/DataAnnotation/annotation.model.ts b/frontend/src/pages/DataAnnotation/annotation.model.ts index f9208c8..3d53c7c 100644 --- a/frontend/src/pages/DataAnnotation/annotation.model.ts +++ b/frontend/src/pages/DataAnnotation/annotation.model.ts @@ -8,6 +8,11 @@ export enum AnnotationTaskStatus { SKIPPED = "skipped", } +export enum AnnotationResultStatus { + ANNOTATED = "ANNOTATED", + NO_ANNOTATION = "NO_ANNOTATION", +} + export interface AnnotationTask { id: string; name: string; @@ -52,7 +57,7 @@ export interface ObjectDefinition { export interface TemplateConfiguration { labels: LabelDefinition[]; objects: ObjectDefinition[]; - metadata?: Record; + metadata?: Record; } export interface AnnotationTemplate { diff --git a/runtime/datamate-python/app/db/models/annotation_management.py b/runtime/datamate-python/app/db/models/annotation_management.py index 38e1058..6454306 100644 --- a/runtime/datamate-python/app/db/models/annotation_management.py +++ b/runtime/datamate-python/app/db/models/annotation_management.py @@ -2,11 +2,15 @@ import uuid from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey, UniqueConstraint, Index -from sqlalchemy.sql import func - -from app.db.session import Base - -class AnnotationTemplate(Base): +from sqlalchemy.sql import func + +from app.db.session import Base + +ANNOTATION_STATUS_ANNOTATED = "ANNOTATED" +ANNOTATION_STATUS_NO_ANNOTATION = "NO_ANNOTATION" +ANNOTATION_STATUS_VALUES = {ANNOTATION_STATUS_ANNOTATED, ANNOTATION_STATUS_NO_ANNOTATION} + +class AnnotationTemplate(Base): """标注配置模板模型""" __tablename__ = "t_dm_annotation_templates" @@ -84,12 +88,18 @@ class AnnotationResult(Base): __tablename__ = "t_dm_annotation_results" - id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") - project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)") - file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)") - annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") + id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") + project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)") + file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)") + annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)") + annotation_status = Column( + String(32), + nullable=False, + default=ANNOTATION_STATUS_ANNOTATED, + comment="标注状态: ANNOTATED/NO_ANNOTATION", + ) + created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") + updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") def __repr__(self): return f"" diff --git a/runtime/datamate-python/app/module/annotation/schema/editor.py b/runtime/datamate-python/app/module/annotation/schema/editor.py index 99c704e..127c6ee 100644 --- a/runtime/datamate-python/app/module/annotation/schema/editor.py +++ b/runtime/datamate-python/app/module/annotation/schema/editor.py @@ -9,10 +9,23 @@ from __future__ import annotations from datetime import datetime +from enum import Enum from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field, ConfigDict +from app.db.models.annotation_management import ( + ANNOTATION_STATUS_ANNOTATED, + ANNOTATION_STATUS_NO_ANNOTATION, +) + + +class AnnotationStatus(str, Enum): + """标注状态枚举""" + + ANNOTATED = ANNOTATION_STATUS_ANNOTATED + NO_ANNOTATION = ANNOTATION_STATUS_NO_ANNOTATION + class EditorProjectInfo(BaseModel): """编辑器项目元信息""" @@ -40,8 +53,13 @@ class EditorTaskListItem(BaseModel): file_type: Optional[str] = Field(None, alias="fileType", description="文件类型") has_annotation: bool = Field(..., alias="hasAnnotation", description="是否已有最终标注") annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间") + annotation_status: Optional[AnnotationStatus] = Field( + None, + alias="annotationStatus", + description="标注状态", + ) - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, use_enum_values=True) class EditorTaskListResponse(BaseModel): @@ -89,6 +107,11 @@ class UpsertAnnotationRequest(BaseModel): """保存/覆盖最终标注(Label Studio annotation 原始对象)""" annotation: Dict[str, Any] = Field(..., description="Label Studio annotation 对象(包含 result 等)") + annotation_status: Optional[AnnotationStatus] = Field( + None, + alias="annotationStatus", + description="标注状态(无标注/不适用时传 NO_ANNOTATION)", + ) expected_updated_at: Optional[datetime] = Field( None, alias="expectedUpdatedAt", @@ -101,7 +124,7 @@ class UpsertAnnotationRequest(BaseModel): description="段落索引(分段模式下必填)", ) - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, use_enum_values=True) class UpsertAnnotationResponse(BaseModel): diff --git a/runtime/datamate-python/app/module/annotation/service/editor.py b/runtime/datamate-python/app/module/annotation/service/editor.py index 07824bb..51162dd 100644 --- a/runtime/datamate-python/app/module/annotation/service/editor.py +++ b/runtime/datamate-python/app/module/annotation/service/editor.py @@ -24,6 +24,11 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.core.config import settings from app.core.logging import get_logger from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile +from app.db.models.annotation_management import ( + ANNOTATION_STATUS_ANNOTATED, + ANNOTATION_STATUS_NO_ANNOTATION, + ANNOTATION_STATUS_VALUES, +) from app.module.annotation.config import LabelStudioTagConfig from app.module.annotation.schema.editor import ( EditorProjectInfo, @@ -348,6 +353,24 @@ class AnnotationEditorService: return ET.tostring(root, encoding="unicode") + @staticmethod + def _has_annotation_result(payload: Optional[Dict[str, Any]]) -> bool: + if not payload or not isinstance(payload, dict): + return False + if payload.get("segmented"): + segments = payload.get("segments", {}) + if not isinstance(segments, dict): + return False + for segment in segments.values(): + if not isinstance(segment, dict): + continue + result = segment.get("result") + if isinstance(result, list) and len(result) > 0: + return True + return False + result = payload.get("result") + return isinstance(result, list) and len(result) > 0 + @classmethod def _build_source_document_filter(cls): file_type_lower = func.lower(DatasetFiles.file_type) @@ -447,7 +470,12 @@ class AnnotationEditorService: else_=0, ) files_result = await self.db.execute( - select(DatasetFiles, AnnotationResult.id, AnnotationResult.updated_at) + select( + DatasetFiles, + AnnotationResult.id, + AnnotationResult.updated_at, + AnnotationResult.annotation_status, + ) .join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id) .outerjoin( AnnotationResult, @@ -462,7 +490,7 @@ class AnnotationEditorService: rows = files_result.all() items: List[EditorTaskListItem] = [] - for file_record, annotation_id, annotation_updated_at in rows: + for file_record, annotation_id, annotation_updated_at, annotation_status in rows: fid = str(file_record.id) # type: ignore[arg-type] items.append( EditorTaskListItem( @@ -471,6 +499,7 @@ class AnnotationEditorService: fileType=getattr(file_record, "file_type", None), hasAnnotation=annotation_id is not None, annotationUpdatedAt=annotation_updated_at, + annotationStatus=annotation_status, ) ) @@ -869,12 +898,26 @@ class AnnotationEditorService: annotation_payload["id"] = self._make_ls_annotation_id(project_id, file_id) final_payload = annotation_payload + requested_status = request.annotation_status + if requested_status is not None and requested_status not in ANNOTATION_STATUS_VALUES: + raise HTTPException(status_code=400, detail="annotationStatus 不合法") + + has_result = self._has_annotation_result(final_payload) + if has_result: + final_status = ANNOTATION_STATUS_ANNOTATED + else: + if requested_status == ANNOTATION_STATUS_NO_ANNOTATION: + final_status = ANNOTATION_STATUS_NO_ANNOTATION + else: + raise HTTPException(status_code=400, detail="未发现标注内容,请确认无标注/不适用后再保存") + if existing: if request.expected_updated_at and existing.updated_at: if existing.updated_at != request.expected_updated_at.replace(tzinfo=None): raise HTTPException(status_code=409, detail="标注已被更新,请刷新后重试") existing.annotation = final_payload # type: ignore[assignment] + existing.annotation_status = final_status # type: ignore[assignment] existing.updated_at = now # type: ignore[assignment] await self.db.commit() await self.db.refresh(existing) @@ -892,6 +935,7 @@ class AnnotationEditorService: project_id=project_id, file_id=file_id, annotation=final_payload, + annotation_status=final_status, created_at=now, updated_at=now, ) diff --git a/scripts/db/data-annotation-init.sql b/scripts/db/data-annotation-init.sql index f6c73f7..0c876a3 100644 --- a/scripts/db/data-annotation-init.sql +++ b/scripts/db/data-annotation-init.sql @@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS t_dm_annotation_results ( project_id VARCHAR(36) NOT NULL COMMENT '标注项目ID', file_id VARCHAR(36) NOT NULL COMMENT '文件ID', annotation JSON NOT NULL COMMENT 'Label Studio annotation 原始JSON', + annotation_status VARCHAR(32) NOT NULL DEFAULT 'ANNOTATED' COMMENT '标注状态: ANNOTATED/NO_ANNOTATION', created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', UNIQUE KEY uk_project_file (project_id, file_id),