From d5b75fee0d32921443f48c8d787cb03facb7a1d5 Mon Sep 17 00:00:00 2001
From: Jerry Yan <792602257@qq.com>
Date: Wed, 7 Jan 2026 00:00:16 +0800
Subject: [PATCH] LSF
---
frontend/public/lsf/lsf.html | 287 +++++++++++++++
.../Annotate/LabelStudioTextEditor.tsx | 346 ++++++++++++++++++
.../DataAnnotation/Home/DataAnnotation.tsx | 114 ++----
.../pages/DataAnnotation/annotation.api.ts | 30 +-
frontend/src/routes/routes.ts | 5 +
runtime/datamate-python/app/core/config.py | 6 +
.../datamate-python/app/db/models/__init__.py | 4 +-
.../app/db/models/annotation_management.py | 40 +-
.../module/annotation/interface/__init__.py | 14 +-
.../app/module/annotation/interface/editor.py | 90 +++++
.../app/module/annotation/schema/__init__.py | 17 +-
.../app/module/annotation/schema/editor.py | 83 +++++
.../app/module/annotation/service/editor.py | 295 +++++++++++++++
scripts/db/data-annotation-init.sql | 60 +--
14 files changed, 1267 insertions(+), 124 deletions(-)
create mode 100644 frontend/public/lsf/lsf.html
create mode 100644 frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx
create mode 100644 runtime/datamate-python/app/module/annotation/interface/editor.py
create mode 100644 runtime/datamate-python/app/module/annotation/schema/editor.py
create mode 100644 runtime/datamate-python/app/module/annotation/service/editor.py
diff --git a/frontend/public/lsf/lsf.html b/frontend/public/lsf/lsf.html
new file mode 100644
index 0000000..ac70325
--- /dev/null
+++ b/frontend/public/lsf/lsf.html
@@ -0,0 +1,287 @@
+
+
+
+
+
+ DataMate - Label Studio 编辑器
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx
new file mode 100644
index 0000000..345bf24
--- /dev/null
+++ b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx
@@ -0,0 +1,346 @@
+import { useEffect, useMemo, useRef, useState } from "react";
+import { App, Button, Card, List, Spin, Typography } from "antd";
+import { LeftOutlined, ReloadOutlined, SaveOutlined } from "@ant-design/icons";
+import { useNavigate, useParams } from "react-router";
+
+import {
+ getEditorProjectInfoUsingGet,
+ getEditorTaskUsingGet,
+ listEditorTasksUsingGet,
+ upsertEditorAnnotationUsingPut,
+} from "../annotation.api";
+
+type EditorProjectInfo = {
+ projectId: string;
+ datasetId: string;
+ templateId?: string | null;
+ labelConfig?: string | null;
+ supported: boolean;
+ unsupportedReason?: string | null;
+};
+
+type EditorTaskListItem = {
+ fileId: string;
+ fileName: string;
+ fileType?: string | null;
+ hasAnnotation: boolean;
+ annotationUpdatedAt?: string | null;
+};
+
+type LsfMessage = {
+ type?: string;
+ payload?: any;
+};
+
+const LSF_IFRAME_SRC = "/lsf/lsf.html";
+
+export default function LabelStudioTextEditor() {
+ const { projectId = "" } = useParams();
+ const navigate = useNavigate();
+ const { message } = App.useApp();
+
+ const origin = useMemo(() => window.location.origin, []);
+ const iframeRef = useRef(null);
+ const initSeqRef = useRef(0);
+
+ const [loadingProject, setLoadingProject] = useState(true);
+ const [loadingTasks, setLoadingTasks] = useState(false);
+ const [loadingTaskDetail, setLoadingTaskDetail] = useState(false);
+ const [saving, setSaving] = useState(false);
+
+ const [iframeReady, setIframeReady] = useState(false);
+ const [project, setProject] = useState(null);
+ const [tasks, setTasks] = useState([]);
+ const [selectedFileId, setSelectedFileId] = useState("");
+
+ const postToIframe = (type: string, payload?: any) => {
+ const win = iframeRef.current?.contentWindow;
+ if (!win) return;
+ win.postMessage({ type, payload }, origin);
+ };
+
+ const loadProject = async () => {
+ setLoadingProject(true);
+ try {
+ const resp = (await getEditorProjectInfoUsingGet(projectId)) as any;
+ const data = resp?.data as EditorProjectInfo | undefined;
+ if (!data?.projectId) {
+ message.error("获取标注项目信息失败");
+ setProject(null);
+ return;
+ }
+ setProject(data);
+ } catch (e) {
+ console.error(e);
+ message.error("获取标注项目信息失败");
+ setProject(null);
+ } finally {
+ setLoadingProject(false);
+ }
+ };
+
+ const loadTasks = async (silent = false) => {
+ if (!projectId) return;
+ if (!silent) setLoadingTasks(true);
+ try {
+ const resp = (await listEditorTasksUsingGet(projectId, { page: 0, size: 200 })) as any;
+ const content = (resp?.data?.content || []) as EditorTaskListItem[];
+ const items = Array.isArray(content) ? content : [];
+ setTasks(items);
+ if (!selectedFileId && items.length > 0) {
+ setSelectedFileId(items[0].fileId);
+ }
+ } catch (e) {
+ console.error(e);
+ if (!silent) message.error("获取文件列表失败");
+ setTasks([]);
+ } finally {
+ if (!silent) setLoadingTasks(false);
+ }
+ };
+
+ const initEditorForFile = async (fileId: string) => {
+ if (!project?.supported) return;
+ if (!project?.labelConfig) {
+ message.error("该项目未绑定标注模板,无法加载编辑器");
+ return;
+ }
+ if (!iframeReady) return;
+
+ const seq = ++initSeqRef.current;
+ setLoadingTaskDetail(true);
+
+ try {
+ const resp = (await getEditorTaskUsingGet(projectId, fileId)) as any;
+ const task = resp?.data?.task;
+ if (!task) {
+ message.error("获取任务详情失败");
+ return;
+ }
+ if (seq !== initSeqRef.current) return;
+
+ postToIframe("LS_INIT", {
+ labelConfig: project.labelConfig,
+ task,
+ user: { id: "datamate" },
+ interfaces: [
+ "panel",
+ "update",
+ "submit",
+ "controls",
+ "side-column",
+ "annotations:menu",
+ "annotations:add-new",
+ "annotations:delete",
+ ],
+ selectedAnnotationIndex: 0,
+ allowCreateEmptyAnnotation: true,
+ });
+ } catch (e) {
+ console.error(e);
+ message.error("加载编辑器失败");
+ } finally {
+ if (seq === initSeqRef.current) setLoadingTaskDetail(false);
+ }
+ };
+
+ const saveFromExport = async (payload: any) => {
+ const taskId = payload?.taskId;
+ const annotation = payload?.annotation;
+ if (!taskId || !annotation) {
+ message.error("导出标注失败:缺少 taskId/annotation");
+ return;
+ }
+
+ setSaving(true);
+ try {
+ await upsertEditorAnnotationUsingPut(projectId, String(taskId), { annotation });
+ message.success("标注已保存");
+ await loadTasks(true);
+ } catch (e) {
+ console.error(e);
+ message.error("保存失败");
+ } finally {
+ setSaving(false);
+ }
+ };
+
+ const requestExport = () => {
+ if (!selectedFileId) {
+ message.warning("请先选择文件");
+ return;
+ }
+ postToIframe("LS_EXPORT", {});
+ };
+
+ useEffect(() => {
+ setIframeReady(false);
+ setProject(null);
+ setTasks([]);
+ setSelectedFileId("");
+ initSeqRef.current = 0;
+
+ if (projectId) loadProject();
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [projectId]);
+
+ useEffect(() => {
+ if (!project?.supported) return;
+ loadTasks();
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [project?.projectId, project?.supported]);
+
+ useEffect(() => {
+ if (!selectedFileId) return;
+ initEditorForFile(selectedFileId);
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [selectedFileId, iframeReady]);
+
+ useEffect(() => {
+ const handler = (event: MessageEvent) => {
+ if (event.origin !== origin) return;
+ const msg = event.data || {};
+ if (!msg?.type) return;
+
+ if (msg.type === "LS_IFRAME_READY") {
+ setIframeReady(true);
+ return;
+ }
+
+ if (msg.type === "LS_EXPORT_RESULT") {
+ saveFromExport(msg.payload);
+ return;
+ }
+
+ // 兼容 iframe 内部在 submit 时直接上报(若启用)
+ if (msg.type === "LS_SUBMIT") {
+ saveFromExport(msg.payload);
+ return;
+ }
+
+ if (msg.type === "LS_ERROR") {
+ message.error(msg.payload?.message || "编辑器发生错误");
+ }
+ };
+
+ window.addEventListener("message", handler);
+ return () => window.removeEventListener("message", handler);
+ }, [message, origin]);
+
+ if (loadingProject) {
+ return (
+
+
+
+ );
+ }
+
+ if (!project) {
+ return (
+
+
+ 未找到标注项目
+
+
+
+
+
+ );
+ }
+
+ if (!project.supported) {
+ return (
+
+
+ 暂不支持该数据类型
+
+ {project.unsupportedReason || "当前仅支持文本(TEXT)项目的内嵌编辑器。"}
+
+
+
+
+
+
+ );
+ }
+
+ return (
+
+
+
+ } onClick={() => navigate("/data/annotation")}>
+ 返回
+
+
+ 标注(内嵌编辑器)
+
+
+
+ } loading={loadingTasks} onClick={() => loadTasks()}>
+ 刷新文件列表
+
+ }
+ loading={saving}
+ disabled={!iframeReady || !selectedFileId}
+ onClick={requestExport}
+ >
+ 保存
+
+
+
+
+
+
+ (
+ setSelectedFileId(item.fileId)}
+ >
+
+
+ {item.fileName}
+
+ {item.hasAnnotation ? "已标注" : "未标注"}
+
+
+ {item.annotationUpdatedAt && (
+
+ 更新: {item.annotationUpdatedAt}
+
+ )}
+
+
+ )}
+ />
+
+
+
+
+ {loadingTaskDetail && (
+
+
+
+ )}
+
+
+
+
+
+ );
+}
diff --git a/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx b/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx
index 8b02b45..0483edd 100644
--- a/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx
+++ b/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx
@@ -1,15 +1,16 @@
-import { useState, useEffect } from "react";
-import { Card, Button, Table, message, Modal, Tabs, Tag, Progress, Tooltip } from "antd";
-import {
- PlusOutlined,
- EditOutlined,
- DeleteOutlined,
- SyncOutlined,
-} from "@ant-design/icons";
-import { SearchControls } from "@/components/SearchControls";
-import CardView from "@/components/CardView";
-import type { AnnotationTask } from "../annotation.model";
-import useFetchData from "@/hooks/useFetchData";
+import { useState, useEffect } from "react";
+import { Card, Button, Table, message, Modal, Tabs, Tag, Progress, Tooltip } from "antd";
+import {
+ PlusOutlined,
+ EditOutlined,
+ DeleteOutlined,
+ SyncOutlined,
+} from "@ant-design/icons";
+import { useNavigate } from "react-router";
+import { SearchControls } from "@/components/SearchControls";
+import CardView from "@/components/CardView";
+import type { AnnotationTask } from "../annotation.model";
+import useFetchData from "@/hooks/useFetchData";
import {
deleteAnnotationTaskByIdUsingDelete,
queryAnnotationTasksUsingGet,
@@ -39,12 +40,13 @@ const AUTO_MODEL_SIZE_LABELS: Record = {
x: "YOLOv8x (最精确)",
};
-export default function DataAnnotation() {
- // return ;
- const [activeTab, setActiveTab] = useState("tasks");
- const [viewMode, setViewMode] = useState<"list" | "card">("list");
- const [showCreateDialog, setShowCreateDialog] = useState(false);
- const [autoTasks, setAutoTasks] = useState([]);
+export default function DataAnnotation() {
+ // return ;
+ const navigate = useNavigate();
+ const [activeTab, setActiveTab] = useState("tasks");
+ const [viewMode, setViewMode] = useState<"list" | "card">("list");
+ const [showCreateDialog, setShowCreateDialog] = useState(false);
+ const [autoTasks, setAutoTasks] = useState([]);
const {
loading,
@@ -56,9 +58,8 @@ export default function DataAnnotation() {
handleKeywordChange,
} = useFetchData(queryAnnotationTasksUsingGet, mapAnnotationTask, 30000, true, [], 0);
- const [labelStudioBase, setLabelStudioBase] = useState(null);
- const [selectedRowKeys, setSelectedRowKeys] = useState<(string | number)[]>([]);
- const [selectedRows, setSelectedRows] = useState([]);
+ const [selectedRowKeys, setSelectedRowKeys] = useState<(string | number)[]>([]);
+ const [selectedRows, setSelectedRows] = useState([]);
// 拉取自动标注任务(供轮询和创建成功后立即刷新复用)
const refreshAutoTasks = async (silent = false) => {
@@ -76,71 +77,24 @@ export default function DataAnnotation() {
}
};
- // prefetch config on mount so clicking annotate is fast and we know whether base URL exists
- // useEffect ensures this runs once
- useEffect(() => {
- let mounted = true;
- (async () => {
- try {
- const baseUrl = `http://${window.location.hostname}:${parseInt(window.location.port) + 1}`;
- if (mounted) setLabelStudioBase(baseUrl);
- } catch (e) {
- if (mounted) setLabelStudioBase(null);
- }
- })();
- return () => {
- mounted = false;
- };
- }, []);
-
- // 自动标注任务轮询(用于在同一表格中展示处理进度)
- useEffect(() => {
- refreshAutoTasks();
- const timer = setInterval(() => refreshAutoTasks(true), 3000);
+ // 自动标注任务轮询(用于在同一表格中展示处理进度)
+ useEffect(() => {
+ refreshAutoTasks();
+ const timer = setInterval(() => refreshAutoTasks(true), 3000);
return () => {
clearInterval(timer);
};
}, []);
- const handleAnnotate = (task: AnnotationTask) => {
- // Open Label Studio project page in a new tab
- (async () => {
- try {
- // prefer using labeling project id already present on the task
- // `mapAnnotationTask` normalizes upstream fields into `labelingProjId`/`projId`,
- // so prefer those and fall back to the task id if necessary.
- let labelingProjId = (task as any).labelingProjId || (task as any).projId || undefined;
-
- // no fallback external mapping lookup; rely on normalized fields from mapAnnotationTask
-
- // use prefetched base if available
- const base = labelStudioBase;
-
- // no debug logging in production
-
- if (labelingProjId) {
- // only open external Label Studio when we have a configured base url
- if (base) {
- const target = `${base}/projects/${labelingProjId}/data`;
- window.open(target, "_blank");
- } else {
- // no external Label Studio URL configured — do not perform internal redirect in this version
- message.error("无法跳转到 Label Studio:未配置 Label Studio 基础 URL");
- return;
- }
- } else {
- // no labeling project id available — do not attempt internal redirect in this version
- message.error("无法跳转到 Label Studio:该映射未绑定标注项目");
- return;
- }
- } catch (error) {
- // on error, surface a user-friendly message instead of redirecting
- message.error("无法跳转到 Label Studio:发生错误,请检查配置或控制台日志");
- return;
- }
- })();
- };
+ const handleAnnotate = (task: AnnotationTask) => {
+ const projectId = (task as any)?.id;
+ if (!projectId) {
+ message.error("无法进入标注:缺少标注项目ID");
+ return;
+ }
+ navigate(`/data/annotation/annotate/${projectId}`);
+ };
const handleDelete = (task: AnnotationTask) => {
Modal.confirm({
diff --git a/frontend/src/pages/DataAnnotation/annotation.api.ts b/frontend/src/pages/DataAnnotation/annotation.api.ts
index b989cc4..8e9d4ca 100644
--- a/frontend/src/pages/DataAnnotation/annotation.api.ts
+++ b/frontend/src/pages/DataAnnotation/annotation.api.ts
@@ -62,6 +62,30 @@ export function getAutoAnnotationTaskStatusUsingGet(taskId: string) {
return get(`/api/annotation/auto/${taskId}/status`);
}
-export function downloadAutoAnnotationResultUsingGet(taskId: string) {
- return download(`/api/annotation/auto/${taskId}/download`);
-}
+export function downloadAutoAnnotationResultUsingGet(taskId: string) {
+ return download(`/api/annotation/auto/${taskId}/download`);
+}
+
+// =====================
+// Label Studio Editor(内嵌版)
+// =====================
+
+export function getEditorProjectInfoUsingGet(projectId: string) {
+ return get(`/api/annotation/editor/projects/${projectId}`);
+}
+
+export function listEditorTasksUsingGet(projectId: string, params?: any) {
+ return get(`/api/annotation/editor/projects/${projectId}/tasks`, params);
+}
+
+export function getEditorTaskUsingGet(projectId: string, fileId: string) {
+ return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}`);
+}
+
+export function upsertEditorAnnotationUsingPut(
+ projectId: string,
+ fileId: string,
+ data: any
+) {
+ return put(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/annotation`, data);
+}
diff --git a/frontend/src/routes/routes.ts b/frontend/src/routes/routes.ts
index a0c23ed..303a153 100644
--- a/frontend/src/routes/routes.ts
+++ b/frontend/src/routes/routes.ts
@@ -16,6 +16,7 @@ import CleansingTemplateCreate from "@/pages/DataCleansing/Create/CreateTemplate
import DataAnnotation from "@/pages/DataAnnotation/Home/DataAnnotation";
import AnnotationTaskCreate from "@/pages/DataAnnotation/Create/CreateTask";
+import LabelStudioTextEditor from "@/pages/DataAnnotation/Annotate/LabelStudioTextEditor";
import DataSynthesisPage from "@/pages/SynthesisTask/DataSynthesis";
import InstructionTemplateCreate from "@/pages/SynthesisTask/CreateTemplate";
@@ -145,6 +146,10 @@ const router = createBrowserRouter([
path: "create-task",
Component: AnnotationTaskCreate,
},
+ {
+ path: "annotate/:projectId",
+ Component: LabelStudioTextEditor,
+ },
],
},
{
diff --git a/runtime/datamate-python/app/core/config.py b/runtime/datamate-python/app/core/config.py
index 50e9e2e..10adb00 100644
--- a/runtime/datamate-python/app/core/config.py
+++ b/runtime/datamate-python/app/core/config.py
@@ -62,5 +62,11 @@ class Settings(BaseSettings):
# DataMate
dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀
+ # DataMate Backend (Java) - 用于通过“下载/预览接口”读取文件内容
+ datamate_backend_base_url: str = "http://datamate-backend:8080/api"
+
+ # 标注编辑器(Label Studio Editor)相关
+ editor_max_text_bytes: int = 2 * 1024 * 1024 # 2MB,避免一次加载超大文本卡死前端
+
# 全局设置实例
settings = Settings()
diff --git a/runtime/datamate-python/app/db/models/__init__.py b/runtime/datamate-python/app/db/models/__init__.py
index 2b83de2..431d0ca 100644
--- a/runtime/datamate-python/app/db/models/__init__.py
+++ b/runtime/datamate-python/app/db/models/__init__.py
@@ -13,7 +13,8 @@ from .user_management import (
from .annotation_management import (
AnnotationTemplate,
- LabelingProject
+ LabelingProject,
+ AnnotationResult
)
from .data_evaluation import (
@@ -30,6 +31,7 @@ __all__ = [
"User",
"AnnotationTemplate",
"LabelingProject",
+ "AnnotationResult",
"EvaluationTask",
"EvaluationItem",
]
diff --git a/runtime/datamate-python/app/db/models/annotation_management.py b/runtime/datamate-python/app/db/models/annotation_management.py
index 81589f0..4003dbb 100644
--- a/runtime/datamate-python/app/db/models/annotation_management.py
+++ b/runtime/datamate-python/app/db/models/annotation_management.py
@@ -33,10 +33,10 @@ class AnnotationTemplate(Base):
"""检查是否已被软删除"""
return self.deleted_at is not None
-class LabelingProject(Base):
- """标注项目模型"""
-
- __tablename__ = "t_dm_labeling_projects"
+class LabelingProject(Base):
+ """标注项目模型"""
+
+ __tablename__ = "t_dm_labeling_projects"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
@@ -53,13 +53,29 @@ class LabelingProject(Base):
return f""
@property
- def is_deleted(self) -> bool:
- """检查是否已被软删除"""
- return self.deleted_at is not None
-
-
-class AutoAnnotationTask(Base):
- """自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
+ def is_deleted(self) -> bool:
+ """检查是否已被软删除"""
+ return self.deleted_at is not None
+
+
+class AnnotationResult(Base):
+ """标注结果模型(单人单份最终标签,Label Studio annotation 原始 JSON)"""
+
+ __tablename__ = "t_dm_annotation_results"
+
+ id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+ project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)")
+ file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)")
+ annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)")
+ created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+ updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+
+ def __repr__(self):
+ return f""
+
+
+class AutoAnnotationTask(Base):
+ """自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
__tablename__ = "t_dm_auto_annotation_tasks"
@@ -92,4 +108,4 @@ class AutoAnnotationTask(Base):
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
- return self.deleted_at is not None
\ No newline at end of file
+ return self.deleted_at is not None
diff --git a/runtime/datamate-python/app/module/annotation/interface/__init__.py b/runtime/datamate-python/app/module/annotation/interface/__init__.py
index 2aea889..297ef62 100644
--- a/runtime/datamate-python/app/module/annotation/interface/__init__.py
+++ b/runtime/datamate-python/app/module/annotation/interface/__init__.py
@@ -2,9 +2,10 @@ from fastapi import APIRouter
from .config import router as about_router
from .project import router as project_router
-from .task import router as task_router
-from .template import router as template_router
-from .auto import router as auto_router
+from .task import router as task_router
+from .template import router as template_router
+from .auto import router as auto_router
+from .editor import router as editor_router
router = APIRouter(
prefix="/annotation",
@@ -13,6 +14,7 @@ router = APIRouter(
router.include_router(about_router)
router.include_router(project_router)
-router.include_router(task_router)
-router.include_router(template_router)
-router.include_router(auto_router)
\ No newline at end of file
+router.include_router(task_router)
+router.include_router(template_router)
+router.include_router(auto_router)
+router.include_router(editor_router)
diff --git a/runtime/datamate-python/app/module/annotation/interface/editor.py b/runtime/datamate-python/app/module/annotation/interface/editor.py
new file mode 100644
index 0000000..507d211
--- /dev/null
+++ b/runtime/datamate-python/app/module/annotation/interface/editor.py
@@ -0,0 +1,90 @@
+"""
+Label Studio Editor(前端嵌入式)接口
+
+说明:
+- 不依赖 Label Studio Server;仅复用其“编辑器”前端库
+- DataMate 负责提供 tasks/annotations 数据与保存能力
+- 当前为 TEXT POC:只支持 dataset_type=TEXT 的项目
+"""
+
+from __future__ import annotations
+
+from fastapi import APIRouter, Depends, Query, Path
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.logging import get_logger
+from app.db.session import get_db
+from app.module.annotation.schema.editor import (
+ EditorProjectInfo,
+ EditorTaskListResponse,
+ EditorTaskResponse,
+ UpsertAnnotationRequest,
+ UpsertAnnotationResponse,
+)
+from app.module.annotation.service.editor import AnnotationEditorService
+from app.module.shared.schema import StandardResponse
+
+logger = get_logger(__name__)
+
+router = APIRouter(
+ prefix="/editor",
+ tags=["annotation/editor"],
+)
+
+
+@router.get(
+ "/projects/{project_id}",
+ response_model=StandardResponse[EditorProjectInfo],
+)
+async def get_editor_project_info(
+ project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
+ db: AsyncSession = Depends(get_db),
+):
+ service = AnnotationEditorService(db)
+ info = await service.get_project_info(project_id)
+ return StandardResponse(code=200, message="success", data=info)
+
+
+@router.get(
+ "/projects/{project_id}/tasks",
+ response_model=StandardResponse[EditorTaskListResponse],
+)
+async def list_editor_tasks(
+ project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
+ page: int = Query(0, ge=0, description="页码(从0开始)"),
+ size: int = Query(50, ge=1, le=200, description="每页大小"),
+ db: AsyncSession = Depends(get_db),
+):
+ service = AnnotationEditorService(db)
+ result = await service.list_tasks(project_id, page=page, size=size)
+ return StandardResponse(code=200, message="success", data=result)
+
+
+@router.get(
+ "/projects/{project_id}/tasks/{file_id}",
+ response_model=StandardResponse[EditorTaskResponse],
+)
+async def get_editor_task(
+ project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
+ file_id: str = Path(..., description="文件ID(t_dm_dataset_files.id)"),
+ db: AsyncSession = Depends(get_db),
+):
+ service = AnnotationEditorService(db)
+ task = await service.get_task(project_id, file_id)
+ return StandardResponse(code=200, message="success", data=task)
+
+
+@router.put(
+ "/projects/{project_id}/tasks/{file_id}/annotation",
+ response_model=StandardResponse[UpsertAnnotationResponse],
+)
+async def upsert_editor_annotation(
+ request: UpsertAnnotationRequest,
+ project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
+ file_id: str = Path(..., description="文件ID(t_dm_dataset_files.id)"),
+ db: AsyncSession = Depends(get_db),
+):
+ service = AnnotationEditorService(db)
+ result = await service.upsert_annotation(project_id, file_id, request)
+ return StandardResponse(code=200, message="success", data=result)
+
diff --git a/runtime/datamate-python/app/module/annotation/schema/__init__.py b/runtime/datamate-python/app/module/annotation/schema/__init__.py
index 6fb8707..3e9c02b 100644
--- a/runtime/datamate-python/app/module/annotation/schema/__init__.py
+++ b/runtime/datamate-python/app/module/annotation/schema/__init__.py
@@ -30,6 +30,15 @@ from .mapping import (
DeleteDatasetResponse,
)
+from .editor import (
+ EditorProjectInfo,
+ EditorTaskListItem,
+ EditorTaskListResponse,
+ EditorTaskResponse,
+ UpsertAnnotationRequest,
+ UpsertAnnotationResponse,
+)
+
# Rebuild model to resolve forward references
DatasetMappingResponse.model_rebuild()
@@ -51,4 +60,10 @@ __all__ = [
"UpdateAnnotationTemplateRequest",
"AnnotationTemplateResponse",
"AnnotationTemplateListResponse",
-]
\ No newline at end of file
+ "EditorProjectInfo",
+ "EditorTaskListItem",
+ "EditorTaskListResponse",
+ "EditorTaskResponse",
+ "UpsertAnnotationRequest",
+ "UpsertAnnotationResponse",
+]
diff --git a/runtime/datamate-python/app/module/annotation/schema/editor.py b/runtime/datamate-python/app/module/annotation/schema/editor.py
new file mode 100644
index 0000000..7a13085
--- /dev/null
+++ b/runtime/datamate-python/app/module/annotation/schema/editor.py
@@ -0,0 +1,83 @@
+"""
+标注编辑器(Label Studio Editor)接口模型
+
+设计目标:
+- 单人单份最终标签:每个 project_id + file_id 只维护 1 条最终标注结果
+- 完全兼容 Label Studio:标注结果以 annotation 原始 JSON 形式存储与返回
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from pydantic import BaseModel, Field, ConfigDict
+
+
+class EditorProjectInfo(BaseModel):
+ """编辑器项目元信息"""
+
+ project_id: str = Field(..., alias="projectId", description="DataMate 标注项目ID(t_dm_labeling_projects.id)")
+ dataset_id: str = Field(..., alias="datasetId", description="数据集ID(t_dm_datasets.id)")
+ template_id: Optional[str] = Field(None, alias="templateId", description="模板ID(t_dm_annotation_templates.id)")
+ label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML 配置")
+ supported: bool = Field(..., description="当前数据类型是否支持内嵌编辑器")
+ unsupported_reason: Optional[str] = Field(None, alias="unsupportedReason", description="不支持原因(当 supported=false)")
+
+ model_config = ConfigDict(populate_by_name=True)
+
+
+class EditorTaskListItem(BaseModel):
+ """编辑器任务列表条目(对应一个数据集文件)"""
+
+ file_id: str = Field(..., alias="fileId", description="文件ID")
+ file_name: str = Field(..., alias="fileName", description="文件名")
+ file_type: Optional[str] = Field(None, alias="fileType", description="文件类型")
+ has_annotation: bool = Field(..., alias="hasAnnotation", description="是否已有最终标注")
+ annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
+
+ model_config = ConfigDict(populate_by_name=True)
+
+
+class EditorTaskListResponse(BaseModel):
+ """编辑器任务列表响应"""
+
+ content: List[EditorTaskListItem] = Field(..., description="任务列表")
+ total_elements: int = Field(..., alias="totalElements", description="总条数")
+ total_pages: int = Field(..., alias="totalPages", description="总页数")
+ page: int = Field(..., description="页码(从0开始)")
+ size: int = Field(..., description="每页大小")
+
+ model_config = ConfigDict(populate_by_name=True)
+
+
+class EditorTaskResponse(BaseModel):
+ """编辑器任务详情(可直接喂给 Label Studio Editor 的 task 对象)"""
+
+ task: Dict[str, Any] = Field(..., description="Label Studio task 对象")
+ annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
+
+ model_config = ConfigDict(populate_by_name=True)
+
+
+class UpsertAnnotationRequest(BaseModel):
+ """保存/覆盖最终标注(Label Studio annotation 原始对象)"""
+
+ annotation: Dict[str, Any] = Field(..., description="Label Studio annotation 对象(包含 result 等)")
+ expected_updated_at: Optional[datetime] = Field(
+ None,
+ alias="expectedUpdatedAt",
+ description="乐观锁:若提供则要求与当前记录 updated_at 一致,否则返回 409",
+ )
+
+ model_config = ConfigDict(populate_by_name=True)
+
+
+class UpsertAnnotationResponse(BaseModel):
+ """保存/覆盖最终标注响应"""
+
+ annotation_id: str = Field(..., alias="annotationId", description="标注结果ID(t_dm_annotation_results.id)")
+ updated_at: datetime = Field(..., alias="updatedAt", description="标注更新时间")
+
+ model_config = ConfigDict(populate_by_name=True)
+
diff --git a/runtime/datamate-python/app/module/annotation/service/editor.py b/runtime/datamate-python/app/module/annotation/service/editor.py
new file mode 100644
index 0000000..e779789
--- /dev/null
+++ b/runtime/datamate-python/app/module/annotation/service/editor.py
@@ -0,0 +1,295 @@
+"""
+标注编辑器(Label Studio Editor)服务
+
+职责:
+- 解析 DataMate 标注项目(t_dm_labeling_projects)
+- 以“文件下载/预览接口”读取文本内容,构造 Label Studio task
+- 以原始 annotation JSON 形式 upsert 最终标注结果(单人单份)
+"""
+
+from __future__ import annotations
+
+import uuid
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple
+
+import httpx
+from fastapi import HTTPException
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.config import settings
+from app.core.logging import get_logger
+from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
+from app.module.annotation.schema.editor import (
+ EditorProjectInfo,
+ EditorTaskListItem,
+ EditorTaskListResponse,
+ EditorTaskResponse,
+ UpsertAnnotationRequest,
+ UpsertAnnotationResponse,
+)
+from app.module.annotation.service.template import AnnotationTemplateService
+
+logger = get_logger(__name__)
+
+
+class AnnotationEditorService:
+ """Label Studio Editor 集成服务(TEXT POC 版)"""
+
+ def __init__(self, db: AsyncSession):
+ self.db = db
+ self.template_service = AnnotationTemplateService()
+
+ async def _get_project_or_404(self, project_id: str) -> LabelingProject:
+ result = await self.db.execute(
+ select(LabelingProject).where(
+ LabelingProject.id == project_id,
+ LabelingProject.deleted_at.is_(None),
+ )
+ )
+ project = result.scalar_one_or_none()
+ if not project:
+ raise HTTPException(status_code=404, detail=f"标注项目不存在: {project_id}")
+ return project
+
+ async def _get_dataset_type(self, dataset_id: str) -> Optional[str]:
+ result = await self.db.execute(
+ select(Dataset.dataset_type).where(Dataset.id == dataset_id)
+ )
+ return result.scalar_one_or_none()
+
+ async def _get_label_config(self, template_id: Optional[str]) -> Optional[str]:
+ if not template_id:
+ return None
+ template = await self.template_service.get_template(self.db, template_id)
+ return getattr(template, "label_config", None) if template else None
+
+ async def get_project_info(self, project_id: str) -> EditorProjectInfo:
+ project = await self._get_project_or_404(project_id)
+
+ dataset_type = await self._get_dataset_type(project.dataset_id)
+ supported = (dataset_type or "").upper() == "TEXT"
+ unsupported_reason = None
+ if not supported:
+ unsupported_reason = f"当前仅支持 TEXT,项目数据类型为: {dataset_type or 'UNKNOWN'}"
+
+ label_config = await self._get_label_config(project.template_id)
+
+ return EditorProjectInfo(
+ projectId=project.id,
+ datasetId=project.dataset_id,
+ templateId=project.template_id,
+ labelConfig=label_config,
+ supported=supported,
+ unsupportedReason=unsupported_reason,
+ )
+
+ async def list_tasks(self, project_id: str, page: int = 0, size: int = 50) -> EditorTaskListResponse:
+ project = await self._get_project_or_404(project_id)
+
+ count_result = await self.db.execute(
+ select(func.count()).select_from(DatasetFiles).where(
+ DatasetFiles.dataset_id == project.dataset_id
+ )
+ )
+ total = int(count_result.scalar() or 0)
+
+ files_result = await self.db.execute(
+ select(DatasetFiles)
+ .where(DatasetFiles.dataset_id == project.dataset_id)
+ .order_by(DatasetFiles.created_at.desc())
+ .offset(page * size)
+ .limit(size)
+ )
+ files = files_result.scalars().all()
+
+ file_ids = [str(f.id) for f in files] # type: ignore[arg-type]
+ updated_map: Dict[str, datetime] = {}
+ if file_ids:
+ ann_result = await self.db.execute(
+ select(AnnotationResult.file_id, AnnotationResult.updated_at).where(
+ AnnotationResult.project_id == project_id,
+ AnnotationResult.file_id.in_(file_ids),
+ )
+ )
+ for file_id, updated_at in ann_result.all():
+ if file_id and updated_at:
+ updated_map[str(file_id)] = updated_at
+
+ items: List[EditorTaskListItem] = []
+ for f in files:
+ fid = str(f.id) # type: ignore[arg-type]
+ items.append(
+ EditorTaskListItem(
+ fileId=fid,
+ fileName=str(getattr(f, "file_name", "")),
+ fileType=getattr(f, "file_type", None),
+ hasAnnotation=fid in updated_map,
+ annotationUpdatedAt=updated_map.get(fid),
+ )
+ )
+
+ total_pages = (total + size - 1) // size if size > 0 else 0
+ return EditorTaskListResponse(
+ content=items,
+ totalElements=total,
+ totalPages=total_pages,
+ page=page,
+ size=size,
+ )
+
+ async def _fetch_text_content_via_download_api(self, dataset_id: str, file_id: str) -> str:
+ base = settings.datamate_backend_base_url.rstrip("/")
+ url = f"{base}/data-management/datasets/{dataset_id}/files/{file_id}/download"
+
+ try:
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+ resp = await client.get(url)
+ resp.raise_for_status()
+
+ content_length = resp.headers.get("content-length")
+ if content_length:
+ try:
+ if int(content_length) > settings.editor_max_text_bytes:
+ raise HTTPException(
+ status_code=413,
+ detail=f"文本文件过大,限制 {settings.editor_max_text_bytes} 字节",
+ )
+ except ValueError:
+ # content-length 非法则忽略,走实际长度判断
+ pass
+
+ data = resp.content
+ if len(data) > settings.editor_max_text_bytes:
+ raise HTTPException(
+ status_code=413,
+ detail=f"文本文件过大,限制 {settings.editor_max_text_bytes} 字节",
+ )
+
+ # TEXT POC:默认按 UTF-8 解码,不可解码字符用替换符处理
+ return data.decode("utf-8", errors="replace")
+
+ except HTTPException:
+ raise
+ except httpx.HTTPStatusError as e:
+ logger.error(f"读取文本失败: dataset={dataset_id}, file={file_id}, http={e.response.status_code}")
+ raise HTTPException(status_code=502, detail="读取文本失败(下载接口返回错误)")
+ except Exception as e:
+ logger.error(f"读取文本失败: dataset={dataset_id}, file={file_id}, err={e}")
+ raise HTTPException(status_code=502, detail="读取文本失败(下载接口调用异常)")
+
+ async def get_task(self, project_id: str, file_id: str) -> EditorTaskResponse:
+ project = await self._get_project_or_404(project_id)
+
+ # TEXT 支持校验
+ dataset_type = await self._get_dataset_type(project.dataset_id)
+ if (dataset_type or "").upper() != "TEXT":
+ raise HTTPException(status_code=400, detail="当前仅支持 TEXT 项目的内嵌编辑器")
+
+ file_result = await self.db.execute(
+ select(DatasetFiles).where(
+ DatasetFiles.id == file_id,
+ DatasetFiles.dataset_id == project.dataset_id,
+ )
+ )
+ file_record = file_result.scalar_one_or_none()
+ if not file_record:
+ raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
+
+ text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
+
+ ann_result = await self.db.execute(
+ select(AnnotationResult).where(
+ AnnotationResult.project_id == project_id,
+ AnnotationResult.file_id == file_id,
+ )
+ )
+ ann = ann_result.scalar_one_or_none()
+
+ task: Dict[str, Any] = {
+ "id": file_id,
+ "data": {
+ "text": text_content,
+ "file_id": file_id,
+ "dataset_id": project.dataset_id,
+ "file_name": getattr(file_record, "file_name", ""),
+ },
+ "annotations": [],
+ }
+
+ annotation_updated_at = None
+ if ann:
+ annotation_updated_at = ann.updated_at
+ # 直接返回存储的 annotation 原始对象(Label Studio 兼容)
+ task["annotations"] = [ann.annotation]
+
+ return EditorTaskResponse(
+ task=task,
+ annotationUpdatedAt=annotation_updated_at,
+ )
+
+ async def upsert_annotation(self, project_id: str, file_id: str, request: UpsertAnnotationRequest) -> UpsertAnnotationResponse:
+ project = await self._get_project_or_404(project_id)
+
+ # 校验文件归属
+ file_check = await self.db.execute(
+ select(DatasetFiles.id).where(
+ DatasetFiles.id == file_id,
+ DatasetFiles.dataset_id == project.dataset_id,
+ )
+ )
+ if not file_check.scalar_one_or_none():
+ raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
+
+ annotation_payload = dict(request.annotation or {})
+ result = annotation_payload.get("result")
+ if not isinstance(result, list):
+ raise HTTPException(status_code=400, detail="annotation.result 必须为数组")
+
+ existing_result = await self.db.execute(
+ select(AnnotationResult).where(
+ AnnotationResult.project_id == project_id,
+ AnnotationResult.file_id == file_id,
+ )
+ )
+ existing = existing_result.scalar_one_or_none()
+
+ now = datetime.utcnow()
+
+ if existing:
+ if request.expected_updated_at and existing.updated_at:
+ if existing.updated_at != request.expected_updated_at.replace(tzinfo=None):
+ raise HTTPException(status_code=409, detail="标注已被更新,请刷新后重试")
+
+ # 固定 annotation.id 为记录ID,保持稳定
+ annotation_payload["id"] = existing.id
+ existing.annotation = annotation_payload # type: ignore[assignment]
+ existing.updated_at = now # type: ignore[assignment]
+ await self.db.commit()
+ await self.db.refresh(existing)
+
+ return UpsertAnnotationResponse(
+ annotationId=existing.id,
+ updatedAt=existing.updated_at or now,
+ )
+
+ new_id = str(uuid.uuid4())
+ annotation_payload["id"] = new_id
+ record = AnnotationResult(
+ id=new_id,
+ project_id=project_id,
+ file_id=file_id,
+ annotation=annotation_payload,
+ created_at=now,
+ updated_at=now,
+ )
+ self.db.add(record)
+ await self.db.commit()
+ await self.db.refresh(record)
+
+ return UpsertAnnotationResponse(
+ annotationId=record.id,
+ updatedAt=record.updated_at or now,
+ )
+
diff --git a/scripts/db/data-annotation-init.sql b/scripts/db/data-annotation-init.sql
index e367674..55be49c 100644
--- a/scripts/db/data-annotation-init.sql
+++ b/scripts/db/data-annotation-init.sql
@@ -20,27 +20,45 @@ CREATE TABLE t_dm_annotation_templates (
INDEX idx_built_in (built_in)
) COMMENT='标注配置模板表';
-CREATE TABLE t_dm_labeling_projects (
- id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
- dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
- name VARCHAR(100) NOT NULL COMMENT '项目名称',
- labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
- template_id VARCHAR(36) NULL COMMENT '使用的模板ID',
- configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
- progress JSON COMMENT '项目进度信息',
- created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
- updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
- deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
- FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
- INDEX idx_dataset_id (dataset_id),
- INDEX idx_template_id (template_id),
- INDEX idx_labeling_project_id (labeling_project_id)
-) COMMENT='标注项目表';
-
--- 自动标注任务表
-CREATE TABLE t_dm_auto_annotation_tasks (
- id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
- name VARCHAR(255) NOT NULL COMMENT '任务名称',
+CREATE TABLE t_dm_labeling_projects (
+ id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
+ dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
+ name VARCHAR(100) NOT NULL COMMENT '项目名称',
+ labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
+ template_id VARCHAR(36) NULL COMMENT '使用的模板ID',
+ configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
+ progress JSON COMMENT '项目进度信息',
+ created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
+ deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
+ FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
+ INDEX idx_dataset_id (dataset_id),
+ INDEX idx_template_id (template_id),
+ INDEX idx_labeling_project_id (labeling_project_id)
+) COMMENT='标注项目表';
+
+-- 标注结果表(单人单份最终标签,存储 Label Studio annotation 原始 JSON)
+-- 设计说明:
+-- 1) project_id 复用 t_dm_labeling_projects.id(DataMate 内部标注项目ID)
+-- 2) file_id 复用 t_dm_dataset_files.id(数据集文件ID)
+-- 3) annotation 字段存 Label Studio annotation 对象(包含 result 等),不做降维转换
+CREATE TABLE IF NOT EXISTS t_dm_annotation_results (
+ id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
+ project_id VARCHAR(36) NOT NULL COMMENT '标注项目ID(t_dm_labeling_projects.id)',
+ file_id VARCHAR(36) NOT NULL COMMENT '文件ID(t_dm_dataset_files.id)',
+ annotation JSON NOT NULL COMMENT 'Label Studio annotation 原始JSON(单人单份最终结果)',
+ created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
+ UNIQUE KEY uk_project_file (project_id, file_id),
+ INDEX idx_project_id (project_id),
+ INDEX idx_file_id (file_id),
+ INDEX idx_updated_at (updated_at)
+) COMMENT='标注结果表(Label Studio兼容,单人单份)';
+
+-- 自动标注任务表
+CREATE TABLE t_dm_auto_annotation_tasks (
+ id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
+ name VARCHAR(255) NOT NULL COMMENT '任务名称',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
dataset_name VARCHAR(255) COMMENT '数据集名称(冗余字段,方便查询)',
config JSON NOT NULL COMMENT '任务配置(模型规模、置信度等)',