This commit is contained in:
2026-01-07 00:00:16 +08:00
parent 7d4dcb756b
commit d5b75fee0d
14 changed files with 1267 additions and 124 deletions

View File

@@ -0,0 +1,287 @@
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>DataMate - Label Studio 编辑器</title>
<style>
html,
body {
height: 100%;
margin: 0;
}
#label-studio {
height: 100vh;
}
</style>
<!--
说明:
- 本页面作为 iframe 运行,用于隔离 Label Studio Frontend(避免与 DataMate React/Antd 依赖冲突)。
- 当前使用 CDN 加载 LSF 产物;如需离线部署,可改为本地静态资源。
- 与父页面通过 postMessage 通信,约定消息类型:
- Parent -> Iframe: LS_INIT / LS_EXPORT / LS_RESET / LS_PING
- Iframe -> Parent: LS_IFRAME_READY / LS_READY / LS_EXPORT_RESULT / LS_RESET_DONE / LS_PONG / LS_ERROR / LS_SUBMIT
-->
<link
rel="stylesheet"
href="https://unpkg.com/label-studio-frontend@1.7.1/dist/lsf/css/main.css"
/>
<script src="https://unpkg.com/label-studio-frontend@1.7.1/dist/lsf/js/main.js"></script>
</head>
<body>
<div id="label-studio"></div>
<script>
(function () {
const ORIGIN = window.location.origin;
let lsInstance = null;
let currentTask = null;
function postToParent(type, payload) {
window.parent.postMessage({ type, payload }, ORIGIN);
}
function destroyLabelStudio() {
try {
if (lsInstance && typeof lsInstance.destroy === "function") {
lsInstance.destroy();
}
} catch (_) {}
lsInstance = null;
currentTask = null;
const root = document.getElementById("label-studio");
if (root) root.innerHTML = "";
}
function pickAnnotationStore(ls) {
return ls?.annotationStore || ls?.store?.annotationStore || null;
}
function normalizeUser(rawUser) {
const user = rawUser && typeof rawUser === "object" ? rawUser : { id: "anonymous" };
const userId = user.id || user.userId || user.username || user.email || "anonymous";
let pk = user.pk;
if (!pk) {
let h = 0;
for (let i = 0; i < String(userId).length; i++) {
h = (h * 31 + String(userId).charCodeAt(i)) | 0;
}
pk = Math.abs(h) || 1;
}
return {
...user,
id: userId,
pk,
firstName: user.firstName || user.name || String(userId),
};
}
function normalizeTask(task, extra) {
const t = task && typeof task === "object" ? { ...task } : null;
if (!t) return null;
const annotations = Array.isArray(extra?.annotations)
? extra.annotations
: Array.isArray(t.annotations)
? t.annotations
: [];
const predictions = Array.isArray(extra?.predictions)
? extra.predictions
: Array.isArray(t.predictions)
? t.predictions
: [];
return {
...t,
annotations,
predictions,
};
}
function ensureSelectedAnnotation(store, prefer) {
if (!store) return;
const annotations = Array.isArray(store.annotations) ? store.annotations : [];
if (prefer) {
const byId = annotations.find((a) => String(a.id) === String(prefer.id));
if (byId && typeof store.selectAnnotation === "function") {
store.selectAnnotation(byId.id);
return;
}
const idx = Number.isFinite(prefer.index) ? Number(prefer.index) : -1;
if (idx >= 0 && idx < annotations.length && typeof store.selectAnnotation === "function") {
store.selectAnnotation(annotations[idx].id);
return;
}
}
if (annotations.length > 0 && typeof store.selectAnnotation === "function") {
store.selectAnnotation(annotations[0].id);
return;
}
if (typeof store.addAnnotation === "function" && typeof store.selectAnnotation === "function") {
const ann = store.addAnnotation({ userGenerate: true });
if (ann && ann.id) store.selectAnnotation(ann.id);
}
}
function exportSelectedAnnotation() {
if (!lsInstance) {
throw new Error("LabelStudio 未初始化");
}
const store = pickAnnotationStore(lsInstance);
if (!store) {
throw new Error("无法访问 annotationStore");
}
const selected =
store.selected ||
store.selectedAnnotation ||
(Array.isArray(store.annotations) && store.annotations.length ? store.annotations[0] : null);
let serialized = null;
if (selected && typeof selected.serializeAnnotation === "function") {
serialized = selected.serializeAnnotation();
} else if (selected && typeof selected.serialize === "function") {
serialized = selected.serialize();
}
const annotationPayload = Array.isArray(serialized)
? { id: selected?.id || "draft", result: serialized }
: serialized && typeof serialized === "object"
? { id: selected?.id || serialized.id || "draft", ...serialized }
: { id: selected?.id || "draft", result: (selected && selected.result) || [] };
// 最小化对齐 Label Studio Server 的字段(DataMate 侧会原样存储)
if (!annotationPayload.task) annotationPayload.task = currentTask?.id || null;
if (!annotationPayload.created_at) annotationPayload.created_at = new Date().toISOString();
annotationPayload.updated_at = new Date().toISOString();
return {
taskId: currentTask?.id || null,
annotation: annotationPayload,
};
}
function initLabelStudio(payload) {
if (!window.LabelStudio) {
throw new Error("LabelStudio 未加载(请检查静态资源/网络)");
}
if (!payload || !payload.labelConfig || !payload.task) {
throw new Error("初始化参数缺失:labelConfig/task");
}
destroyLabelStudio();
const interfaces =
payload.interfaces ||
[
"panel",
"update",
"controls",
"side-column",
"annotations",
"infobar",
"instruction",
];
const task = normalizeTask(payload.task, payload);
if (!task) {
throw new Error("task 参数非法");
}
currentTask = task;
const user = normalizeUser(payload.user);
lsInstance = new window.LabelStudio("label-studio", {
config: payload.labelConfig,
interfaces,
user,
task,
onLabelStudioLoad: function (LS) {
try {
const store = pickAnnotationStore(LS);
ensureSelectedAnnotation(store, {
id: payload.selectedAnnotationId,
index: payload.selectedAnnotationIndex,
});
// 允许在没有任何 annotation 的情况下,自动创建一个可编辑的 annotation
if (payload.allowCreateEmptyAnnotation !== false) {
try {
const store2 = pickAnnotationStore(LS);
const selected = store2?.selected || store2?.selectedAnnotation || null;
if (!selected && typeof store2?.addAnnotation === "function" && typeof store2?.selectAnnotation === "function") {
const ann = store2.addAnnotation({ userGenerate: true });
if (ann?.id) store2.selectAnnotation(ann.id);
}
} catch (_) {}
}
} catch (_) {}
postToParent("LS_READY", { taskId: task?.id || null });
},
// 让内嵌编辑器的“提交/保存”按钮也能触发父页面保存
onSubmitAnnotation: function () {
try {
const raw = exportSelectedAnnotation();
postToParent("LS_SUBMIT", raw);
} catch (e) {
postToParent("LS_ERROR", { message: e?.message || String(e) });
}
},
});
}
window.addEventListener("message", (event) => {
if (event.origin !== ORIGIN) return;
const msg = event.data || {};
if (!msg.type) return;
try {
if (msg.type === "LS_INIT") {
initLabelStudio(msg.payload || {});
return;
}
if (msg.type === "LS_EXPORT") {
const raw = exportSelectedAnnotation();
postToParent("LS_EXPORT_RESULT", raw);
return;
}
if (msg.type === "LS_RESET") {
destroyLabelStudio();
postToParent("LS_RESET_DONE", {});
return;
}
if (msg.type === "LS_PING") {
postToParent("LS_PONG", {});
return;
}
} catch (e) {
postToParent("LS_ERROR", { message: e?.message || String(e) });
}
});
postToParent("LS_IFRAME_READY", {});
})();
</script>
</body>
</html>

View File

@@ -0,0 +1,346 @@
import { useEffect, useMemo, useRef, useState } from "react";
import { App, Button, Card, List, Spin, Typography } from "antd";
import { LeftOutlined, ReloadOutlined, SaveOutlined } from "@ant-design/icons";
import { useNavigate, useParams } from "react-router";
import {
getEditorProjectInfoUsingGet,
getEditorTaskUsingGet,
listEditorTasksUsingGet,
upsertEditorAnnotationUsingPut,
} from "../annotation.api";
type EditorProjectInfo = {
projectId: string;
datasetId: string;
templateId?: string | null;
labelConfig?: string | null;
supported: boolean;
unsupportedReason?: string | null;
};
type EditorTaskListItem = {
fileId: string;
fileName: string;
fileType?: string | null;
hasAnnotation: boolean;
annotationUpdatedAt?: string | null;
};
type LsfMessage = {
type?: string;
payload?: any;
};
const LSF_IFRAME_SRC = "/lsf/lsf.html";
export default function LabelStudioTextEditor() {
const { projectId = "" } = useParams();
const navigate = useNavigate();
const { message } = App.useApp();
const origin = useMemo(() => window.location.origin, []);
const iframeRef = useRef<HTMLIFrameElement | null>(null);
const initSeqRef = useRef(0);
const [loadingProject, setLoadingProject] = useState(true);
const [loadingTasks, setLoadingTasks] = useState(false);
const [loadingTaskDetail, setLoadingTaskDetail] = useState(false);
const [saving, setSaving] = useState(false);
const [iframeReady, setIframeReady] = useState(false);
const [project, setProject] = useState<EditorProjectInfo | null>(null);
const [tasks, setTasks] = useState<EditorTaskListItem[]>([]);
const [selectedFileId, setSelectedFileId] = useState<string>("");
const postToIframe = (type: string, payload?: any) => {
const win = iframeRef.current?.contentWindow;
if (!win) return;
win.postMessage({ type, payload }, origin);
};
const loadProject = async () => {
setLoadingProject(true);
try {
const resp = (await getEditorProjectInfoUsingGet(projectId)) as any;
const data = resp?.data as EditorProjectInfo | undefined;
if (!data?.projectId) {
message.error("获取标注项目信息失败");
setProject(null);
return;
}
setProject(data);
} catch (e) {
console.error(e);
message.error("获取标注项目信息失败");
setProject(null);
} finally {
setLoadingProject(false);
}
};
const loadTasks = async (silent = false) => {
if (!projectId) return;
if (!silent) setLoadingTasks(true);
try {
const resp = (await listEditorTasksUsingGet(projectId, { page: 0, size: 200 })) as any;
const content = (resp?.data?.content || []) as EditorTaskListItem[];
const items = Array.isArray(content) ? content : [];
setTasks(items);
if (!selectedFileId && items.length > 0) {
setSelectedFileId(items[0].fileId);
}
} catch (e) {
console.error(e);
if (!silent) message.error("获取文件列表失败");
setTasks([]);
} finally {
if (!silent) setLoadingTasks(false);
}
};
const initEditorForFile = async (fileId: string) => {
if (!project?.supported) return;
if (!project?.labelConfig) {
message.error("该项目未绑定标注模板,无法加载编辑器");
return;
}
if (!iframeReady) return;
const seq = ++initSeqRef.current;
setLoadingTaskDetail(true);
try {
const resp = (await getEditorTaskUsingGet(projectId, fileId)) as any;
const task = resp?.data?.task;
if (!task) {
message.error("获取任务详情失败");
return;
}
if (seq !== initSeqRef.current) return;
postToIframe("LS_INIT", {
labelConfig: project.labelConfig,
task,
user: { id: "datamate" },
interfaces: [
"panel",
"update",
"submit",
"controls",
"side-column",
"annotations:menu",
"annotations:add-new",
"annotations:delete",
],
selectedAnnotationIndex: 0,
allowCreateEmptyAnnotation: true,
});
} catch (e) {
console.error(e);
message.error("加载编辑器失败");
} finally {
if (seq === initSeqRef.current) setLoadingTaskDetail(false);
}
};
const saveFromExport = async (payload: any) => {
const taskId = payload?.taskId;
const annotation = payload?.annotation;
if (!taskId || !annotation) {
message.error("导出标注失败:缺少 taskId/annotation");
return;
}
setSaving(true);
try {
await upsertEditorAnnotationUsingPut(projectId, String(taskId), { annotation });
message.success("标注已保存");
await loadTasks(true);
} catch (e) {
console.error(e);
message.error("保存失败");
} finally {
setSaving(false);
}
};
const requestExport = () => {
if (!selectedFileId) {
message.warning("请先选择文件");
return;
}
postToIframe("LS_EXPORT", {});
};
useEffect(() => {
setIframeReady(false);
setProject(null);
setTasks([]);
setSelectedFileId("");
initSeqRef.current = 0;
if (projectId) loadProject();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [projectId]);
useEffect(() => {
if (!project?.supported) return;
loadTasks();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [project?.projectId, project?.supported]);
useEffect(() => {
if (!selectedFileId) return;
initEditorForFile(selectedFileId);
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [selectedFileId, iframeReady]);
useEffect(() => {
const handler = (event: MessageEvent<LsfMessage>) => {
if (event.origin !== origin) return;
const msg = event.data || {};
if (!msg?.type) return;
if (msg.type === "LS_IFRAME_READY") {
setIframeReady(true);
return;
}
if (msg.type === "LS_EXPORT_RESULT") {
saveFromExport(msg.payload);
return;
}
// 兼容 iframe 内部在 submit 时直接上报(若启用)
if (msg.type === "LS_SUBMIT") {
saveFromExport(msg.payload);
return;
}
if (msg.type === "LS_ERROR") {
message.error(msg.payload?.message || "编辑器发生错误");
}
};
window.addEventListener("message", handler);
return () => window.removeEventListener("message", handler);
}, [message, origin]);
if (loadingProject) {
return (
<div className="h-full flex items-center justify-center">
<Spin />
</div>
);
}
if (!project) {
return (
<div className="h-full flex items-center justify-center">
<Card>
<Typography.Text></Typography.Text>
<div className="mt-4 flex justify-end">
<Button onClick={() => navigate("/data/annotation")}></Button>
</div>
</Card>
</div>
);
}
if (!project.supported) {
return (
<div className="h-full flex items-center justify-center">
<Card style={{ maxWidth: 640 }}>
<Typography.Title level={4}></Typography.Title>
<Typography.Paragraph type="secondary">
{project.unsupportedReason || "当前仅支持文本(TEXT)项目的内嵌编辑器。"}
</Typography.Paragraph>
<div className="flex justify-end gap-2">
<Button onClick={() => navigate("/data/annotation")}></Button>
</div>
</Card>
</div>
);
}
return (
<div className="h-full flex flex-col gap-3">
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<Button icon={<LeftOutlined />} onClick={() => navigate("/data/annotation")}>
</Button>
<Typography.Title level={4} style={{ margin: 0 }}>
</Typography.Title>
</div>
<div className="flex items-center gap-2">
<Button icon={<ReloadOutlined />} loading={loadingTasks} onClick={() => loadTasks()}>
</Button>
<Button
type="primary"
icon={<SaveOutlined />}
loading={saving}
disabled={!iframeReady || !selectedFileId}
onClick={requestExport}
>
</Button>
</div>
</div>
<div className="flex gap-3 flex-1 min-h-0">
<Card title="文件" style={{ width: 320 }} bodyStyle={{ padding: 0, height: "100%", overflow: "auto" }}>
<List
loading={loadingTasks}
dataSource={tasks}
renderItem={(item) => (
<List.Item
key={item.fileId}
style={{
cursor: "pointer",
background: item.fileId === selectedFileId ? "#f0f5ff" : undefined,
paddingLeft: 12,
paddingRight: 12,
}}
onClick={() => setSelectedFileId(item.fileId)}
>
<div className="flex flex-col w-full">
<div className="flex items-center justify-between gap-2">
<Typography.Text ellipsis>{item.fileName}</Typography.Text>
<Typography.Text type="secondary" style={{ whiteSpace: "nowrap" }}>
{item.hasAnnotation ? "已标注" : "未标注"}
</Typography.Text>
</div>
{item.annotationUpdatedAt && (
<Typography.Text type="secondary" style={{ fontSize: 12 }}>
: {item.annotationUpdatedAt}
</Typography.Text>
)}
</div>
</List.Item>
)}
/>
</Card>
<Card title="编辑器" className="flex-1" bodyStyle={{ padding: 0, height: "100%", overflow: "hidden" }}>
<div className="relative h-full">
{loadingTaskDetail && (
<div className="absolute inset-0 z-10 flex items-center justify-center bg-white/70">
<Spin />
</div>
)}
<iframe
ref={iframeRef}
title="Label Studio Frontend"
src={LSF_IFRAME_SRC}
className="w-full h-full border-0"
/>
</div>
</Card>
</div>
</div>
);
}

View File

@@ -1,15 +1,16 @@
import { useState, useEffect } from "react";
import { Card, Button, Table, message, Modal, Tabs, Tag, Progress, Tooltip } from "antd";
import {
PlusOutlined,
EditOutlined,
DeleteOutlined,
SyncOutlined,
} from "@ant-design/icons";
import { SearchControls } from "@/components/SearchControls";
import CardView from "@/components/CardView";
import type { AnnotationTask } from "../annotation.model";
import useFetchData from "@/hooks/useFetchData";
import { useState, useEffect } from "react";
import { Card, Button, Table, message, Modal, Tabs, Tag, Progress, Tooltip } from "antd";
import {
PlusOutlined,
EditOutlined,
DeleteOutlined,
SyncOutlined,
} from "@ant-design/icons";
import { useNavigate } from "react-router";
import { SearchControls } from "@/components/SearchControls";
import CardView from "@/components/CardView";
import type { AnnotationTask } from "../annotation.model";
import useFetchData from "@/hooks/useFetchData";
import {
deleteAnnotationTaskByIdUsingDelete,
queryAnnotationTasksUsingGet,
@@ -39,12 +40,13 @@ const AUTO_MODEL_SIZE_LABELS: Record<string, string> = {
x: "YOLOv8x (最精确)",
};
export default function DataAnnotation() {
// return <DevelopmentInProgress showTime="2025.10.30" />;
const [activeTab, setActiveTab] = useState("tasks");
const [viewMode, setViewMode] = useState<"list" | "card">("list");
const [showCreateDialog, setShowCreateDialog] = useState(false);
const [autoTasks, setAutoTasks] = useState<any[]>([]);
export default function DataAnnotation() {
// return <DevelopmentInProgress showTime="2025.10.30" />;
const navigate = useNavigate();
const [activeTab, setActiveTab] = useState("tasks");
const [viewMode, setViewMode] = useState<"list" | "card">("list");
const [showCreateDialog, setShowCreateDialog] = useState(false);
const [autoTasks, setAutoTasks] = useState<any[]>([]);
const {
loading,
@@ -56,9 +58,8 @@ export default function DataAnnotation() {
handleKeywordChange,
} = useFetchData(queryAnnotationTasksUsingGet, mapAnnotationTask, 30000, true, [], 0);
const [labelStudioBase, setLabelStudioBase] = useState<string | null>(null);
const [selectedRowKeys, setSelectedRowKeys] = useState<(string | number)[]>([]);
const [selectedRows, setSelectedRows] = useState<any[]>([]);
const [selectedRowKeys, setSelectedRowKeys] = useState<(string | number)[]>([]);
const [selectedRows, setSelectedRows] = useState<any[]>([]);
// 拉取自动标注任务(供轮询和创建成功后立即刷新复用)
const refreshAutoTasks = async (silent = false) => {
@@ -76,71 +77,24 @@ export default function DataAnnotation() {
}
};
// prefetch config on mount so clicking annotate is fast and we know whether base URL exists
// useEffect ensures this runs once
useEffect(() => {
let mounted = true;
(async () => {
try {
const baseUrl = `http://${window.location.hostname}:${parseInt(window.location.port) + 1}`;
if (mounted) setLabelStudioBase(baseUrl);
} catch (e) {
if (mounted) setLabelStudioBase(null);
}
})();
return () => {
mounted = false;
};
}, []);
// 自动标注任务轮询(用于在同一表格中展示处理进度)
useEffect(() => {
refreshAutoTasks();
const timer = setInterval(() => refreshAutoTasks(true), 3000);
// 自动标注任务轮询(用于在同一表格中展示处理进度)
useEffect(() => {
refreshAutoTasks();
const timer = setInterval(() => refreshAutoTasks(true), 3000);
return () => {
clearInterval(timer);
};
}, []);
const handleAnnotate = (task: AnnotationTask) => {
// Open Label Studio project page in a new tab
(async () => {
try {
// prefer using labeling project id already present on the task
// `mapAnnotationTask` normalizes upstream fields into `labelingProjId`/`projId`,
// so prefer those and fall back to the task id if necessary.
let labelingProjId = (task as any).labelingProjId || (task as any).projId || undefined;
// no fallback external mapping lookup; rely on normalized fields from mapAnnotationTask
// use prefetched base if available
const base = labelStudioBase;
// no debug logging in production
if (labelingProjId) {
// only open external Label Studio when we have a configured base url
if (base) {
const target = `${base}/projects/${labelingProjId}/data`;
window.open(target, "_blank");
} else {
// no external Label Studio URL configured — do not perform internal redirect in this version
message.error("无法跳转到 Label Studio:未配置 Label Studio 基础 URL");
return;
}
} else {
// no labeling project id available — do not attempt internal redirect in this version
message.error("无法跳转到 Label Studio:该映射未绑定标注项目");
return;
}
} catch (error) {
// on error, surface a user-friendly message instead of redirecting
message.error("无法跳转到 Label Studio:发生错误,请检查配置或控制台日志");
return;
}
})();
};
const handleAnnotate = (task: AnnotationTask) => {
const projectId = (task as any)?.id;
if (!projectId) {
message.error("无法进入标注:缺少标注项目ID");
return;
}
navigate(`/data/annotation/annotate/${projectId}`);
};
const handleDelete = (task: AnnotationTask) => {
Modal.confirm({

View File

@@ -62,6 +62,30 @@ export function getAutoAnnotationTaskStatusUsingGet(taskId: string) {
return get(`/api/annotation/auto/${taskId}/status`);
}
export function downloadAutoAnnotationResultUsingGet(taskId: string) {
return download(`/api/annotation/auto/${taskId}/download`);
}
export function downloadAutoAnnotationResultUsingGet(taskId: string) {
return download(`/api/annotation/auto/${taskId}/download`);
}
// =====================
// Label Studio Editor(内嵌版)
// =====================
export function getEditorProjectInfoUsingGet(projectId: string) {
return get(`/api/annotation/editor/projects/${projectId}`);
}
export function listEditorTasksUsingGet(projectId: string, params?: any) {
return get(`/api/annotation/editor/projects/${projectId}/tasks`, params);
}
export function getEditorTaskUsingGet(projectId: string, fileId: string) {
return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}`);
}
export function upsertEditorAnnotationUsingPut(
projectId: string,
fileId: string,
data: any
) {
return put(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/annotation`, data);
}

View File

@@ -16,6 +16,7 @@ import CleansingTemplateCreate from "@/pages/DataCleansing/Create/CreateTemplate
import DataAnnotation from "@/pages/DataAnnotation/Home/DataAnnotation";
import AnnotationTaskCreate from "@/pages/DataAnnotation/Create/CreateTask";
import LabelStudioTextEditor from "@/pages/DataAnnotation/Annotate/LabelStudioTextEditor";
import DataSynthesisPage from "@/pages/SynthesisTask/DataSynthesis";
import InstructionTemplateCreate from "@/pages/SynthesisTask/CreateTemplate";
@@ -145,6 +146,10 @@ const router = createBrowserRouter([
path: "create-task",
Component: AnnotationTaskCreate,
},
{
path: "annotate/:projectId",
Component: LabelStudioTextEditor,
},
],
},
{

View File

@@ -62,5 +62,11 @@ class Settings(BaseSettings):
# DataMate
dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀
# DataMate Backend (Java) - 用于通过“下载/预览接口”读取文件内容
datamate_backend_base_url: str = "http://datamate-backend:8080/api"
# 标注编辑器(Label Studio Editor)相关
editor_max_text_bytes: int = 2 * 1024 * 1024 # 2MB,避免一次加载超大文本卡死前端
# 全局设置实例
settings = Settings()

View File

@@ -13,7 +13,8 @@ from .user_management import (
from .annotation_management import (
AnnotationTemplate,
LabelingProject
LabelingProject,
AnnotationResult
)
from .data_evaluation import (
@@ -30,6 +31,7 @@ __all__ = [
"User",
"AnnotationTemplate",
"LabelingProject",
"AnnotationResult",
"EvaluationTask",
"EvaluationItem",
]

View File

@@ -33,10 +33,10 @@ class AnnotationTemplate(Base):
"""检查是否已被软删除"""
return self.deleted_at is not None
class LabelingProject(Base):
"""标注项目模型"""
__tablename__ = "t_dm_labeling_projects"
class LabelingProject(Base):
"""标注项目模型"""
__tablename__ = "t_dm_labeling_projects"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
@@ -53,13 +53,29 @@ class LabelingProject(Base):
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None
class AutoAnnotationTask(Base):
"""自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None
class AnnotationResult(Base):
"""标注结果模型(单人单份最终标签,Label Studio annotation 原始 JSON)"""
__tablename__ = "t_dm_annotation_results"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)")
file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)")
annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
def __repr__(self):
return f"<AnnotationResult(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
class AutoAnnotationTask(Base):
"""自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
__tablename__ = "t_dm_auto_annotation_tasks"
@@ -92,4 +108,4 @@ class AutoAnnotationTask(Base):
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None
return self.deleted_at is not None

View File

@@ -2,9 +2,10 @@ from fastapi import APIRouter
from .config import router as about_router
from .project import router as project_router
from .task import router as task_router
from .template import router as template_router
from .auto import router as auto_router
from .task import router as task_router
from .template import router as template_router
from .auto import router as auto_router
from .editor import router as editor_router
router = APIRouter(
prefix="/annotation",
@@ -13,6 +14,7 @@ router = APIRouter(
router.include_router(about_router)
router.include_router(project_router)
router.include_router(task_router)
router.include_router(template_router)
router.include_router(auto_router)
router.include_router(task_router)
router.include_router(template_router)
router.include_router(auto_router)
router.include_router(editor_router)

View File

@@ -0,0 +1,90 @@
"""
Label Studio Editor(前端嵌入式)接口
说明:
- 不依赖 Label Studio Server;仅复用其“编辑器”前端库
- DataMate 负责提供 tasks/annotations 数据与保存能力
- 当前为 TEXT POC:只支持 dataset_type=TEXT 的项目
"""
from __future__ import annotations
from fastapi import APIRouter, Depends, Query, Path
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.logging import get_logger
from app.db.session import get_db
from app.module.annotation.schema.editor import (
EditorProjectInfo,
EditorTaskListResponse,
EditorTaskResponse,
UpsertAnnotationRequest,
UpsertAnnotationResponse,
)
from app.module.annotation.service.editor import AnnotationEditorService
from app.module.shared.schema import StandardResponse
logger = get_logger(__name__)
router = APIRouter(
prefix="/editor",
tags=["annotation/editor"],
)
@router.get(
"/projects/{project_id}",
response_model=StandardResponse[EditorProjectInfo],
)
async def get_editor_project_info(
project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
db: AsyncSession = Depends(get_db),
):
service = AnnotationEditorService(db)
info = await service.get_project_info(project_id)
return StandardResponse(code=200, message="success", data=info)
@router.get(
"/projects/{project_id}/tasks",
response_model=StandardResponse[EditorTaskListResponse],
)
async def list_editor_tasks(
project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
page: int = Query(0, ge=0, description="页码(从0开始)"),
size: int = Query(50, ge=1, le=200, description="每页大小"),
db: AsyncSession = Depends(get_db),
):
service = AnnotationEditorService(db)
result = await service.list_tasks(project_id, page=page, size=size)
return StandardResponse(code=200, message="success", data=result)
@router.get(
"/projects/{project_id}/tasks/{file_id}",
response_model=StandardResponse[EditorTaskResponse],
)
async def get_editor_task(
project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
file_id: str = Path(..., description="文件ID(t_dm_dataset_files.id)"),
db: AsyncSession = Depends(get_db),
):
service = AnnotationEditorService(db)
task = await service.get_task(project_id, file_id)
return StandardResponse(code=200, message="success", data=task)
@router.put(
"/projects/{project_id}/tasks/{file_id}/annotation",
response_model=StandardResponse[UpsertAnnotationResponse],
)
async def upsert_editor_annotation(
request: UpsertAnnotationRequest,
project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
file_id: str = Path(..., description="文件ID(t_dm_dataset_files.id)"),
db: AsyncSession = Depends(get_db),
):
service = AnnotationEditorService(db)
result = await service.upsert_annotation(project_id, file_id, request)
return StandardResponse(code=200, message="success", data=result)

View File

@@ -30,6 +30,15 @@ from .mapping import (
DeleteDatasetResponse,
)
from .editor import (
EditorProjectInfo,
EditorTaskListItem,
EditorTaskListResponse,
EditorTaskResponse,
UpsertAnnotationRequest,
UpsertAnnotationResponse,
)
# Rebuild model to resolve forward references
DatasetMappingResponse.model_rebuild()
@@ -51,4 +60,10 @@ __all__ = [
"UpdateAnnotationTemplateRequest",
"AnnotationTemplateResponse",
"AnnotationTemplateListResponse",
]
"EditorProjectInfo",
"EditorTaskListItem",
"EditorTaskListResponse",
"EditorTaskResponse",
"UpsertAnnotationRequest",
"UpsertAnnotationResponse",
]

View File

@@ -0,0 +1,83 @@
"""
标注编辑器(Label Studio Editor)接口模型
设计目标:
- 单人单份最终标签:每个 project_id + file_id 只维护 1 条最终标注结果
- 完全兼容 Label Studio:标注结果以 annotation 原始 JSON 形式存储与返回
"""
from __future__ import annotations
from datetime import datetime
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field, ConfigDict
class EditorProjectInfo(BaseModel):
"""编辑器项目元信息"""
project_id: str = Field(..., alias="projectId", description="DataMate 标注项目ID(t_dm_labeling_projects.id)")
dataset_id: str = Field(..., alias="datasetId", description="数据集ID(t_dm_datasets.id)")
template_id: Optional[str] = Field(None, alias="templateId", description="模板ID(t_dm_annotation_templates.id)")
label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML 配置")
supported: bool = Field(..., description="当前数据类型是否支持内嵌编辑器")
unsupported_reason: Optional[str] = Field(None, alias="unsupportedReason", description="不支持原因(当 supported=false)")
model_config = ConfigDict(populate_by_name=True)
class EditorTaskListItem(BaseModel):
"""编辑器任务列表条目(对应一个数据集文件)"""
file_id: str = Field(..., alias="fileId", description="文件ID")
file_name: str = Field(..., alias="fileName", description="文件名")
file_type: Optional[str] = Field(None, alias="fileType", description="文件类型")
has_annotation: bool = Field(..., alias="hasAnnotation", description="是否已有最终标注")
annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
model_config = ConfigDict(populate_by_name=True)
class EditorTaskListResponse(BaseModel):
"""编辑器任务列表响应"""
content: List[EditorTaskListItem] = Field(..., description="任务列表")
total_elements: int = Field(..., alias="totalElements", description="总条数")
total_pages: int = Field(..., alias="totalPages", description="总页数")
page: int = Field(..., description="页码(从0开始)")
size: int = Field(..., description="每页大小")
model_config = ConfigDict(populate_by_name=True)
class EditorTaskResponse(BaseModel):
"""编辑器任务详情(可直接喂给 Label Studio Editor 的 task 对象)"""
task: Dict[str, Any] = Field(..., description="Label Studio task 对象")
annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
model_config = ConfigDict(populate_by_name=True)
class UpsertAnnotationRequest(BaseModel):
"""保存/覆盖最终标注(Label Studio annotation 原始对象)"""
annotation: Dict[str, Any] = Field(..., description="Label Studio annotation 对象(包含 result 等)")
expected_updated_at: Optional[datetime] = Field(
None,
alias="expectedUpdatedAt",
description="乐观锁:若提供则要求与当前记录 updated_at 一致,否则返回 409",
)
model_config = ConfigDict(populate_by_name=True)
class UpsertAnnotationResponse(BaseModel):
"""保存/覆盖最终标注响应"""
annotation_id: str = Field(..., alias="annotationId", description="标注结果ID(t_dm_annotation_results.id)")
updated_at: datetime = Field(..., alias="updatedAt", description="标注更新时间")
model_config = ConfigDict(populate_by_name=True)

View File

@@ -0,0 +1,295 @@
"""
标注编辑器(Label Studio Editor)服务
职责:
- 解析 DataMate 标注项目(t_dm_labeling_projects)
- 以“文件下载/预览接口”读取文本内容,构造 Label Studio task
- 以原始 annotation JSON 形式 upsert 最终标注结果(单人单份)
"""
from __future__ import annotations
import uuid
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple
import httpx
from fastapi import HTTPException
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.logging import get_logger
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
from app.module.annotation.schema.editor import (
EditorProjectInfo,
EditorTaskListItem,
EditorTaskListResponse,
EditorTaskResponse,
UpsertAnnotationRequest,
UpsertAnnotationResponse,
)
from app.module.annotation.service.template import AnnotationTemplateService
logger = get_logger(__name__)
class AnnotationEditorService:
"""Label Studio Editor 集成服务(TEXT POC 版)"""
def __init__(self, db: AsyncSession):
self.db = db
self.template_service = AnnotationTemplateService()
async def _get_project_or_404(self, project_id: str) -> LabelingProject:
result = await self.db.execute(
select(LabelingProject).where(
LabelingProject.id == project_id,
LabelingProject.deleted_at.is_(None),
)
)
project = result.scalar_one_or_none()
if not project:
raise HTTPException(status_code=404, detail=f"标注项目不存在: {project_id}")
return project
async def _get_dataset_type(self, dataset_id: str) -> Optional[str]:
result = await self.db.execute(
select(Dataset.dataset_type).where(Dataset.id == dataset_id)
)
return result.scalar_one_or_none()
async def _get_label_config(self, template_id: Optional[str]) -> Optional[str]:
if not template_id:
return None
template = await self.template_service.get_template(self.db, template_id)
return getattr(template, "label_config", None) if template else None
async def get_project_info(self, project_id: str) -> EditorProjectInfo:
project = await self._get_project_or_404(project_id)
dataset_type = await self._get_dataset_type(project.dataset_id)
supported = (dataset_type or "").upper() == "TEXT"
unsupported_reason = None
if not supported:
unsupported_reason = f"当前仅支持 TEXT,项目数据类型为: {dataset_type or 'UNKNOWN'}"
label_config = await self._get_label_config(project.template_id)
return EditorProjectInfo(
projectId=project.id,
datasetId=project.dataset_id,
templateId=project.template_id,
labelConfig=label_config,
supported=supported,
unsupportedReason=unsupported_reason,
)
async def list_tasks(self, project_id: str, page: int = 0, size: int = 50) -> EditorTaskListResponse:
project = await self._get_project_or_404(project_id)
count_result = await self.db.execute(
select(func.count()).select_from(DatasetFiles).where(
DatasetFiles.dataset_id == project.dataset_id
)
)
total = int(count_result.scalar() or 0)
files_result = await self.db.execute(
select(DatasetFiles)
.where(DatasetFiles.dataset_id == project.dataset_id)
.order_by(DatasetFiles.created_at.desc())
.offset(page * size)
.limit(size)
)
files = files_result.scalars().all()
file_ids = [str(f.id) for f in files] # type: ignore[arg-type]
updated_map: Dict[str, datetime] = {}
if file_ids:
ann_result = await self.db.execute(
select(AnnotationResult.file_id, AnnotationResult.updated_at).where(
AnnotationResult.project_id == project_id,
AnnotationResult.file_id.in_(file_ids),
)
)
for file_id, updated_at in ann_result.all():
if file_id and updated_at:
updated_map[str(file_id)] = updated_at
items: List[EditorTaskListItem] = []
for f in files:
fid = str(f.id) # type: ignore[arg-type]
items.append(
EditorTaskListItem(
fileId=fid,
fileName=str(getattr(f, "file_name", "")),
fileType=getattr(f, "file_type", None),
hasAnnotation=fid in updated_map,
annotationUpdatedAt=updated_map.get(fid),
)
)
total_pages = (total + size - 1) // size if size > 0 else 0
return EditorTaskListResponse(
content=items,
totalElements=total,
totalPages=total_pages,
page=page,
size=size,
)
async def _fetch_text_content_via_download_api(self, dataset_id: str, file_id: str) -> str:
base = settings.datamate_backend_base_url.rstrip("/")
url = f"{base}/data-management/datasets/{dataset_id}/files/{file_id}/download"
try:
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
resp = await client.get(url)
resp.raise_for_status()
content_length = resp.headers.get("content-length")
if content_length:
try:
if int(content_length) > settings.editor_max_text_bytes:
raise HTTPException(
status_code=413,
detail=f"文本文件过大,限制 {settings.editor_max_text_bytes} 字节",
)
except ValueError:
# content-length 非法则忽略,走实际长度判断
pass
data = resp.content
if len(data) > settings.editor_max_text_bytes:
raise HTTPException(
status_code=413,
detail=f"文本文件过大,限制 {settings.editor_max_text_bytes} 字节",
)
# TEXT POC:默认按 UTF-8 解码,不可解码字符用替换符处理
return data.decode("utf-8", errors="replace")
except HTTPException:
raise
except httpx.HTTPStatusError as e:
logger.error(f"读取文本失败: dataset={dataset_id}, file={file_id}, http={e.response.status_code}")
raise HTTPException(status_code=502, detail="读取文本失败(下载接口返回错误)")
except Exception as e:
logger.error(f"读取文本失败: dataset={dataset_id}, file={file_id}, err={e}")
raise HTTPException(status_code=502, detail="读取文本失败(下载接口调用异常)")
async def get_task(self, project_id: str, file_id: str) -> EditorTaskResponse:
project = await self._get_project_or_404(project_id)
# TEXT 支持校验
dataset_type = await self._get_dataset_type(project.dataset_id)
if (dataset_type or "").upper() != "TEXT":
raise HTTPException(status_code=400, detail="当前仅支持 TEXT 项目的内嵌编辑器")
file_result = await self.db.execute(
select(DatasetFiles).where(
DatasetFiles.id == file_id,
DatasetFiles.dataset_id == project.dataset_id,
)
)
file_record = file_result.scalar_one_or_none()
if not file_record:
raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
ann_result = await self.db.execute(
select(AnnotationResult).where(
AnnotationResult.project_id == project_id,
AnnotationResult.file_id == file_id,
)
)
ann = ann_result.scalar_one_or_none()
task: Dict[str, Any] = {
"id": file_id,
"data": {
"text": text_content,
"file_id": file_id,
"dataset_id": project.dataset_id,
"file_name": getattr(file_record, "file_name", ""),
},
"annotations": [],
}
annotation_updated_at = None
if ann:
annotation_updated_at = ann.updated_at
# 直接返回存储的 annotation 原始对象(Label Studio 兼容)
task["annotations"] = [ann.annotation]
return EditorTaskResponse(
task=task,
annotationUpdatedAt=annotation_updated_at,
)
async def upsert_annotation(self, project_id: str, file_id: str, request: UpsertAnnotationRequest) -> UpsertAnnotationResponse:
project = await self._get_project_or_404(project_id)
# 校验文件归属
file_check = await self.db.execute(
select(DatasetFiles.id).where(
DatasetFiles.id == file_id,
DatasetFiles.dataset_id == project.dataset_id,
)
)
if not file_check.scalar_one_or_none():
raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
annotation_payload = dict(request.annotation or {})
result = annotation_payload.get("result")
if not isinstance(result, list):
raise HTTPException(status_code=400, detail="annotation.result 必须为数组")
existing_result = await self.db.execute(
select(AnnotationResult).where(
AnnotationResult.project_id == project_id,
AnnotationResult.file_id == file_id,
)
)
existing = existing_result.scalar_one_or_none()
now = datetime.utcnow()
if existing:
if request.expected_updated_at and existing.updated_at:
if existing.updated_at != request.expected_updated_at.replace(tzinfo=None):
raise HTTPException(status_code=409, detail="标注已被更新,请刷新后重试")
# 固定 annotation.id 为记录ID,保持稳定
annotation_payload["id"] = existing.id
existing.annotation = annotation_payload # type: ignore[assignment]
existing.updated_at = now # type: ignore[assignment]
await self.db.commit()
await self.db.refresh(existing)
return UpsertAnnotationResponse(
annotationId=existing.id,
updatedAt=existing.updated_at or now,
)
new_id = str(uuid.uuid4())
annotation_payload["id"] = new_id
record = AnnotationResult(
id=new_id,
project_id=project_id,
file_id=file_id,
annotation=annotation_payload,
created_at=now,
updated_at=now,
)
self.db.add(record)
await self.db.commit()
await self.db.refresh(record)
return UpsertAnnotationResponse(
annotationId=record.id,
updatedAt=record.updated_at or now,
)

View File

@@ -20,27 +20,45 @@ CREATE TABLE t_dm_annotation_templates (
INDEX idx_built_in (built_in)
) COMMENT='标注配置模板表';
CREATE TABLE t_dm_labeling_projects (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
name VARCHAR(100) NOT NULL COMMENT '项目名称',
labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
template_id VARCHAR(36) NULL COMMENT '使用的模板ID',
configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
progress JSON COMMENT '项目进度信息',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
INDEX idx_dataset_id (dataset_id),
INDEX idx_template_id (template_id),
INDEX idx_labeling_project_id (labeling_project_id)
) COMMENT='标注项目表';
-- 自动标注任务表
CREATE TABLE t_dm_auto_annotation_tasks (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
name VARCHAR(255) NOT NULL COMMENT '任务名称',
CREATE TABLE t_dm_labeling_projects (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
name VARCHAR(100) NOT NULL COMMENT '项目名称',
labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
template_id VARCHAR(36) NULL COMMENT '使用的模板ID',
configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
progress JSON COMMENT '项目进度信息',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
INDEX idx_dataset_id (dataset_id),
INDEX idx_template_id (template_id),
INDEX idx_labeling_project_id (labeling_project_id)
) COMMENT='标注项目表';
-- 标注结果表(单人单份最终标签,存储 Label Studio annotation 原始 JSON)
-- 设计说明:
-- 1) project_id 复用 t_dm_labeling_projects.id(DataMate 内部标注项目ID)
-- 2) file_id 复用 t_dm_dataset_files.id(数据集文件ID)
-- 3) annotation 字段存 Label Studio annotation 对象(包含 result 等),不做降维转换
CREATE TABLE IF NOT EXISTS t_dm_annotation_results (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
project_id VARCHAR(36) NOT NULL COMMENT '标注项目ID(t_dm_labeling_projects.id)',
file_id VARCHAR(36) NOT NULL COMMENT '文件ID(t_dm_dataset_files.id)',
annotation JSON NOT NULL COMMENT 'Label Studio annotation 原始JSON(单人单份最终结果)',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
UNIQUE KEY uk_project_file (project_id, file_id),
INDEX idx_project_id (project_id),
INDEX idx_file_id (file_id),
INDEX idx_updated_at (updated_at)
) COMMENT='标注结果表(Label Studio兼容,单人单份)';
-- 自动标注任务表
CREATE TABLE t_dm_auto_annotation_tasks (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
name VARCHAR(255) NOT NULL COMMENT '任务名称',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
dataset_name VARCHAR(255) COMMENT '数据集名称(冗余字段,方便查询)',
config JSON NOT NULL COMMENT '任务配置(模型规模、置信度等)',