You've already forked DataMate
feat(annotation): 添加标注数据导出功能
- 新增导出对话框组件,支持多种格式选择 - 实现 JSON、JSONL、CSV、COCO、YOLO 五种导出格式 - 添加导出统计信息显示,包括总文件数和已标注数 - 集成前端导出按钮和后端 API 接口 - 支持仅导出已标注数据和包含原始数据选项 - 实现文件下载和命名功能
This commit is contained in:
@@ -1,16 +1,17 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import { Card, Button, Table, message, Modal, Tabs, Tag, Progress, Tooltip } from "antd";
|
||||
import {
|
||||
PlusOutlined,
|
||||
EditOutlined,
|
||||
DeleteOutlined,
|
||||
SyncOutlined,
|
||||
} from "@ant-design/icons";
|
||||
import { useNavigate } from "react-router";
|
||||
import { SearchControls } from "@/components/SearchControls";
|
||||
import CardView from "@/components/CardView";
|
||||
import type { AnnotationTask } from "../annotation.model";
|
||||
import useFetchData from "@/hooks/useFetchData";
|
||||
import { useState, useEffect } from "react";
|
||||
import { Card, Button, Table, message, Modal, Tabs, Tag, Progress, Tooltip } from "antd";
|
||||
import {
|
||||
PlusOutlined,
|
||||
EditOutlined,
|
||||
DeleteOutlined,
|
||||
SyncOutlined,
|
||||
DownloadOutlined,
|
||||
} from "@ant-design/icons";
|
||||
import { useNavigate } from "react-router";
|
||||
import { SearchControls } from "@/components/SearchControls";
|
||||
import CardView from "@/components/CardView";
|
||||
import type { AnnotationTask } from "../annotation.model";
|
||||
import useFetchData from "@/hooks/useFetchData";
|
||||
import {
|
||||
deleteAnnotationTaskByIdUsingDelete,
|
||||
queryAnnotationTasksUsingGet,
|
||||
@@ -20,6 +21,7 @@ import {
|
||||
} from "../annotation.api";
|
||||
import { mapAnnotationTask } from "../annotation.const";
|
||||
import CreateAnnotationTask from "../Create/components/CreateAnnotationTaskDialog";
|
||||
import ExportAnnotationDialog from "./ExportAnnotationDialog";
|
||||
import { ColumnType } from "antd/es/table";
|
||||
import { TemplateList } from "../Template";
|
||||
// Note: DevelopmentInProgress intentionally not used here
|
||||
@@ -40,13 +42,14 @@ const AUTO_MODEL_SIZE_LABELS: Record<string, string> = {
|
||||
x: "YOLOv8x (最精确)",
|
||||
};
|
||||
|
||||
export default function DataAnnotation() {
|
||||
// return <DevelopmentInProgress showTime="2025.10.30" />;
|
||||
const navigate = useNavigate();
|
||||
const [activeTab, setActiveTab] = useState("tasks");
|
||||
const [viewMode, setViewMode] = useState<"list" | "card">("list");
|
||||
const [showCreateDialog, setShowCreateDialog] = useState(false);
|
||||
const [autoTasks, setAutoTasks] = useState<any[]>([]);
|
||||
export default function DataAnnotation() {
|
||||
// return <DevelopmentInProgress showTime="2025.10.30" />;
|
||||
const navigate = useNavigate();
|
||||
const [activeTab, setActiveTab] = useState("tasks");
|
||||
const [viewMode, setViewMode] = useState<"list" | "card">("list");
|
||||
const [showCreateDialog, setShowCreateDialog] = useState(false);
|
||||
const [exportTask, setExportTask] = useState<AnnotationTask | null>(null);
|
||||
const [autoTasks, setAutoTasks] = useState<any[]>([]);
|
||||
|
||||
const {
|
||||
loading,
|
||||
@@ -58,8 +61,8 @@ export default function DataAnnotation() {
|
||||
handleKeywordChange,
|
||||
} = useFetchData(queryAnnotationTasksUsingGet, mapAnnotationTask, 30000, true, [], 0);
|
||||
|
||||
const [selectedRowKeys, setSelectedRowKeys] = useState<(string | number)[]>([]);
|
||||
const [selectedRows, setSelectedRows] = useState<any[]>([]);
|
||||
const [selectedRowKeys, setSelectedRowKeys] = useState<(string | number)[]>([]);
|
||||
const [selectedRows, setSelectedRows] = useState<any[]>([]);
|
||||
|
||||
// 拉取自动标注任务(供轮询和创建成功后立即刷新复用)
|
||||
const refreshAutoTasks = async (silent = false) => {
|
||||
@@ -77,24 +80,28 @@ export default function DataAnnotation() {
|
||||
}
|
||||
};
|
||||
|
||||
// 自动标注任务轮询(用于在同一表格中展示处理进度)
|
||||
useEffect(() => {
|
||||
refreshAutoTasks();
|
||||
const timer = setInterval(() => refreshAutoTasks(true), 3000);
|
||||
// 自动标注任务轮询(用于在同一表格中展示处理进度)
|
||||
useEffect(() => {
|
||||
refreshAutoTasks();
|
||||
const timer = setInterval(() => refreshAutoTasks(true), 3000);
|
||||
|
||||
return () => {
|
||||
clearInterval(timer);
|
||||
};
|
||||
}, []);
|
||||
|
||||
const handleAnnotate = (task: AnnotationTask) => {
|
||||
const projectId = (task as any)?.id;
|
||||
if (!projectId) {
|
||||
message.error("无法进入标注:缺少标注项目ID");
|
||||
return;
|
||||
}
|
||||
navigate(`/data/annotation/annotate/${projectId}`);
|
||||
};
|
||||
const handleAnnotate = (task: AnnotationTask) => {
|
||||
const projectId = (task as any)?.id;
|
||||
if (!projectId) {
|
||||
message.error("无法进入标注:缺少标注项目ID");
|
||||
return;
|
||||
}
|
||||
navigate(`/data/annotation/annotate/${projectId}`);
|
||||
};
|
||||
|
||||
const handleExport = (task: AnnotationTask) => {
|
||||
setExportTask(task);
|
||||
};
|
||||
|
||||
const handleDelete = (task: AnnotationTask) => {
|
||||
Modal.confirm({
|
||||
@@ -257,6 +264,12 @@ export default function DataAnnotation() {
|
||||
),
|
||||
onClick: handleAnnotate,
|
||||
},
|
||||
{
|
||||
key: "export",
|
||||
label: "导出",
|
||||
icon: <DownloadOutlined className="w-4 h-4" style={{ color: "#1890ff" }} />,
|
||||
onClick: handleExport,
|
||||
},
|
||||
{
|
||||
key: "sync",
|
||||
label: "同步",
|
||||
@@ -552,6 +565,13 @@ export default function DataAnnotation() {
|
||||
}
|
||||
}}
|
||||
/>
|
||||
|
||||
<ExportAnnotationDialog
|
||||
open={!!exportTask}
|
||||
projectId={exportTask?.id || ""}
|
||||
projectName={exportTask?.name || ""}
|
||||
onClose={() => setExportTask(null)}
|
||||
/>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
|
||||
@@ -0,0 +1,219 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import {
|
||||
Modal,
|
||||
Form,
|
||||
Select,
|
||||
Checkbox,
|
||||
Spin,
|
||||
Statistic,
|
||||
Row,
|
||||
Col,
|
||||
message,
|
||||
Alert,
|
||||
} from "antd";
|
||||
import { FileTextOutlined, CheckCircleOutlined } from "@ant-design/icons";
|
||||
import {
|
||||
getExportStatsUsingGet,
|
||||
downloadAnnotationsUsingGet,
|
||||
ExportFormat,
|
||||
} from "../annotation.api";
|
||||
|
||||
interface ExportAnnotationDialogProps {
|
||||
open: boolean;
|
||||
projectId: string;
|
||||
projectName: string;
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
const FORMAT_OPTIONS: { label: string; value: ExportFormat; description: string }[] = [
|
||||
{
|
||||
label: "JSON",
|
||||
value: "json",
|
||||
description: "Label Studio 原生格式,包含完整标注结构",
|
||||
},
|
||||
{
|
||||
label: "JSON Lines",
|
||||
value: "jsonl",
|
||||
description: "每行一条记录,适合大数据处理",
|
||||
},
|
||||
{
|
||||
label: "CSV",
|
||||
value: "csv",
|
||||
description: "表格格式,可用 Excel 打开",
|
||||
},
|
||||
{
|
||||
label: "COCO",
|
||||
value: "coco",
|
||||
description: "目标检测通用格式,适用于图像标注",
|
||||
},
|
||||
{
|
||||
label: "YOLO",
|
||||
value: "yolo",
|
||||
description: "YOLO 格式(ZIP),适用于目标检测训练",
|
||||
},
|
||||
];
|
||||
|
||||
export default function ExportAnnotationDialog({
|
||||
open,
|
||||
projectId,
|
||||
projectName,
|
||||
onClose,
|
||||
}: ExportAnnotationDialogProps) {
|
||||
const [form] = Form.useForm();
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [exporting, setExporting] = useState(false);
|
||||
const [stats, setStats] = useState<{
|
||||
totalFiles: number;
|
||||
annotatedFiles: number;
|
||||
} | null>(null);
|
||||
|
||||
// 加载导出统计信息
|
||||
useEffect(() => {
|
||||
if (open && projectId) {
|
||||
setLoading(true);
|
||||
getExportStatsUsingGet(projectId)
|
||||
.then((res: any) => {
|
||||
const data = res?.data || res;
|
||||
setStats({
|
||||
totalFiles: data?.totalFiles || 0,
|
||||
annotatedFiles: data?.annotatedFiles || 0,
|
||||
});
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error("Failed to get export stats:", err);
|
||||
message.error("获取导出统计失败");
|
||||
})
|
||||
.finally(() => {
|
||||
setLoading(false);
|
||||
});
|
||||
}
|
||||
}, [open, projectId]);
|
||||
|
||||
// 重置表单
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
form.setFieldsValue({
|
||||
format: "json",
|
||||
onlyAnnotated: true,
|
||||
includeData: false,
|
||||
});
|
||||
}
|
||||
}, [open, form]);
|
||||
|
||||
const handleExport = async () => {
|
||||
try {
|
||||
const values = await form.validateFields();
|
||||
setExporting(true);
|
||||
|
||||
const blob = await downloadAnnotationsUsingGet(
|
||||
projectId,
|
||||
values.format,
|
||||
values.onlyAnnotated,
|
||||
values.includeData
|
||||
);
|
||||
|
||||
// 获取文件名
|
||||
const formatExt: Record<ExportFormat, string> = {
|
||||
json: "json",
|
||||
jsonl: "jsonl",
|
||||
csv: "csv",
|
||||
coco: "json",
|
||||
yolo: "zip",
|
||||
};
|
||||
const ext = formatExt[values.format as ExportFormat] || "json";
|
||||
const filename = `${projectName}_annotations.${ext}`;
|
||||
|
||||
// 下载文件
|
||||
const url = window.URL.createObjectURL(blob as Blob);
|
||||
const a = document.createElement("a");
|
||||
a.href = url;
|
||||
a.download = filename;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
window.URL.revokeObjectURL(url);
|
||||
document.body.removeChild(a);
|
||||
|
||||
message.success("导出成功");
|
||||
onClose();
|
||||
} catch (err: any) {
|
||||
console.error("Export failed:", err);
|
||||
message.error(err?.message || "导出失败,请稍后重试");
|
||||
} finally {
|
||||
setExporting(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title="导出标注数据"
|
||||
open={open}
|
||||
onCancel={onClose}
|
||||
onOk={handleExport}
|
||||
okText="导出"
|
||||
cancelText="取消"
|
||||
confirmLoading={exporting}
|
||||
width={520}
|
||||
>
|
||||
<Spin spinning={loading}>
|
||||
{/* 统计信息 */}
|
||||
<div className="mb-4 p-4 bg-gray-50 rounded-lg">
|
||||
<Row gutter={16}>
|
||||
<Col span={12}>
|
||||
<Statistic
|
||||
title="总文件数"
|
||||
value={stats?.totalFiles || 0}
|
||||
prefix={<FileTextOutlined />}
|
||||
/>
|
||||
</Col>
|
||||
<Col span={12}>
|
||||
<Statistic
|
||||
title="已标注文件数"
|
||||
value={stats?.annotatedFiles || 0}
|
||||
prefix={<CheckCircleOutlined style={{ color: "#52c41a" }} />}
|
||||
/>
|
||||
</Col>
|
||||
</Row>
|
||||
</div>
|
||||
|
||||
{/* 导出选项 */}
|
||||
<Form form={form} layout="vertical">
|
||||
<Form.Item
|
||||
name="format"
|
||||
label="导出格式"
|
||||
rules={[{ required: true, message: "请选择导出格式" }]}
|
||||
>
|
||||
<Select
|
||||
options={FORMAT_OPTIONS.map((opt) => ({
|
||||
label: (
|
||||
<div>
|
||||
<div className="font-medium">{opt.label}</div>
|
||||
<div className="text-xs text-gray-400">{opt.description}</div>
|
||||
</div>
|
||||
),
|
||||
value: opt.value,
|
||||
}))}
|
||||
optionLabelProp="label"
|
||||
/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item name="onlyAnnotated" valuePropName="checked">
|
||||
<Checkbox>仅导出已标注数据</Checkbox>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item name="includeData" valuePropName="checked">
|
||||
<Checkbox>包含原始数据内容</Checkbox>
|
||||
</Form.Item>
|
||||
</Form>
|
||||
|
||||
{stats && stats.annotatedFiles === 0 && (
|
||||
<Alert
|
||||
type="warning"
|
||||
message="当前项目暂无已标注数据"
|
||||
description="请先完成标注后再导出"
|
||||
showIcon
|
||||
/>
|
||||
)}
|
||||
</Spin>
|
||||
</Modal>
|
||||
);
|
||||
}
|
||||
@@ -1,5 +1,8 @@
|
||||
import { get, post, put, del, download } from "@/utils/request";
|
||||
|
||||
// 导出格式类型
|
||||
export type ExportFormat = "json" | "jsonl" | "csv" | "coco" | "yolo";
|
||||
|
||||
// 标注任务管理相关接口
|
||||
export function queryAnnotationTasksUsingGet(params?: any) {
|
||||
return get("/api/annotation/project", params);
|
||||
@@ -62,30 +65,60 @@ export function getAutoAnnotationTaskStatusUsingGet(taskId: string) {
|
||||
return get(`/api/annotation/auto/${taskId}/status`);
|
||||
}
|
||||
|
||||
export function downloadAutoAnnotationResultUsingGet(taskId: string) {
|
||||
return download(`/api/annotation/auto/${taskId}/download`);
|
||||
}
|
||||
|
||||
// =====================
|
||||
// Label Studio Editor(内嵌版)
|
||||
// =====================
|
||||
|
||||
export function getEditorProjectInfoUsingGet(projectId: string) {
|
||||
return get(`/api/annotation/editor/projects/${projectId}`);
|
||||
}
|
||||
|
||||
export function listEditorTasksUsingGet(projectId: string, params?: any) {
|
||||
return get(`/api/annotation/editor/projects/${projectId}/tasks`, params);
|
||||
}
|
||||
|
||||
export function getEditorTaskUsingGet(projectId: string, fileId: string) {
|
||||
return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}`);
|
||||
}
|
||||
|
||||
export function upsertEditorAnnotationUsingPut(
|
||||
projectId: string,
|
||||
fileId: string,
|
||||
data: any
|
||||
) {
|
||||
return put(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/annotation`, data);
|
||||
}
|
||||
export function downloadAutoAnnotationResultUsingGet(taskId: string) {
|
||||
return download(`/api/annotation/auto/${taskId}/download`);
|
||||
}
|
||||
|
||||
// =====================
|
||||
// Label Studio Editor(内嵌版)
|
||||
// =====================
|
||||
|
||||
export function getEditorProjectInfoUsingGet(projectId: string) {
|
||||
return get(`/api/annotation/editor/projects/${projectId}`);
|
||||
}
|
||||
|
||||
export function listEditorTasksUsingGet(projectId: string, params?: any) {
|
||||
return get(`/api/annotation/editor/projects/${projectId}/tasks`, params);
|
||||
}
|
||||
|
||||
export function getEditorTaskUsingGet(projectId: string, fileId: string) {
|
||||
return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}`);
|
||||
}
|
||||
|
||||
export function upsertEditorAnnotationUsingPut(
|
||||
projectId: string,
|
||||
fileId: string,
|
||||
data: any
|
||||
) {
|
||||
return put(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/annotation`, data);
|
||||
}
|
||||
|
||||
// =====================
|
||||
// 标注数据导出
|
||||
// =====================
|
||||
|
||||
export interface ExportStatsResponse {
|
||||
projectId: string;
|
||||
projectName: string;
|
||||
totalFiles: number;
|
||||
annotatedFiles: number;
|
||||
exportFormat: string;
|
||||
}
|
||||
|
||||
export function getExportStatsUsingGet(projectId: string) {
|
||||
return get(`/api/annotation/export/projects/${projectId}/stats`);
|
||||
}
|
||||
|
||||
export function downloadAnnotationsUsingGet(
|
||||
projectId: string,
|
||||
format: ExportFormat = "json",
|
||||
onlyAnnotated: boolean = true,
|
||||
includeData: boolean = false
|
||||
) {
|
||||
const params = new URLSearchParams({
|
||||
format,
|
||||
only_annotated: String(onlyAnnotated),
|
||||
include_data: String(includeData),
|
||||
});
|
||||
return download(`/api/annotation/export/projects/${projectId}/download?${params.toString()}`);
|
||||
}
|
||||
|
||||
@@ -2,10 +2,11 @@ from fastapi import APIRouter
|
||||
|
||||
from .config import router as about_router
|
||||
from .project import router as project_router
|
||||
from .task import router as task_router
|
||||
from .template import router as template_router
|
||||
from .auto import router as auto_router
|
||||
from .editor import router as editor_router
|
||||
from .task import router as task_router
|
||||
from .template import router as template_router
|
||||
from .auto import router as auto_router
|
||||
from .editor import router as editor_router
|
||||
from .export import router as export_router
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/annotation",
|
||||
@@ -14,7 +15,8 @@ router = APIRouter(
|
||||
|
||||
router.include_router(about_router)
|
||||
router.include_router(project_router)
|
||||
router.include_router(task_router)
|
||||
router.include_router(template_router)
|
||||
router.include_router(auto_router)
|
||||
router.include_router(editor_router)
|
||||
router.include_router(task_router)
|
||||
router.include_router(template_router)
|
||||
router.include_router(auto_router)
|
||||
router.include_router(editor_router)
|
||||
router.include_router(export_router)
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
标注数据导出接口
|
||||
|
||||
提供标注数据的批量导出功能,支持多种格式:
|
||||
- JSON: Label Studio 原生格式
|
||||
- JSONL: JSON Lines 格式
|
||||
- CSV: 表格格式
|
||||
- COCO: 目标检测格式
|
||||
- YOLO: YOLO 格式
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, Path, Query
|
||||
from fastapi.responses import Response
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.db.session import get_db
|
||||
from app.module.shared.schema import StandardResponse
|
||||
|
||||
from ..schema.export import (
|
||||
ExportAnnotationsRequest,
|
||||
ExportAnnotationsResponse,
|
||||
ExportFormat,
|
||||
)
|
||||
from ..service.export import AnnotationExportService
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/export",
|
||||
tags=["annotation/export"],
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/projects/{project_id}/stats",
|
||||
response_model=StandardResponse[ExportAnnotationsResponse],
|
||||
)
|
||||
async def get_export_stats(
|
||||
project_id: str = Path(..., description="标注项目ID"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""获取导出统计信息(总文件数、已标注数等)"""
|
||||
service = AnnotationExportService(db)
|
||||
stats = await service.get_export_stats(project_id)
|
||||
return StandardResponse(code=200, message="success", data=stats)
|
||||
|
||||
|
||||
@router.get("/projects/{project_id}/download")
|
||||
async def download_annotations(
|
||||
project_id: str = Path(..., description="标注项目ID"),
|
||||
format: ExportFormat = Query(default=ExportFormat.JSON, description="导出格式"),
|
||||
include_data: bool = Query(default=False, description="是否包含原始数据"),
|
||||
only_annotated: bool = Query(default=True, description="是否只导出已标注数据"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
下载标注数据
|
||||
|
||||
支持的格式:
|
||||
- json: Label Studio 原生 JSON 格式
|
||||
- jsonl: JSON Lines 格式(每行一条记录)
|
||||
- csv: CSV 表格格式
|
||||
- coco: COCO 目标检测格式
|
||||
- yolo: YOLO 格式(ZIP 压缩包)
|
||||
"""
|
||||
service = AnnotationExportService(db)
|
||||
|
||||
request = ExportAnnotationsRequest(
|
||||
format=format,
|
||||
include_data=include_data,
|
||||
only_annotated=only_annotated,
|
||||
)
|
||||
|
||||
content, filename, content_type = await service.export_annotations(
|
||||
project_id=project_id,
|
||||
request=request,
|
||||
)
|
||||
|
||||
return Response(
|
||||
content=content,
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||||
"Content-Length": str(len(content)),
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
标注数据导出相关 Schema
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ExportFormat(str, Enum):
|
||||
"""导出格式枚举"""
|
||||
JSON = "json" # Label Studio 原生 JSON 格式
|
||||
JSONL = "jsonl" # JSON Lines 格式(每行一条记录)
|
||||
CSV = "csv" # CSV 表格格式
|
||||
COCO = "coco" # COCO 目标检测格式
|
||||
YOLO = "yolo" # YOLO 格式
|
||||
|
||||
|
||||
class ExportAnnotationsRequest(BaseModel):
|
||||
"""导出标注数据请求"""
|
||||
format: ExportFormat = Field(default=ExportFormat.JSON, description="导出格式")
|
||||
include_data: bool = Field(default=True, description="是否包含原始数据(如文本内容)")
|
||||
only_annotated: bool = Field(default=True, description="是否只导出已标注的数据")
|
||||
|
||||
model_config = {"use_enum_values": True}
|
||||
|
||||
|
||||
class ExportAnnotationsResponse(BaseModel):
|
||||
"""导出标注数据响应(用于预览/统计)"""
|
||||
project_id: str = Field(..., description="项目ID")
|
||||
project_name: str = Field(..., description="项目名称")
|
||||
total_files: int = Field(..., description="总文件数")
|
||||
annotated_files: int = Field(..., description="已标注文件数")
|
||||
export_format: str = Field(..., description="导出格式")
|
||||
|
||||
model_config = {"populate_by_name": True}
|
||||
|
||||
|
||||
class AnnotationExportItem(BaseModel):
|
||||
"""单条导出记录"""
|
||||
file_id: str = Field(..., description="文件ID")
|
||||
file_name: str = Field(..., description="文件名")
|
||||
data: Optional[Dict[str, Any]] = Field(default=None, description="原始数据")
|
||||
annotations: List[Dict[str, Any]] = Field(default_factory=list, description="标注结果")
|
||||
created_at: Optional[datetime] = Field(default=None, description="创建时间")
|
||||
updated_at: Optional[datetime] = Field(default=None, description="更新时间")
|
||||
|
||||
|
||||
class COCOExportFormat(BaseModel):
|
||||
"""COCO 格式导出结构"""
|
||||
info: Dict[str, Any] = Field(default_factory=dict)
|
||||
licenses: List[Dict[str, Any]] = Field(default_factory=list)
|
||||
images: List[Dict[str, Any]] = Field(default_factory=list)
|
||||
annotations: List[Dict[str, Any]] = Field(default_factory=list)
|
||||
categories: List[Dict[str, Any]] = Field(default_factory=list)
|
||||
420
runtime/datamate-python/app/module/annotation/service/export.py
Normal file
420
runtime/datamate-python/app/module/annotation/service/export.py
Normal file
@@ -0,0 +1,420 @@
|
||||
"""
|
||||
标注数据导出服务
|
||||
|
||||
支持的导出格式:
|
||||
- JSON: Label Studio 原生 JSON 格式
|
||||
- JSONL: JSON Lines 格式(每行一条记录)
|
||||
- CSV: CSV 表格格式
|
||||
- COCO: COCO 目标检测格式(适用于图像标注)
|
||||
- YOLO: YOLO 格式(适用于图像标注)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
|
||||
|
||||
from ..schema.export import (
|
||||
AnnotationExportItem,
|
||||
COCOExportFormat,
|
||||
ExportAnnotationsRequest,
|
||||
ExportAnnotationsResponse,
|
||||
ExportFormat,
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class AnnotationExportService:
|
||||
"""标注数据导出服务"""
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
async def get_export_stats(self, project_id: str) -> ExportAnnotationsResponse:
|
||||
"""获取导出统计信息"""
|
||||
project = await self._get_project_or_404(project_id)
|
||||
|
||||
# 获取总文件数
|
||||
total_result = await self.db.execute(
|
||||
select(func.count()).select_from(DatasetFiles).where(
|
||||
DatasetFiles.dataset_id == project.dataset_id
|
||||
)
|
||||
)
|
||||
total_files = int(total_result.scalar() or 0)
|
||||
|
||||
# 获取已标注文件数
|
||||
annotated_result = await self.db.execute(
|
||||
select(func.count(AnnotationResult.id.distinct())).where(
|
||||
AnnotationResult.project_id == project_id
|
||||
)
|
||||
)
|
||||
annotated_files = int(annotated_result.scalar() or 0)
|
||||
|
||||
return ExportAnnotationsResponse(
|
||||
project_id=project_id,
|
||||
project_name=project.name,
|
||||
total_files=total_files,
|
||||
annotated_files=annotated_files,
|
||||
export_format="json",
|
||||
)
|
||||
|
||||
async def export_annotations(
|
||||
self,
|
||||
project_id: str,
|
||||
request: ExportAnnotationsRequest,
|
||||
) -> Tuple[bytes, str, str]:
|
||||
"""
|
||||
导出标注数据
|
||||
|
||||
返回: (文件内容bytes, 文件名, content_type)
|
||||
"""
|
||||
project = await self._get_project_or_404(project_id)
|
||||
|
||||
# 获取标注数据
|
||||
items = await self._fetch_annotation_data(
|
||||
project_id=project_id,
|
||||
dataset_id=project.dataset_id,
|
||||
only_annotated=request.only_annotated,
|
||||
include_data=request.include_data,
|
||||
)
|
||||
|
||||
# 根据格式导出
|
||||
format_type = ExportFormat(request.format) if isinstance(request.format, str) else request.format
|
||||
|
||||
if format_type == ExportFormat.JSON:
|
||||
return self._export_json(items, project.name)
|
||||
elif format_type == ExportFormat.JSONL:
|
||||
return self._export_jsonl(items, project.name)
|
||||
elif format_type == ExportFormat.CSV:
|
||||
return self._export_csv(items, project.name)
|
||||
elif format_type == ExportFormat.COCO:
|
||||
return self._export_coco(items, project.name)
|
||||
elif format_type == ExportFormat.YOLO:
|
||||
return self._export_yolo(items, project.name)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"不支持的导出格式: {request.format}")
|
||||
|
||||
async def _get_project_or_404(self, project_id: str) -> LabelingProject:
|
||||
"""获取标注项目,不存在则抛出 404"""
|
||||
result = await self.db.execute(
|
||||
select(LabelingProject).where(
|
||||
LabelingProject.id == project_id,
|
||||
LabelingProject.deleted_at.is_(None),
|
||||
)
|
||||
)
|
||||
project = result.scalar_one_or_none()
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail=f"标注项目不存在: {project_id}")
|
||||
return project
|
||||
|
||||
async def _fetch_annotation_data(
|
||||
self,
|
||||
project_id: str,
|
||||
dataset_id: str,
|
||||
only_annotated: bool = True,
|
||||
include_data: bool = False,
|
||||
) -> List[AnnotationExportItem]:
|
||||
"""获取标注数据列表"""
|
||||
items: List[AnnotationExportItem] = []
|
||||
|
||||
if only_annotated:
|
||||
# 只获取已标注的数据
|
||||
result = await self.db.execute(
|
||||
select(AnnotationResult, DatasetFiles)
|
||||
.join(DatasetFiles, AnnotationResult.file_id == DatasetFiles.id)
|
||||
.where(AnnotationResult.project_id == project_id)
|
||||
.order_by(AnnotationResult.updated_at.desc())
|
||||
)
|
||||
rows = result.all()
|
||||
|
||||
for ann, file in rows:
|
||||
annotation_data = ann.annotation or {}
|
||||
items.append(
|
||||
AnnotationExportItem(
|
||||
file_id=str(file.id),
|
||||
file_name=str(getattr(file, "file_name", "")),
|
||||
data={"text": ""} if include_data else None, # TEXT 类型数据需要单独获取
|
||||
annotations=[annotation_data] if annotation_data else [],
|
||||
created_at=ann.created_at,
|
||||
updated_at=ann.updated_at,
|
||||
)
|
||||
)
|
||||
else:
|
||||
# 获取所有文件,包括未标注的
|
||||
files_result = await self.db.execute(
|
||||
select(DatasetFiles).where(DatasetFiles.dataset_id == dataset_id)
|
||||
)
|
||||
files = files_result.scalars().all()
|
||||
|
||||
# 获取已有的标注
|
||||
ann_result = await self.db.execute(
|
||||
select(AnnotationResult).where(AnnotationResult.project_id == project_id)
|
||||
)
|
||||
annotations = {str(a.file_id): a for a in ann_result.scalars().all()}
|
||||
|
||||
for file in files:
|
||||
file_id = str(file.id)
|
||||
ann = annotations.get(file_id)
|
||||
annotation_data = ann.annotation if ann else {}
|
||||
|
||||
items.append(
|
||||
AnnotationExportItem(
|
||||
file_id=file_id,
|
||||
file_name=str(getattr(file, "file_name", "")),
|
||||
data={"text": ""} if include_data else None,
|
||||
annotations=[annotation_data] if annotation_data else [],
|
||||
created_at=ann.created_at if ann else None,
|
||||
updated_at=ann.updated_at if ann else None,
|
||||
)
|
||||
)
|
||||
|
||||
return items
|
||||
|
||||
def _export_json(
|
||||
self, items: List[AnnotationExportItem], project_name: str
|
||||
) -> Tuple[bytes, str, str]:
|
||||
"""导出为 JSON 格式"""
|
||||
export_data = {
|
||||
"project_name": project_name,
|
||||
"export_time": datetime.utcnow().isoformat() + "Z",
|
||||
"total_items": len(items),
|
||||
"annotations": [
|
||||
{
|
||||
"file_id": item.file_id,
|
||||
"file_name": item.file_name,
|
||||
"data": item.data,
|
||||
"annotations": item.annotations,
|
||||
"created_at": item.created_at.isoformat() if item.created_at else None,
|
||||
"updated_at": item.updated_at.isoformat() if item.updated_at else None,
|
||||
}
|
||||
for item in items
|
||||
],
|
||||
}
|
||||
|
||||
content = json.dumps(export_data, ensure_ascii=False, indent=2).encode("utf-8")
|
||||
filename = f"{project_name}_annotations.json"
|
||||
return content, filename, "application/json"
|
||||
|
||||
def _export_jsonl(
|
||||
self, items: List[AnnotationExportItem], project_name: str
|
||||
) -> Tuple[bytes, str, str]:
|
||||
"""导出为 JSON Lines 格式"""
|
||||
lines = []
|
||||
for item in items:
|
||||
record = {
|
||||
"file_id": item.file_id,
|
||||
"file_name": item.file_name,
|
||||
"data": item.data,
|
||||
"annotations": item.annotations,
|
||||
"created_at": item.created_at.isoformat() if item.created_at else None,
|
||||
"updated_at": item.updated_at.isoformat() if item.updated_at else None,
|
||||
}
|
||||
lines.append(json.dumps(record, ensure_ascii=False))
|
||||
|
||||
content = "\n".join(lines).encode("utf-8")
|
||||
filename = f"{project_name}_annotations.jsonl"
|
||||
return content, filename, "application/x-ndjson"
|
||||
|
||||
def _export_csv(
|
||||
self, items: List[AnnotationExportItem], project_name: str
|
||||
) -> Tuple[bytes, str, str]:
|
||||
"""导出为 CSV 格式"""
|
||||
output = io.StringIO()
|
||||
|
||||
# CSV 表头
|
||||
fieldnames = [
|
||||
"file_id",
|
||||
"file_name",
|
||||
"annotation_result",
|
||||
"labels",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
|
||||
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for item in items:
|
||||
# 提取标签信息
|
||||
labels = []
|
||||
for ann in item.annotations:
|
||||
results = ann.get("result", [])
|
||||
for r in results:
|
||||
value = r.get("value", {})
|
||||
# 提取不同类型的标签值
|
||||
if "choices" in value:
|
||||
labels.extend(value["choices"])
|
||||
elif "text" in value:
|
||||
labels.append(value["text"])
|
||||
elif "labels" in value:
|
||||
labels.extend(value["labels"])
|
||||
|
||||
writer.writerow({
|
||||
"file_id": item.file_id,
|
||||
"file_name": item.file_name,
|
||||
"annotation_result": json.dumps(item.annotations, ensure_ascii=False),
|
||||
"labels": "|".join(labels),
|
||||
"created_at": item.created_at.isoformat() if item.created_at else "",
|
||||
"updated_at": item.updated_at.isoformat() if item.updated_at else "",
|
||||
})
|
||||
|
||||
content = output.getvalue().encode("utf-8-sig") # BOM for Excel compatibility
|
||||
filename = f"{project_name}_annotations.csv"
|
||||
return content, filename, "text/csv"
|
||||
|
||||
def _export_coco(
|
||||
self, items: List[AnnotationExportItem], project_name: str
|
||||
) -> Tuple[bytes, str, str]:
|
||||
"""导出为 COCO 格式(适用于目标检测标注)"""
|
||||
coco_format = COCOExportFormat(
|
||||
info={
|
||||
"description": f"Exported from DataMate project: {project_name}",
|
||||
"version": "1.0",
|
||||
"year": datetime.utcnow().year,
|
||||
"date_created": datetime.utcnow().isoformat(),
|
||||
},
|
||||
licenses=[{"id": 1, "name": "Unknown", "url": ""}],
|
||||
images=[],
|
||||
annotations=[],
|
||||
categories=[],
|
||||
)
|
||||
|
||||
category_map: Dict[str, int] = {}
|
||||
annotation_id = 1
|
||||
|
||||
for idx, item in enumerate(items):
|
||||
image_id = idx + 1
|
||||
|
||||
# 添加图片信息
|
||||
coco_format.images.append({
|
||||
"id": image_id,
|
||||
"file_name": item.file_name,
|
||||
"width": 0, # 需要实际图片尺寸
|
||||
"height": 0,
|
||||
})
|
||||
|
||||
# 处理标注
|
||||
for ann in item.annotations:
|
||||
results = ann.get("result", [])
|
||||
for r in results:
|
||||
# 处理矩形框标注 (rectanglelabels)
|
||||
if r.get("type") == "rectanglelabels":
|
||||
value = r.get("value", {})
|
||||
labels = value.get("rectanglelabels", [])
|
||||
|
||||
for label in labels:
|
||||
if label not in category_map:
|
||||
category_map[label] = len(category_map) + 1
|
||||
coco_format.categories.append({
|
||||
"id": category_map[label],
|
||||
"name": label,
|
||||
"supercategory": "",
|
||||
})
|
||||
|
||||
# 转换坐标(Label Studio 使用百分比)
|
||||
x = value.get("x", 0)
|
||||
y = value.get("y", 0)
|
||||
width = value.get("width", 0)
|
||||
height = value.get("height", 0)
|
||||
|
||||
coco_format.annotations.append({
|
||||
"id": annotation_id,
|
||||
"image_id": image_id,
|
||||
"category_id": category_map[label],
|
||||
"bbox": [x, y, width, height],
|
||||
"area": width * height,
|
||||
"iscrowd": 0,
|
||||
})
|
||||
annotation_id += 1
|
||||
|
||||
content = json.dumps(coco_format.model_dump(), ensure_ascii=False, indent=2).encode("utf-8")
|
||||
filename = f"{project_name}_coco.json"
|
||||
return content, filename, "application/json"
|
||||
|
||||
def _export_yolo(
|
||||
self, items: List[AnnotationExportItem], project_name: str
|
||||
) -> Tuple[bytes, str, str]:
|
||||
"""导出为 YOLO 格式(ZIP 包含 txt 标注文件和 classes.txt)"""
|
||||
|
||||
# 创建临时目录
|
||||
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".zip")
|
||||
os.close(tmp_fd)
|
||||
|
||||
category_set: set = set()
|
||||
txt_files: Dict[str, str] = {}
|
||||
|
||||
for item in items:
|
||||
lines = []
|
||||
|
||||
for ann in item.annotations:
|
||||
results = ann.get("result", [])
|
||||
for r in results:
|
||||
# 处理矩形框标注
|
||||
if r.get("type") == "rectanglelabels":
|
||||
value = r.get("value", {})
|
||||
labels = value.get("rectanglelabels", [])
|
||||
|
||||
for label in labels:
|
||||
category_set.add(label)
|
||||
|
||||
# YOLO 格式:class_id x_center y_center width height (归一化 0-1)
|
||||
x = value.get("x", 0) / 100
|
||||
y = value.get("y", 0) / 100
|
||||
w = value.get("width", 0) / 100
|
||||
h = value.get("height", 0) / 100
|
||||
|
||||
x_center = x + w / 2
|
||||
y_center = y + h / 2
|
||||
|
||||
lines.append(f"{label} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")
|
||||
|
||||
if lines:
|
||||
# 生成对应的 txt 文件名
|
||||
base_name = os.path.splitext(item.file_name)[0]
|
||||
txt_files[f"{base_name}.txt"] = "\n".join(lines)
|
||||
|
||||
# 生成类别列表
|
||||
categories = sorted(category_set)
|
||||
category_map = {cat: idx for idx, cat in enumerate(categories)}
|
||||
|
||||
# 更新 txt 文件中的类别索引
|
||||
for filename, content in txt_files.items():
|
||||
updated_lines = []
|
||||
for line in content.split("\n"):
|
||||
parts = line.split(" ", 1)
|
||||
if len(parts) == 2:
|
||||
label, coords = parts
|
||||
class_id = category_map.get(label, 0)
|
||||
updated_lines.append(f"{class_id} {coords}")
|
||||
txt_files[filename] = "\n".join(updated_lines)
|
||||
|
||||
# 创建 ZIP 文件
|
||||
with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
# 写入 classes.txt
|
||||
zf.writestr("classes.txt", "\n".join(categories))
|
||||
|
||||
# 写入标注文件
|
||||
for filename, content in txt_files.items():
|
||||
zf.writestr(f"labels/{filename}", content)
|
||||
|
||||
with open(tmp_path, "rb") as f:
|
||||
content = f.read()
|
||||
|
||||
os.unlink(tmp_path)
|
||||
|
||||
filename = f"{project_name}_yolo.zip"
|
||||
return content, filename, "application/zip"
|
||||
Reference in New Issue
Block a user