You've already forked DataMate
问题1 - 并发控制缺失: - 在 _ensure_knowledge_set 方法中添加数据库行锁(with_for_update) - 修改 _update_project_config 方法,使用行锁保护配置更新 问题3 - 数据清理机制缺失: - 添加 _cleanup_knowledge_set_for_project 方法,项目删除时清理知识集 - 添加 _cleanup_knowledge_item_for_file 方法,文件删除时清理知识条目 - 在 delete_mapping 接口中调用清理方法 问题4 - 文件操作事务问题: - 修改 uploadKnowledgeItems,添加事务失败后的文件清理逻辑 - 修改 deleteKnowledgeItem,删除记录前先删除关联文件 - 新增 deleteKnowledgeItemFile 辅助方法 问题5 - COCO导出格式问题: - 添加 _get_image_dimensions 方法读取图片实际宽高 - 将百分比坐标转换为像素坐标 - 在 AnnotationExportItem 中添加 file_path 字段 涉及文件: - knowledge_sync.py - project.py - KnowledgeItemApplicationService.java - export.py - export schema.py
70 lines
2.4 KiB
Python
70 lines
2.4 KiB
Python
"""
|
|
标注数据导出相关 Schema
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class ExportFormat(str, Enum):
|
|
"""导出格式枚举"""
|
|
|
|
JSON = "json" # Label Studio 原生 JSON 格式
|
|
JSONL = "jsonl" # JSON Lines 格式(每行一条记录)
|
|
CSV = "csv" # CSV 表格格式
|
|
COCO = "coco" # COCO 目标检测格式
|
|
YOLO = "yolo" # YOLO 格式
|
|
|
|
|
|
class ExportAnnotationsRequest(BaseModel):
|
|
"""导出标注数据请求"""
|
|
|
|
format: ExportFormat = Field(default=ExportFormat.JSON, description="导出格式")
|
|
include_data: bool = Field(
|
|
default=True, description="是否包含原始数据(如文本内容)"
|
|
)
|
|
only_annotated: bool = Field(default=True, description="是否只导出已标注的数据")
|
|
|
|
model_config = {"use_enum_values": True}
|
|
|
|
|
|
class ExportAnnotationsResponse(BaseModel):
|
|
"""导出标注数据响应(用于预览/统计)"""
|
|
|
|
project_id: str = Field(..., description="项目ID")
|
|
project_name: str = Field(..., description="项目名称")
|
|
total_files: int = Field(..., description="总文件数")
|
|
annotated_files: int = Field(..., description="已标注文件数")
|
|
export_format: str = Field(..., description="导出格式")
|
|
|
|
model_config = {"populate_by_name": True}
|
|
|
|
|
|
class AnnotationExportItem(BaseModel):
|
|
"""单条导出记录"""
|
|
|
|
file_id: str = Field(..., description="文件ID")
|
|
file_name: str = Field(..., description="文件名")
|
|
file_path: Optional[str] = Field(default=None, description="文件路径")
|
|
data: Optional[Dict[str, Any]] = Field(default=None, description="原始数据")
|
|
annotations: List[Dict[str, Any]] = Field(
|
|
default_factory=list, description="标注结果"
|
|
)
|
|
created_at: Optional[datetime] = Field(default=None, description="创建时间")
|
|
updated_at: Optional[datetime] = Field(default=None, description="更新时间")
|
|
|
|
|
|
class COCOExportFormat(BaseModel):
|
|
"""COCO 格式导出结构"""
|
|
|
|
info: Dict[str, Any] = Field(default_factory=dict)
|
|
licenses: List[Dict[str, Any]] = Field(default_factory=list)
|
|
images: List[Dict[str, Any]] = Field(default_factory=list)
|
|
annotations: List[Dict[str, Any]] = Field(default_factory=list)
|
|
categories: List[Dict[str, Any]] = Field(default_factory=list)
|