From 3c3ca130b39ba98fe6368a4412a7be41007d35e6 Mon Sep 17 00:00:00 2001
From: Jerry Yan <792602257@qq.com>
Date: Fri, 30 Jan 2026 17:35:22 +0800
Subject: [PATCH] =?UTF-8?q?feat(annotation):=20=E6=B7=BB=E5=8A=A0=E6=96=87?=
 =?UTF-8?q?=E6=9C=AC=E6=96=87=E4=BB=B6=E5=86=85=E5=AE=B9=E8=AF=BB=E5=8F=96?=
 =?UTF-8?q?=E5=92=8C=E5=A4=9A=E7=B1=BB=E5=9E=8B=E6=A0=87=E7=AD=BE=E5=AF=BC?=
 =?UTF-8?q?=E5=87=BA=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 新增异步函数 _read_file_content 用于安全读取文本文件内容
- 实现在导出时包含文本文件的实际内容数据
- 扩展 CSV 导出格式支持多种标注类型标签提取
- 添加对矩形标签、多边形标签、画笔标签等多种标注类型的支持
- 更新 COCO 格式导出文档说明bbox坐标转换注意事项
---
 .../app/module/annotation/service/export.py   | 64 +++++++++++++++++--
 1 file changed, 60 insertions(+), 4 deletions(-)

diff --git a/runtime/datamate-python/app/module/annotation/service/export.py b/runtime/datamate-python/app/module/annotation/service/export.py
index ac27716..6a6ff1e 100644
--- a/runtime/datamate-python/app/module/annotation/service/export.py
+++ b/runtime/datamate-python/app/module/annotation/service/export.py
@@ -27,6 +27,32 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.core.logging import get_logger
 from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
 
+
+async def _read_file_content(file_path: str, max_size: int = 10 * 1024 * 1024) -> Optional[str]:
+    """读取文件内容，仅适用于文本文件
+    
+    Args:
+        file_path: 文件路径
+        max_size: 最大读取字节数（默认10MB）
+        
+    Returns:
+        文件内容字符串，如果读取失败返回 None
+    """
+    try:
+        # 检查文件是否存在且大小在限制内
+        if not os.path.exists(file_path):
+            return None
+        
+        file_size = os.path.getsize(file_path)
+        if file_size > max_size:
+            return f"[File too large: {file_size} bytes]"
+        
+        # 尝试以文本方式读取
+        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+            return f.read()
+    except Exception:
+        return None
+
 from ..schema.export import (
     AnnotationExportItem,
     COCOExportFormat,
@@ -147,11 +173,17 @@ class AnnotationExportService:
 
             for ann, file in rows:
                 annotation_data = ann.annotation or {}
+                # 获取文件内容（如果是文本文件且用户要求包含数据）
+                file_content = None
+                if include_data:
+                    file_path = getattr(file, "file_path", "")
+                    file_content = await _read_file_content(file_path)
+                
                 items.append(
                     AnnotationExportItem(
                         file_id=str(file.id),
                         file_name=str(getattr(file, "file_name", "")),
-                        data={"text": ""} if include_data else None,  # TEXT 类型数据需要单独获取
+                        data={"text": file_content} if include_data else None,
                         annotations=[annotation_data] if annotation_data else [],
                         created_at=ann.created_at,
                         updated_at=ann.updated_at,
@@ -177,12 +209,18 @@ class AnnotationExportService:
                 file_id = str(file.id)
                 ann = annotations.get(file_id)
                 annotation_data = ann.annotation if ann else {}
+                
+                # 获取文件内容（如果是文本文件且用户要求包含数据）
+                file_content = None
+                if include_data:
+                    file_path = getattr(file, "file_path", "")
+                    file_content = await _read_file_content(file_path)
 
                 items.append(
                     AnnotationExportItem(
                         file_id=file_id,
                         file_name=str(getattr(file, "file_name", "")),
-                        data={"text": ""} if include_data else None,
+                        data={"text": file_content} if include_data else None,
                         annotations=[annotation_data] if annotation_data else [],
                         created_at=ann.created_at if ann else None,
                         updated_at=ann.updated_at if ann else None,
@@ -256,12 +294,14 @@ class AnnotationExportService:
         writer.writeheader()
 
         for item in items:
-            # 提取标签信息
+            # 提取标签信息（支持多种标注类型）
             labels = []
             for ann in item.annotations:
                 results = ann.get("result", [])
                 for r in results:
                     value = r.get("value", {})
+                    label_type = r.get("type", "")
+                    
                     # 提取不同类型的标签值
                     if "choices" in value:
                         labels.extend(value["choices"])
@@ -269,6 +309,18 @@ class AnnotationExportService:
                         labels.append(value["text"])
                     elif "labels" in value:
                         labels.extend(value["labels"])
+                    elif "rectanglelabels" in value:
+                        labels.extend(value["rectanglelabels"])
+                    elif "polygonlabels" in value:
+                        labels.extend(value["polygonlabels"])
+                    elif "brushlabels" in value:
+                        labels.extend(value["brushlabels"])
+                    elif "hypertextlabels" in value:
+                        labels.extend(value["hypertextlabels"])
+                    elif "timeserieslabels" in value:
+                        labels.extend(value["timeserieslabels"])
+                    elif "transcription" in value:
+                        labels.append(value["transcription"])
 
             writer.writerow({
                 "file_id": item.file_id,
@@ -286,7 +338,11 @@ class AnnotationExportService:
     def _export_coco(
         self, items: List[AnnotationExportItem], project_name: str
     ) -> Tuple[bytes, str, str]:
-        """导出为 COCO 格式（适用于目标检测标注）"""
+        """导出为 COCO 格式（适用于目标检测标注）
+        
+        注意：当前实现中图片宽高被设置为0，因为需要读取实际图片文件获取尺寸。
+        bbox 坐标使用 Label Studio 的百分比值（0-100），使用时需要转换为像素坐标。
+        """
         coco_format = COCOExportFormat(
             info={
                 "description": f"Exported from DataMate project: {project_name}",