From 3c3ca130b39ba98fe6368a4412a7be41007d35e6 Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Fri, 30 Jan 2026 17:35:22 +0800 Subject: [PATCH] =?UTF-8?q?feat(annotation):=20=E6=B7=BB=E5=8A=A0=E6=96=87?= =?UTF-8?q?=E6=9C=AC=E6=96=87=E4=BB=B6=E5=86=85=E5=AE=B9=E8=AF=BB=E5=8F=96?= =?UTF-8?q?=E5=92=8C=E5=A4=9A=E7=B1=BB=E5=9E=8B=E6=A0=87=E7=AD=BE=E5=AF=BC?= =?UTF-8?q?=E5=87=BA=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增异步函数 _read_file_content 用于安全读取文本文件内容 - 实现在导出时包含文本文件的实际内容数据 - 扩展 CSV 导出格式支持多种标注类型标签提取 - 添加对矩形标签、多边形标签、画笔标签等多种标注类型的支持 - 更新 COCO 格式导出文档说明bbox坐标转换注意事项 --- .../app/module/annotation/service/export.py | 64 +++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/runtime/datamate-python/app/module/annotation/service/export.py b/runtime/datamate-python/app/module/annotation/service/export.py index ac27716..6a6ff1e 100644 --- a/runtime/datamate-python/app/module/annotation/service/export.py +++ b/runtime/datamate-python/app/module/annotation/service/export.py @@ -27,6 +27,32 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.core.logging import get_logger from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject + +async def _read_file_content(file_path: str, max_size: int = 10 * 1024 * 1024) -> Optional[str]: + """读取文件内容,仅适用于文本文件 + + Args: + file_path: 文件路径 + max_size: 最大读取字节数(默认10MB) + + Returns: + 文件内容字符串,如果读取失败返回 None + """ + try: + # 检查文件是否存在且大小在限制内 + if not os.path.exists(file_path): + return None + + file_size = os.path.getsize(file_path) + if file_size > max_size: + return f"[File too large: {file_size} bytes]" + + # 尝试以文本方式读取 + with open(file_path, 'r', encoding='utf-8', errors='replace') as f: + return f.read() + except Exception: + return None + from ..schema.export import ( AnnotationExportItem, COCOExportFormat, @@ -147,11 +173,17 @@ class AnnotationExportService: for ann, file in rows: annotation_data = ann.annotation or {} + # 获取文件内容(如果是文本文件且用户要求包含数据) + file_content = None + if include_data: + file_path = getattr(file, "file_path", "") + file_content = await _read_file_content(file_path) + items.append( AnnotationExportItem( file_id=str(file.id), file_name=str(getattr(file, "file_name", "")), - data={"text": ""} if include_data else None, # TEXT 类型数据需要单独获取 + data={"text": file_content} if include_data else None, annotations=[annotation_data] if annotation_data else [], created_at=ann.created_at, updated_at=ann.updated_at, @@ -177,12 +209,18 @@ class AnnotationExportService: file_id = str(file.id) ann = annotations.get(file_id) annotation_data = ann.annotation if ann else {} + + # 获取文件内容(如果是文本文件且用户要求包含数据) + file_content = None + if include_data: + file_path = getattr(file, "file_path", "") + file_content = await _read_file_content(file_path) items.append( AnnotationExportItem( file_id=file_id, file_name=str(getattr(file, "file_name", "")), - data={"text": ""} if include_data else None, + data={"text": file_content} if include_data else None, annotations=[annotation_data] if annotation_data else [], created_at=ann.created_at if ann else None, updated_at=ann.updated_at if ann else None, @@ -256,12 +294,14 @@ class AnnotationExportService: writer.writeheader() for item in items: - # 提取标签信息 + # 提取标签信息(支持多种标注类型) labels = [] for ann in item.annotations: results = ann.get("result", []) for r in results: value = r.get("value", {}) + label_type = r.get("type", "") + # 提取不同类型的标签值 if "choices" in value: labels.extend(value["choices"]) @@ -269,6 +309,18 @@ class AnnotationExportService: labels.append(value["text"]) elif "labels" in value: labels.extend(value["labels"]) + elif "rectanglelabels" in value: + labels.extend(value["rectanglelabels"]) + elif "polygonlabels" in value: + labels.extend(value["polygonlabels"]) + elif "brushlabels" in value: + labels.extend(value["brushlabels"]) + elif "hypertextlabels" in value: + labels.extend(value["hypertextlabels"]) + elif "timeserieslabels" in value: + labels.extend(value["timeserieslabels"]) + elif "transcription" in value: + labels.append(value["transcription"]) writer.writerow({ "file_id": item.file_id, @@ -286,7 +338,11 @@ class AnnotationExportService: def _export_coco( self, items: List[AnnotationExportItem], project_name: str ) -> Tuple[bytes, str, str]: - """导出为 COCO 格式(适用于目标检测标注)""" + """导出为 COCO 格式(适用于目标检测标注) + + 注意:当前实现中图片宽高被设置为0,因为需要读取实际图片文件获取尺寸。 + bbox 坐标使用 Label Studio 的百分比值(0-100),使用时需要转换为像素坐标。 + """ coco_format = COCOExportFormat( info={ "description": f"Exported from DataMate project: {project_name}",