DataMate/runtime/datamate-python/app/module/annotation/service/sync.py

from typing import Optional, List, Dict, Any, Tuple, Set
from app.module.dataset import DatasetManagementService
from sqlalchemy import update, select
from app.db.models import DatasetFiles

from app.core.logging import get_logger
from app.core.config import settings
from app.exception import NoDatasetInfoFoundError

from ..client import LabelStudioClient
from ..schema import (
    SyncDatasetResponse,
    DatasetMappingResponse,
    SyncAnnotationsResponse
)
from ..service.mapping import DatasetMappingService

logger = get_logger(__name__)

class SyncService:
    """数据同步服务"""

    def __init__(
        self,
        dm_client: DatasetManagementService,
        ls_client: LabelStudioClient,
        mapping_service: DatasetMappingService
    ):
        self.dm_client = dm_client
        self.ls_client = ls_client
        self.mapping_service = mapping_service

    def _determine_data_type(self, file_type: str) -> str:
        """根据文件类型确定数据类型"""
        file_type_lower = file_type.lower()

        type_mapping = {
            'image': ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'svg', 'webp'],
            'audio': ['mp3', 'wav', 'flac', 'aac', 'ogg'],
            'video': ['mp4', 'avi', 'mov', 'wmv', 'flv', 'webm'],
            'text': ['txt', 'doc', 'docx', 'pdf'],
            'wsi': ['svs', 'tiff', 'ndpi', 'mrxs', 'sdpc'],
            'ct': ['dcm', 'dicom', 'nii', 'nii.gz']
        }

        for data_type, extensions in type_mapping.items():
            if any(ext in file_type_lower for ext in extensions):
                return data_type

        return 'image'  # 默认为图像类型

    def _build_task_data(self, file_info: Any, dataset_id: str) -> dict:
        """构建Label Studio任务数据"""
        data_type = self._determine_data_type(file_info.fileType)

        # 替换文件路径前缀
        file_path = file_info.filePath.removeprefix(settings.dm_file_path_prefix)
        file_path = settings.label_studio_file_path_prefix + file_path

        return {
            "data": {
                f"{data_type}": file_path,
                "file_path": file_info.filePath,
                "file_id": file_info.id,
                "original_name": file_info.originalName,
                "dataset_id": dataset_id,
            }
        }

    async def _create_tasks_with_fallback(
        self,
        project_id: str,
        tasks: List[dict]
    ) -> int:
        """批量创建任务，失败时回退到单个创建"""
        if not tasks:
            return 0

        # 尝试批量创建
        batch_result = await self.ls_client.create_tasks_batch(project_id, tasks)

        if batch_result:
            logger.debug(f"Successfully created {len(tasks)} tasks in batch")
            return len(tasks)

        # 批量失败，回退到单个创建
        logger.warning(f"Batch creation failed, falling back to single creation")
        created_count = 0

        for task_data in tasks:
            task_result = await self.ls_client.create_task(
                project_id,
                task_data["data"],
                task_data.get("meta")
            )
            if task_result:
                created_count += 1

        logger.debug(f"Successfully created {created_count}/{len(tasks)} tasks individually")
        return created_count

    async def get_existing_dm_file_mapping(self, project_id: str) -> Dict[str, int]:
        """
        获取Label Studio项目中已存在的DM文件ID到任务ID的映射

        Args:
            project_id: Label Studio项目ID

        Returns:
            file_id到task_id的映射字典
        """
        try:
            page_size = getattr(settings, 'ls_task_page_size', 1000)
            result = await self.ls_client.get_project_tasks(
                project_id=project_id,
                page=None,
                page_size=page_size
            )

            if not result:
                logger.warning(f"Failed to fetch tasks for project {project_id}")
                return {}

            all_tasks = result.get("tasks", [])
            logger.info(f"Successfully fetched {len(all_tasks)} tasks")

            # 使用字典推导式构建映射
            dm_file_to_task_mapping = {
                str(task.get('data', {}).get('file_id')): task.get('id')
                for task in all_tasks
                if task.get('data', {}).get('file_id') is not None
            }

            logger.info(f"Found {len(dm_file_to_task_mapping)} existing task mappings")
            return dm_file_to_task_mapping

        except Exception as e:
            logger.error(f"Error while fetching existing tasks: {e}")
            return {}

    async def _fetch_dm_files_paginated(
        self,
        dataset_id: str,
        batch_size: int,
        existing_file_ids: Set[str],
        project_id: str
    ) -> Tuple[Set[str], int]:
        """
        分页获取DM文件并创建新任务

        Returns:
            (当前文件ID集合, 创建的任务数)
        """
        current_file_ids = set()
        total_created = 0
        page = 0

        while True:
            files_response = await self.dm_client.get_dataset_files(
                dataset_id,
                page=page,
                size=batch_size,
            )

            if not files_response or not files_response.content:
                logger.info(f"No more files on page {page + 1}")
                break

            logger.info(f"Processing page {page + 1}, {len(files_response.content)} files")

            # 筛选新文件并构建任务数据
            new_tasks = []
            for file_info in files_response.content:
                file_id = str(file_info.id)
                current_file_ids.add(file_id)

                if file_id not in existing_file_ids:
                    task_data = self._build_task_data(file_info, dataset_id)
                    new_tasks.append(task_data)

            logger.info(f"Page {page + 1}: {len(new_tasks)} new files, {len(files_response.content) - len(new_tasks)} existing")

            # 批量创建任务
            if new_tasks:
                created = await self._create_tasks_with_fallback(project_id, new_tasks)
                total_created += created

            # 检查是否还有更多页面
            if page >= files_response.totalPages - 1:
                break
            page += 1

        return current_file_ids, total_created

    async def _delete_orphaned_tasks(
        self,
        existing_dm_file_mapping: Dict[str, int],
        current_file_ids: Set[str]
    ) -> int:
        """删除在DM中不存在的Label Studio任务"""
        # 使用集合操作找出需要删除的文件ID
        deleted_file_ids = set(existing_dm_file_mapping.keys()) - current_file_ids

        if not deleted_file_ids:
            logger.info("No tasks to delete")
            return 0

        tasks_to_delete = [existing_dm_file_mapping[fid] for fid in deleted_file_ids]
        logger.info(f"Deleting {len(tasks_to_delete)} orphaned tasks")

        delete_result = await self.ls_client.delete_tasks_batch(tasks_to_delete)
        deleted_count = delete_result.get("successful", 0)

        logger.info(f"Successfully deleted {deleted_count} tasks")
        return deleted_count

    async def sync_dataset_files(
        self,
        mapping_id: str,
        batch_size: int = 50
    ) -> SyncDatasetResponse:
        """
        同步数据集文件到Label Studio (Legacy endpoint - 委托给sync_files)

        Args:
            mapping_id: 映射ID
            batch_size: 批处理大小

        Returns:
            同步结果响应
        """
        logger.info(f"Start syncing dataset files by mapping: {mapping_id}")

        # 获取映射关系
        mapping = await self.mapping_service.get_mapping_by_uuid(mapping_id)
        if not mapping:
            logger.error(f"Dataset mapping not found: {mapping_id}")
            return SyncDatasetResponse(
                id="",
                status="error",
                synced_files=0,
                total_files=0,
                message=f"Dataset mapping not found: {mapping_id}"
            )

        try:
            # 委托给sync_files执行实际同步
            result = await self.sync_files(mapping, batch_size)

            logger.info(f"Sync completed: created={result['created']}, deleted={result['deleted']}, total={result['total']}")

            return SyncDatasetResponse(
                id=mapping.id,
                status="success",
                synced_files=result["created"],
                total_files=result["total"],
                message=f"Sync completed: created {result['created']} files, deleted {result['deleted']} tasks"
            )

        except Exception as e:
            logger.error(f"Error while syncing dataset: {e}")
            return SyncDatasetResponse(
                id=mapping.id,
                status="error",
                synced_files=0,
                total_files=0,
                message=f"Sync failed: {str(e)}"
            )

    async def sync_dataset(
        self,
        mapping_id: str,
        batch_size: int = 50,
        file_priority: int = 0,
        annotation_priority: int = 0
    ) -> SyncDatasetResponse:
        """
        同步数据集文件和标注

        Args:
            mapping_id: 映射ID
            batch_size: 批处理大小
            file_priority: 文件同步优先级 (0: dataset优先, 1: annotation优先)
            annotation_priority: 标注同步优先级 (0: dataset优先, 1: annotation优先)

        Returns:
            同步结果响应
        """
        logger.info(f"Start syncing dataset by mapping: {mapping_id}")

        # 检查映射是否存在
        mapping = await self.mapping_service.get_mapping_by_uuid(mapping_id)
        if not mapping:
            logger.error(f"Dataset mapping not found: {mapping_id}")
            return SyncDatasetResponse(
                id="",
                status="error",
                synced_files=0,
                total_files=0,
                message=f"Dataset mapping not found: {mapping_id}"
            )

        try:
            # 同步文件
            file_result = await self.sync_files(mapping, batch_size)

            # TODO: 同步标注
            # annotation_result = await self.sync_annotations(mapping, batch_size, annotation_priority)

            logger.info(f"Sync completed: created={file_result['created']}, deleted={file_result['deleted']}, total={file_result['total']}")

            return SyncDatasetResponse(
                id=mapping.id,
                status="success",
                synced_files=file_result["created"],
                total_files=file_result["total"],
                message=f"Sync completed: created {file_result['created']} files, deleted {file_result['deleted']} tasks"
            )

        except Exception as e:
            logger.error(f"Error while syncing dataset: {e}")
            return SyncDatasetResponse(
                id=mapping.id,
                status="error",
                synced_files=0,
                total_files=0,
                message=f"Sync failed: {str(e)}"
            )

    async def sync_files(
        self,
        mapping: DatasetMappingResponse,
        batch_size: int
    ) -> Dict[str, int]:
        """
        同步DM和Label Studio之间的文件

        Args:
            mapping: 数据集映射信息
            batch_size: 批处理大小

        Returns:
            同步统计信息: {"created": int, "deleted": int, "total": int}
        """
        logger.info(f"Syncing files for dataset {mapping.dataset_id} to project {mapping.labeling_project_id}")

        # 获取DM数据集信息
        dataset_info = await self.dm_client.get_dataset(mapping.dataset_id)
        if not dataset_info:
            raise NoDatasetInfoFoundError(mapping.dataset_id)

        total_files = dataset_info.fileCount
        logger.info(f"Total files in DM dataset: {total_files}")

        # 获取Label Studio中已存在的文件映射
        existing_dm_file_mapping = await self.get_existing_dm_file_mapping(mapping.labeling_project_id)
        existing_file_ids = set(existing_dm_file_mapping.keys())
        logger.info(f"{len(existing_file_ids)} tasks already exist in Label Studio")

        # 分页获取DM文件并创建新任务
        current_file_ids, created_count = await self._fetch_dm_files_paginated(
            mapping.dataset_id,
            batch_size,
            existing_file_ids,
            mapping.labeling_project_id
        )

        # 删除孤立任务
        deleted_count = await self._delete_orphaned_tasks(
            existing_dm_file_mapping,
            current_file_ids
        )

        logger.info(f"File sync completed: total={total_files}, created={created_count}, deleted={deleted_count}")

        return {
            "created": created_count,
            "deleted": deleted_count,
            "total": total_files
        }

    async def sync_annotations(
        self,
        mapping: DatasetMappingResponse,
        batch_size: int,
        priority: int
    ) -> Dict[str, int]:
        """
        同步DM和Label Studio之间的标注

        Args:
            mapping: 数据集映射信息
            batch_size: 批处理大小
            priority: 标注同步优先级 (0: dataset优先, 1: annotation优先)

        Returns:
            同步统计信息: {"synced_to_dm": int, "synced_to_ls": int}
        """
        logger.info(f"Syncing annotations for dataset {mapping.dataset_id} (priority={priority})")

        # TODO: 实现标注同步逻辑
        # 1. 从DM获取标注结果
        # 2. 从Label Studio获取标注结果
        # 3. 根据优先级合并结果
        # 4. 将差异写入DM和LS

        logger.info("Annotation sync not yet implemented")
        return {
            "synced_to_dm": 0,
            "synced_to_ls": 0
        }

    def _simplify_annotation_result(self, annotation: Dict[str, Any]) -> Tuple[List[Dict[str, Any]], str]:
        """
        将Label Studio标注结果简化为指定格式

        Args:
            annotation: Label Studio原始标注数据

        Returns:
            Tuple of (简化后的标注结果列表, 标注更新时间ISO字符串)
        """
        simplified = []

        # 获取result字段（包含实际的标注数据）
        results = annotation.get("result", [])

        # 获取标注的更新时间，优先使用updated_at，否则使用created_at
        updated_at = annotation.get("updated_at") or annotation.get("created_at", "")

        for result_item in results:
            simplified_item = {
                "from_name": result_item.get("from_name", ""),
                "to_name": result_item.get("to_name", ""),
                "type": result_item.get("type", ""),
                "values": result_item.get("value", {})
            }
            simplified.append(simplified_item)

        return simplified, updated_at

    def _compare_timestamps(self, ts1: str, ts2: str) -> int:
        """
        比较两个ISO格式时间戳

        Args:
            ts1: 第一个时间戳
            ts2: 第二个时间戳

        Returns:
            1 如果 ts1 > ts2
            -1 如果 ts1 < ts2
            0 如果相等或无法比较
        """
        try:
            from dateutil import parser
            from datetime import timezone

            dt1 = parser.parse(ts1)
            dt2 = parser.parse(ts2)

            # Convert both to UTC timezone-aware if needed
            if dt1.tzinfo is None:
                dt1 = dt1.replace(tzinfo=timezone.utc)
            if dt2.tzinfo is None:
                dt2 = dt2.replace(tzinfo=timezone.utc)

            if dt1 > dt2:
                return 1
            elif dt1 < dt2:
                return -1
            else:
                return 0
        except Exception as e:
            logger.warning(f"Failed to compare timestamps {ts1} and {ts2}: {e}")
            return 0

    def _should_overwrite_dm(self, ls_updated_at: str, dm_tags_updated_at: Optional[str], overwrite: bool) -> bool:
        """
        判断是否应该用Label Studio的标注覆盖DataMate的标注

        Args:
            ls_updated_at: Label Studio标注的更新时间
            dm_tags_updated_at: DataMate中标注的更新时间（从tags_updated_at字段）
            overwrite: 是否允许覆盖

        Returns:
            True 如果应该覆盖，False 如果不应该覆盖
        """
        # 如果不允许覆盖，直接返回False
        if not overwrite:
            return False

        # 如果DataMate没有标注时间戳，允许覆盖
        if not dm_tags_updated_at:
            return True

        # 如果Label Studio的标注更新，允许覆盖
        return self._compare_timestamps(ls_updated_at, dm_tags_updated_at) > 0

    def _should_overwrite_ls(self, dm_tags_updated_at: Optional[str], ls_updated_at: str, overwrite_ls: bool) -> bool:
        """
        判断是否应该用DataMate的标注覆盖Label Studio的标注

        Args:
            dm_tags_updated_at: DataMate中标注的更新时间（从tags_updated_at字段）
            ls_updated_at: Label Studio标注的更新时间
            overwrite_ls: 是否允许覆盖Label Studio

        Returns:
            True 如果应该覆盖，False 如果不应该覆盖
        """
        # 如果不允许覆盖，直接返回False
        if not overwrite_ls:
            return False

        # 如果DataMate没有标注时间戳，不应该覆盖Label Studio
        if not dm_tags_updated_at:
            return False

        # 如果Label Studio没有标注，应该覆盖
        if not ls_updated_at:
            return True

        # 如果DataMate的标注更新，允许覆盖
        return self._compare_timestamps(dm_tags_updated_at, ls_updated_at) > 0

    async def sync_annotations_from_ls_to_dm(
        self,
        mapping: DatasetMappingResponse,
        batch_size: int = 50,
        overwrite: bool = True
    ) -> SyncAnnotationsResponse:
        """
        从Label Studio同步标注到数据集

        Args:
            mapping: 数据集映射信息
            batch_size: 批处理大小
            overwrite: 是否允许覆盖DataMate中的标注（基于时间戳比较）

        Returns:
            同步结果响应
        """
        logger.info(f"Syncing annotations from LS to DM: dataset={mapping.dataset_id}, project={mapping.labeling_project_id}")

        synced_count = 0
        skipped_count = 0
        failed_count = 0
        conflicts_resolved = 0

        try:
            # 获取Label Studio中的所有任务
            ls_tasks_result = await self.ls_client.get_project_tasks(
                mapping.labeling_project_id,
                page=None
            )

            if not ls_tasks_result:
                token_display = settings.label_studio_user_token[:10] + "..." if settings.label_studio_user_token else "None"
                error_msg = f"Failed to fetch tasks from Label Studio project {mapping.labeling_project_id}. Please check:\n" \
                           f"1. Label Studio is running at {settings.label_studio_base_url}\n" \
                           f"2. Project ID {mapping.labeling_project_id} exists\n" \
                           f"3. API token is valid: {token_display}"
                logger.error(error_msg)
                return SyncAnnotationsResponse(
                    id=mapping.id,
                    status="error",
                    synced_to_dm=0,
                    synced_to_ls=0,
                    skipped=0,
                    failed=0,
                    conflicts_resolved=0,
                    message=f"Failed to connect to Label Studio at {settings.label_studio_base_url}"
                )

            all_tasks = ls_tasks_result.get("tasks", [])
            logger.info(f"Found {len(all_tasks)} tasks in Label Studio project")

            if len(all_tasks) == 0:
                logger.warning(f"No tasks found in Label Studio project {mapping.labeling_project_id}")
                return SyncAnnotationsResponse(
                    id=mapping.id,
                    status="success",
                    synced_to_dm=0,
                    synced_to_ls=0,
                    skipped=0,
                    failed=0,
                    conflicts_resolved=0,
                    message="No tasks found in Label Studio project"
                )

            # 批量处理任务
            for i in range(0, len(all_tasks), batch_size):
                batch_tasks = all_tasks[i:i + batch_size]
                logger.info(f"Processing batch {i // batch_size + 1}, {len(batch_tasks)} tasks")

                for task in batch_tasks:
                    task_id = task.get("id")
                    file_id = task.get("data", {}).get("file_id")

                    if not file_id:
                        logger.warning(f"Task {task_id} has no file_id, skipping")
                        skipped_count += 1
                        continue

                    # 获取任务的标注结果
                    annotations = await self.ls_client.get_task_annotations(task_id)

                    if not annotations:
                        logger.debug(f"No annotations for task {task_id}, skipping")
                        skipped_count += 1
                        continue

                    # 简化标注结果（取最新的标注）
                    latest_annotation = max(annotations, key=lambda a: a.get("updated_at") or a.get("created_at", ""))
                    simplified_annotations, ls_updated_at = self._simplify_annotation_result(latest_annotation)

                    if not simplified_annotations:
                        logger.debug(f"Task {task_id} has no valid annotation results")
                        skipped_count += 1
                        continue

                    # 更新数据库中的tags字段
                    try:
                        # 检查文件是否存在以及是否已有标注
                        result = await self.dm_client.db.execute(
                            select(DatasetFiles).where(
                                DatasetFiles.id == file_id,
                                DatasetFiles.dataset_id == mapping.dataset_id
                            )
                        )
                        file_record = result.scalar_one_or_none()

                        if not file_record:
                            logger.warning(f"File {file_id} not found in dataset {mapping.dataset_id}")
                            failed_count += 1
                            continue

                        # 检查是否应该覆盖DataMate的标注（使用文件级别的tags_updated_at）
                        dm_tags_updated_at: Optional[str] = None
                        if file_record.tags_updated_at:  # type: ignore
                            dm_tags_updated_at = file_record.tags_updated_at.isoformat()  # type: ignore

                        if not self._should_overwrite_dm(ls_updated_at, dm_tags_updated_at, overwrite):
                            logger.debug(f"File {file_id}: DataMate has newer or equal annotations, skipping (overwrite={overwrite})")
                            skipped_count += 1
                            continue

                        # 如果存在冲突（两边都有标注且时间戳不同），记录为冲突解决
                        if file_record.tags and ls_updated_at:  # type: ignore
                            conflicts_resolved += 1
                            logger.debug(f"File {file_id}: Resolved conflict, Label Studio annotation is newer")

                        # 更新tags字段和tags_updated_at
                        from datetime import datetime
                        tags_updated_datetime = datetime.fromisoformat(ls_updated_at.replace('Z', '+00:00'))

                        await self.dm_client.db.execute(
                            update(DatasetFiles)
                            .where(DatasetFiles.id == file_id)
                            .values(
                                tags=simplified_annotations,
                                tags_updated_at=tags_updated_datetime
                            )
                        )
                        await self.dm_client.db.commit()

                        synced_count += 1
                        logger.debug(f"Synced annotations for file {file_id}: {len(simplified_annotations)} results")

                    except Exception as e:
                        logger.error(f"Failed to update annotations for file {file_id}: {e}")
                        failed_count += 1
                        await self.dm_client.db.rollback()

            logger.info(f"Annotation sync completed: synced={synced_count}, skipped={skipped_count}, failed={failed_count}, conflicts_resolved={conflicts_resolved}")

            status = "success" if failed_count == 0 else ("partial" if synced_count > 0 else "error")

            return SyncAnnotationsResponse(
                id=mapping.id,
                status=status,
                synced_to_dm=synced_count,
                synced_to_ls=0,
                skipped=skipped_count,
                failed=failed_count,
                conflicts_resolved=conflicts_resolved,
                message=f"Synced {synced_count} annotations from Label Studio to dataset. Skipped: {skipped_count}, Failed: {failed_count}, Conflicts resolved: {conflicts_resolved}"
            )

        except Exception as e:
            logger.error(f"Error while syncing annotations from LS to DM: {e}")
            return SyncAnnotationsResponse(
                id=mapping.id,
                status="error",
                synced_to_dm=synced_count,
                synced_to_ls=0,
                skipped=skipped_count,
                failed=failed_count,
                conflicts_resolved=conflicts_resolved,
                message=f"Sync failed: {str(e)}"
            )

    async def sync_annotations_from_dm_to_ls(
        self,
        mapping: DatasetMappingResponse,
        batch_size: int = 50,
        overwrite_ls: bool = True
    ) -> SyncAnnotationsResponse:
        """
        从DataMate数据集同步标注到Label Studio

        Args:
            mapping: 数据集映射信息
            batch_size: 批处理大小
            overwrite_ls: 是否允许覆盖Label Studio中的标注（基于时间戳比较）

        Returns:
            同步结果响应
        """
        logger.info(f"Syncing annotations from DM to LS: dataset={mapping.dataset_id}, project={mapping.labeling_project_id}")

        synced_count = 0
        skipped_count = 0
        failed_count = 0
        conflicts_resolved = 0

        try:
            # 获取Label Studio中的文件ID到任务ID的映射
            dm_file_to_task_mapping = await self.get_existing_dm_file_mapping(mapping.labeling_project_id)

            if not dm_file_to_task_mapping:
                logger.warning(f"No task mapping found for project {mapping.labeling_project_id}")
                return SyncAnnotationsResponse(
                    id=mapping.id,
                    status="error",
                    synced_to_dm=0,
                    synced_to_ls=0,
                    skipped=0,
                    failed=0,
                    conflicts_resolved=0,
                    message="No tasks found in Label Studio project"
                )

            logger.info(f"Found {len(dm_file_to_task_mapping)} task mappings")

            # 分页获取DataMate中的文件
            page = 0
            processed_count = 0

            while True:
                files_response = await self.dm_client.get_dataset_files(
                    mapping.dataset_id,
                    page=page,
                    size=batch_size,
                )

                if not files_response or not files_response.content:
                    logger.info(f"No more files on page {page + 1}")
                    break

                logger.info(f"Processing page {page + 1}, {len(files_response.content)} files")

                for file_info in files_response.content:
                    file_id = str(file_info.id)
                    processed_count += 1

                    # 检查该文件是否在Label Studio中有对应的任务
                    task_id = dm_file_to_task_mapping.get(file_id)
                    if not task_id:
                        logger.debug(f"File {file_id} has no corresponding task in Label Studio, skipping")
                        skipped_count += 1
                        continue

                    # 获取DataMate中的标注
                    dm_tags: List[Dict[str, Any]] = file_info.tags if file_info.tags else []  # type: ignore

                    if not dm_tags:
                        logger.debug(f"File {file_id} has no annotations in DataMate, skipping")
                        skipped_count += 1
                        continue

                    # 获取DataMate中标注的更新时间
                    dm_tags_updated_at: Optional[str] = None
                    if file_info.tags_updated_at:  # type: ignore
                        dm_tags_updated_at = file_info.tags_updated_at.isoformat()  # type: ignore

                    try:
                        # 获取Label Studio中该任务的现有标注
                        ls_annotations = await self.ls_client.get_task_annotations(task_id)

                        # 获取Label Studio标注的更新时间
                        ls_updated_at = ""
                        if ls_annotations:
                            latest_ls_annotation = max(
                                ls_annotations,
                                key=lambda a: a.get("updated_at") or a.get("created_at", "")
                            )
                            ls_updated_at = latest_ls_annotation.get("updated_at") or latest_ls_annotation.get("created_at", "")

                        # 检查是否应该覆盖Label Studio的标注
                        if not self._should_overwrite_ls(dm_tags_updated_at, ls_updated_at, overwrite_ls):
                            logger.debug(f"Task {task_id}: Label Studio has newer or equal annotations, skipping (overwrite_ls={overwrite_ls})")
                            skipped_count += 1
                            continue

                        # 如果存在冲突，记录为冲突解决
                        if ls_annotations and dm_tags:
                            conflicts_resolved += 1
                            logger.debug(f"Task {task_id}: Resolved conflict, DataMate annotation is newer")

                        # 将DataMate的标注转换为Label Studio格式
                        ls_result = []
                        for tag in dm_tags:
                            ls_result_item = {
                                "from_name": tag.get("from_name", ""),
                                "to_name": tag.get("to_name", ""),
                                "type": tag.get("type", ""),
                                "value": tag.get("values", {})
                            }
                            ls_result.append(ls_result_item)

                        # 如果Label Studio已有标注，更新它；否则创建新标注
                        if ls_annotations:
                            # 更新最新的标注
                            latest_annotation_id = latest_ls_annotation.get("id")
                            if not latest_annotation_id:
                                logger.error(f"Task {task_id} has no annotation ID")
                                failed_count += 1
                                continue

                            update_result = await self.ls_client.update_annotation(
                                int(latest_annotation_id),
                                ls_result
                            )
                            if update_result:
                                synced_count += 1
                                logger.debug(f"Updated annotation for task {task_id}")
                            else:
                                failed_count += 1
                                logger.error(f"Failed to update annotation for task {task_id}")
                        else:
                            # 创建新标注
                            create_result = await self.ls_client.create_annotation(
                                task_id,
                                ls_result
                            )
                            if create_result:
                                synced_count += 1
                                logger.debug(f"Created annotation for task {task_id}")
                            else:
                                failed_count += 1
                                logger.error(f"Failed to create annotation for task {task_id}")

                    except Exception as e:
                        logger.error(f"Failed to sync annotations for file {file_id} (task {task_id}): {e}")
                        failed_count += 1

                # 检查是否还有更多页面
                if page >= files_response.totalPages - 1:
                    break
                page += 1

            logger.info(f"Annotation sync completed: synced={synced_count}, skipped={skipped_count}, failed={failed_count}, conflicts_resolved={conflicts_resolved}")

            status = "success" if failed_count == 0 else ("partial" if synced_count > 0 else "error")

            return SyncAnnotationsResponse(
                id=mapping.id,
                status=status,
                synced_to_dm=0,
                synced_to_ls=synced_count,
                skipped=skipped_count,
                failed=failed_count,
                conflicts_resolved=conflicts_resolved,
                message=f"Synced {synced_count} annotations from DataMate to Label Studio. Skipped: {skipped_count}, Failed: {failed_count}, Conflicts resolved: {conflicts_resolved}"
            )

        except Exception as e:
            logger.error(f"Error while syncing annotations from DM to LS: {e}")
            return SyncAnnotationsResponse(
                id=mapping.id,
                status="error",
                synced_to_dm=0,
                synced_to_ls=synced_count,
                skipped=skipped_count,
                failed=failed_count,
                conflicts_resolved=conflicts_resolved,
                message=f"Sync failed: {str(e)}"
            )

    async def sync_annotations_bidirectional(
        self,
        mapping: DatasetMappingResponse,
        batch_size: int = 50,
        overwrite: bool = True,
        overwrite_ls: bool = True
    ) -> SyncAnnotationsResponse:
        """
        双向同步标注结果

        Args:
            mapping: 数据集映射信息
            batch_size: 批处理大小
            overwrite: 是否允许覆盖DataMate中的标注
            overwrite_ls: 是否允许覆盖Label Studio中的标注

        Returns:
            同步结果响应
        """
        logger.info(f"Bidirectional annotation sync: dataset={mapping.dataset_id}, project={mapping.labeling_project_id}")

        try:
            # 先从Label Studio同步到DataMate
            ls_to_dm_result = await self.sync_annotations_from_ls_to_dm(
                mapping,
                batch_size,
                overwrite
            )

            # 再从DataMate同步到Label Studio
            dm_to_ls_result = await self.sync_annotations_from_dm_to_ls(
                mapping,
                batch_size,
                overwrite_ls
            )

            # 合并结果
            total_synced_to_dm = ls_to_dm_result.synced_to_dm
            total_synced_to_ls = dm_to_ls_result.synced_to_ls
            total_skipped = ls_to_dm_result.skipped + dm_to_ls_result.skipped
            total_failed = ls_to_dm_result.failed + dm_to_ls_result.failed
            total_conflicts = ls_to_dm_result.conflicts_resolved + dm_to_ls_result.conflicts_resolved

            # 判断状态
            if ls_to_dm_result.status == "error" and dm_to_ls_result.status == "error":
                status = "error"
            elif total_failed > 0:
                status = "partial"
            else:
                status = "success"

            logger.info(f"Bidirectional sync completed: to_dm={total_synced_to_dm}, to_ls={total_synced_to_ls}, skipped={total_skipped}, failed={total_failed}, conflicts={total_conflicts}")

            return SyncAnnotationsResponse(
                id=mapping.id,
                status=status,
                synced_to_dm=total_synced_to_dm,
                synced_to_ls=total_synced_to_ls,
                skipped=total_skipped,
                failed=total_failed,
                conflicts_resolved=total_conflicts,
                message=f"Bidirectional sync completed: {total_synced_to_dm} to DataMate, {total_synced_to_ls} to Label Studio. Skipped: {total_skipped}, Failed: {total_failed}, Conflicts resolved: {total_conflicts}"
            )

        except Exception as e:
            logger.error(f"Error during bidirectional sync: {e}")
            return SyncAnnotationsResponse(
                id=mapping.id,
                status="error",
                synced_to_dm=0,
                synced_to_ls=0,
                skipped=0,
                failed=0,
                conflicts_resolved=0,
                message=f"Bidirectional sync failed: {str(e)}"
            )

    async def get_sync_status(
        self,
        dataset_id: str
    ) -> Optional[Dict[str, Any]]:
        """获取同步状态"""
        mapping = await self.mapping_service.get_mapping_by_source_uuid(dataset_id)
        if not mapping:
            return None

        # 获取DM数据集信息
        dataset_info = await self.dm_client.get_dataset(dataset_id)

        # 获取Label Studio项目任务数量
        tasks_info = await self.ls_client.get_project_tasks(mapping.labeling_project_id)

        return {
            "id": mapping.id,
            "dataset_id": dataset_id,
            "labeling_project_id": mapping.labeling_project_id,
            "dm_total_files": dataset_info.fileCount if dataset_info else 0,
            "ls_total_tasks": tasks_info.get("count", 0) if tasks_info else 0,
            "sync_ratio": (
                tasks_info.get("count", 0) / dataset_info.fileCount
                if dataset_info and dataset_info.fileCount > 0 and tasks_info else 0
            )
        }