refactor: Reorganize datamate-python (#34)

refactor: Reorganize datamate-python (previously label-studio-adapter) into a DDD style structure.
2025-10-30 01:32:59 +08:00
parent 0614157c0b
commit 2f7341dc1f
79 changed files with 1077 additions and 1577 deletions
--- a/deployment/docker/label-studio/docker-compose.yml
+++ b/deployment/docker/label-studio/docker-compose.yml
@@ -0,0 +1,50 @@
+services:
+
+  app:
+    stdin_open: true
+    tty: true
+    image: heartexlabs/label-studio:latest
+    restart: unless-stopped
+    user: root
+    expose:
+      - "8000"
+    ports:
+      - "8000:8000"
+    depends_on:
+      - db
+    environment:
+      - DJANGO_DB=default
+      - POSTGRE_NAME=postgres
+      - POSTGRE_USER=postgres
+      - POSTGRE_PASSWORD=
+      - POSTGRE_PORT=5432
+      - POSTGRE_HOST=db
+      - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-}
+      - LOCAL_FILES_SERVING_ENABLED=true
+      - LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local
+      - USE_USERNAME_FOR_LOGIN=true
+      - LABEL_STUDIO_USERNAME=admin@huawei.com
+      - LABEL_STUDIO_PASSWORD=admin1234
+      - LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true
+      - LABEL_STUDIO_USER_TOKEN=abc123abc123
+      - LOG_LEVEL=INFO
+    volumes:
+      - label-studio-data:/label-studio/data:rw
+      - dataset_volume:/label-studio/local:rw
+    command: label-studio-uwsgi
+
+  db:
+    image: pgautoupgrade/pgautoupgrade:13-alpine
+    hostname: db
+    restart: unless-stopped
+    environment:
+      - POSTGRES_HOST_AUTH_METHOD=trust
+      - POSTGRES_USER=postgres
+    volumes:
+      - label-studio-db:/var/lib/postgresql/data
+
+volumes:
+  label-studio-data:
+  label-studio-db:
+  dataset_volume:
+    name: datamate-dataset-volume
--- a/runtime/datamate-python/.env.example
+++ b/runtime/datamate-python/.env.example
@@ -66,37 +66,6 @@ MYSQL_USER=label_studio_user
 MYSQL_PASSWORD=user_password
 MYSQL_DATABASE=label_studio_adapter

-# =========================
-# Label Studio 数据库配置 (PostgreSQL)
-# =========================
-# 仅在使用 docker-compose.label-studio.yml 启动 Label Studio 时需要配置
-POSTGRES_HOST=label-studio-db
-POSTGRES_PORT=5432
-POSTGRES_USER=labelstudio
-POSTGRES_PASSWORD=labelstudio@4321
-POSTGRES_DATABASE=labelstudio
-
-# =========================
-# SQLite 数据库配置（兜底选项）
-# =========================
-# 优先级3：如果没有配置 MySQL/PostgreSQL，将使用 SQLite
-SQLITE_PATH=./data/labelstudio_adapter.db
-
-# =========================
-# 可选：直接指定数据库 URL
-# =========================
-# 如果设置了此项，将覆盖上面的 MySQL/PostgreSQL/SQLite 配置
-# DATABASE_URL=postgresql+asyncpg://user:password@host:port/database
-
-# =========================
-# 安全配置
-# =========================
-# 密钥（生产环境务必修改）
-SECRET_KEY=your-secret-key-change-this-in-production
-
-# Token 过期时间（分钟）
-ACCESS_TOKEN_EXPIRE_MINUTES=30
-
 # =========================
 # CORS 配置
 # =========================
--- a/runtime/datamate-python/app/init.py
+++ b/runtime/datamate-python/app/init.py
@@ -1 +0,0 @@
-# app/__init__.py
--- a/runtime/datamate-python/app/api/init.py
+++ b/runtime/datamate-python/app/api/init.py
@@ -1,19 +0,0 @@
-"""
-API 路由模块
-
-集中管理所有API路由的组织结构
-"""
-from fastapi import APIRouter
-
-from .system import router as system_router
-from .project import project_router
-
-# 创建主API路由器
-api_router = APIRouter()
-
-# 注册到主路由器
-api_router.include_router(system_router, tags=["系统"])
-api_router.include_router(project_router, prefix="/project", tags=["项目"])
-
-# 导出路由器供 main.py 使用
-__all__ = ["api_router"]
--- a/runtime/datamate-python/app/api/project/init.py
+++ b/runtime/datamate-python/app/api/project/init.py
@@ -1,11 +0,0 @@
-"""
-标注工程相关API路由模块
-"""
-from fastapi import APIRouter
-
-project_router = APIRouter()
-
-from . import create
-from . import sync
-from . import list
-from . import delete
--- a/runtime/datamate-python/app/api/project/create.py
+++ b/runtime/datamate-python/app/api/project/create.py
@@ -1,130 +0,0 @@
-from fastapi import APIRouter, Depends, HTTPException
-from sqlalchemy.ext.asyncio import AsyncSession
-from typing import Optional
-
-from app.db.database import get_db
-from app.services.dataset_mapping_service import DatasetMappingService
-from app.infrastructure import DatamateClient, LabelStudioClient
-from app.schemas.dataset_mapping import (
-    DatasetMappingCreateRequest,
-    DatasetMappingCreateResponse,
-)
-from app.schemas import StandardResponse
-from app.core.logging import get_logger
-from app.core.config import settings
-from . import project_router
-
-logger = get_logger(__name__)
-
-@project_router.post("/create", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201)
-async def create_dataset_mapping(
-    request: DatasetMappingCreateRequest,
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    创建数据集映射
-    
-    根据指定的DM程序中的数据集，创建Label Studio中的数据集，
-    在数据库中记录这一关联关系，返回Label Studio数据集的ID
-    
-    注意：一个数据集可以创建多个标注项目
-    """
-    try:
-        dm_client = DatamateClient(db)
-        ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
-                                      token=settings.label_studio_user_token)
-        service = DatasetMappingService(db)
-        
-        logger.info(f"Create dataset mapping request: {request.dataset_id}")
-        
-        # 从DM服务获取数据集信息
-        dataset_info = await dm_client.get_dataset(request.dataset_id)
-        if not dataset_info:
-            raise HTTPException(
-                status_code=404,
-                detail=f"Dataset not found in DM service: {request.dataset_id}"
-            )
-        
-        # 确定数据类型（基于数据集类型）
-        data_type = "image"  # 默认值
-        if dataset_info.type and dataset_info.type.code:
-            type_code = dataset_info.type.code.lower()
-            if "audio" in type_code:
-                data_type = "audio"
-            elif "video" in type_code:
-                data_type = "video"
-            elif "text" in type_code:
-                data_type = "text"
-        
-        project_name = f"{dataset_info.name}"
-        
-        # 在Label Studio中创建项目
-        project_data = await ls_client.create_project(
-            title=project_name,
-            description=dataset_info.description or f"Imported from DM dataset {dataset_info.id}",
-            data_type=data_type
-        )
-        
-        if not project_data:
-            raise HTTPException(
-                status_code=500,
-                detail="Fail to create Label Studio project."
-            )
-        
-        project_id = project_data["id"]
-        
-        # 配置本地存储：dataset/<id>
-        local_storage_path = f"{settings.label_studio_local_storage_dataset_base_path}/{request.dataset_id}"
-        storage_result = await ls_client.create_local_storage(
-            project_id=project_id,
-            path=local_storage_path,
-            title="Dataset_BLOB",
-            use_blob_urls=True,
-            description=f"Local storage for dataset {dataset_info.name}"
-        )
-
-        # 配置本地存储：upload
-        local_storage_path = f"{settings.label_studio_local_storage_upload_base_path}"
-        storage_result = await ls_client.create_local_storage(
-            project_id=project_id,
-            path=local_storage_path,
-            title="Upload_BLOB",
-            use_blob_urls=True,
-            description=f"Local storage for dataset {dataset_info.name}"
-        )
-        
-        if not storage_result:
-            # 本地存储配置失败，记录警告但不中断流程
-            logger.warning(f"Failed to configure local storage for project {project_id}")
-        else:
-            logger.info(f"Local storage configured for project {project_id}: {local_storage_path}")
-        
-        # 创建映射关系，包含项目名称
-        mapping = await service.create_mapping(
-            request, 
-            str(project_id),
-            project_name
-        )
-        
-        logger.debug(
-            f"Dataset mapping created: {mapping.mapping_id} -> S {mapping.dataset_id} <> L {mapping.labelling_project_id}"
-        )
-        
-        response_data = DatasetMappingCreateResponse(
-            mapping_id=mapping.mapping_id,
-            labelling_project_id=mapping.labelling_project_id,
-            labelling_project_name=mapping.labelling_project_name or project_name,
-            message="Dataset mapping created successfully"
-        )
-        
-        return StandardResponse(
-            code=201,
-            message="success",
-            data=response_data
-        )
-        
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error while creating dataset mapping: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
--- a/runtime/datamate-python/app/api/project/delete.py
+++ b/runtime/datamate-python/app/api/project/delete.py
@@ -1,106 +0,0 @@
-from fastapi import Depends, HTTPException, Query
-from sqlalchemy.ext.asyncio import AsyncSession
-from typing import Optional
-
-from app.db.database import get_db
-from app.services.dataset_mapping_service import DatasetMappingService
-from app.infrastructure import DatamateClient, LabelStudioClient
-from app.schemas.dataset_mapping import DeleteDatasetResponse
-from app.schemas import StandardResponse
-from app.core.logging import get_logger
-from app.core.config import settings
-
-from . import project_router
-
-logger = get_logger(__name__)
-
-@project_router.delete("/mappings", response_model=StandardResponse[DeleteDatasetResponse])
-async def delete_mapping(
-    m: Optional[str] = Query(None, description="映射UUID"),
-    proj: Optional[str] = Query(None, description="Label Studio项目ID"),
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    删除映射关系和对应的 Label Studio 项目
-    
-    可以通过以下任一方式指定要删除的映射：
-    - m: 映射UUID
-    - proj: Label Studio项目ID
-    - 两者都提供（优先使用 m）
-    
-    此操作会：
-    1. 删除 Label Studio 中的项目
-    2. 软删除数据库中的映射记录
-    """
-    try:
-        # 至少需要提供一个参数
-        if not m and not proj:
-            raise HTTPException(
-                status_code=400,
-                detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided"
-            )
-
-        ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
-                                      token=settings.label_studio_user_token)
-        service = DatasetMappingService(db)
-        
-        # 优先使用 mapping_id 查询
-        if m:
-            logger.debug(f"Deleting by mapping UUID: {m}")
-            mapping = await service.get_mapping_by_uuid(m)
-        # 如果没有提供 m，使用 proj 查询
-        elif proj:
-            logger.debug(f"Deleting by project ID: {proj}")
-            mapping = await service.get_mapping_by_labelling_project_id(proj)
-        else:
-            mapping = None
-        
-        if not mapping:
-            raise HTTPException(
-                status_code=404,
-                detail=f"Mapping either not found or not specified."
-            )
-        
-        mapping_id = mapping.mapping_id
-        labelling_project_id = mapping.labelling_project_id
-        labelling_project_name = mapping.labelling_project_name
-        
-        logger.debug(f"Found mapping: {mapping_id}, Label Studio project ID: {labelling_project_id}")
-        
-        # 1. 删除 Label Studio 项目
-        try:
-            delete_success = await ls_client.delete_project(int(labelling_project_id))
-            if delete_success:
-                logger.debug(f"Successfully deleted Label Studio project: {labelling_project_id}")
-            else:
-                logger.warning(f"Failed to delete Label Studio project or project not found: {labelling_project_id}")
-        except Exception as e:
-            logger.error(f"Error deleting Label Studio project: {e}")
-            # 继续执行，即使 Label Studio 项目删除失败也要删除映射记录
-        
-        # 2. 软删除映射记录
-        soft_delete_success = await service.soft_delete_mapping(mapping_id)
-        
-        if not soft_delete_success:
-            raise HTTPException(
-                status_code=500,
-                detail="Failed to delete mapping record"
-            )
-
-        logger.info(f"Successfully deleted mapping: {mapping_id}, Label Studio project: {labelling_project_id}")
-
-        return StandardResponse(
-            code=200,
-            message="success",
-            data=DeleteDatasetResponse(
-                mapping_id=mapping_id,
-                status="success",
-                message=f"Successfully deleted mapping and Label Studio project '{labelling_project_name}'"
-            )
-        )
-        
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error deleting mapping: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
--- a/runtime/datamate-python/app/api/project/list.py
+++ b/runtime/datamate-python/app/api/project/list.py
@@ -1,152 +0,0 @@
-from fastapi import APIRouter, Depends, HTTPException, Query
-from sqlalchemy.ext.asyncio import AsyncSession
-from typing import List
-import math
-
-from app.db.database import get_db
-from app.services.dataset_mapping_service import DatasetMappingService
-from app.schemas.dataset_mapping import DatasetMappingResponse
-from app.schemas.common import StandardResponse, PaginatedData
-from app.core.logging import get_logger
-from . import project_router
-
-logger = get_logger(__name__)
-
-@project_router.get("/mappings/list", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
-async def list_mappings(
-    page: int = Query(1, ge=1, description="页码（从1开始）"),
-    page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    查询所有映射关系（分页）
-    
-    返回所有有效的数据集映射关系（未被软删除的），支持分页查询
-    """
-    try:
-        service = DatasetMappingService(db)
-        
-        # 计算 skip
-        skip = (page - 1) * page_size
-        
-        logger.info(f"Listing mappings, page={page}, page_size={page_size}")
-        
-        # 获取数据和总数
-        mappings, total = await service.get_all_mappings_with_count(
-            skip=skip, 
-            limit=page_size
-        )
-        
-        # 计算总页数
-        total_pages = math.ceil(total / page_size) if total > 0 else 0
-        
-        # 构造分页响应
-        paginated_data = PaginatedData(
-            page=page,
-            size=page_size,
-            total_elements=total,
-            total_pages=total_pages,
-            content=mappings
-        )
-        
-        logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
-        
-        return StandardResponse(
-            code=200,
-            message="success",
-            data=paginated_data
-        )
-        
-    except Exception as e:
-        logger.error(f"Error listing mappings: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-
-@project_router.get("/mappings/{mapping_id}", response_model=StandardResponse[DatasetMappingResponse])
-async def get_mapping(
-    mapping_id: str,
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    根据 UUID 查询单个映射关系
-    """
-    try:
-        service = DatasetMappingService(db)
-        
-        logger.info(f"Get mapping: {mapping_id}")
-        
-        mapping = await service.get_mapping_by_uuid(mapping_id)
-        
-        if not mapping:
-            raise HTTPException(
-                status_code=404,
-                detail=f"Mapping not found: {mapping_id}"
-            )
-        
-        logger.info(f"Found mapping: {mapping.mapping_id}")
-        
-        return StandardResponse(
-            code=200,
-            message="success",
-            data=mapping
-        )
-        
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error getting mapping: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-
-@project_router.get("/mappings/by-source/{dataset_id}", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
-async def get_mappings_by_source(
-    dataset_id: str,
-    page: int = Query(1, ge=1, description="页码（从1开始）"),
-    page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    根据源数据集 ID 查询所有映射关系（分页）
-    
-    返回该数据集创建的所有标注项目（不包括已删除的），支持分页查询
-    """
-    try:
-        service = DatasetMappingService(db)
-        
-        # 计算 skip
-        skip = (page - 1) * page_size
-        
-        logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, page_size={page_size}")
-        
-        # 获取数据和总数
-        mappings, total = await service.get_mappings_by_source_with_count(
-            dataset_id=dataset_id,
-            skip=skip,
-            limit=page_size
-        )
-        
-        # 计算总页数
-        total_pages = math.ceil(total / page_size) if total > 0 else 0
-        
-        # 构造分页响应
-        paginated_data = PaginatedData(
-            page=page,
-            size=page_size,
-            total_elements=total,
-            total_pages=total_pages,
-            content=mappings
-        )
-        
-        logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
-        
-        return StandardResponse(
-            code=200,
-            message="success",
-            data=paginated_data
-        )
-        
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Error getting mappings: {e}")
-        raise HTTPException(status_code=500, detail="Internal server error")
--- a/runtime/datamate-python/app/core/init.py
+++ b/runtime/datamate-python/app/core/init.py
@@ -1 +1,7 @@
-# app/core/__init__.py
+# app/core/__init__.py
+
+"""
+Core module
+
+
+"""
--- a/runtime/datamate-python/app/core/config.py
+++ b/runtime/datamate-python/app/core/config.py
@@ -1,5 +1,5 @@
 from pydantic_settings import BaseSettings
-from typing import Optional
+from typing import Optional, List
 import os
 from pathlib import Path

@@ -24,9 +24,9 @@ class Settings(BaseSettings):
    port: int = 8000

    # CORS配置
-    allowed_origins: list = ["*"]
-    allowed_methods: list = ["*"]
-    allowed_headers: list = ["*"]
+    allowed_origins: List[str] = ["*"]
+    allowed_methods: List[str] = ["*"]
+    allowed_headers: List[str] = ["*"]

    # MySQL数据库配置 (优先级1)
    mysql_host: Optional[str] = None
@@ -49,11 +49,7 @@ class Settings(BaseSettings):
    database_url: Optional[str] = None

    # 日志配置
-    log_level: str = "INFO"
-
-    # 安全配置
-    secret_key: str = "your-secret-key-change-this-in-production"
-    access_token_expire_minutes: int = 30
+    log_level: str = "DEBUG"

    # =========================
    # Label Studio 服务配置
@@ -63,8 +59,7 @@ class Settings(BaseSettings):
    label_studio_password: Optional[str] = None  # Label Studio 密码（用于登录）
    label_studio_user_token: Optional[str] = None  # Legacy Token

-    label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files/dataset"  # Label Studio容器中的本地存储基础路径
-    label_studio_local_storage_upload_base_path: str = "/label-studio/local_files/upload"  # Label Studio容器中的本地存储基础路径
+    label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files"  # Label Studio容器中的本地存储基础路径
    label_studio_file_path_prefix: str = "/data/local-files/?d="  # Label Studio本地文件服务路径前缀

    ls_task_page_size: int = 1000
@@ -73,7 +68,7 @@ class Settings(BaseSettings):
    # =========================
    # Data Management 服务配置
    # =========================
-    dm_file_path_prefix: str = "/"  # DM存储文件夹前缀
+    dm_file_path_prefix: str = "/dataset"  # DM存储文件夹前缀


    @property
--- a/runtime/datamate-python/app/core/logging.py
+++ b/runtime/datamate-python/app/core/logging.py
@@ -11,7 +11,7 @@ def setup_logging():
    log_dir.mkdir(exist_ok=True)
    
    # 配置日志格式
-    log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    log_format = "%(asctime)s - %(name)s - [%(levelname)s] - %(message)s"
    date_format = "%Y-%m-%d %H:%M:%S"
    
    # 创建处理器
@@ -44,9 +44,10 @@ def setup_logging():
    root_logger.addHandler(error_handler)
    
    # 配置第三方库日志级别（减少详细日志）
-    logging.getLogger("uvicorn").setLevel(logging.WARNING)
+    logging.getLogger("uvicorn").setLevel(logging.ERROR)
    logging.getLogger("sqlalchemy.engine").setLevel(logging.ERROR)  # 隐藏SQL查询日志
-    logging.getLogger("httpx").setLevel(logging.WARNING)
+    logging.getLogger("httpx").setLevel(logging.ERROR)
+    logging.getLogger("httpcore").setLevel(logging.ERROR)

 def get_logger(name: str) -> logging.Logger:
    """获取指定名称的日志器"""
--- a/runtime/datamate-python/app/db/init.py
+++ b/runtime/datamate-python/app/db/init.py
@@ -1 +0,0 @@
-# app/db/__init__.py
--- a/runtime/datamate-python/app/db/models/init.py
+++ b/runtime/datamate-python/app/db/models/init.py
@@ -0,0 +1,28 @@
+
+from .dataset_management import (
+    Dataset,
+    DatasetTag,
+    DatasetFiles,
+    DatasetStatistics,
+    Tag
+)
+
+from .user_management import (
+    User
+)
+
+from .annotation_management import (
+    AnnotationTemplate,
+    LabelingProject
+)
+
+__all__ = [
+    "Dataset",
+    "DatasetTag",
+    "DatasetFiles",
+    "DatasetStatistics",
+    "Tag",
+    "User",
+    "AnnotationTemplate",
+    "LabelingProject",
+]
--- a/runtime/datamate-python/app/db/models/annotation_management.py
+++ b/runtime/datamate-python/app/db/models/annotation_management.py
@@ -0,0 +1,51 @@
+"""
+Tables of Annotation Management Module
+"""
+
+import uuid
+from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP, Text, Integer, JSON, Date
+from sqlalchemy.sql import func
+
+from app.db.session import Base
+
+class AnnotationTemplate(Base):
+    """标注模板模型"""
+    
+    __tablename__ = "t_dm_annotation_templates"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID")
+    name = Column(String(32), nullable=False, comment="模板名称")
+    description = Column(String(255), nullable=True, comment="模板描述")
+    configuration = Column(JSON, nullable=True, comment="配置信息（JSON格式）")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间（软删除）")
+    
+    def __repr__(self):
+        return f"<AnnotationTemplate(id={self.id}, name={self.name})>"
+    
+    @property
+    def is_deleted(self) -> bool:
+        """检查是否已被软删除"""
+        return self.deleted_at is not None
+    
+class LabelingProject(Base):
+    """标注工程表"""
+    
+    __tablename__ = "t_dm_labeling_projects"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID")
+    dataset_id = Column(String(36), nullable=False, comment="数据集ID")
+    name = Column(String(32), nullable=False, comment="项目名称")
+    labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID")
+    configuration = Column(JSON, nullable=True, comment="标签配置")
+    progress = Column(JSON, nullable=True, comment="标注进度统计")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间（软删除）")
+    
+    def __repr__(self):
+        return f"<LabelingProject(id={self.id}, dataset_id={self.dataset_id}, name={self.name})>"
+    
+    @property
+    def is_deleted(self) -> bool:
+        """检查是否已被软删除"""
+        return self.deleted_at is not None
--- a/runtime/datamate-python/app/db/models/dataset_management.py
+++ b/runtime/datamate-python/app/db/models/dataset_management.py
@@ -0,0 +1,113 @@
+"""
+Tables of Dataset Management Module
+"""
+
+import uuid
+from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP, Text, Integer, JSON, Date
+from sqlalchemy.sql import func
+
+from app.db.session import Base
+
+class Dataset(Base):
+    """数据集模型（支持医学影像、文本、问答等多种类型）"""
+    
+    __tablename__ = "t_dm_datasets"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    name = Column(String(255), nullable=False, comment="数据集名称")
+    description = Column(Text, nullable=True, comment="数据集描述")
+    dataset_type = Column(String(50), nullable=False, comment="数据集类型：IMAGE/TEXT/QA/MULTIMODAL/OTHER")
+    category = Column(String(100), nullable=True, comment="数据集分类：医学影像/问答/文献等")
+    path = Column(String(500), nullable=True, comment="数据存储路径")
+    format = Column(String(50), nullable=True, comment="数据格式：DCM/JPG/JSON/CSV等")
+    schema_info = Column(JSON, nullable=True, comment="数据结构信息")
+    size_bytes = Column(BigInteger, default=0, comment="数据大小(字节)")
+    file_count = Column(BigInteger, default=0, comment="文件数量")
+    record_count = Column(BigInteger, default=0, comment="记录数量")
+    retention_days = Column(Integer, default=0, comment="数据保留天数（0表示长期保留）")
+    tags = Column(JSON, nullable=True, comment="标签列表")
+    dataset_metadata = Column("metadata", JSON, nullable=True, comment="元数据信息")
+    status = Column(String(50), default='DRAFT', comment="状态：DRAFT/ACTIVE/ARCHIVED")
+    is_public = Column(Boolean, default=False, comment="是否公开")
+    is_featured = Column(Boolean, default=False, comment="是否推荐")
+    version = Column(BigInteger, nullable=False, default=0, comment="版本号")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_by = Column(String(255), nullable=True, comment="创建者")
+    updated_by = Column(String(255), nullable=True, comment="更新者")
+    
+    def __repr__(self):
+        return f"<Dataset(id={self.id}, name={self.name}, type={self.dataset_type})>"
+
+class DatasetTag(Base):
+    """数据集标签关联模型"""
+    
+    __tablename__ = "t_dm_dataset_tags"
+    
+    dataset_id = Column(String(36), primary_key=True, comment="数据集ID（UUID）")
+    tag_id = Column(String(36), primary_key=True, comment="标签ID（UUID）")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    
+    def __repr__(self):
+        return f"<DatasetTag(dataset_id={self.dataset_id}, tag_id={self.tag_id})>"
+
+class DatasetFiles(Base):
+    """DM数据集文件模型"""
+    
+    __tablename__ = "t_dm_dataset_files"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    dataset_id = Column(String(36), nullable=False, comment="所属数据集ID（UUID）")
+    file_name = Column(String(255), nullable=False, comment="文件名")
+    file_path = Column(String(1000), nullable=False, comment="文件路径")
+    file_type = Column(String(50), nullable=True, comment="文件格式：JPG/PNG/DCM/TXT等")
+    file_size = Column(BigInteger, default=0, comment="文件大小(字节)")
+    check_sum = Column(String(64), nullable=True, comment="文件校验和")
+    tags = Column(JSON, nullable=True, comment="文件标签信息")
+    dataset_filemetadata = Column("metadata", JSON, nullable=True, comment="文件元数据")
+    status = Column(String(50), default='ACTIVE', comment="文件状态：ACTIVE/DELETED/PROCESSING")
+    upload_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传时间")
+    last_access_time = Column(TIMESTAMP, nullable=True, comment="最后访问时间")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    
+    def __repr__(self):
+        return f"<DatasetFiles(id={self.id}, dataset_id={self.dataset_id}, file_name={self.file_name})>"
+    
+class DatasetStatistics(Base):
+    """数据集统计信息模型"""
+    
+    __tablename__ = "t_dm_dataset_statistics"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    dataset_id = Column(String(36), nullable=False, comment="数据集ID（UUID）")
+    stat_date = Column(Date, nullable=False, comment="统计日期")
+    total_files = Column(BigInteger, default=0, comment="总文件数")
+    total_size = Column(BigInteger, default=0, comment="总大小(字节)")
+    processed_files = Column(BigInteger, default=0, comment="已处理文件数")
+    error_files = Column(BigInteger, default=0, comment="错误文件数")
+    download_count = Column(BigInteger, default=0, comment="下载次数")
+    view_count = Column(BigInteger, default=0, comment="查看次数")
+    quality_metrics = Column(JSON, nullable=True, comment="质量指标")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    
+    def __repr__(self):
+        return f"<DatasetStatistics(id={self.id}, dataset_id={self.dataset_id}, date={self.stat_date})>"
+
+class Tag(Base):
+    """标签集合模型"""
+    
+    __tablename__ = "t_dm_tags"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    name = Column(String(100), nullable=False, unique=True, comment="标签名称")
+    description = Column(Text, nullable=True, comment="标签描述")
+    category = Column(String(50), nullable=True, comment="标签分类")
+    color = Column(String(7), nullable=True, comment="标签颜色(十六进制)")
+    usage_count = Column(BigInteger, default=0, comment="使用次数")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    
+    def __repr__(self):
+        return f"<Tag(id={self.id}, name={self.name}, category={self.category})>"
--- a/runtime/datamate-python/app/db/models/user_management.py
+++ b/runtime/datamate-python/app/db/models/user_management.py
@@ -1,6 +1,11 @@
+"""
+Tables of User Management Module
+"""
+
 from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP
 from sqlalchemy.sql import func
-from app.db.database import Base
+
+from app.db.session import Base

 class User(Base):
    """用户模型"""
--- a/runtime/datamate-python/app/db/database.py
+++ b/runtime/datamate-python/app/db/database.py
@@ -8,8 +8,7 @@ logger = get_logger(__name__)

 # 获取数据库配置信息
 db_info = settings.get_database_info()
-logger.info(f"使用数据库: {db_info['type']}")
-logger.info(f"连接URL: {db_info['url']}")
+logger.info(f"使用数据库: {db_info['type']} || 连接URL: {db_info['url']}")

 # 创建数据库引擎
 engine = create_async_engine(
--- a/runtime/datamate-python/app/exception.py
+++ b/runtime/datamate-python/app/exception.py
@@ -0,0 +1,97 @@
+"""
+全局自定义异常类定义
+"""
+from fastapi.responses import JSONResponse
+from fastapi.exceptions import RequestValidationError
+from starlette.exceptions import HTTPException as StarletteHTTPException
+from fastapi import FastAPI, Request, HTTPException, status
+
+from .core.logging import setup_logging, get_logger
+
+logger = get_logger(__name__)
+
+# 自定义异常处理器：StarletteHTTPException (包括404等)
+async def starlette_http_exception_handler(request: Request, exc: StarletteHTTPException):
+    """将Starlette的HTTPException转换为标准响应格式"""
+    return JSONResponse(
+        status_code=exc.status_code,
+        content={
+            "code": exc.status_code,
+            "message": "error",
+            "data": {
+                "detail": exc.detail
+            }
+        }
+    )
+
+# 自定义异常处理器：FastAPI HTTPException
+async def fastapi_http_exception_handler(request: Request, exc: HTTPException):
+    """将FastAPI的HTTPException转换为标准响应格式"""
+    return JSONResponse(
+        status_code=exc.status_code,
+        content={
+            "code": exc.status_code,
+            "message": "error",
+            "data": {
+                "detail": exc.detail
+            }
+        }
+    )
+
+# 自定义异常处理器：RequestValidationError
+async def validation_exception_handler(request: Request, exc: RequestValidationError):
+    """将请求验证错误转换为标准响应格式"""
+    return JSONResponse(
+        status_code=422,
+        content={
+            "code": 422,
+            "message": "error",
+            "data": {
+                "detail": "Validation error",
+                "errors": exc.errors()
+            }
+        }
+    )
+
+# 自定义异常处理器：未捕获的异常
+async def general_exception_handler(request: Request, exc: Exception):
+    """将未捕获的异常转换为标准响应格式"""
+    logger.error(f"Unhandled exception: {exc}", exc_info=True)
+    return JSONResponse(
+        status_code=500,
+        content={
+            "code": 500,
+            "message": "error",
+            "data": {
+                "detail": "Internal server error"
+            }
+        }
+    )
+
+class LabelStudioAdapterException(Exception):
+    """Label Studio Adapter 基础异常类"""
+    pass
+
+class DatasetMappingNotFoundError(LabelStudioAdapterException):
+    """数据集映射未找到异常"""
+    def __init__(self, mapping_id: str):
+        self.mapping_id = mapping_id
+        super().__init__(f"Dataset mapping not found: {mapping_id}")
+
+class NoDatasetInfoFoundError(LabelStudioAdapterException):
+    """无法获取数据集信息异常"""
+    def __init__(self, dataset_uuid: str):
+        self.dataset_uuid = dataset_uuid
+        super().__init__(f"Failed to get dataset info: {dataset_uuid}")
+
+class LabelStudioClientError(LabelStudioAdapterException):
+    """Label Studio 客户端错误"""
+    pass
+
+class DMServiceClientError(LabelStudioAdapterException):
+    """DM 服务客户端错误"""
+    pass
+
+class SyncServiceError(LabelStudioAdapterException):
+    """同步服务错误"""
+    pass
--- a/runtime/datamate-python/app/exceptions.py
+++ b/runtime/datamate-python/app/exceptions.py
@@ -1,31 +0,0 @@
-"""
-自定义异常类定义
-"""
-
-class LabelStudioAdapterException(Exception):
-    """Label Studio Adapter 基础异常类"""
-    pass
-
-class DatasetMappingNotFoundError(LabelStudioAdapterException):
-    """数据集映射未找到异常"""
-    def __init__(self, mapping_id: str):
-        self.mapping_id = mapping_id
-        super().__init__(f"Dataset mapping not found: {mapping_id}")
-
-class NoDatasetInfoFoundError(LabelStudioAdapterException):
-    """无法获取数据集信息异常"""
-    def __init__(self, dataset_uuid: str):
-        self.dataset_uuid = dataset_uuid
-        super().__init__(f"Failed to get dataset info: {dataset_uuid}")
-
-class LabelStudioClientError(LabelStudioAdapterException):
-    """Label Studio 客户端错误"""
-    pass
-
-class DMServiceClientError(LabelStudioAdapterException):
-    """DM 服务客户端错误"""
-    pass
-
-class SyncServiceError(LabelStudioAdapterException):
-    """同步服务错误"""
-    pass
--- a/runtime/datamate-python/app/infrastructure/init.py
+++ b/runtime/datamate-python/app/infrastructure/init.py
@@ -1,6 +0,0 @@
-# app/clients/__init__.py
-
-from .label_studio import Client as LabelStudioClient
-from .datamate import Client as DatamateClient
-
-__all__ = ["LabelStudioClient", "DatamateClient"]
--- a/runtime/datamate-python/app/main.py
+++ b/runtime/datamate-python/app/main.py
@@ -1,16 +1,23 @@
 from fastapi import FastAPI, Request, HTTPException, status
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
 from fastapi.exceptions import RequestValidationError
 from starlette.exceptions import HTTPException as StarletteHTTPException
+
 from contextlib import asynccontextmanager
 from typing import Dict, Any
+from sqlalchemy import text

 from .core.config import settings
 from .core.logging import setup_logging, get_logger
-from .infrastructure import LabelStudioClient
-from .api import api_router
-from .schemas import StandardResponse
+from .db.session import engine, AsyncSessionLocal
+from .module.shared.schema import StandardResponse
+from .module import router
+from .exception import (
+    starlette_http_exception_handler,
+    fastapi_http_exception_handler,
+    validation_exception_handler,
+    general_exception_handler
+)

 # 设置日志
 setup_logging()
@@ -21,23 +28,21 @@ async def lifespan(app: FastAPI):
    """应用程序生命周期管理"""
    
    # 启动时初始化
-    logger.info("Starting Label Studio Adapter...")
-    
-    # 初始化 Label Studio 客户端，使用 HTTP REST API + Token 认证
-    ls_client = LabelStudioClient(
-        base_url=settings.label_studio_base_url,
-        token=settings.label_studio_user_token
-    )
-    
-    logger.info("Label Studio Adapter started")
-    
+    logger.info("DataMate Python Backend starting...")
+    # 数据库连接验证
+    try:
+        async with AsyncSessionLocal() as session:
+            await session.execute(text("SELECT 1"))
+        logger.info("Database connection validated successfully.")
+    except Exception as e:
+        logger.error(f"Database connection validation failed: {e}")
+        logger.debug(f"Connection details: {settings.computed_database_url}")
+        raise
+
    yield
    
    # 关闭时清理
-    logger.info("Shutting down Label Studio Adapter...")
-    
-    # 客户端清理会在客户端管理器中处理
-    logger.info("Label Studio Adapter stopped")
+    logger.info("DataMate Python Backend shutting down ...")

 # 创建FastAPI应用
 app = FastAPI(
@@ -57,70 +62,16 @@ app.add_middleware(
    allow_headers=settings.allowed_headers,
 )

-# 自定义异常处理器：StarletteHTTPException (包括404等)
-@app.exception_handler(StarletteHTTPException)
-async def starlette_http_exception_handler(request: Request, exc: StarletteHTTPException):
-    """将Starlette的HTTPException转换为标准响应格式"""
-    return JSONResponse(
-        status_code=exc.status_code,
-        content={
-            "code": exc.status_code,
-            "message": "error",
-            "data": {
-                "detail": exc.detail
-            }
-        }
-    )
-
-# 自定义异常处理器：FastAPI HTTPException
-@app.exception_handler(HTTPException)
-async def fastapi_http_exception_handler(request: Request, exc: HTTPException):
-    """将FastAPI的HTTPException转换为标准响应格式"""
-    return JSONResponse(
-        status_code=exc.status_code,
-        content={
-            "code": exc.status_code,
-            "message": "error",
-            "data": {
-                "detail": exc.detail
-            }
-        }
-    )
-
-# 自定义异常处理器：RequestValidationError
-@app.exception_handler(RequestValidationError)
-async def validation_exception_handler(request: Request, exc: RequestValidationError):
-    """将请求验证错误转换为标准响应格式"""
-    return JSONResponse(
-        status_code=422,
-        content={
-            "code": 422,
-            "message": "error",
-            "data": {
-                "detail": "Validation error",
-                "errors": exc.errors()
-            }
-        }
-    )
-
-# 自定义异常处理器：未捕获的异常
-@app.exception_handler(Exception)
-async def general_exception_handler(request: Request, exc: Exception):
-    """将未捕获的异常转换为标准响应格式"""
-    logger.error(f"Unhandled exception: {exc}", exc_info=True)
-    return JSONResponse(
-        status_code=500,
-        content={
-            "code": 500,
-            "message": "error",
-            "data": {
-                "detail": "Internal server error"
-            }
-        }
-    )
-
 # 注册路由
-app.include_router(api_router, prefix="/api")
+app.include_router(router)
+
+logger.debug("Registered routes: %s", [getattr(r, "path", None) for r in app.routes])
+
+# 注册全局异常处理器
+app.add_exception_handler(StarletteHTTPException, starlette_http_exception_handler) # type: ignore
+app.add_exception_handler(HTTPException, fastapi_http_exception_handler) # type: ignore
+app.add_exception_handler(RequestValidationError, validation_exception_handler) # type: ignore
+app.add_exception_handler(Exception, general_exception_handler)

 # 测试端点：验证异常处理
@app.get("/test-404", include_in_schema=False)
--- a/runtime/datamate-python/app/models/README.md
+++ b/runtime/datamate-python/app/models/README.md
@@ -1,138 +0,0 @@
-# DataMate 数据模型结构
-
-本文档列出了根据 `scripts/db` 中的 SQL 文件创建的所有 Python 数据模型。
-
-## 模型组织结构
-
-```
-app/models/
-├── __init__.py                 # 主模块导出文件
-├── dm/                         # 数据管理 (Data Management) 模块
-│   ├── __init__.py
-│   ├── annotation_template.py  # 标注模板
-│   ├── labeling_project.py     # 标注项目
-│   ├── dataset.py              # 数据集
-│   ├── dataset_files.py        # 数据集文件
-│   ├── dataset_statistics.py   # 数据集统计
-│   ├── dataset_tag.py          # 数据集标签关联
-│   ├── tag.py                  # 标签
-│   └── user.py                 # 用户
-├── cleaning/                   # 数据清洗 (Data Cleaning) 模块
-│   ├── __init__.py
-│   ├── clean_template.py       # 清洗模板
-│   ├── clean_task.py           # 清洗任务
-│   ├── operator_instance.py    # 算子实例
-│   └── clean_result.py         # 清洗结果
-├── collection/                 # 数据归集 (Data Collection) 模块
-│   ├── __init__.py
-│   ├── task_execution.py       # 任务执行明细
-│   ├── collection_task.py      # 数据归集任务
-│   ├── task_log.py             # 任务执行记录
-│   └── datax_template.py       # DataX模板配置
-├── common/                     # 通用 (Common) 模块
-│   ├── __init__.py
-│   └── chunk_upload_request.py # 文件切片上传请求
-└── operator/                   # 算子 (Operator) 模块
-    ├── __init__.py
-    ├── operator.py             # 算子
-    ├── operator_category.py    # 算子分类
-    └── operator_category_relation.py  # 算子分类关联
-```
-
-## 模块详情
-
-### 1. Data Management (DM) 模块
-对应 SQL: `data-management-init.sql` 和 `data-annotation-init.sql`
-
-#### 模型列表：
- **AnnotationTemplate** (`t_dm_annotation_templates`) - 标注模板
- **LabelingProject** (`t_dm_labeling_projects`) - 标注项目
- **Dataset** (`t_dm_datasets`) - 数据集（支持医学影像、文本、问答等多种类型）
- **DatasetFiles** (`t_dm_dataset_files`) - 数据集文件
- **DatasetStatistics** (`t_dm_dataset_statistics`) - 数据集统计信息
- **Tag** (`t_dm_tags`) - 标签
- **DatasetTag** (`t_dm_dataset_tags`) - 数据集标签关联
- **User** (`users`) - 用户
-
-### 2. Data Cleaning 模块
-对应 SQL: `data-cleaning-init.sql`
-
-#### 模型列表：
- **CleanTemplate** (`t_clean_template`) - 清洗模板
- **CleanTask** (`t_clean_task`) - 清洗任务
- **OperatorInstance** (`t_operator_instance`) - 算子实例
- **CleanResult** (`t_clean_result`) - 清洗结果
-
-### 3. Data Collection (DC) 模块
-对应 SQL: `data-collection-init.sql`
-
-#### 模型列表：
- **TaskExecution** (`t_dc_task_executions`) - 任务执行明细
- **CollectionTask** (`t_dc_collection_tasks`) - 数据归集任务
- **TaskLog** (`t_dc_task_log`) - 任务执行记录
- **DataxTemplate** (`t_dc_datax_templates`) - DataX模板配置
-
-### 4. Common 模块
-对应 SQL: `data-common-init.sql`
-
-#### 模型列表：
- **ChunkUploadRequest** (`t_chunk_upload_request`) - 文件切片上传请求
-
-### 5. Operator 模块
-对应 SQL: `data-operator-init.sql`
-
-#### 模型列表：
- **Operator** (`t_operator`) - 算子
- **OperatorCategory** (`t_operator_category`) - 算子分类
- **OperatorCategoryRelation** (`t_operator_category_relation`) - 算子分类关联
-
-## 使用方式
-
-```python
-# 导入所有模型
-from app.models import (
-    # DM 模块
-    AnnotationTemplate,
-    LabelingProject,
-    Dataset,
-    DatasetFiles,
-    DatasetStatistics,
-    DatasetTag,
-    Tag,
-    User,
-    # Cleaning 模块
-    CleanTemplate,
-    CleanTask,
-    OperatorInstance,
-    CleanResult,
-    # Collection 模块
-    TaskExecution,
-    CollectionTask,
-    TaskLog,
-    DataxTemplate,
-    # Common 模块
-    ChunkUploadRequest,
-    # Operator 模块
-    Operator,
-    OperatorCategory,
-    OperatorCategoryRelation
-)
-
-# 或者按模块导入
-from app.models.dm import Dataset, DatasetFiles
-from app.models.collection import CollectionTask
-from app.models.operator import Operator
-```
-
-## 注意事项
-
-1. **UUID 主键**: 大部分表使用 UUID (String(36)) 作为主键
-2. **时间戳**: 使用 `TIMESTAMP` 类型，并配置自动更新
-3. **软删除**: 部分模型（如 AnnotationTemplate, LabelingProject）支持软删除，包含 `deleted_at` 字段和 `is_deleted` 属性
-4. **JSON 字段**: 配置信息、元数据等使用 JSON 类型存储
-5. **字段一致性**: 所有模型字段都严格按照 SQL 定义创建，确保与数据库表结构完全一致
-
-## 更新记录
-
- 2025-10-25: 根据 `scripts/db` 中的 SQL 文件创建所有数据模型
- 已更新现有的 `annotation_template.py`、`labeling_project.py`、`dataset_files.py` 以匹配 SQL 定义
--- a/runtime/datamate-python/app/models/init.py
+++ b/runtime/datamate-python/app/models/init.py
@@ -1,69 +0,0 @@
-# app/models/__init__.py
-
-# Data Management (DM) 模块
-from .dm import (
-    AnnotationTemplate,
-    LabelingProject,
-    Dataset,
-    DatasetFiles,
-    DatasetStatistics,
-    DatasetTag,
-    Tag,
-    User
-)
-
-# Data Cleaning 模块
-from .cleaning import (
-    CleanTemplate,
-    CleanTask,
-    OperatorInstance,
-    CleanResult
-)
-
-# Data Collection (DC) 模块
-from .collection import (
-    TaskExecution,
-    CollectionTask,
-    TaskLog,
-    DataxTemplate
-)
-
-# Common 模块
-from .common import (
-    ChunkUploadRequest
-)
-
-# Operator 模块
-from .operator import (
-    Operator,
-    OperatorCategory,
-    OperatorCategoryRelation
-)
-
-__all__ = [
-    # DM 模块
-    "AnnotationTemplate",
-    "LabelingProject",
-    "Dataset",
-    "DatasetFiles",
-    "DatasetStatistics",
-    "DatasetTag",
-    "Tag",
-    "User",
-    # Cleaning 模块
-    "CleanTemplate",
-    "CleanTask",
-    "OperatorInstance",
-    "CleanResult",
-    # Collection 模块
-    "TaskExecution",
-    "CollectionTask",
-    "TaskLog",
-    "DataxTemplate",
-    # Common 模块
-    "ChunkUploadRequest",
-    # Operator 模块
-    "Operator",
-    "OperatorCategory",
-    "OperatorCategoryRelation"
-]
--- a/runtime/datamate-python/app/models/cleaning/init.py
+++ b/runtime/datamate-python/app/models/cleaning/init.py
@@ -1,13 +0,0 @@
-# app/models/cleaning/__init__.py
-
-from .clean_template import CleanTemplate
-from .clean_task import CleanTask
-from .operator_instance import OperatorInstance
-from .clean_result import CleanResult
-
-__all__ = [
-    "CleanTemplate",
-    "CleanTask",
-    "OperatorInstance",
-    "CleanResult"
-]
--- a/runtime/datamate-python/app/models/cleaning/clean_result.py
+++ b/runtime/datamate-python/app/models/cleaning/clean_result.py
@@ -1,22 +0,0 @@
-from sqlalchemy import Column, String, BigInteger, Text
-from app.db.database import Base
-
-class CleanResult(Base):
-    """清洗结果模型"""
-    
-    __tablename__ = "t_clean_result"
-    
-    instance_id = Column(String(64), primary_key=True, comment="实例ID")
-    src_file_id = Column(String(64), nullable=True, comment="源文件ID")
-    dest_file_id = Column(String(64), primary_key=True, comment="目标文件ID")
-    src_name = Column(String(256), nullable=True, comment="源文件名")
-    dest_name = Column(String(256), nullable=True, comment="目标文件名")
-    src_type = Column(String(256), nullable=True, comment="源文件类型")
-    dest_type = Column(String(256), nullable=True, comment="目标文件类型")
-    src_size = Column(BigInteger, nullable=True, comment="源文件大小")
-    dest_size = Column(BigInteger, nullable=True, comment="目标文件大小")
-    status = Column(String(256), nullable=True, comment="处理状态")
-    result = Column(Text, nullable=True, comment="处理结果")
-    
-    def __repr__(self):
-        return f"<CleanResult(instance_id={self.instance_id}, dest_file_id={self.dest_file_id}, status={self.status})>"
--- a/runtime/datamate-python/app/models/cleaning/clean_task.py
+++ b/runtime/datamate-python/app/models/cleaning/clean_task.py
@@ -1,27 +0,0 @@
-from sqlalchemy import Column, String, BigInteger, Integer, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-
-class CleanTask(Base):
-    """清洗任务模型"""
-    
-    __tablename__ = "t_clean_task"
-    
-    id = Column(String(64), primary_key=True, comment="任务ID")
-    name = Column(String(64), nullable=True, comment="任务名称")
-    description = Column(String(256), nullable=True, comment="任务描述")
-    status = Column(String(256), nullable=True, comment="任务状态")
-    src_dataset_id = Column(String(64), nullable=True, comment="源数据集ID")
-    src_dataset_name = Column(String(64), nullable=True, comment="源数据集名称")
-    dest_dataset_id = Column(String(64), nullable=True, comment="目标数据集ID")
-    dest_dataset_name = Column(String(64), nullable=True, comment="目标数据集名称")
-    before_size = Column(BigInteger, nullable=True, comment="清洗前大小")
-    after_size = Column(BigInteger, nullable=True, comment="清洗后大小")
-    file_count = Column(Integer, nullable=True, comment="文件数量")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    started_at = Column(TIMESTAMP, nullable=True, comment="开始时间")
-    finished_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
-    created_by = Column(String(256), nullable=True, comment="创建者")
-    
-    def __repr__(self):
-        return f"<CleanTask(id={self.id}, name={self.name}, status={self.status})>"
--- a/runtime/datamate-python/app/models/cleaning/clean_template.py
+++ b/runtime/datamate-python/app/models/cleaning/clean_template.py
@@ -1,18 +0,0 @@
-from sqlalchemy import Column, String, Text, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-
-class CleanTemplate(Base):
-    """清洗模板模型"""
-    
-    __tablename__ = "t_clean_template"
-    
-    id = Column(String(64), primary_key=True, unique=True, comment="模板ID")
-    name = Column(String(64), nullable=True, comment="模板名称")
-    description = Column(String(256), nullable=True, comment="模板描述")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
-    created_by = Column(String(256), nullable=True, comment="创建者")
-    
-    def __repr__(self):
-        return f"<CleanTemplate(id={self.id}, name={self.name})>"
--- a/runtime/datamate-python/app/models/cleaning/operator_instance.py
+++ b/runtime/datamate-python/app/models/cleaning/operator_instance.py
@@ -1,15 +0,0 @@
-from sqlalchemy import Column, String, Integer, Text
-from app.db.database import Base
-
-class OperatorInstance(Base):
-    """算子实例模型"""
-    
-    __tablename__ = "t_operator_instance"
-    
-    instance_id = Column(String(256), primary_key=True, comment="实例ID")
-    operator_id = Column(String(256), primary_key=True, comment="算子ID")
-    op_index = Column(Integer, primary_key=True, comment="算子索引")
-    settings_override = Column(Text, nullable=True, comment="配置覆盖")
-    
-    def __repr__(self):
-        return f"<OperatorInstance(instance_id={self.instance_id}, operator_id={self.operator_id}, index={self.op_index})>"
--- a/runtime/datamate-python/app/models/collection/init.py
+++ b/runtime/datamate-python/app/models/collection/init.py
@@ -1,13 +0,0 @@
-# app/models/collection/__init__.py
-
-from .task_execution import TaskExecution
-from .collection_task import CollectionTask
-from .task_log import TaskLog
-from .datax_template import DataxTemplate
-
-__all__ = [
-    "TaskExecution",
-    "CollectionTask",
-    "TaskLog",
-    "DataxTemplate"
-]
--- a/runtime/datamate-python/app/models/collection/collection_task.py
+++ b/runtime/datamate-python/app/models/collection/collection_task.py
@@ -1,28 +0,0 @@
-from sqlalchemy import Column, String, Text, Integer, BigInteger, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-
-class CollectionTask(Base):
-    """数据归集任务模型"""
-    
-    __tablename__ = "t_dc_collection_tasks"
-    
-    id = Column(String(36), primary_key=True, comment="任务ID（UUID）")
-    name = Column(String(255), nullable=False, comment="任务名称")
-    description = Column(Text, nullable=True, comment="任务描述")
-    sync_mode = Column(String(20), default='ONCE', comment="同步模式：ONCE/SCHEDULED")
-    config = Column(Text, nullable=False, comment="归集配置（DataX配置），包含源端和目标端配置信息")
-    schedule_expression = Column(String(255), nullable=True, comment="Cron调度表达式")
-    status = Column(String(20), default='DRAFT', comment="任务状态：DRAFT/READY/RUNNING/SUCCESS/FAILED/STOPPED")
-    retry_count = Column(Integer, default=3, comment="重试次数")
-    timeout_seconds = Column(Integer, default=3600, comment="超时时间（秒）")
-    max_records = Column(BigInteger, nullable=True, comment="最大处理记录数")
-    sort_field = Column(String(100), nullable=True, comment="增量字段")
-    last_execution_id = Column(String(36), nullable=True, comment="最后执行ID（UUID）")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
-    created_by = Column(String(255), nullable=True, comment="创建者")
-    updated_by = Column(String(255), nullable=True, comment="更新者")
-    
-    def __repr__(self):
-        return f"<CollectionTask(id={self.id}, name={self.name}, status={self.status})>"
--- a/runtime/datamate-python/app/models/collection/datax_template.py
+++ b/runtime/datamate-python/app/models/collection/datax_template.py
@@ -1,23 +0,0 @@
-from sqlalchemy import Column, String, Text, Boolean, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-
-class DataxTemplate(Base):
-    """DataX模板配置模型"""
-    
-    __tablename__ = "t_dc_datax_templates"
-    
-    id = Column(String(36), primary_key=True, comment="模板ID（UUID）")
-    name = Column(String(255), nullable=False, unique=True, comment="模板名称")
-    source_type = Column(String(50), nullable=False, comment="源数据源类型")
-    target_type = Column(String(50), nullable=False, comment="目标数据源类型")
-    template_content = Column(Text, nullable=False, comment="模板内容")
-    description = Column(Text, nullable=True, comment="模板描述")
-    version = Column(String(20), default='1.0.0', comment="版本号")
-    is_system = Column(Boolean, default=False, comment="是否系统模板")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
-    created_by = Column(String(255), nullable=True, comment="创建者")
-    
-    def __repr__(self):
-        return f"<DataxTemplate(id={self.id}, name={self.name}, source={self.source_type}, target={self.target_type})>"
--- a/runtime/datamate-python/app/models/collection/task_execution.py
+++ b/runtime/datamate-python/app/models/collection/task_execution.py
@@ -1,34 +0,0 @@
-from sqlalchemy import Column, String, Text, Integer, BigInteger, DECIMAL, JSON, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-
-class TaskExecution(Base):
-    """任务执行明细模型"""
-    
-    __tablename__ = "t_dc_task_executions"
-    
-    id = Column(String(36), primary_key=True, comment="执行记录ID（UUID）")
-    task_id = Column(String(36), nullable=False, comment="任务ID")
-    task_name = Column(String(255), nullable=False, comment="任务名称")
-    status = Column(String(20), default='RUNNING', comment="执行状态：RUNNING/SUCCESS/FAILED/STOPPED")
-    progress = Column(DECIMAL(5, 2), default=0.00, comment="进度百分比")
-    records_total = Column(BigInteger, default=0, comment="总记录数")
-    records_processed = Column(BigInteger, default=0, comment="已处理记录数")
-    records_success = Column(BigInteger, default=0, comment="成功记录数")
-    records_failed = Column(BigInteger, default=0, comment="失败记录数")
-    throughput = Column(DECIMAL(10, 2), default=0.00, comment="吞吐量（条/秒）")
-    data_size_bytes = Column(BigInteger, default=0, comment="数据量（字节）")
-    started_at = Column(TIMESTAMP, nullable=True, comment="开始时间")
-    completed_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
-    duration_seconds = Column(Integer, default=0, comment="执行时长（秒）")
-    config = Column(JSON, nullable=True, comment="执行配置")
-    error_message = Column(Text, nullable=True, comment="错误信息")
-    datax_job_id = Column(Text, nullable=True, comment="datax任务ID")
-    result = Column(Text, nullable=True, comment="执行结果")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
-    created_by = Column(String(255), nullable=True, comment="创建者")
-    updated_by = Column(String(255), nullable=True, comment="更新者")
-    
-    def __repr__(self):
-        return f"<TaskExecution(id={self.id}, task_id={self.task_id}, status={self.status})>"
--- a/runtime/datamate-python/app/models/collection/task_log.py
+++ b/runtime/datamate-python/app/models/collection/task_log.py
@@ -1,26 +0,0 @@
-from sqlalchemy import Column, String, Text, Integer, BigInteger, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-
-class TaskLog(Base):
-    """任务执行记录模型"""
-    
-    __tablename__ = "t_dc_task_log"
-    
-    id = Column(String(36), primary_key=True, comment="执行记录ID（UUID）")
-    task_id = Column(String(36), nullable=False, comment="任务ID")
-    task_name = Column(String(255), nullable=False, comment="任务名称")
-    sync_mode = Column(String(20), default='FULL', comment="同步模式：FULL/INCREMENTAL")
-    status = Column(String(20), default='RUNNING', comment="执行状态：RUNNING/SUCCESS/FAILED/STOPPED")
-    start_time = Column(TIMESTAMP, nullable=True, comment="开始时间")
-    end_time = Column(TIMESTAMP, nullable=True, comment="结束时间")
-    duration = Column(BigInteger, nullable=True, comment="执行时长(毫秒)")
-    process_id = Column(String(50), nullable=True, comment="进程ID")
-    log_path = Column(String(500), nullable=True, comment="日志文件路径")
-    error_msg = Column(Text, nullable=True, comment="错误信息")
-    result = Column(Text, nullable=True, comment="执行结果")
-    retry_times = Column(Integer, default=0, comment="重试次数")
-    create_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    
-    def __repr__(self):
-        return f"<TaskLog(id={self.id}, task_id={self.task_id}, status={self.status})>"
--- a/runtime/datamate-python/app/models/common/init.py
+++ b/runtime/datamate-python/app/models/common/init.py
@@ -1,7 +0,0 @@
-# app/models/common/__init__.py
-
-from .chunk_upload_request import ChunkUploadRequest
-
-__all__ = [
-    "ChunkUploadRequest"
-]
--- a/runtime/datamate-python/app/models/common/chunk_upload_request.py
+++ b/runtime/datamate-python/app/models/common/chunk_upload_request.py
@@ -1,19 +0,0 @@
-from sqlalchemy import Column, String, Integer, Text, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-
-class ChunkUploadRequest(Base):
-    """文件切片上传请求模型"""
-    
-    __tablename__ = "t_chunk_upload_request"
-    
-    id = Column(String(36), primary_key=True, comment="UUID")
-    total_file_num = Column(Integer, nullable=True, comment="总文件数")
-    uploaded_file_num = Column(Integer, nullable=True, comment="已上传文件数")
-    upload_path = Column(String(256), nullable=True, comment="文件路径")
-    timeout = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传请求超时时间")
-    service_id = Column(String(64), nullable=True, comment="上传请求所属服务：DATA-MANAGEMENT(数据管理)")
-    check_info = Column(Text, nullable=True, comment="业务信息")
-    
-    def __repr__(self):
-        return f"<ChunkUploadRequest(id={self.id}, service_id={self.service_id}, progress={self.uploaded_file_num}/{self.total_file_num})>"
--- a/runtime/datamate-python/app/models/dm/init.py
+++ b/runtime/datamate-python/app/models/dm/init.py
@@ -1,21 +0,0 @@
-# app/models/dm/__init__.py
-
-from .annotation_template import AnnotationTemplate
-from .labeling_project import LabelingProject
-from .dataset import Dataset
-from .dataset_files import DatasetFiles
-from .dataset_statistics import DatasetStatistics
-from .dataset_tag import DatasetTag
-from .tag import Tag
-from .user import User
-
-__all__ = [
-    "AnnotationTemplate",
-    "LabelingProject",
-    "Dataset",
-    "DatasetFiles",
-    "DatasetStatistics",
-    "DatasetTag",
-    "Tag",
-    "User"
-]
--- a/runtime/datamate-python/app/models/dm/annotation_template.py
+++ b/runtime/datamate-python/app/models/dm/annotation_template.py
@@ -1,24 +0,0 @@
-from sqlalchemy import Column, String, JSON, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-import uuid
-
-class AnnotationTemplate(Base):
-    """标注模板模型"""
-    
-    __tablename__ = "t_dm_annotation_templates"
-    
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID")
-    name = Column(String(32), nullable=False, comment="模板名称")
-    description = Column(String(255), nullable=True, comment="模板描述")
-    configuration = Column(JSON, nullable=True, comment="配置信息（JSON格式）")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间（软删除）")
-    
-    def __repr__(self):
-        return f"<AnnotationTemplate(id={self.id}, name={self.name})>"
-    
-    @property
-    def is_deleted(self) -> bool:
-        """检查是否已被软删除"""
-        return self.deleted_at is not None
--- a/runtime/datamate-python/app/models/dm/dataset.py
+++ b/runtime/datamate-python/app/models/dm/dataset.py
@@ -1,35 +0,0 @@
-from sqlalchemy import Column, String, Text, BigInteger, Integer, Boolean, JSON, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-import uuid
-
-class Dataset(Base):
-    """数据集模型（支持医学影像、文本、问答等多种类型）"""
-    
-    __tablename__ = "t_dm_datasets"
-    
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
-    name = Column(String(255), nullable=False, comment="数据集名称")
-    description = Column(Text, nullable=True, comment="数据集描述")
-    dataset_type = Column(String(50), nullable=False, comment="数据集类型：IMAGE/TEXT/QA/MULTIMODAL/OTHER")
-    category = Column(String(100), nullable=True, comment="数据集分类：医学影像/问答/文献等")
-    path = Column(String(500), nullable=True, comment="数据存储路径")
-    format = Column(String(50), nullable=True, comment="数据格式：DCM/JPG/JSON/CSV等")
-    schema_info = Column(JSON, nullable=True, comment="数据结构信息")
-    size_bytes = Column(BigInteger, default=0, comment="数据大小(字节)")
-    file_count = Column(BigInteger, default=0, comment="文件数量")
-    record_count = Column(BigInteger, default=0, comment="记录数量")
-    retention_days = Column(Integer, default=0, comment="数据保留天数（0表示长期保留）")
-    tags = Column(JSON, nullable=True, comment="标签列表")
-    metadata = Column(JSON, nullable=True, comment="元数据信息")
-    status = Column(String(50), default='DRAFT', comment="状态：DRAFT/ACTIVE/ARCHIVED")
-    is_public = Column(Boolean, default=False, comment="是否公开")
-    is_featured = Column(Boolean, default=False, comment="是否推荐")
-    version = Column(BigInteger, nullable=False, default=0, comment="版本号")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
-    created_by = Column(String(255), nullable=True, comment="创建者")
-    updated_by = Column(String(255), nullable=True, comment="更新者")
-    
-    def __repr__(self):
-        return f"<Dataset(id={self.id}, name={self.name}, type={self.dataset_type})>"
--- a/runtime/datamate-python/app/models/dm/dataset_files.py
+++ b/runtime/datamate-python/app/models/dm/dataset_files.py
@@ -1,27 +0,0 @@
-from sqlalchemy import Column, String, JSON, BigInteger, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-import uuid
-
-class DatasetFiles(Base):
-    """DM数据集文件模型"""
-    
-    __tablename__ = "t_dm_dataset_files"
-    
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
-    dataset_id = Column(String(36), nullable=False, comment="所属数据集ID（UUID）")
-    file_name = Column(String(255), nullable=False, comment="文件名")
-    file_path = Column(String(1000), nullable=False, comment="文件路径")
-    file_type = Column(String(50), nullable=True, comment="文件格式：JPG/PNG/DCM/TXT等")
-    file_size = Column(BigInteger, default=0, comment="文件大小(字节)")
-    check_sum = Column(String(64), nullable=True, comment="文件校验和")
-    tags = Column(JSON, nullable=True, comment="文件标签信息")
-    metadata = Column(JSON, nullable=True, comment="文件元数据")
-    status = Column(String(50), default='ACTIVE', comment="文件状态：ACTIVE/DELETED/PROCESSING")
-    upload_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传时间")
-    last_access_time = Column(TIMESTAMP, nullable=True, comment="最后访问时间")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
-    
-    def __repr__(self):
-        return f"<DatasetFiles(id={self.id}, dataset_id={self.dataset_id}, file_name={self.file_name})>"
--- a/runtime/datamate-python/app/models/dm/dataset_statistics.py
+++ b/runtime/datamate-python/app/models/dm/dataset_statistics.py
@@ -1,25 +0,0 @@
-from sqlalchemy import Column, String, Date, BigInteger, JSON, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-import uuid
-
-class DatasetStatistics(Base):
-    """数据集统计信息模型"""
-    
-    __tablename__ = "t_dm_dataset_statistics"
-    
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
-    dataset_id = Column(String(36), nullable=False, comment="数据集ID（UUID）")
-    stat_date = Column(Date, nullable=False, comment="统计日期")
-    total_files = Column(BigInteger, default=0, comment="总文件数")
-    total_size = Column(BigInteger, default=0, comment="总大小(字节)")
-    processed_files = Column(BigInteger, default=0, comment="已处理文件数")
-    error_files = Column(BigInteger, default=0, comment="错误文件数")
-    download_count = Column(BigInteger, default=0, comment="下载次数")
-    view_count = Column(BigInteger, default=0, comment="查看次数")
-    quality_metrics = Column(JSON, nullable=True, comment="质量指标")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
-    
-    def __repr__(self):
-        return f"<DatasetStatistics(id={self.id}, dataset_id={self.dataset_id}, date={self.stat_date})>"
--- a/runtime/datamate-python/app/models/dm/dataset_tag.py
+++ b/runtime/datamate-python/app/models/dm/dataset_tag.py
@@ -1,15 +0,0 @@
-from sqlalchemy import Column, String, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-
-class DatasetTag(Base):
-    """数据集标签关联模型"""
-    
-    __tablename__ = "t_dm_dataset_tags"
-    
-    dataset_id = Column(String(36), primary_key=True, comment="数据集ID（UUID）")
-    tag_id = Column(String(36), primary_key=True, comment="标签ID（UUID）")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    
-    def __repr__(self):
-        return f"<DatasetTag(dataset_id={self.dataset_id}, tag_id={self.tag_id})>"
--- a/runtime/datamate-python/app/models/dm/labeling_project.py
+++ b/runtime/datamate-python/app/models/dm/labeling_project.py
@@ -1,26 +0,0 @@
-from sqlalchemy import Column, String, Integer, JSON, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-import uuid
-
-class LabelingProject(Base):
-    """DM标注项目模型（原 DatasetMapping）"""
-    
-    __tablename__ = "t_dm_labeling_projects"
-    
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID")
-    dataset_id = Column(String(36), nullable=False, comment="数据集ID")
-    name = Column(String(32), nullable=False, comment="项目名称")
-    labeling_project_id = Column(Integer, nullable=False, comment="Label Studio项目ID")
-    configuration = Column(JSON, nullable=True, comment="标签配置")
-    progress = Column(JSON, nullable=True, comment="标注进度统计")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间（软删除）")
-    
-    def __repr__(self):
-        return f"<LabelingProject(id={self.id}, dataset_id={self.dataset_id}, name={self.name})>"
-    
-    @property
-    def is_deleted(self) -> bool:
-        """检查是否已被软删除"""
-        return self.deleted_at is not None
--- a/runtime/datamate-python/app/models/dm/tag.py
+++ b/runtime/datamate-python/app/models/dm/tag.py
@@ -1,21 +0,0 @@
-from sqlalchemy import Column, String, Text, BigInteger, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-import uuid
-
-class Tag(Base):
-    """标签模型"""
-    
-    __tablename__ = "t_dm_tags"
-    
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
-    name = Column(String(100), nullable=False, unique=True, comment="标签名称")
-    description = Column(Text, nullable=True, comment="标签描述")
-    category = Column(String(50), nullable=True, comment="标签分类")
-    color = Column(String(7), nullable=True, comment="标签颜色(十六进制)")
-    usage_count = Column(BigInteger, default=0, comment="使用次数")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
-    
-    def __repr__(self):
-        return f"<Tag(id={self.id}, name={self.name}, category={self.category})>"
--- a/runtime/datamate-python/app/models/operator/init.py
+++ b/runtime/datamate-python/app/models/operator/init.py
@@ -1,11 +0,0 @@
-# app/models/operator/__init__.py
-
-from .operator import Operator
-from .operator_category import OperatorCategory
-from .operator_category_relation import OperatorCategoryRelation
-
-__all__ = [
-    "Operator",
-    "OperatorCategory",
-    "OperatorCategoryRelation"
-]
--- a/runtime/datamate-python/app/models/operator/operator.py
+++ b/runtime/datamate-python/app/models/operator/operator.py
@@ -1,24 +0,0 @@
-from sqlalchemy import Column, String, Text, Boolean, TIMESTAMP
-from sqlalchemy.sql import func
-from app.db.database import Base
-
-class Operator(Base):
-    """算子模型"""
-    
-    __tablename__ = "t_operator"
-    
-    id = Column(String(64), primary_key=True, comment="算子ID")
-    name = Column(String(64), nullable=True, comment="算子名称")
-    description = Column(String(256), nullable=True, comment="算子描述")
-    version = Column(String(256), nullable=True, comment="版本")
-    inputs = Column(String(256), nullable=True, comment="输入类型")
-    outputs = Column(String(256), nullable=True, comment="输出类型")
-    runtime = Column(Text, nullable=True, comment="运行时信息")
-    settings = Column(Text, nullable=True, comment="配置信息")
-    file_name = Column(Text, nullable=True, comment="文件名")
-    is_star = Column(Boolean, nullable=True, comment="是否收藏")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
-    
-    def __repr__(self):
-        return f"<Operator(id={self.id}, name={self.name}, version={self.version})>"
--- a/runtime/datamate-python/app/models/operator/operator_category.py
+++ b/runtime/datamate-python/app/models/operator/operator_category.py
@@ -1,15 +0,0 @@
-from sqlalchemy import Column, String, Integer
-from app.db.database import Base
-
-class OperatorCategory(Base):
-    """算子分类模型"""
-    
-    __tablename__ = "t_operator_category"
-    
-    id = Column(Integer, primary_key=True, autoincrement=True, comment="分类ID")
-    name = Column(String(64), nullable=True, comment="分类名称")
-    type = Column(String(64), nullable=True, comment="分类类型")
-    parent_id = Column(Integer, nullable=True, comment="父分类ID")
-    
-    def __repr__(self):
-        return f"<OperatorCategory(id={self.id}, name={self.name}, type={self.type})>"
--- a/runtime/datamate-python/app/models/operator/operator_category_relation.py
+++ b/runtime/datamate-python/app/models/operator/operator_category_relation.py
@@ -1,13 +0,0 @@
-from sqlalchemy import Column, String, Integer
-from app.db.database import Base
-
-class OperatorCategoryRelation(Base):
-    """算子分类关联模型"""
-    
-    __tablename__ = "t_operator_category_relation"
-    
-    category_id = Column(Integer, primary_key=True, comment="分类ID")
-    operator_id = Column(String(64), primary_key=True, comment="算子ID")
-    
-    def __repr__(self):
-        return f"<OperatorCategoryRelation(category_id={self.category_id}, operator_id={self.operator_id})>"
--- a/runtime/datamate-python/app/module/init.py
+++ b/runtime/datamate-python/app/module/init.py
@@ -0,0 +1,11 @@
+from fastapi import APIRouter
+
+from .annotation.interface import router as annotation_router
+
+router = APIRouter(
+    prefix="/api"
+)
+
+router.include_router(annotation_router)
+
+__all__ = ["router"]
--- a/runtime/datamate-python/app/module/annotation/init.py
+++ b/runtime/datamate-python/app/module/annotation/init.py
--- a/runtime/datamate-python/app/module/annotation/client/init.py
+++ b/runtime/datamate-python/app/module/annotation/client/init.py
@@ -0,0 +1,3 @@
+from .labelstudio import LabelStudioClient
+
+__all__ = ["LabelStudioClient"]
--- a/runtime/datamate-python/app/module/annotation/client/labelstudio/init.py
+++ b/runtime/datamate-python/app/module/annotation/client/labelstudio/init.py
@@ -0,0 +1,3 @@
+from .client import Client as LabelStudioClient
+
+__all__ = ["LabelStudioClient"]
--- a/runtime/datamate-python/app/module/annotation/client/labelstudio/client.py
+++ b/runtime/datamate-python/app/module/annotation/client/labelstudio/client.py
@@ -1,10 +1,10 @@
 import httpx
 from typing import Optional, Dict, Any, List
-import json

 from app.core.config import settings
 from app.core.logging import get_logger
-from app.schemas.label_studio import (
+
+from .schema import (
    LabelStudioProject, 
    LabelStudioCreateProjectRequest,
    LabelStudioCreateTaskRequest
@@ -88,7 +88,7 @@ class Client:
            }
        )
        
-        logger.info(f"Label Studio client initialized: {self.base_url}")
+        logger.debug(f"Label Studio client initialized: {self.base_url}")
    
    def get_label_config_by_type(self, data_type: str) -> str:
        """根据数据类型获取标注配置"""
@@ -103,7 +103,7 @@ class Client:
    ) -> Optional[Dict[str, Any]]:
        """创建Label Studio项目"""
        try:
-            logger.info(f"Creating Label Studio project: {title}")
+            logger.debug(f"Creating Label Studio project: {title}")
            
            if not label_config:
                label_config = self.get_label_config_by_type(data_type)
@@ -123,7 +123,7 @@ class Client:
            if not project_id:
                raise Exception("Label Studio response does not contain project ID")
            
-            logger.info(f"Project created successfully, ID: {project_id}")
+            logger.debug(f"Project created successfully, ID: {project_id}")
            return project
        
        except httpx.HTTPStatusError as e:
@@ -142,7 +142,7 @@ class Client:
    ) -> Optional[Dict[str, Any]]:
        """批量导入任务到Label Studio项目"""
        try:
-            logger.info(f"Importing {len(tasks)} tasks into project {project_id}")
+            logger.debug(f"Importing {len(tasks)} tasks into project {project_id}")
            
            response = await self.client.post(
                f"/api/projects/{project_id}/import",
@@ -157,7 +157,7 @@ class Client:
            result = response.json()
            task_count = result.get("task_count", len(tasks))
            
-            logger.info(f"Tasks imported successfully: {task_count}")
+            logger.debug(f"Tasks imported successfully: {task_count}")
            return result
            
        except httpx.HTTPStatusError as e:
@@ -236,11 +236,12 @@ class Client:
            
            # 如果指定了page，直接获取单页任务
            if page is not None:
-                logger.info(f"Fetching tasks for project {pid}, page {page} (page_size={page_size})")
+                logger.debug(f"Fetching tasks for project {pid}, page {page} (page_size={page_size})")
                
                response = await self.client.get(
-                    f"/api/projects/{pid}/tasks",
+                    f"/api/tasks",
                    params={
+                        "project": pid,
                        "page": page,
                        "page_size": page_size
                    }
@@ -259,48 +260,27 @@ class Client:
                }
            
            # 如果未指定page，获取所有任务
-            logger.info(f"Start fetching all tasks for project {pid} (page_size={page_size})")
+            logger.debug(f"(page) not specified, fetching all tasks.")
            all_tasks = []
-            current_page = 1
+        
+            response = await self.client.get(
+                f"/api/tasks",
+                params={
+                    "project": pid
+                }
+            )
+            response.raise_for_status()
            
-            while True:
-                try:
-                    response = await self.client.get(
-                        f"/api/projects/{pid}/tasks",
-                        params={
-                            "page": current_page,
-                            "page_size": page_size
-                        }
-                    )
-                    response.raise_for_status()
-                    
-                    result = response.json()
-                    tasks = result.get("tasks", [])
-                    
-                    if not tasks:
-                        logger.debug(f"No more tasks on page {current_page}")
-                        break
-                    
-                    all_tasks.extend(tasks)
-                    logger.debug(f"Fetched page {current_page}, {len(tasks)} tasks")
-                    
-                    # 检查是否还有更多页
-                    total = result.get("total", 0)
-                    if len(all_tasks) >= total:
-                        break
-                    
-                    current_page += 1
-                    
-                except httpx.HTTPStatusError as e:
-                    if e.response.status_code == 404:
-                        # 超出页数范围，结束分页
-                        logger.debug(f"Reached last page (page {current_page})")
-                        break
-                    else:
-                        raise
+            result = response.json()
+            tasks = result.get("tasks", [])
            
-            logger.info(f"Fetched all tasks for project {pid}, total {len(all_tasks)}")
+            if not tasks:
+                logger.debug(f"No tasks found for this project.")
+
            
+            all_tasks.extend(tasks)
+            logger.debug(f"Fetched {len(tasks)} tasks.")
+        
            # 返回所有任务，不包含分页信息
            return {
                "count": len(all_tasks),
@@ -321,12 +301,12 @@ class Client:
    ) -> bool:
        """删除单个任务"""
        try:
-            logger.info(f"Deleting task: {task_id}")
+            logger.debug(f"Deleting task: {task_id}")
            
            response = await self.client.delete(f"/api/tasks/{task_id}")
            response.raise_for_status()
            
-            logger.info(f"Task deleted: {task_id}")
+            logger.debug(f"Task deleted: {task_id}")
            return True
            
        except httpx.HTTPStatusError as e:
@@ -342,7 +322,7 @@ class Client:
    ) -> Dict[str, int]:
        """批量删除任务"""
        try:
-            logger.info(f"Deleting {len(task_ids)} tasks in batch")
+            logger.debug(f"Deleting {len(task_ids)} tasks in batch")
            
            successful_deletions = 0
            failed_deletions = 0
@@ -353,7 +333,7 @@ class Client:
                else:
                    failed_deletions += 1
            
-            logger.info(f"Batch deletion finished: success {successful_deletions}, failed {failed_deletions}")
+            logger.debug(f"Batch deletion finished: success {successful_deletions}, failed {failed_deletions}")
            
            return {
                "successful": successful_deletions,
@@ -372,7 +352,7 @@ class Client:
    async def get_project(self, project_id: int) -> Optional[Dict[str, Any]]:
        """获取项目信息"""
        try:
-            logger.info(f"Fetching project info: {project_id}")
+            logger.debug(f"Fetching project info: {project_id}")
            
            response = await self.client.get(f"/api/projects/{project_id}")
            response.raise_for_status()
@@ -389,12 +369,12 @@ class Client:
    async def delete_project(self, project_id: int) -> bool:
        """删除项目"""
        try:
-            logger.info(f"Deleting project: {project_id}")
+            logger.debug(f"Deleting project: {project_id}")
            
            response = await self.client.delete(f"/api/projects/{project_id}")
            response.raise_for_status()
            
-            logger.info(f"Project deleted: {project_id}")
+            logger.debug(f"Project deleted: {project_id}")
            return True
            
        except httpx.HTTPStatusError as e:
@@ -427,7 +407,7 @@ class Client:
            创建的存储配置信息，失败返回 None
        """
        try:
-            logger.info(f"Creating local storage for project {project_id}: {path}")
+            logger.debug(f"Creating local storage for project {project_id}: {path}")
            
            storage_data = {
                "project": project_id,
@@ -450,7 +430,7 @@ class Client:
            storage = response.json()
            storage_id = storage.get("id")
            
-            logger.info(f"Local storage created successfully, ID: {storage_id}")
+            logger.debug(f"Local storage created successfully, ID: {storage_id}")
            return storage
            
        except httpx.HTTPStatusError as e:
@@ -464,6 +444,6 @@ class Client:
        """关闭客户端连接"""
        try:
            await self.client.aclose()
-            logger.info("Label Studio client closed")
+            logger.debug("Label Studio client closed")
        except Exception as e:
            logger.error(f"Error while closing Label Studio client: {e}")
--- a/runtime/datamate-python/app/module/annotation/client/labelstudio/schema.py
+++ b/runtime/datamate-python/app/module/annotation/client/labelstudio/schema.py
@@ -1,7 +1,9 @@
 from pydantic import Field
-from typing import Dict, Any, Optional, List
+from typing import Dict, Any, Optional
 from datetime import datetime
-from .common import BaseResponseModel
+
+from app.module.shared.schema import BaseResponseModel
+

 class LabelStudioProject(BaseResponseModel):
    """Label Studio项目模型"""
--- a/runtime/datamate-python/app/module/annotation/interface/init.py
+++ b/runtime/datamate-python/app/module/annotation/interface/init.py
@@ -0,0 +1,12 @@
+from fastapi import APIRouter
+
+from .project import router as project_router
+from .task import router as task_router
+
+router = APIRouter(
+    prefix="/annotation",
+    tags = ["annotation"]
+)
+
+router.include_router(project_router)
+router.include_router(task_router)
--- a/runtime/datamate-python/app/module/annotation/interface/project.py
+++ b/runtime/datamate-python/app/module/annotation/interface/project.py
@@ -0,0 +1,353 @@
+from typing import Optional
+import math
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db.session import get_db
+from app.module.shared.schema import StandardResponse, PaginatedData
+from app.module.dataset import DatasetManagementService
+from app.core.logging import get_logger
+from app.core.config import settings
+
+from ..client import LabelStudioClient
+from ..service.mapping import DatasetMappingService
+from ..schema import (
+    DatasetMappingCreateRequest,
+    DatasetMappingCreateResponse,
+    DeleteDatasetResponse,
+    DatasetMappingResponse,
+)
+
+router = APIRouter(
+    prefix="/project",
+    tags=["annotation/project"]
+)
+logger = get_logger(__name__)
+
+@router.post("/", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201)
+async def create_mapping(
+    request: DatasetMappingCreateRequest,
+    db: AsyncSession = Depends(get_db)
+):
+    """
+    创建数据集映射
+    
+    根据指定的DM程序中的数据集，创建Label Studio中的数据集，
+    在数据库中记录这一关联关系，返回Label Studio数据集的ID
+    
+    注意：一个数据集可以创建多个标注项目
+    """
+    try:
+        dm_client = DatasetManagementService(db)
+        ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
+                                      token=settings.label_studio_user_token)
+        service = DatasetMappingService(db)
+        
+        logger.info(f"Create dataset mapping request: {request.dataset_id}")
+        
+        # 从DM服务获取数据集信息
+        dataset_info = await dm_client.get_dataset(request.dataset_id)
+        if not dataset_info:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Dataset not found in DM service: {request.dataset_id}"
+            )
+        
+        # 确定数据类型（基于数据集类型）
+        data_type = "image"  # 默认值
+        if dataset_info.type and dataset_info.type.code:
+            type_code = dataset_info.type.code.lower()
+            if "audio" in type_code:
+                data_type = "audio"
+            elif "video" in type_code:
+                data_type = "video"
+            elif "text" in type_code:
+                data_type = "text"
+        
+        project_name = f"{dataset_info.name}"
+        
+        # 在Label Studio中创建项目
+        project_data = await ls_client.create_project(
+            title=project_name,
+            description=dataset_info.description or f"Imported from DM dataset {dataset_info.id}",
+            data_type=data_type
+        )
+        
+        if not project_data:
+            raise HTTPException(
+                status_code=500,
+                detail="Fail to create Label Studio project."
+            )
+        
+        project_id = project_data["id"]
+        
+        # 配置本地存储：dataset/<id>
+        local_storage_path = f"{settings.label_studio_local_storage_dataset_base_path}/{request.dataset_id}"
+        storage_result = await ls_client.create_local_storage(
+            project_id=project_id,
+            path=local_storage_path,
+            title="Dataset_BLOB",
+            use_blob_urls=True,
+            description=f"Local storage for dataset {dataset_info.name}"
+        )
+        
+        if not storage_result:
+            # 本地存储配置失败，记录警告但不中断流程
+            logger.warning(f"Failed to configure local storage for project {project_id}")
+        else:
+            logger.info(f"Local storage configured for project {project_id}: {local_storage_path}")
+        
+        # 创建映射关系，包含项目名称
+        mapping = await service.create_mapping(
+            request, 
+            str(project_id),
+            project_name
+        )
+        
+        response_data = DatasetMappingCreateResponse(
+            id=mapping.id,
+            labeling_project_id=str(mapping.labeling_project_id),
+            labeling_project_name=mapping.name or project_name,
+            message="Dataset mapping created successfully"
+        )
+        
+        return StandardResponse(
+            code=201,
+            message="success",
+            data=response_data
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error while creating dataset mapping: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+    
+@router.get("/", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
+async def list_mappings(
+    page: int = Query(1, ge=1, description="页码（从1开始）"),
+    page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
+    db: AsyncSession = Depends(get_db)
+):
+    """
+    查询所有映射关系（分页）
+    
+    返回所有有效的数据集映射关系（未被软删除的），支持分页查询
+    """
+    try:
+        service = DatasetMappingService(db)
+        
+        # 计算 skip
+        skip = (page - 1) * page_size
+        
+        logger.info(f"Listing mappings, page={page}, page_size={page_size}")
+        
+        # 获取数据和总数
+        mappings, total = await service.get_all_mappings_with_count(
+            skip=skip, 
+            limit=page_size
+        )
+        
+        # 计算总页数
+        total_pages = math.ceil(total / page_size) if total > 0 else 0
+        
+        # 构造分页响应
+        paginated_data = PaginatedData(
+            page=page,
+            size=page_size,
+            total_elements=total,
+            total_pages=total_pages,
+            content=mappings
+        )
+        
+        logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
+        
+        return StandardResponse(
+            code=200,
+            message="success",
+            data=paginated_data
+        )
+        
+    except Exception as e:
+        logger.error(f"Error listing mappings: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+@router.get("/{mapping_id}", response_model=StandardResponse[DatasetMappingResponse])
+async def get_mapping(
+    mapping_id: str,
+    db: AsyncSession = Depends(get_db)
+):
+    """
+    根据 UUID 查询单个映射关系
+    """
+    try:
+        service = DatasetMappingService(db)
+        
+        logger.info(f"Get mapping: {mapping_id}")
+        
+        mapping = await service.get_mapping_by_uuid(mapping_id)
+        
+        if not mapping:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Mapping not found: {mapping_id}"
+            )
+        
+        logger.info(f"Found mapping: {mapping.id}")
+        
+        return StandardResponse(
+            code=200,
+            message="success",
+            data=mapping
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error getting mapping: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+@router.get("/by-source/{dataset_id}", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
+async def get_mappings_by_source(
+    dataset_id: str,
+    page: int = Query(1, ge=1, description="页码（从1开始）"),
+    page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
+    db: AsyncSession = Depends(get_db)
+):
+    """
+    根据源数据集 ID 查询所有映射关系（分页）
+    
+    返回该数据集创建的所有标注项目（不包括已删除的），支持分页查询
+    """
+    try:
+        service = DatasetMappingService(db)
+        
+        # 计算 skip
+        skip = (page - 1) * page_size
+        
+        logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, page_size={page_size}")
+        
+        # 获取数据和总数
+        mappings, total = await service.get_mappings_by_source_with_count(
+            dataset_id=dataset_id,
+            skip=skip,
+            limit=page_size
+        )
+        
+        # 计算总页数
+        total_pages = math.ceil(total / page_size) if total > 0 else 0
+        
+        # 构造分页响应
+        paginated_data = PaginatedData(
+            page=page,
+            size=page_size,
+            total_elements=total,
+            total_pages=total_pages,
+            content=mappings
+        )
+        
+        logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
+        
+        return StandardResponse(
+            code=200,
+            message="success",
+            data=paginated_data
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error getting mappings: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+@router.delete("/", response_model=StandardResponse[DeleteDatasetResponse])
+async def delete_mapping(
+    m: Optional[str] = Query(None, description="映射UUID"),
+    proj: Optional[str] = Query(None, description="Label Studio项目ID"),
+    db: AsyncSession = Depends(get_db)
+):
+    """
+    删除映射关系和对应的 Label Studio 项目
+    
+    可以通过以下任一方式指定要删除的映射：
+    - m: 映射UUID
+    - proj: Label Studio项目ID
+    - 两者都提供（优先使用 m）
+    
+    此操作会：
+    1. 删除 Label Studio 中的项目
+    2. 软删除数据库中的映射记录
+    """
+    try:
+        # 至少需要提供一个参数
+        if not m and not proj:
+            raise HTTPException(
+                status_code=400,
+                detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided"
+            )
+
+        ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
+                                      token=settings.label_studio_user_token)
+        service = DatasetMappingService(db)
+        
+        # 优先使用 mapping_id 查询
+        if m:
+            logger.debug(f"Deleting by mapping UUID: {m}")
+            mapping = await service.get_mapping_by_uuid(m)
+        # 如果没有提供 m，使用 proj 查询
+        elif proj:
+            logger.debug(f"Deleting by project ID: {proj}")
+            mapping = await service.get_mapping_by_labeling_project_id(proj)
+        else:
+            mapping = None
+        
+        if not mapping:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Mapping either not found or not specified."
+            )
+        
+        id = mapping.id
+        labeling_project_id = mapping.labeling_project_id
+        labeling_project_name = mapping.name
+
+        logger.debug(f"Found mapping: {id}, Label Studio project ID: {labeling_project_id}")
+        
+        # 1. 删除 Label Studio 项目
+        try:
+            delete_success = await ls_client.delete_project(int(labeling_project_id))
+            if delete_success:
+                logger.debug(f"Successfully deleted Label Studio project: {labeling_project_id}")
+            else:
+                logger.warning(f"Failed to delete Label Studio project or project not found: {labeling_project_id}")
+        except Exception as e:
+            logger.error(f"Error deleting Label Studio project: {e}")
+            # 继续执行，即使 Label Studio 项目删除失败也要删除映射记录
+        
+        # 2. 软删除映射记录
+        soft_delete_success = await service.soft_delete_mapping(id)
+        
+        if not soft_delete_success:
+            raise HTTPException(
+                status_code=500,
+                detail="Failed to delete mapping record"
+            )
+
+        logger.info(f"Successfully deleted mapping: {id}, Label Studio project: {labeling_project_id}")
+
+        return StandardResponse(
+            code=200,
+            message="success",
+            data=DeleteDatasetResponse(
+                id=id,
+                status="success",
+                message=f"Successfully deleted mapping and Label Studio project '{labeling_project_name}'"
+            )
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error deleting mapping: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error")
--- a/runtime/datamate-python/app/module/annotation/interface/task.py
+++ b/runtime/datamate-python/app/module/annotation/interface/task.py
@@ -2,24 +2,29 @@ from fastapi import APIRouter, Depends, HTTPException, Query
 from sqlalchemy.ext.asyncio import AsyncSession
 from typing import List, Optional

-from app.db.database import get_db
-from app.services.dataset_mapping_service import DatasetMappingService
-from app.services.sync_service import SyncService
-from app.infrastructure import DatamateClient, LabelStudioClient
-from app.exceptions import NoDatasetInfoFoundError, DatasetMappingNotFoundError
-from app.schemas.dataset_mapping import (
-    DatasetMappingResponse,
+from app.db.session import get_db
+from app.module.shared.schema import StandardResponse
+from app.module.dataset import DatasetManagementService
+from app.core.logging import get_logger
+from app.core.config import settings
+from app.exception import NoDatasetInfoFoundError, DatasetMappingNotFoundError
+
+from ..client import LabelStudioClient
+from ..service.sync import SyncService
+from ..service.mapping import DatasetMappingService
+from ..schema import (
    SyncDatasetRequest,
    SyncDatasetResponse,
 )
-from app.schemas import StandardResponse
-from app.core.logging import get_logger
-from app.core.config import settings
-from . import project_router

+
+router = APIRouter(
+    prefix="/task",
+    tags=["annotation/task"]
+)
 logger = get_logger(__name__)

-@project_router.post("/sync", response_model=StandardResponse[SyncDatasetResponse])
+@router.post("/sync", response_model=StandardResponse[SyncDatasetResponse])
 async def sync_dataset_content(
    request: SyncDatasetRequest,
    db: AsyncSession = Depends(get_db)
@@ -33,22 +38,22 @@ async def sync_dataset_content(
    try:
        ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
                                      token=settings.label_studio_user_token)
-        dm_client = DatamateClient(db)
+        dm_client = DatasetManagementService(db)
        mapping_service = DatasetMappingService(db)
        sync_service = SyncService(dm_client, ls_client, mapping_service)

-        logger.info(f"Sync dataset content request: mapping_id={request.mapping_id}")
-        
-        # 根据 mapping_id 获取映射关系
-        mapping = await mapping_service.get_mapping_by_uuid(request.mapping_id)
+        logger.info(f"Sync dataset content request: mapping_id={request.id}")
+
+        # request.id 合法性校验
+        mapping = await mapping_service.get_mapping_by_uuid(request.id)
        if not mapping:
            raise HTTPException(
                status_code=404,
-                detail=f"Mapping not found: {request.mapping_id}"
+                detail=f"Mapping not found: {request.id}"
            )
        
        # 执行同步（使用映射中的源数据集UUID）
-        result = await sync_service.sync_dataset_files(request.mapping_id, request.batch_size)
+        result = await sync_service.sync_dataset_files(request.id, request.batch_size)
        
        logger.info(f"Sync completed: {result.synced_files}/{result.total_files} files")
        
--- a/runtime/datamate-python/app/module/annotation/schema/init.py
+++ b/runtime/datamate-python/app/module/annotation/schema/init.py
@@ -0,0 +1,24 @@
+from .mapping import (
+    DatasetMappingBase,
+    DatasetMappingCreateRequest,
+    DatasetMappingCreateResponse,
+    DatasetMappingUpdateRequest,
+    DatasetMappingResponse,
+    DeleteDatasetResponse
+)
+
+from .sync import (
+    SyncDatasetRequest,
+    SyncDatasetResponse
+)
+
+__all__ = [
+    "DatasetMappingBase",
+    "DatasetMappingCreateRequest",
+    "DatasetMappingCreateResponse",
+    "DatasetMappingUpdateRequest",
+    "DatasetMappingResponse",
+    "SyncDatasetRequest",
+    "SyncDatasetResponse",
+    "DeleteDatasetResponse"
+]
--- a/runtime/datamate-python/app/module/annotation/schema/mapping.py
+++ b/runtime/datamate-python/app/module/annotation/schema/mapping.py
@@ -0,0 +1,42 @@
+from pydantic import Field
+from typing import Optional
+from datetime import datetime
+
+from app.module.shared.schema import BaseResponseModel
+
+class DatasetMappingBase(BaseResponseModel):
+    """数据集映射 基础模型"""
+    dataset_id: str = Field(..., description="源数据集ID")
+
+class DatasetMappingCreateRequest(DatasetMappingBase):
+    """数据集映射 创建 请求模型"""
+    pass
+
+class DatasetMappingCreateResponse(BaseResponseModel):
+    """数据集映射 创建 响应模型"""
+    id: str = Field(..., description="映射UUID")
+    labeling_project_id: str = Field(..., description="Label Studio项目ID")
+    labeling_project_name: str = Field(..., description="Label Studio项目名称")
+    message: str = Field(..., description="响应消息")
+
+class DatasetMappingUpdateRequest(BaseResponseModel):
+    """数据集映射 更新 请求模型"""
+    dataset_id: Optional[str] = Field(None, description="源数据集ID")
+
+class DatasetMappingResponse(DatasetMappingBase):
+    """数据集映射 查询 响应模型"""
+    id: str = Field(..., description="映射UUID")
+    labeling_project_id: str = Field(..., description="标注项目ID")
+    name: Optional[str] = Field(None, description="标注项目名称")
+    created_at: datetime = Field(..., description="创建时间")
+    deleted_at: Optional[datetime] = Field(None, description="删除时间")
+    
+    class Config:
+        from_attributes = True
+        populate_by_name = True
+        
+class DeleteDatasetResponse(BaseResponseModel):
+    """删除数据集响应模型"""
+    id: str = Field(..., description="映射UUID")
+    status: str = Field(..., description="删除状态")
+    message: str = Field(..., description="响应消息")
--- a/runtime/datamate-python/app/module/annotation/schema/sync.py
+++ b/runtime/datamate-python/app/module/annotation/schema/sync.py
@@ -0,0 +1,19 @@
+from pydantic import Field
+from typing import Optional
+from datetime import datetime
+
+from app.module.shared.schema import BaseResponseModel
+
+
+class SyncDatasetRequest(BaseResponseModel):
+    """同步数据集请求模型"""
+    id: str = Field(..., description="映射ID（mapping UUID）")
+    batch_size: int = Field(50, ge=1, le=100, description="批处理大小")
+
+class SyncDatasetResponse(BaseResponseModel):
+    """同步数据集响应模型"""
+    id: str = Field(..., description="映射UUID")
+    status: str = Field(..., description="同步状态")
+    synced_files: int = Field(..., description="已同步文件数量")
+    total_files: int = Field(0, description="总文件数量")
+    message: str = Field(..., description="响应消息")
--- a/runtime/datamate-python/app/module/annotation/service/init.py
+++ b/runtime/datamate-python/app/module/annotation/service/init.py
--- a/runtime/datamate-python/app/module/annotation/service/mapping.py
+++ b/runtime/datamate-python/app/module/annotation/service/mapping.py
@@ -5,13 +5,13 @@ from typing import Optional, List, Tuple
 from datetime import datetime
 import uuid

-from app.models.dm.labeling_project import LabelingProject
-from app.schemas.dataset_mapping import (
+from app.core.logging import get_logger
+from app.db.models import LabelingProject
+from app.module.annotation.schema import (
    DatasetMappingCreateRequest, 
    DatasetMappingUpdateRequest, 
    DatasetMappingResponse
 )
-from app.core.logging import get_logger

 logger = get_logger(__name__)

@@ -24,24 +24,24 @@ class DatasetMappingService:
    async def create_mapping(
        self, 
        mapping_data: DatasetMappingCreateRequest, 
-        labelling_project_id: str,
-        labelling_project_name: str
+        labeling_project_id: str,
+        labeling_project_name: str
    ) -> DatasetMappingResponse:
        """创建数据集映射"""
-        logger.info(f"Create dataset mapping: {mapping_data.dataset_id} -> {labelling_project_id}")
+        logger.info(f"Create dataset mapping: {mapping_data.dataset_id} -> {labeling_project_id}")
        
        db_mapping = LabelingProject(
-            mapping_id=str(uuid.uuid4()),
+            id=str(uuid.uuid4()),
            dataset_id=mapping_data.dataset_id,
-            labelling_project_id=labelling_project_id,
-            labelling_project_name=labelling_project_name
+            labeling_project_id=labeling_project_id,
+            name=labeling_project_name
        )
-        
+
        self.db.add(db_mapping)
        await self.db.commit()
        await self.db.refresh(db_mapping)
        
-        logger.info(f"Mapping created: {db_mapping.id}")
+        logger.debug(f"Mapping created: {db_mapping.id}")
        return DatasetMappingResponse.model_validate(db_mapping)
    
    async def get_mapping_by_source_uuid(
@@ -89,16 +89,16 @@ class DatasetMappingService:
        logger.debug(f"Found {len(mappings)} mappings")
        return [DatasetMappingResponse.model_validate(mapping) for mapping in mappings]
    
-    async def get_mapping_by_labelling_project_id(
+    async def get_mapping_by_labeling_project_id(
        self, 
-        labelling_project_id: str
+        labeling_project_id: str
    ) -> Optional[DatasetMappingResponse]:
        """根据Label Studio项目ID获取映射"""
-        logger.debug(f"Get mapping by Label Studio project id: {labelling_project_id}")
+        logger.debug(f"Get mapping by Label Studio project id: {labeling_project_id}")
        
        result = await self.db.execute(
            select(LabelingProject).where(
-                LabelingProject.labeling_project_id == labelling_project_id,
+                LabelingProject.labeling_project_id == labeling_project_id,
                LabelingProject.deleted_at.is_(None)
            )
        )
@@ -108,7 +108,7 @@ class DatasetMappingService:
            logger.debug(f"Found mapping: {mapping.mapping_id}")
            return DatasetMappingResponse.model_validate(mapping)
        
-        logger.debug(f"No mapping found for Label Studio project id: {labelling_project_id}")
+        logger.debug(f"No mapping found for Label Studio project id: {labeling_project_id}")
        return None
    
    async def get_mapping_by_uuid(self, mapping_id: str) -> Optional[DatasetMappingResponse]:
@@ -156,21 +156,6 @@ class DatasetMappingService:
            return await self.get_mapping_by_uuid(mapping_id)
        return None
    
-    async def update_last_updated_at(self, mapping_id: str) -> bool:
-        """更新最后更新时间"""
-        logger.debug(f"Update mapping last updated at: {mapping_id}")
-        
-        result = await self.db.execute(
-            update(LabelingProject)
-            .where(
-                LabelingProject.id == mapping_id,
-                LabelingProject.deleted_at.is_(None)
-            )
-            .values(last_updated_at=datetime.utcnow())
-        )
-        await self.db.commit()
-        return result.rowcount > 0
-    
    async def soft_delete_mapping(self, mapping_id: str) -> bool:
        """软删除映射"""
        logger.info(f"Soft delete mapping: {mapping_id}")
--- a/runtime/datamate-python/app/module/annotation/service/sync.py
+++ b/runtime/datamate-python/app/module/annotation/service/sync.py
@@ -1,10 +1,13 @@
 from typing import Optional, List, Dict, Any, Tuple
-from app.infrastructure import LabelStudioClient, DatamateClient
-from app.services.dataset_mapping_service import DatasetMappingService
-from app.schemas.dataset_mapping import SyncDatasetResponse
+from app.module.dataset import DatasetManagementService
+
 from app.core.logging import get_logger
 from app.core.config import settings
-from app.exceptions import NoDatasetInfoFoundError, DatasetMappingNotFoundError
+from app.exception import NoDatasetInfoFoundError
+
+from ..client import LabelStudioClient
+from ..schema import SyncDatasetResponse
+from ..service.mapping import DatasetMappingService

 logger = get_logger(__name__)

@@ -13,7 +16,7 @@ class SyncService:
    
    def __init__(
        self, 
-        dm_client: DatamateClient, 
+        dm_client: DatasetManagementService, 
        ls_client: LabelStudioClient,
        mapping_service: DatasetMappingService
    ):
@@ -44,7 +47,7 @@ class SyncService:
            project_id: Label Studio项目ID
            
        Returns:
-            dm_file_id到task_id的映射字典
+            file_id到task_id的映射字典
        """
        try:
            logger.info(f"Fetching existing task mappings for project {project_id} (page_size={settings.ls_task_page_size})")
@@ -59,24 +62,29 @@ class SyncService:
                page=None,  # 不指定page，获取所有任务
                page_size=page_size
            )
+
+            logger.info(f"Fetched tasks result: {result}")
            
            if not result:
                logger.warning(f"Failed to fetch tasks for project {project_id}")
                return {}
            
+            logger.info(f"Successfully fetched tasks for project {project_id}")
+            
            all_tasks = result.get("tasks", [])

            # 遍历所有任务，构建映射
            for task in all_tasks:
-                # 检查任务的meta字段中是否有dm_file_id
-                meta = task.get('meta')
-                if meta:
-                    dm_file_id = meta.get('dm_file_id')
-                    if dm_file_id:
-                        task_id = task.get('id')
-                        if task_id:
-                            dm_file_to_task_mapping[str(dm_file_id)] = task_id
-
+                # logger.debug(task)
+                try:
+                    file_id = task.get('data', {}).get('file_id')
+                    task_id = task.get('id')
+                    
+                    dm_file_to_task_mapping[str(file_id)] = task_id
+                except Exception as e:
+                    logger.error(f"Error processing task {task.get('id')}: {e}")
+                    continue
+            logger.debug(dm_file_to_task_mapping)
            logger.info(f"Found {len(dm_file_to_task_mapping)} existing task mappings")
            return dm_file_to_task_mapping

@@ -86,22 +94,22 @@ class SyncService:
    
    async def sync_dataset_files(
        self, 
-        mapping_id: str, 
+        id: str, 
        batch_size: int = 50
    ) -> SyncDatasetResponse:
        """同步数据集文件到Label Studio"""
-        logger.info(f"Start syncing dataset by mapping: {mapping_id}")
+        logger.info(f"Start syncing dataset by mapping: {id}")
        
        # 获取映射关系
-        mapping = await self.mapping_service.get_mapping_by_uuid(mapping_id)
+        mapping = await self.mapping_service.get_mapping_by_uuid(id)
        if not mapping:
-            logger.error(f"Dataset mapping not found: {mapping_id}")
+            logger.error(f"Dataset mapping not found: {id}")
            return SyncDatasetResponse(
-                mapping_id="",
+                id="",
                status="error",
                synced_files=0,
                total_files=0,
-                message=f"Dataset mapping not found: {mapping_id}"
+                message=f"Dataset mapping not found: {id}"
            )
        
        try:
@@ -118,20 +126,17 @@ class SyncService:
            logger.info(f"Total files in dataset: {total_files}")
            
            # 获取Label Studio中已存在的DM文件ID到任务ID的映射
-            existing_dm_file_mapping = await self.get_existing_dm_file_mapping(mapping.labelling_project_id)
-            existing_dm_file_ids = set(existing_dm_file_mapping.keys())
-            logger.info(f"{len(existing_dm_file_ids)} tasks already exist in Label Studio")
+            existing_dm_file_mapping = await self.get_existing_dm_file_mapping(mapping.labeling_project_id)
+            existing_file_ids = set(existing_dm_file_mapping.keys())
+            logger.info(f"{len(existing_file_ids)} tasks already exist in Label Studio")
            
            # 收集DM中当前存在的所有文件ID
-            current_dm_file_ids = set()
-            
-            # 分页获取并同步文件
+            current_file_ids = set()
            while True:
                files_response = await self.dm_client.get_dataset_files(
                    mapping.dataset_id, 
                    page=page, 
                    size=batch_size,
-                    status="COMPLETED"  # 只同步已完成的文件
                )
                
                if not files_response or not files_response.content:
@@ -147,19 +152,18 @@ class SyncService:
                
                for file_info in files_response.content:
                    # 记录当前DM中存在的文件ID
-                    current_dm_file_ids.add(str(file_info.id))
+                    current_file_ids.add(str(file_info.id))
                    
                    # 检查文件是否已存在
-                    if str(file_info.id) in existing_dm_file_ids:
+                    if str(file_info.id) in existing_file_ids:
                        existing_files_count += 1
                        logger.debug(f"Skip existing file: {file_info.originalName} (ID: {file_info.id})")
                        continue
                    
                    new_files_count += 1
                    
-                    # 确定数据类型
                    data_type = self.determine_data_type(file_info.fileType)
-                    
+
                    # 替换文件路径前缀:只替换开头的前缀，不影响路径中间可能出现的相同字符串
                    file_path = file_info.filePath.removeprefix(settings.dm_file_path_prefix)
                    file_path = settings.label_studio_file_path_prefix + file_path
@@ -167,14 +171,11 @@ class SyncService:
                    # 构造任务数据
                    task_data = {
                        "data": {
-                            data_type: file_path
-                        },
-                        "meta": {
-                            "file_size": file_info.size,
-                            "file_type": file_info.fileType,
-                            "dm_dataset_id": mapping.dataset_id,
-                            "dm_file_id": file_info.id,
+                            f"{data_type}": file_path,
+                            "file_path": file_info.filePath,
+                            "file_id": file_info.id,
                            "original_name": file_info.originalName,
+                            "dataset_id": mapping.dataset_id,
                        }
                    }
                    tasks.append(task_data)
@@ -184,7 +185,7 @@ class SyncService:
                # 批量创建Label Studio任务
                if tasks:
                    batch_result = await self.ls_client.create_tasks_batch(
-                        mapping.labelling_project_id,
+                        mapping.labeling_project_id,
                        tasks
                    )
                    
@@ -196,7 +197,7 @@ class SyncService:
                        # 如果批量创建失败，尝试单个创建
                        for task_data in tasks:
                            task_result = await self.ls_client.create_task(
-                                mapping.labelling_project_id,
+                                mapping.labeling_project_id,
                                task_data["data"],
                                task_data.get("meta")
                            )
@@ -210,10 +211,10 @@ class SyncService:
            
            # 清理在DM中不存在但在Label Studio中存在的任务
            tasks_to_delete = []
-            for dm_file_id, task_id in existing_dm_file_mapping.items():
-                if dm_file_id not in current_dm_file_ids:
+            for file_id, task_id in existing_dm_file_mapping.items():
+                if file_id not in current_file_ids:
                    tasks_to_delete.append(task_id)
-                    logger.debug(f"Mark task for deletion: {task_id} (DM file ID: {dm_file_id})")
+                    logger.debug(f"Mark task for deletion: {task_id} (DM file ID: {file_id})")
            
            if tasks_to_delete:
                logger.info(f"Deleting {len(tasks_to_delete)} tasks not present in DM")
@@ -223,13 +224,10 @@ class SyncService:
            else:
                logger.info("No tasks to delete")
            
-            # 更新映射的最后更新时间
-            await self.mapping_service.update_last_updated_at(mapping.mapping_id)
-            
            logger.info(f"Sync completed: total_files={total_files}, created={synced_files}, deleted={deleted_tasks}")
            
            return SyncDatasetResponse(
-                mapping_id=mapping.mapping_id,
+                id=mapping.id,
                status="success",
                synced_files=synced_files,
                total_files=total_files,
@@ -239,7 +237,7 @@ class SyncService:
        except Exception as e:
            logger.error(f"Error while syncing dataset: {e}")
            return SyncDatasetResponse(
-                mapping_id=mapping.mapping_id,
+                id=mapping.id,
                status="error",
                synced_files=0,
                total_files=0,
@@ -259,13 +257,12 @@ class SyncService:
        dataset_info = await self.dm_client.get_dataset(dataset_id)
        
        # 获取Label Studio项目任务数量
-        tasks_info = await self.ls_client.get_project_tasks(mapping.labelling_project_id)
+        tasks_info = await self.ls_client.get_project_tasks(mapping.labeling_project_id)
        
        return {
-            "mapping_id": mapping.mapping_id,
+            "id": mapping.id,
            "dataset_id": dataset_id,
-            "labelling_project_id": mapping.labelling_project_id,
-            "last_updated_at": mapping.last_updated_at,
+            "labeling_project_id": mapping.labeling_project_id,
            "dm_total_files": dataset_info.fileCount if dataset_info else 0,
            "ls_total_tasks": tasks_info.get("count", 0) if tasks_info else 0,
            "sync_ratio": (
--- a/runtime/datamate-python/app/module/dataset/init.py
+++ b/runtime/datamate-python/app/module/dataset/init.py
@@ -0,0 +1,3 @@
+from .service import DatasetManagementService
+
+__all__ = ["DatasetManagementService"]
--- a/runtime/datamate-python/app/module/dataset/schema/init.py
+++ b/runtime/datamate-python/app/module/dataset/schema/init.py
@@ -0,0 +1,16 @@
+from .dataset_file import (
+    DatasetFileResponse,
+    PagedDatasetFileResponse,
+)
+
+from .dataset import (
+    DatasetResponse,
+    DatasetTypeResponse,
+)
+
+__all__ = [
+    "DatasetResponse",
+    "DatasetFileResponse",
+    "PagedDatasetFileResponse",
+    "DatasetTypeResponse",
+]
--- a/runtime/datamate-python/app/module/dataset/schema/dataset.py
+++ b/runtime/datamate-python/app/module/dataset/schema/dataset.py
@@ -2,28 +2,6 @@ from pydantic import BaseModel, Field
 from typing import List, Optional, Dict, Any
 from datetime import datetime

-class DatasetFileResponse(BaseModel):
-    """DM服务数据集文件响应模型"""
-    id: str = Field(..., description="文件ID")
-    fileName: str = Field(..., description="文件名")
-    fileType: str = Field(..., description="文件类型")
-    filePath: str = Field(..., description="文件路径")
-    originalName: Optional[str] = Field(None, description="原始文件名")
-    size: Optional[int] = Field(None, description="文件大小（字节）")
-    status: Optional[str] = Field(None, description="文件状态")
-    uploadedAt: Optional[datetime] = Field(None, description="上传时间")
-    description: Optional[str] = Field(None, description="文件描述")
-    uploadedBy: Optional[str] = Field(None, description="上传者")
-    lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间")
-
-class PagedDatasetFileResponse(BaseModel):
-    """DM服务分页文件响应模型"""
-    content: List[DatasetFileResponse] = Field(..., description="文件列表")
-    totalElements: int = Field(..., description="总元素数")
-    totalPages: int = Field(..., description="总页数")
-    page: int = Field(..., description="当前页码")
-    size: int = Field(..., description="每页大小")
-    
 class DatasetTypeResponse(BaseModel):
    """数据集类型响应模型"""
    code: str = Field(..., description="类型编码")
--- a/runtime/datamate-python/app/module/dataset/schema/dataset_file.py
+++ b/runtime/datamate-python/app/module/dataset/schema/dataset_file.py
@@ -0,0 +1,26 @@
+from pydantic import BaseModel, Field
+from typing import List, Optional, Dict, Any
+from datetime import datetime
+
+class DatasetFileResponse(BaseModel):
+    """DM服务数据集文件响应模型"""
+    id: str = Field(..., description="文件ID")
+    fileName: str = Field(..., description="文件名")
+    fileType: str = Field(..., description="文件类型")
+    filePath: str = Field(..., description="文件路径")
+    originalName: Optional[str] = Field(None, description="原始文件名")
+    size: Optional[int] = Field(None, description="文件大小（字节）")
+    status: Optional[str] = Field(None, description="文件状态")
+    uploadedAt: Optional[datetime] = Field(None, description="上传时间")
+    description: Optional[str] = Field(None, description="文件描述")
+    uploadedBy: Optional[str] = Field(None, description="上传者")
+    lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间")
+
+class PagedDatasetFileResponse(BaseModel):
+    """DM服务分页文件响应模型"""
+    content: List[DatasetFileResponse] = Field(..., description="文件列表")
+    totalElements: int = Field(..., description="总元素数")
+    totalPages: int = Field(..., description="总页数")
+    page: int = Field(..., description="当前页码")
+    size: int = Field(..., description="每页大小")
+    
--- a/runtime/datamate-python/app/module/dataset/service/init.py
+++ b/runtime/datamate-python/app/module/dataset/service/init.py
@@ -0,0 +1,3 @@
+from .service import Service as DatasetManagementService
+
+__all__ = ["DatasetManagementService"]
--- a/runtime/datamate-python/app/module/dataset/service/service.py
+++ b/runtime/datamate-python/app/module/dataset/service/service.py
@@ -2,15 +2,16 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
 from sqlalchemy import func
 from typing import Optional
+
 from app.core.config import settings
 from app.core.logging import get_logger
-from app.schemas.dm_service import DatasetResponse, PagedDatasetFileResponse, DatasetFileResponse
-from app.models.dm.dataset import Dataset
-from app.models.dm.dataset_files import DatasetFiles
+from app.db.models import Dataset, DatasetFiles
+
+from ..schema import DatasetResponse, PagedDatasetFileResponse, DatasetFileResponse

 logger = get_logger(__name__)

-class Client:
+class Service:
    """数据管理服务客户端 - 直接访问数据库"""
    
    def __init__(self, db: AsyncSession):
--- a/runtime/datamate-python/app/module/management/init.py
+++ b/runtime/datamate-python/app/module/management/init.py
--- a/runtime/datamate-python/app/module/management/api/init.py
+++ b/runtime/datamate-python/app/module/management/api/init.py
--- a/runtime/datamate-python/app/module/management/api/system.py
+++ b/runtime/datamate-python/app/module/management/api/system.py
--- a/runtime/datamate-python/app/module/management/service/init.py
+++ b/runtime/datamate-python/app/module/management/service/init.py
--- a/runtime/datamate-python/app/module/shared/init.py
+++ b/runtime/datamate-python/app/module/shared/init.py
--- a/runtime/datamate-python/app/module/shared/schema/init.py
+++ b/runtime/datamate-python/app/module/shared/schema/init.py
@@ -0,0 +1,11 @@
+from .common import (
+    BaseResponseModel,
+    StandardResponse,
+    PaginatedData
+)
+
+__all__ = [
+    "BaseResponseModel",
+    "StandardResponse",
+    "PaginatedData"
+]
--- a/runtime/datamate-python/app/module/shared/schema/common.py
+++ b/runtime/datamate-python/app/module/shared/schema/common.py
@@ -42,7 +42,6 @@ class StandardResponse(BaseResponseModel, Generic[T]):
            }
        }

-
 class PaginatedData(BaseResponseModel, Generic[T]):
    """分页数据容器"""
    page: int = Field(..., description="当前页码（从1开始）")
--- a/runtime/datamate-python/app/schemas/init.py
+++ b/runtime/datamate-python/app/schemas/init.py
@@ -1,29 +0,0 @@
-# app/schemas/__init__.py
-
-from .common import *
-from .dataset_mapping import *
-from .dm_service import *
-from .label_studio import *
-
-__all__ = [
-    # Common schemas
-    "StandardResponse",
-    
-    # Dataset Mapping schemas
-    "DatasetMappingBase",
-    "DatasetMappingCreateRequest", 
-    "DatasetMappingUpdateRequest",
-    "DatasetMappingResponse",
-    "DatasetMappingCreateResponse",
-    "SyncDatasetResponse", 
-    "DeleteDatasetResponse",
-    
-    # DM Service schemas
-    "DatasetFileResponse",
-    "PagedDatasetFileResponse", 
-    "DatasetResponse",
-    
-    # Label Studio schemas
-    "LabelStudioProject",
-    "LabelStudioTask"
-]
--- a/runtime/datamate-python/app/schemas/dataset_mapping.py
+++ b/runtime/datamate-python/app/schemas/dataset_mapping.py
@@ -1,56 +0,0 @@
-from pydantic import Field
-from typing import Optional
-from datetime import datetime
-
-from .common import BaseResponseModel
-
-class DatasetMappingBase(BaseResponseModel):
-    """数据集映射 基础模型"""
-    dataset_id: str = Field(..., description="源数据集ID")
-
-class DatasetMappingCreateRequest(DatasetMappingBase):
-    """数据集映射 创建 请求模型"""
-    pass
-
-class DatasetMappingCreateResponse(BaseResponseModel):
-    """数据集映射 创建 响应模型"""
-    mapping_id: str = Field(..., description="映射UUID")
-    labelling_project_id: str = Field(..., description="Label Studio项目ID")
-    labelling_project_name: str = Field(..., description="Label Studio项目名称")
-    message: str = Field(..., description="响应消息")
-
-class DatasetMappingUpdateRequest(BaseResponseModel):
-    """数据集映射 更新 请求模型"""
-    dataset_id: Optional[str] = Field(None, description="源数据集ID")
-
-class DatasetMappingResponse(DatasetMappingBase):
-    """数据集映射 查询 响应模型"""
-    mapping_id: str = Field(..., description="映射UUID")
-    labelling_project_id: str = Field(..., description="标注项目ID")
-    labelling_project_name: Optional[str] = Field(None, description="标注项目名称")
-    created_at: datetime = Field(..., description="创建时间")
-    last_updated_at: datetime = Field(..., description="最后更新时间")
-    deleted_at: Optional[datetime] = Field(None, description="删除时间")
-    
-    class Config:
-        from_attributes = True
-        populate_by_name = True
-
-class SyncDatasetRequest(BaseResponseModel):
-    """同步数据集请求模型"""
-    mapping_id: str = Field(..., description="映射ID（mapping UUID）")
-    batch_size: int = Field(50, ge=1, le=100, description="批处理大小")
-
-class SyncDatasetResponse(BaseResponseModel):
-    """同步数据集响应模型"""
-    mapping_id: str = Field(..., description="映射UUID")
-    status: str = Field(..., description="同步状态")
-    synced_files: int = Field(..., description="已同步文件数量")
-    total_files: int = Field(0, description="总文件数量")
-    message: str = Field(..., description="响应消息")
-
-class DeleteDatasetResponse(BaseResponseModel):
-    """删除数据集响应模型"""
-    mapping_id: str = Field(..., description="映射UUID")
-    status: str = Field(..., description="删除状态")
-    message: str = Field(..., description="响应消息")
--- a/runtime/datamate-python/app/services/init.py
+++ b/runtime/datamate-python/app/services/init.py
@@ -1,6 +0,0 @@
-# app/services/__init__.py
-
-from .dataset_mapping_service import DatasetMappingService
-from .sync_service import SyncService
-
-__all__ = ["DatasetMappingService", "SyncService"]
--- a/scripts/db/data-annotation-init.sql
+++ b/scripts/db/data-annotation-init.sql
@@ -0,0 +1,19 @@
+CREATE TABLE t_dm_annotation_templates (
+    id VARCHAR(36) PRIMARY KEY,
+    name VARCHAR(32) NOT NULL COMMENT '模板名称',
+    description VARCHAR(255) COMMENT '模板描述',
+    configuration JSON,
+    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
+    deleted_at TIMESTAMP NULL COMMENT '删除时间（软删除）'
+);
+
+CREATE TABLE t_dm_labeling_projects (
+    id VARCHAR(36) PRIMARY KEY,
+    dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
+    name VARCHAR(32) NOT NULL COMMENT '项目名称',
+    labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
+    configuration JSON,
+    progress JSON,
+    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
+    deleted_at TIMESTAMP NULL COMMENT '删除时间（软删除）'
+);