Merge branch 'main' into develop_deer

2025-10-28 11:03:01 +08:00
parent a69b9f4921 f54afddbeb
commit 41e7e684c3
121 changed files with 1999 additions and 935 deletions
--- a/runtime/label-studio-adapter/.env.example
+++ b/runtime/label-studio-adapter/.env.example
@@ -53,9 +53,6 @@ LS_TASK_PAGE_SIZE=1000
 # =========================
 # Data Management 服务配置
 # =========================
-# DM 服务地址
-DM_SERVICE_BASE_URL=http://data-engine:8080
-
 # DM 存储文件夹前缀（通常与 Label Studio 的 local-files 文件夹映射一致）
 DM_FILE_PATH_PREFIX=/

--- a/runtime/label-studio-adapter/.gitignore
+++ b/runtime/label-studio-adapter/.gitignore
--- a/runtime/datamate-python/README.md
+++ b/runtime/datamate-python/README.md
@@ -0,0 +1,86 @@
+# Label Studio Adapter (DataMate)
+
+这是 DataMate 的 Label Studio Adapter 服务，负责将 DataMate 的项目与 Label Studio 同步并提供对外的 HTTP API（基于 FastAPI）。
+
+## 简要说明
+
+- 框架：FastAPI
+- 异步数据库/ORM：SQLAlchemy (async)
+- 数据库迁移：Alembic
+- 运行器：uvicorn
+
+## 快速开始（开发）
+
+1. 克隆仓库并进入项目目录
+2. 创建并激活虚拟环境：
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+```
+
+3. 安装依赖：
+
+```bash
+pip install -r requirements.txt
+```
+
+4. 准备环境变量（示例）
+
+创建 `.env` 并设置必要的变量，例如：
+
+- DATABASE_URL（或根据项目配置使用具体变量）
+- LABEL_STUDIO_BASE_URL
+- LABEL_STUDIO_USER_TOKEN
+
+（具体变量请参考 `.env.example`）
+
+5. 数据库迁移（开发环境）：
+
+```bash
+alembic upgrade head
+```
+
+6. 启动开发服务器（示例与常用参数）：
+
+- 本地开发（默认 host/port，自动重载）：
+
+```bash
+uvicorn app.main:app --reload
+```
+
+- 指定主机与端口并打开调试日志：
+
+```bash
+uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload --log-level debug
+```
+
+- 在生产环境使用多个 worker（不使用 --reload）：
+
+```bash
+uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers 4 --log-level info --proxy-headers
+```
+
+- 使用环境变量启动（示例）：
+
+```bash
+HOST=0.0.0.0 PORT=8000 uvicorn app.main:app --reload
+```
+
+注意：
+
+- `--reload` 仅用于开发，会监视文件变化并重启进程；不要在生产中使用。
+- `--workers` 提供并发处理能力，但会增加内存占用；生产时通常配合进程管理或容器编排（Kubernetes）使用。
+- 若需要完整的生产部署建议使用 ASGI 服务器（如 gunicorn + uvicorn workers / 或直接使用 uvicorn 在容器中配合进程管理）。
+
+访问 API 文档：
+
+- Swagger UI: http://127.0.0.1:8000/docs
+- ReDoc: http://127.0.0.1:8000/redoc （推荐使用）
+
+## 使用（简要）
+
+- 所有 API 路径以 `/api` 前缀注册（见 `app/main.py` 中 `app.include_router(api_router, prefix="/api")`）。
+- 根路径 `/` 返回服务信息和文档链接。
+
+更多细节请查看 `doc/usage.md`（接口使用）和 `doc/development.md`（开发说明）。
--- a/runtime/label-studio-adapter/app/init.py
+++ b/runtime/label-studio-adapter/app/init.py
--- a/runtime/label-studio-adapter/app/api/init.py
+++ b/runtime/label-studio-adapter/app/api/init.py
--- a/runtime/label-studio-adapter/app/api/project/init.py
+++ b/runtime/label-studio-adapter/app/api/project/init.py
--- a/runtime/label-studio-adapter/app/api/project/create.py
+++ b/runtime/label-studio-adapter/app/api/project/create.py
@@ -4,7 +4,7 @@ from typing import Optional

 from app.db.database import get_db
 from app.services.dataset_mapping_service import DatasetMappingService
-from app.clients import get_clients
+from app.infrastructure import DatamateClient, LabelStudioClient
 from app.schemas.dataset_mapping import (
    DatasetMappingCreateRequest,
    DatasetMappingCreateResponse,
@@ -30,18 +30,19 @@ async def create_dataset_mapping(
    注意：一个数据集可以创建多个标注项目
    """
    try:
-        # 获取全局客户端实例
-        dm_client_instance, ls_client_instance = get_clients()
+        dm_client = DatamateClient(db)
+        ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
+                                      token=settings.label_studio_user_token)
        service = DatasetMappingService(db)
        
-        logger.info(f"Create dataset mapping request: {request.source_dataset_id}")
+        logger.info(f"Create dataset mapping request: {request.dataset_id}")
        
        # 从DM服务获取数据集信息
-        dataset_info = await dm_client_instance.get_dataset(request.source_dataset_id)
+        dataset_info = await dm_client.get_dataset(request.dataset_id)
        if not dataset_info:
            raise HTTPException(
                status_code=404,
-                detail=f"Dataset not found in DM service: {request.source_dataset_id}"
+                detail=f"Dataset not found in DM service: {request.dataset_id}"
            )
        
        # 确定数据类型（基于数据集类型）
@@ -55,11 +56,10 @@ async def create_dataset_mapping(
            elif "text" in type_code:
                data_type = "text"
        
-        # 生成项目名称
        project_name = f"{dataset_info.name}"
        
        # 在Label Studio中创建项目
-        project_data = await ls_client_instance.create_project(
+        project_data = await ls_client.create_project(
            title=project_name,
            description=dataset_info.description or f"Imported from DM dataset {dataset_info.id}",
            data_type=data_type
@@ -74,8 +74,8 @@ async def create_dataset_mapping(
        project_id = project_data["id"]
        
        # 配置本地存储：dataset/<id>
-        local_storage_path = f"{settings.label_studio_local_storage_dataset_base_path}/{request.source_dataset_id}"
-        storage_result = await ls_client_instance.create_local_storage(
+        local_storage_path = f"{settings.label_studio_local_storage_dataset_base_path}/{request.dataset_id}"
+        storage_result = await ls_client.create_local_storage(
            project_id=project_id,
            path=local_storage_path,
            title="Dataset_BLOB",
@@ -85,7 +85,7 @@ async def create_dataset_mapping(

        # 配置本地存储：upload
        local_storage_path = f"{settings.label_studio_local_storage_upload_base_path}"
-        storage_result = await ls_client_instance.create_local_storage(
+        storage_result = await ls_client.create_local_storage(
            project_id=project_id,
            path=local_storage_path,
            title="Upload_BLOB",
@@ -107,7 +107,7 @@ async def create_dataset_mapping(
        )
        
        logger.debug(
-            f"Dataset mapping created: {mapping.mapping_id} -> S {mapping.source_dataset_id} <> L {mapping.labelling_project_id}"
+            f"Dataset mapping created: {mapping.mapping_id} -> S {mapping.dataset_id} <> L {mapping.labelling_project_id}"
        )
        
        response_data = DatasetMappingCreateResponse(
--- a/runtime/label-studio-adapter/app/api/project/delete.py
+++ b/runtime/label-studio-adapter/app/api/project/delete.py
@@ -1,13 +1,15 @@
-from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi import Depends, HTTPException, Query
 from sqlalchemy.ext.asyncio import AsyncSession
 from typing import Optional

 from app.db.database import get_db
 from app.services.dataset_mapping_service import DatasetMappingService
-from app.clients import get_clients
+from app.infrastructure import DatamateClient, LabelStudioClient
 from app.schemas.dataset_mapping import DeleteDatasetResponse
 from app.schemas import StandardResponse
 from app.core.logging import get_logger
+from app.core.config import settings
+
 from . import project_router

 logger = get_logger(__name__)
@@ -37,39 +39,39 @@ async def delete_mapping(
                status_code=400,
                detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided"
            )
-        
-        # 获取全局客户端实例
-        dm_client_instance, ls_client_instance = get_clients()
+
+        ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
+                                      token=settings.label_studio_user_token)
        service = DatasetMappingService(db)
        
-        mapping = None
-        
        # 优先使用 mapping_id 查询
        if m:
-            logger.info(f"Deleting by mapping UUID: {m}")
+            logger.debug(f"Deleting by mapping UUID: {m}")
            mapping = await service.get_mapping_by_uuid(m)
        # 如果没有提供 m，使用 proj 查询
        elif proj:
-            logger.info(f"Deleting by project ID: {proj}")
+            logger.debug(f"Deleting by project ID: {proj}")
            mapping = await service.get_mapping_by_labelling_project_id(proj)
+        else:
+            mapping = None
        
        if not mapping:
            raise HTTPException(
                status_code=404,
-                detail=f"Mapping not found"
+                detail=f"Mapping either not found or not specified."
            )
        
        mapping_id = mapping.mapping_id
        labelling_project_id = mapping.labelling_project_id
        labelling_project_name = mapping.labelling_project_name
        
-        logger.info(f"Found mapping: {mapping_id}, Label Studio project ID: {labelling_project_id}")
+        logger.debug(f"Found mapping: {mapping_id}, Label Studio project ID: {labelling_project_id}")
        
        # 1. 删除 Label Studio 项目
        try:
-            delete_success = await ls_client_instance.delete_project(int(labelling_project_id))
+            delete_success = await ls_client.delete_project(int(labelling_project_id))
            if delete_success:
-                logger.info(f"Successfully deleted Label Studio project: {labelling_project_id}")
+                logger.debug(f"Successfully deleted Label Studio project: {labelling_project_id}")
            else:
                logger.warning(f"Failed to delete Label Studio project or project not found: {labelling_project_id}")
        except Exception as e:
@@ -84,19 +86,17 @@ async def delete_mapping(
                status_code=500,
                detail="Failed to delete mapping record"
            )
-        
-        logger.info(f"Successfully deleted mapping: {mapping_id}")
-        
-        response_data = DeleteDatasetResponse(
-            mapping_id=mapping_id,
-            status="success",
-            message=f"Successfully deleted mapping and Label Studio project '{labelling_project_name}'"
-        )
-        
+
+        logger.info(f"Successfully deleted mapping: {mapping_id}, Label Studio project: {labelling_project_id}")
+
        return StandardResponse(
            code=200,
            message="success",
-            data=response_data
+            data=DeleteDatasetResponse(
+                mapping_id=mapping_id,
+                status="success",
+                message=f"Successfully deleted mapping and Label Studio project '{labelling_project_name}'"
+            )
        )
        
    except HTTPException:
--- a/runtime/label-studio-adapter/app/api/project/list.py
+++ b/runtime/label-studio-adapter/app/api/project/list.py
@@ -98,9 +98,9 @@ async def get_mapping(
        raise HTTPException(status_code=500, detail="Internal server error")


-@project_router.get("/mappings/by-source/{source_dataset_id}", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
+@project_router.get("/mappings/by-source/{dataset_id}", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
 async def get_mappings_by_source(
-    source_dataset_id: str,
+    dataset_id: str,
    page: int = Query(1, ge=1, description="页码（从1开始）"),
    page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
    db: AsyncSession = Depends(get_db)
@@ -116,11 +116,11 @@ async def get_mappings_by_source(
        # 计算 skip
        skip = (page - 1) * page_size
        
-        logger.info(f"Get mappings by source dataset id: {source_dataset_id}, page={page}, page_size={page_size}")
+        logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, page_size={page_size}")
        
        # 获取数据和总数
        mappings, total = await service.get_mappings_by_source_with_count(
-            source_dataset_id=source_dataset_id,
+            dataset_id=dataset_id,
            skip=skip,
            limit=page_size
        )
--- a/runtime/label-studio-adapter/app/api/project/sync.py
+++ b/runtime/label-studio-adapter/app/api/project/sync.py
@@ -5,7 +5,7 @@ from typing import List, Optional
 from app.db.database import get_db
 from app.services.dataset_mapping_service import DatasetMappingService
 from app.services.sync_service import SyncService
-from app.clients import get_clients
+from app.infrastructure import DatamateClient, LabelStudioClient
 from app.exceptions import NoDatasetInfoFoundError, DatasetMappingNotFoundError
 from app.schemas.dataset_mapping import (
    DatasetMappingResponse,
@@ -14,6 +14,7 @@ from app.schemas.dataset_mapping import (
 )
 from app.schemas import StandardResponse
 from app.core.logging import get_logger
+from app.core.config import settings
 from . import project_router

 logger = get_logger(__name__)
@@ -30,10 +31,12 @@ async def sync_dataset_content(
    在数据库中记录更新时间，返回更新状态
    """
    try:
-        dm_client_instance, ls_client_instance = get_clients()
+        ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
+                                      token=settings.label_studio_user_token)
+        dm_client = DatamateClient(db)
        mapping_service = DatasetMappingService(db)
-        sync_service = SyncService(dm_client_instance, ls_client_instance, mapping_service)
-        
+        sync_service = SyncService(dm_client, ls_client, mapping_service)
+
        logger.info(f"Sync dataset content request: mapping_id={request.mapping_id}")
        
        # 根据 mapping_id 获取映射关系
--- a/runtime/label-studio-adapter/app/api/system.py
+++ b/runtime/label-studio-adapter/app/api/system.py
@@ -27,7 +27,6 @@ async def get_config():
        data={
            "app_name": settings.app_name,
            "version": settings.app_version,
-            "dm_service_url": settings.dm_service_base_url,
            "label_studio_url": settings.label_studio_base_url,
            "debug": settings.debug
        }
--- a/runtime/label-studio-adapter/app/core/init.py
+++ b/runtime/label-studio-adapter/app/core/init.py
--- a/runtime/label-studio-adapter/app/core/config.py
+++ b/runtime/label-studio-adapter/app/core/config.py
@@ -73,7 +73,6 @@ class Settings(BaseSettings):
    # =========================
    # Data Management 服务配置
    # =========================
-    dm_service_base_url: str = "http://data-engine"
    dm_file_path_prefix: str = "/"  # DM存储文件夹前缀


--- a/runtime/label-studio-adapter/app/core/logging.py
+++ b/runtime/label-studio-adapter/app/core/logging.py
--- a/runtime/label-studio-adapter/app/db/init.py
+++ b/runtime/label-studio-adapter/app/db/init.py
--- a/runtime/label-studio-adapter/app/db/database.py
+++ b/runtime/label-studio-adapter/app/db/database.py
--- a/runtime/label-studio-adapter/app/exceptions.py
+++ b/runtime/label-studio-adapter/app/exceptions.py
--- a/runtime/datamate-python/app/infrastructure/init.py
+++ b/runtime/datamate-python/app/infrastructure/init.py
@@ -0,0 +1,6 @@
+# app/clients/__init__.py
+
+from .label_studio import Client as LabelStudioClient
+from .datamate import Client as DatamateClient
+
+__all__ = ["LabelStudioClient", "DatamateClient"]
--- a/runtime/datamate-python/app/infrastructure/datamate.py
+++ b/runtime/datamate-python/app/infrastructure/datamate.py
@@ -0,0 +1,159 @@
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+from sqlalchemy import func
+from typing import Optional
+from app.core.config import settings
+from app.core.logging import get_logger
+from app.schemas.dm_service import DatasetResponse, PagedDatasetFileResponse, DatasetFileResponse
+from app.models.dm.dataset import Dataset
+from app.models.dm.dataset_files import DatasetFiles
+
+logger = get_logger(__name__)
+
+class Client:
+    """数据管理服务客户端 - 直接访问数据库"""
+    
+    def __init__(self, db: AsyncSession):
+        """
+        初始化 DM 客户端
+        
+        Args:
+            db: 数据库会话
+        """
+        self.db = db
+        logger.info("Initialize DM service client (Database mode)")
+
+    async def get_dataset(self, dataset_id: str) -> Optional[DatasetResponse]:
+        """获取数据集详情"""
+        try:
+            logger.info(f"Getting dataset detail: {dataset_id} ...")
+            
+            result = await self.db.execute(
+                select(Dataset).where(Dataset.id == dataset_id)
+            )
+            dataset = result.scalar_one_or_none()
+            
+            if not dataset:
+                logger.error(f"Dataset not found: {dataset_id}")
+                return None
+            
+            # 将数据库模型转换为响应模型
+            # type: ignore 用于忽略 SQLAlchemy 的类型检查问题
+            return DatasetResponse(
+                id=dataset.id,  # type: ignore
+                name=dataset.name,  # type: ignore
+                description=dataset.description or "",  # type: ignore
+                datasetType=dataset.dataset_type,  # type: ignore
+                status=dataset.status,  # type: ignore
+                fileCount=dataset.file_count or 0,  # type: ignore
+                totalSize=dataset.size_bytes or 0,  # type: ignore
+                createdAt=dataset.created_at,  # type: ignore
+                updatedAt=dataset.updated_at,  # type: ignore
+                createdBy=dataset.created_by  # type: ignore
+            )
+        except Exception as e:
+            logger.error(f"Failed to get dataset {dataset_id}: {e}")
+            return None
+    
+    async def get_dataset_files(
+        self, 
+        dataset_id: str, 
+        page: int = 0, 
+        size: int = 100,
+        file_type: Optional[str] = None,
+        status: Optional[str] = None
+    ) -> Optional[PagedDatasetFileResponse]:
+        """获取数据集文件列表"""
+        try:
+            logger.info(f"Get dataset files: dataset={dataset_id}, page={page}, size={size}")
+            
+            # 构建查询
+            query = select(DatasetFiles).where(DatasetFiles.dataset_id == dataset_id)
+            
+            # 添加可选过滤条件
+            if file_type:
+                query = query.where(DatasetFiles.file_type == file_type)
+            if status:
+                query = query.where(DatasetFiles.status == status)
+            
+            # 获取总数
+            count_query = select(func.count()).select_from(DatasetFiles).where(
+                DatasetFiles.dataset_id == dataset_id
+            )
+            if file_type:
+                count_query = count_query.where(DatasetFiles.file_type == file_type)
+            if status:
+                count_query = count_query.where(DatasetFiles.status == status)
+                
+            count_result = await self.db.execute(count_query)
+            total = count_result.scalar_one()
+            
+            # 分页查询
+            query = query.offset(page * size).limit(size).order_by(DatasetFiles.created_at.desc())
+            result = await self.db.execute(query)
+            files = result.scalars().all()
+            
+            # 转换为响应模型
+            # type: ignore 用于忽略 SQLAlchemy 的类型检查问题
+            content = [
+                DatasetFileResponse(
+                    id=f.id,  # type: ignore
+                    fileName=f.file_name,  # type: ignore
+                    fileType=f.file_type or "",  # type: ignore
+                    filePath=f.file_path,  # type: ignore
+                    originalName=f.file_name,  # type: ignore
+                    size=f.file_size,  # type: ignore
+                    status=f.status,  # type: ignore
+                    uploadedAt=f.upload_time,  # type: ignore
+                    description=None,
+                    uploadedBy=None,
+                    lastAccessTime=f.last_access_time  # type: ignore
+                )
+                for f in files
+            ]
+            
+            total_pages = (total + size - 1) // size if size > 0 else 0
+            
+            return PagedDatasetFileResponse(
+                content=content,
+                totalElements=total,
+                totalPages=total_pages,
+                page=page,
+                size=size
+            )
+        except Exception as e:
+            logger.error(f"Failed to get dataset files for {dataset_id}: {e}")
+            return None
+    
+    async def download_file(self, dataset_id: str, file_id: str) -> Optional[bytes]:
+        """
+        下载文件内容
+        注意：此方法保留接口兼容性，但实际文件下载可能需要通过文件系统或对象存储
+        """
+        logger.warning(f"download_file is deprecated when using database mode. Use get_file_download_url instead.")
+        return None
+    
+    async def get_file_download_url(self, dataset_id: str, file_id: str) -> Optional[str]:
+        """获取文件下载URL（或文件路径）"""
+        try:
+            result = await self.db.execute(
+                select(DatasetFiles).where(
+                    DatasetFiles.id == file_id,
+                    DatasetFiles.dataset_id == dataset_id
+                )
+            )
+            file = result.scalar_one_or_none()
+            
+            if not file:
+                logger.error(f"File not found: {file_id} in dataset {dataset_id}")
+                return None
+            
+            # 返回文件路径（可以是本地路径或对象存储URL）
+            return file.file_path  # type: ignore
+        except Exception as e:
+            logger.error(f"Failed to get file path for {file_id}: {e}")
+            return None
+    
+    async def close(self):
+        """关闭客户端连接（数据库模式下无需操作）"""
+        logger.info("DM service client closed (Database mode)")
--- a/runtime/label-studio-adapter/app/clients/label_studio_client.py
+++ b/runtime/label-studio-adapter/app/clients/label_studio_client.py
@@ -12,7 +12,7 @@ from app.schemas.label_studio import (

 logger = get_logger(__name__)

-class LabelStudioClient:
+class Client:
    """Label Studio服务客户端
    
    使用 HTTP REST API 直接与 Label Studio 交互
--- a/runtime/label-studio-adapter/app/main.py
+++ b/runtime/label-studio-adapter/app/main.py
@@ -8,7 +8,7 @@ from typing import Dict, Any

 from .core.config import settings
 from .core.logging import setup_logging, get_logger
-from .clients import DMServiceClient, LabelStudioClient, set_clients
+from .infrastructure import LabelStudioClient
 from .api import api_router
 from .schemas import StandardResponse

@@ -23,23 +23,12 @@ async def lifespan(app: FastAPI):
    # 启动时初始化
    logger.info("Starting Label Studio Adapter...")
    
-    # 初始化客户端
-    dm_client = DMServiceClient()
-    
    # 初始化 Label Studio 客户端，使用 HTTP REST API + Token 认证
    ls_client = LabelStudioClient(
        base_url=settings.label_studio_base_url,
        token=settings.label_studio_user_token
    )
    
-    # 设置全局客户端
-    set_clients(dm_client, ls_client)
-    
-    # 数据库初始化由 Alembic 管理
-    # 在 Docker 环境中，entrypoint.sh 会在启动前运行: alembic upgrade head
-    # 在开发环境中，手动运行: alembic upgrade head
-    logger.info("Database schema managed by Alembic")
-    
    logger.info("Label Studio Adapter started")
    
    yield
@@ -155,7 +144,6 @@ async def root():
            "message": f"{settings.app_name} is running",
            "version": settings.app_version,
            "docs_url": "/docs",
-            "dm_service_url": settings.dm_service_base_url,
            "label_studio_url": settings.label_studio_base_url
        }
    )
--- a/runtime/datamate-python/app/models/README.md
+++ b/runtime/datamate-python/app/models/README.md
@@ -0,0 +1,138 @@
+# DataMate 数据模型结构
+
+本文档列出了根据 `scripts/db` 中的 SQL 文件创建的所有 Python 数据模型。
+
+## 模型组织结构
+
+```
+app/models/
+├── __init__.py                 # 主模块导出文件
+├── dm/                         # 数据管理 (Data Management) 模块
+│   ├── __init__.py
+│   ├── annotation_template.py  # 标注模板
+│   ├── labeling_project.py     # 标注项目
+│   ├── dataset.py              # 数据集
+│   ├── dataset_files.py        # 数据集文件
+│   ├── dataset_statistics.py   # 数据集统计
+│   ├── dataset_tag.py          # 数据集标签关联
+│   ├── tag.py                  # 标签
+│   └── user.py                 # 用户
+├── cleaning/                   # 数据清洗 (Data Cleaning) 模块
+│   ├── __init__.py
+│   ├── clean_template.py       # 清洗模板
+│   ├── clean_task.py           # 清洗任务
+│   ├── operator_instance.py    # 算子实例
+│   └── clean_result.py         # 清洗结果
+├── collection/                 # 数据归集 (Data Collection) 模块
+│   ├── __init__.py
+│   ├── task_execution.py       # 任务执行明细
+│   ├── collection_task.py      # 数据归集任务
+│   ├── task_log.py             # 任务执行记录
+│   └── datax_template.py       # DataX模板配置
+├── common/                     # 通用 (Common) 模块
+│   ├── __init__.py
+│   └── chunk_upload_request.py # 文件切片上传请求
+└── operator/                   # 算子 (Operator) 模块
+    ├── __init__.py
+    ├── operator.py             # 算子
+    ├── operator_category.py    # 算子分类
+    └── operator_category_relation.py  # 算子分类关联
+```
+
+## 模块详情
+
+### 1. Data Management (DM) 模块
+对应 SQL: `data-management-init.sql` 和 `data-annotation-init.sql`
+
+#### 模型列表：
+- **AnnotationTemplate** (`t_dm_annotation_templates`) - 标注模板
+- **LabelingProject** (`t_dm_labeling_projects`) - 标注项目
+- **Dataset** (`t_dm_datasets`) - 数据集（支持医学影像、文本、问答等多种类型）
+- **DatasetFiles** (`t_dm_dataset_files`) - 数据集文件
+- **DatasetStatistics** (`t_dm_dataset_statistics`) - 数据集统计信息
+- **Tag** (`t_dm_tags`) - 标签
+- **DatasetTag** (`t_dm_dataset_tags`) - 数据集标签关联
+- **User** (`users`) - 用户
+
+### 2. Data Cleaning 模块
+对应 SQL: `data-cleaning-init.sql`
+
+#### 模型列表：
+- **CleanTemplate** (`t_clean_template`) - 清洗模板
+- **CleanTask** (`t_clean_task`) - 清洗任务
+- **OperatorInstance** (`t_operator_instance`) - 算子实例
+- **CleanResult** (`t_clean_result`) - 清洗结果
+
+### 3. Data Collection (DC) 模块
+对应 SQL: `data-collection-init.sql`
+
+#### 模型列表：
+- **TaskExecution** (`t_dc_task_executions`) - 任务执行明细
+- **CollectionTask** (`t_dc_collection_tasks`) - 数据归集任务
+- **TaskLog** (`t_dc_task_log`) - 任务执行记录
+- **DataxTemplate** (`t_dc_datax_templates`) - DataX模板配置
+
+### 4. Common 模块
+对应 SQL: `data-common-init.sql`
+
+#### 模型列表：
+- **ChunkUploadRequest** (`t_chunk_upload_request`) - 文件切片上传请求
+
+### 5. Operator 模块
+对应 SQL: `data-operator-init.sql`
+
+#### 模型列表：
+- **Operator** (`t_operator`) - 算子
+- **OperatorCategory** (`t_operator_category`) - 算子分类
+- **OperatorCategoryRelation** (`t_operator_category_relation`) - 算子分类关联
+
+## 使用方式
+
+```python
+# 导入所有模型
+from app.models import (
+    # DM 模块
+    AnnotationTemplate,
+    LabelingProject,
+    Dataset,
+    DatasetFiles,
+    DatasetStatistics,
+    DatasetTag,
+    Tag,
+    User,
+    # Cleaning 模块
+    CleanTemplate,
+    CleanTask,
+    OperatorInstance,
+    CleanResult,
+    # Collection 模块
+    TaskExecution,
+    CollectionTask,
+    TaskLog,
+    DataxTemplate,
+    # Common 模块
+    ChunkUploadRequest,
+    # Operator 模块
+    Operator,
+    OperatorCategory,
+    OperatorCategoryRelation
+)
+
+# 或者按模块导入
+from app.models.dm import Dataset, DatasetFiles
+from app.models.collection import CollectionTask
+from app.models.operator import Operator
+```
+
+## 注意事项
+
+1. **UUID 主键**: 大部分表使用 UUID (String(36)) 作为主键
+2. **时间戳**: 使用 `TIMESTAMP` 类型，并配置自动更新
+3. **软删除**: 部分模型（如 AnnotationTemplate, LabelingProject）支持软删除，包含 `deleted_at` 字段和 `is_deleted` 属性
+4. **JSON 字段**: 配置信息、元数据等使用 JSON 类型存储
+5. **字段一致性**: 所有模型字段都严格按照 SQL 定义创建，确保与数据库表结构完全一致
+
+## 更新记录
+
+- 2025-10-25: 根据 `scripts/db` 中的 SQL 文件创建所有数据模型
+- 已更新现有的 `annotation_template.py`、`labeling_project.py`、`dataset_files.py` 以匹配 SQL 定义
--- a/runtime/datamate-python/app/models/init.py
+++ b/runtime/datamate-python/app/models/init.py
@@ -0,0 +1,69 @@
+# app/models/__init__.py
+
+# Data Management (DM) 模块
+from .dm import (
+    AnnotationTemplate,
+    LabelingProject,
+    Dataset,
+    DatasetFiles,
+    DatasetStatistics,
+    DatasetTag,
+    Tag,
+    User
+)
+
+# Data Cleaning 模块
+from .cleaning import (
+    CleanTemplate,
+    CleanTask,
+    OperatorInstance,
+    CleanResult
+)
+
+# Data Collection (DC) 模块
+from .collection import (
+    TaskExecution,
+    CollectionTask,
+    TaskLog,
+    DataxTemplate
+)
+
+# Common 模块
+from .common import (
+    ChunkUploadRequest
+)
+
+# Operator 模块
+from .operator import (
+    Operator,
+    OperatorCategory,
+    OperatorCategoryRelation
+)
+
+__all__ = [
+    # DM 模块
+    "AnnotationTemplate",
+    "LabelingProject",
+    "Dataset",
+    "DatasetFiles",
+    "DatasetStatistics",
+    "DatasetTag",
+    "Tag",
+    "User",
+    # Cleaning 模块
+    "CleanTemplate",
+    "CleanTask",
+    "OperatorInstance",
+    "CleanResult",
+    # Collection 模块
+    "TaskExecution",
+    "CollectionTask",
+    "TaskLog",
+    "DataxTemplate",
+    # Common 模块
+    "ChunkUploadRequest",
+    # Operator 模块
+    "Operator",
+    "OperatorCategory",
+    "OperatorCategoryRelation"
+]
--- a/runtime/datamate-python/app/models/cleaning/init.py
+++ b/runtime/datamate-python/app/models/cleaning/init.py
@@ -0,0 +1,13 @@
+# app/models/cleaning/__init__.py
+
+from .clean_template import CleanTemplate
+from .clean_task import CleanTask
+from .operator_instance import OperatorInstance
+from .clean_result import CleanResult
+
+__all__ = [
+    "CleanTemplate",
+    "CleanTask",
+    "OperatorInstance",
+    "CleanResult"
+]
--- a/runtime/datamate-python/app/models/cleaning/clean_result.py
+++ b/runtime/datamate-python/app/models/cleaning/clean_result.py
@@ -0,0 +1,22 @@
+from sqlalchemy import Column, String, BigInteger, Text
+from app.db.database import Base
+
+class CleanResult(Base):
+    """清洗结果模型"""
+    
+    __tablename__ = "t_clean_result"
+    
+    instance_id = Column(String(64), primary_key=True, comment="实例ID")
+    src_file_id = Column(String(64), nullable=True, comment="源文件ID")
+    dest_file_id = Column(String(64), primary_key=True, comment="目标文件ID")
+    src_name = Column(String(256), nullable=True, comment="源文件名")
+    dest_name = Column(String(256), nullable=True, comment="目标文件名")
+    src_type = Column(String(256), nullable=True, comment="源文件类型")
+    dest_type = Column(String(256), nullable=True, comment="目标文件类型")
+    src_size = Column(BigInteger, nullable=True, comment="源文件大小")
+    dest_size = Column(BigInteger, nullable=True, comment="目标文件大小")
+    status = Column(String(256), nullable=True, comment="处理状态")
+    result = Column(Text, nullable=True, comment="处理结果")
+    
+    def __repr__(self):
+        return f"<CleanResult(instance_id={self.instance_id}, dest_file_id={self.dest_file_id}, status={self.status})>"
--- a/runtime/datamate-python/app/models/cleaning/clean_task.py
+++ b/runtime/datamate-python/app/models/cleaning/clean_task.py
@@ -0,0 +1,27 @@
+from sqlalchemy import Column, String, BigInteger, Integer, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+
+class CleanTask(Base):
+    """清洗任务模型"""
+    
+    __tablename__ = "t_clean_task"
+    
+    id = Column(String(64), primary_key=True, comment="任务ID")
+    name = Column(String(64), nullable=True, comment="任务名称")
+    description = Column(String(256), nullable=True, comment="任务描述")
+    status = Column(String(256), nullable=True, comment="任务状态")
+    src_dataset_id = Column(String(64), nullable=True, comment="源数据集ID")
+    src_dataset_name = Column(String(64), nullable=True, comment="源数据集名称")
+    dest_dataset_id = Column(String(64), nullable=True, comment="目标数据集ID")
+    dest_dataset_name = Column(String(64), nullable=True, comment="目标数据集名称")
+    before_size = Column(BigInteger, nullable=True, comment="清洗前大小")
+    after_size = Column(BigInteger, nullable=True, comment="清洗后大小")
+    file_count = Column(Integer, nullable=True, comment="文件数量")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    started_at = Column(TIMESTAMP, nullable=True, comment="开始时间")
+    finished_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
+    created_by = Column(String(256), nullable=True, comment="创建者")
+    
+    def __repr__(self):
+        return f"<CleanTask(id={self.id}, name={self.name}, status={self.status})>"
--- a/runtime/datamate-python/app/models/cleaning/clean_template.py
+++ b/runtime/datamate-python/app/models/cleaning/clean_template.py
@@ -0,0 +1,18 @@
+from sqlalchemy import Column, String, Text, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+
+class CleanTemplate(Base):
+    """清洗模板模型"""
+    
+    __tablename__ = "t_clean_template"
+    
+    id = Column(String(64), primary_key=True, unique=True, comment="模板ID")
+    name = Column(String(64), nullable=True, comment="模板名称")
+    description = Column(String(256), nullable=True, comment="模板描述")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_by = Column(String(256), nullable=True, comment="创建者")
+    
+    def __repr__(self):
+        return f"<CleanTemplate(id={self.id}, name={self.name})>"
--- a/runtime/datamate-python/app/models/cleaning/operator_instance.py
+++ b/runtime/datamate-python/app/models/cleaning/operator_instance.py
@@ -0,0 +1,15 @@
+from sqlalchemy import Column, String, Integer, Text
+from app.db.database import Base
+
+class OperatorInstance(Base):
+    """算子实例模型"""
+    
+    __tablename__ = "t_operator_instance"
+    
+    instance_id = Column(String(256), primary_key=True, comment="实例ID")
+    operator_id = Column(String(256), primary_key=True, comment="算子ID")
+    op_index = Column(Integer, primary_key=True, comment="算子索引")
+    settings_override = Column(Text, nullable=True, comment="配置覆盖")
+    
+    def __repr__(self):
+        return f"<OperatorInstance(instance_id={self.instance_id}, operator_id={self.operator_id}, index={self.op_index})>"
--- a/runtime/datamate-python/app/models/collection/init.py
+++ b/runtime/datamate-python/app/models/collection/init.py
@@ -0,0 +1,13 @@
+# app/models/collection/__init__.py
+
+from .task_execution import TaskExecution
+from .collection_task import CollectionTask
+from .task_log import TaskLog
+from .datax_template import DataxTemplate
+
+__all__ = [
+    "TaskExecution",
+    "CollectionTask",
+    "TaskLog",
+    "DataxTemplate"
+]
--- a/runtime/datamate-python/app/models/collection/collection_task.py
+++ b/runtime/datamate-python/app/models/collection/collection_task.py
@@ -0,0 +1,28 @@
+from sqlalchemy import Column, String, Text, Integer, BigInteger, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+
+class CollectionTask(Base):
+    """数据归集任务模型"""
+    
+    __tablename__ = "t_dc_collection_tasks"
+    
+    id = Column(String(36), primary_key=True, comment="任务ID（UUID）")
+    name = Column(String(255), nullable=False, comment="任务名称")
+    description = Column(Text, nullable=True, comment="任务描述")
+    sync_mode = Column(String(20), default='ONCE', comment="同步模式：ONCE/SCHEDULED")
+    config = Column(Text, nullable=False, comment="归集配置（DataX配置），包含源端和目标端配置信息")
+    schedule_expression = Column(String(255), nullable=True, comment="Cron调度表达式")
+    status = Column(String(20), default='DRAFT', comment="任务状态：DRAFT/READY/RUNNING/SUCCESS/FAILED/STOPPED")
+    retry_count = Column(Integer, default=3, comment="重试次数")
+    timeout_seconds = Column(Integer, default=3600, comment="超时时间（秒）")
+    max_records = Column(BigInteger, nullable=True, comment="最大处理记录数")
+    sort_field = Column(String(100), nullable=True, comment="增量字段")
+    last_execution_id = Column(String(36), nullable=True, comment="最后执行ID（UUID）")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_by = Column(String(255), nullable=True, comment="创建者")
+    updated_by = Column(String(255), nullable=True, comment="更新者")
+    
+    def __repr__(self):
+        return f"<CollectionTask(id={self.id}, name={self.name}, status={self.status})>"
--- a/runtime/datamate-python/app/models/collection/datax_template.py
+++ b/runtime/datamate-python/app/models/collection/datax_template.py
@@ -0,0 +1,23 @@
+from sqlalchemy import Column, String, Text, Boolean, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+
+class DataxTemplate(Base):
+    """DataX模板配置模型"""
+    
+    __tablename__ = "t_dc_datax_templates"
+    
+    id = Column(String(36), primary_key=True, comment="模板ID（UUID）")
+    name = Column(String(255), nullable=False, unique=True, comment="模板名称")
+    source_type = Column(String(50), nullable=False, comment="源数据源类型")
+    target_type = Column(String(50), nullable=False, comment="目标数据源类型")
+    template_content = Column(Text, nullable=False, comment="模板内容")
+    description = Column(Text, nullable=True, comment="模板描述")
+    version = Column(String(20), default='1.0.0', comment="版本号")
+    is_system = Column(Boolean, default=False, comment="是否系统模板")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_by = Column(String(255), nullable=True, comment="创建者")
+    
+    def __repr__(self):
+        return f"<DataxTemplate(id={self.id}, name={self.name}, source={self.source_type}, target={self.target_type})>"
--- a/runtime/datamate-python/app/models/collection/task_execution.py
+++ b/runtime/datamate-python/app/models/collection/task_execution.py
@@ -0,0 +1,34 @@
+from sqlalchemy import Column, String, Text, Integer, BigInteger, DECIMAL, JSON, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+
+class TaskExecution(Base):
+    """任务执行明细模型"""
+    
+    __tablename__ = "t_dc_task_executions"
+    
+    id = Column(String(36), primary_key=True, comment="执行记录ID（UUID）")
+    task_id = Column(String(36), nullable=False, comment="任务ID")
+    task_name = Column(String(255), nullable=False, comment="任务名称")
+    status = Column(String(20), default='RUNNING', comment="执行状态：RUNNING/SUCCESS/FAILED/STOPPED")
+    progress = Column(DECIMAL(5, 2), default=0.00, comment="进度百分比")
+    records_total = Column(BigInteger, default=0, comment="总记录数")
+    records_processed = Column(BigInteger, default=0, comment="已处理记录数")
+    records_success = Column(BigInteger, default=0, comment="成功记录数")
+    records_failed = Column(BigInteger, default=0, comment="失败记录数")
+    throughput = Column(DECIMAL(10, 2), default=0.00, comment="吞吐量（条/秒）")
+    data_size_bytes = Column(BigInteger, default=0, comment="数据量（字节）")
+    started_at = Column(TIMESTAMP, nullable=True, comment="开始时间")
+    completed_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
+    duration_seconds = Column(Integer, default=0, comment="执行时长（秒）")
+    config = Column(JSON, nullable=True, comment="执行配置")
+    error_message = Column(Text, nullable=True, comment="错误信息")
+    datax_job_id = Column(Text, nullable=True, comment="datax任务ID")
+    result = Column(Text, nullable=True, comment="执行结果")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_by = Column(String(255), nullable=True, comment="创建者")
+    updated_by = Column(String(255), nullable=True, comment="更新者")
+    
+    def __repr__(self):
+        return f"<TaskExecution(id={self.id}, task_id={self.task_id}, status={self.status})>"
--- a/runtime/datamate-python/app/models/collection/task_log.py
+++ b/runtime/datamate-python/app/models/collection/task_log.py
@@ -0,0 +1,26 @@
+from sqlalchemy import Column, String, Text, Integer, BigInteger, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+
+class TaskLog(Base):
+    """任务执行记录模型"""
+    
+    __tablename__ = "t_dc_task_log"
+    
+    id = Column(String(36), primary_key=True, comment="执行记录ID（UUID）")
+    task_id = Column(String(36), nullable=False, comment="任务ID")
+    task_name = Column(String(255), nullable=False, comment="任务名称")
+    sync_mode = Column(String(20), default='FULL', comment="同步模式：FULL/INCREMENTAL")
+    status = Column(String(20), default='RUNNING', comment="执行状态：RUNNING/SUCCESS/FAILED/STOPPED")
+    start_time = Column(TIMESTAMP, nullable=True, comment="开始时间")
+    end_time = Column(TIMESTAMP, nullable=True, comment="结束时间")
+    duration = Column(BigInteger, nullable=True, comment="执行时长(毫秒)")
+    process_id = Column(String(50), nullable=True, comment="进程ID")
+    log_path = Column(String(500), nullable=True, comment="日志文件路径")
+    error_msg = Column(Text, nullable=True, comment="错误信息")
+    result = Column(Text, nullable=True, comment="执行结果")
+    retry_times = Column(Integer, default=0, comment="重试次数")
+    create_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    
+    def __repr__(self):
+        return f"<TaskLog(id={self.id}, task_id={self.task_id}, status={self.status})>"
--- a/runtime/datamate-python/app/models/common/init.py
+++ b/runtime/datamate-python/app/models/common/init.py
@@ -0,0 +1,7 @@
+# app/models/common/__init__.py
+
+from .chunk_upload_request import ChunkUploadRequest
+
+__all__ = [
+    "ChunkUploadRequest"
+]
--- a/runtime/datamate-python/app/models/common/chunk_upload_request.py
+++ b/runtime/datamate-python/app/models/common/chunk_upload_request.py
@@ -0,0 +1,19 @@
+from sqlalchemy import Column, String, Integer, Text, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+
+class ChunkUploadRequest(Base):
+    """文件切片上传请求模型"""
+    
+    __tablename__ = "t_chunk_upload_request"
+    
+    id = Column(String(36), primary_key=True, comment="UUID")
+    total_file_num = Column(Integer, nullable=True, comment="总文件数")
+    uploaded_file_num = Column(Integer, nullable=True, comment="已上传文件数")
+    upload_path = Column(String(256), nullable=True, comment="文件路径")
+    timeout = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传请求超时时间")
+    service_id = Column(String(64), nullable=True, comment="上传请求所属服务：DATA-MANAGEMENT(数据管理)")
+    check_info = Column(Text, nullable=True, comment="业务信息")
+    
+    def __repr__(self):
+        return f"<ChunkUploadRequest(id={self.id}, service_id={self.service_id}, progress={self.uploaded_file_num}/{self.total_file_num})>"
--- a/runtime/datamate-python/app/models/dm/init.py
+++ b/runtime/datamate-python/app/models/dm/init.py
@@ -0,0 +1,21 @@
+# app/models/dm/__init__.py
+
+from .annotation_template import AnnotationTemplate
+from .labeling_project import LabelingProject
+from .dataset import Dataset
+from .dataset_files import DatasetFiles
+from .dataset_statistics import DatasetStatistics
+from .dataset_tag import DatasetTag
+from .tag import Tag
+from .user import User
+
+__all__ = [
+    "AnnotationTemplate",
+    "LabelingProject",
+    "Dataset",
+    "DatasetFiles",
+    "DatasetStatistics",
+    "DatasetTag",
+    "Tag",
+    "User"
+]
--- a/runtime/datamate-python/app/models/dm/annotation_template.py
+++ b/runtime/datamate-python/app/models/dm/annotation_template.py
@@ -0,0 +1,24 @@
+from sqlalchemy import Column, String, JSON, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+import uuid
+
+class AnnotationTemplate(Base):
+    """标注模板模型"""
+    
+    __tablename__ = "t_dm_annotation_templates"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID")
+    name = Column(String(32), nullable=False, comment="模板名称")
+    description = Column(String(255), nullable=True, comment="模板描述")
+    configuration = Column(JSON, nullable=True, comment="配置信息（JSON格式）")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间（软删除）")
+    
+    def __repr__(self):
+        return f"<AnnotationTemplate(id={self.id}, name={self.name})>"
+    
+    @property
+    def is_deleted(self) -> bool:
+        """检查是否已被软删除"""
+        return self.deleted_at is not None
--- a/runtime/datamate-python/app/models/dm/dataset.py
+++ b/runtime/datamate-python/app/models/dm/dataset.py
@@ -0,0 +1,35 @@
+from sqlalchemy import Column, String, Text, BigInteger, Integer, Boolean, JSON, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+import uuid
+
+class Dataset(Base):
+    """数据集模型（支持医学影像、文本、问答等多种类型）"""
+    
+    __tablename__ = "t_dm_datasets"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    name = Column(String(255), nullable=False, comment="数据集名称")
+    description = Column(Text, nullable=True, comment="数据集描述")
+    dataset_type = Column(String(50), nullable=False, comment="数据集类型：IMAGE/TEXT/QA/MULTIMODAL/OTHER")
+    category = Column(String(100), nullable=True, comment="数据集分类：医学影像/问答/文献等")
+    path = Column(String(500), nullable=True, comment="数据存储路径")
+    format = Column(String(50), nullable=True, comment="数据格式：DCM/JPG/JSON/CSV等")
+    schema_info = Column(JSON, nullable=True, comment="数据结构信息")
+    size_bytes = Column(BigInteger, default=0, comment="数据大小(字节)")
+    file_count = Column(BigInteger, default=0, comment="文件数量")
+    record_count = Column(BigInteger, default=0, comment="记录数量")
+    retention_days = Column(Integer, default=0, comment="数据保留天数（0表示长期保留）")
+    tags = Column(JSON, nullable=True, comment="标签列表")
+    metadata = Column(JSON, nullable=True, comment="元数据信息")
+    status = Column(String(50), default='DRAFT', comment="状态：DRAFT/ACTIVE/ARCHIVED")
+    is_public = Column(Boolean, default=False, comment="是否公开")
+    is_featured = Column(Boolean, default=False, comment="是否推荐")
+    version = Column(BigInteger, nullable=False, default=0, comment="版本号")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_by = Column(String(255), nullable=True, comment="创建者")
+    updated_by = Column(String(255), nullable=True, comment="更新者")
+    
+    def __repr__(self):
+        return f"<Dataset(id={self.id}, name={self.name}, type={self.dataset_type})>"
--- a/runtime/datamate-python/app/models/dm/dataset_files.py
+++ b/runtime/datamate-python/app/models/dm/dataset_files.py
@@ -0,0 +1,27 @@
+from sqlalchemy import Column, String, JSON, BigInteger, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+import uuid
+
+class DatasetFiles(Base):
+    """DM数据集文件模型"""
+    
+    __tablename__ = "t_dm_dataset_files"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    dataset_id = Column(String(36), nullable=False, comment="所属数据集ID（UUID）")
+    file_name = Column(String(255), nullable=False, comment="文件名")
+    file_path = Column(String(1000), nullable=False, comment="文件路径")
+    file_type = Column(String(50), nullable=True, comment="文件格式：JPG/PNG/DCM/TXT等")
+    file_size = Column(BigInteger, default=0, comment="文件大小(字节)")
+    check_sum = Column(String(64), nullable=True, comment="文件校验和")
+    tags = Column(JSON, nullable=True, comment="文件标签信息")
+    metadata = Column(JSON, nullable=True, comment="文件元数据")
+    status = Column(String(50), default='ACTIVE', comment="文件状态：ACTIVE/DELETED/PROCESSING")
+    upload_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传时间")
+    last_access_time = Column(TIMESTAMP, nullable=True, comment="最后访问时间")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    
+    def __repr__(self):
+        return f"<DatasetFiles(id={self.id}, dataset_id={self.dataset_id}, file_name={self.file_name})>"
--- a/runtime/datamate-python/app/models/dm/dataset_statistics.py
+++ b/runtime/datamate-python/app/models/dm/dataset_statistics.py
@@ -0,0 +1,25 @@
+from sqlalchemy import Column, String, Date, BigInteger, JSON, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+import uuid
+
+class DatasetStatistics(Base):
+    """数据集统计信息模型"""
+    
+    __tablename__ = "t_dm_dataset_statistics"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    dataset_id = Column(String(36), nullable=False, comment="数据集ID（UUID）")
+    stat_date = Column(Date, nullable=False, comment="统计日期")
+    total_files = Column(BigInteger, default=0, comment="总文件数")
+    total_size = Column(BigInteger, default=0, comment="总大小(字节)")
+    processed_files = Column(BigInteger, default=0, comment="已处理文件数")
+    error_files = Column(BigInteger, default=0, comment="错误文件数")
+    download_count = Column(BigInteger, default=0, comment="下载次数")
+    view_count = Column(BigInteger, default=0, comment="查看次数")
+    quality_metrics = Column(JSON, nullable=True, comment="质量指标")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    
+    def __repr__(self):
+        return f"<DatasetStatistics(id={self.id}, dataset_id={self.dataset_id}, date={self.stat_date})>"
--- a/runtime/datamate-python/app/models/dm/dataset_tag.py
+++ b/runtime/datamate-python/app/models/dm/dataset_tag.py
@@ -0,0 +1,15 @@
+from sqlalchemy import Column, String, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+
+class DatasetTag(Base):
+    """数据集标签关联模型"""
+    
+    __tablename__ = "t_dm_dataset_tags"
+    
+    dataset_id = Column(String(36), primary_key=True, comment="数据集ID（UUID）")
+    tag_id = Column(String(36), primary_key=True, comment="标签ID（UUID）")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    
+    def __repr__(self):
+        return f"<DatasetTag(dataset_id={self.dataset_id}, tag_id={self.tag_id})>"
--- a/runtime/datamate-python/app/models/dm/labeling_project.py
+++ b/runtime/datamate-python/app/models/dm/labeling_project.py
@@ -0,0 +1,26 @@
+from sqlalchemy import Column, String, Integer, JSON, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+import uuid
+
+class LabelingProject(Base):
+    """DM标注项目模型（原 DatasetMapping）"""
+    
+    __tablename__ = "t_dm_labeling_projects"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID")
+    dataset_id = Column(String(36), nullable=False, comment="数据集ID")
+    name = Column(String(32), nullable=False, comment="项目名称")
+    labeling_project_id = Column(Integer, nullable=False, comment="Label Studio项目ID")
+    configuration = Column(JSON, nullable=True, comment="标签配置")
+    progress = Column(JSON, nullable=True, comment="标注进度统计")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间（软删除）")
+    
+    def __repr__(self):
+        return f"<LabelingProject(id={self.id}, dataset_id={self.dataset_id}, name={self.name})>"
+    
+    @property
+    def is_deleted(self) -> bool:
+        """检查是否已被软删除"""
+        return self.deleted_at is not None
--- a/runtime/datamate-python/app/models/dm/tag.py
+++ b/runtime/datamate-python/app/models/dm/tag.py
@@ -0,0 +1,21 @@
+from sqlalchemy import Column, String, Text, BigInteger, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+import uuid
+
+class Tag(Base):
+    """标签模型"""
+    
+    __tablename__ = "t_dm_tags"
+    
+    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    name = Column(String(100), nullable=False, unique=True, comment="标签名称")
+    description = Column(Text, nullable=True, comment="标签描述")
+    category = Column(String(50), nullable=True, comment="标签分类")
+    color = Column(String(7), nullable=True, comment="标签颜色(十六进制)")
+    usage_count = Column(BigInteger, default=0, comment="使用次数")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    
+    def __repr__(self):
+        return f"<Tag(id={self.id}, name={self.name}, category={self.category})>"
--- a/runtime/datamate-python/app/models/dm/user.py
+++ b/runtime/datamate-python/app/models/dm/user.py
@@ -0,0 +1,24 @@
+from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+
+class User(Base):
+    """用户模型"""
+    
+    __tablename__ = "users"
+    
+    id = Column(BigInteger, primary_key=True, autoincrement=True, comment="用户ID")
+    username = Column(String(255), nullable=False, unique=True, comment="用户名")
+    email = Column(String(255), nullable=False, unique=True, comment="邮箱")
+    password_hash = Column(String(255), nullable=False, comment="密码哈希")
+    full_name = Column(String(255), nullable=True, comment="真实姓名")
+    avatar_url = Column(String(500), nullable=True, comment="头像URL")
+    role = Column(String(50), nullable=False, default='USER', comment="角色：ADMIN/USER")
+    organization = Column(String(255), nullable=True, comment="所属机构")
+    enabled = Column(Boolean, nullable=False, default=True, comment="是否启用")
+    last_login_at = Column(TIMESTAMP, nullable=True, comment="最后登录时间")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    
+    def __repr__(self):
+        return f"<User(id={self.id}, username={self.username}, role={self.role})>"
--- a/runtime/datamate-python/app/models/operator/init.py
+++ b/runtime/datamate-python/app/models/operator/init.py
@@ -0,0 +1,11 @@
+# app/models/operator/__init__.py
+
+from .operator import Operator
+from .operator_category import OperatorCategory
+from .operator_category_relation import OperatorCategoryRelation
+
+__all__ = [
+    "Operator",
+    "OperatorCategory",
+    "OperatorCategoryRelation"
+]
--- a/runtime/datamate-python/app/models/operator/operator.py
+++ b/runtime/datamate-python/app/models/operator/operator.py
@@ -0,0 +1,24 @@
+from sqlalchemy import Column, String, Text, Boolean, TIMESTAMP
+from sqlalchemy.sql import func
+from app.db.database import Base
+
+class Operator(Base):
+    """算子模型"""
+    
+    __tablename__ = "t_operator"
+    
+    id = Column(String(64), primary_key=True, comment="算子ID")
+    name = Column(String(64), nullable=True, comment="算子名称")
+    description = Column(String(256), nullable=True, comment="算子描述")
+    version = Column(String(256), nullable=True, comment="版本")
+    inputs = Column(String(256), nullable=True, comment="输入类型")
+    outputs = Column(String(256), nullable=True, comment="输出类型")
+    runtime = Column(Text, nullable=True, comment="运行时信息")
+    settings = Column(Text, nullable=True, comment="配置信息")
+    file_name = Column(Text, nullable=True, comment="文件名")
+    is_star = Column(Boolean, nullable=True, comment="是否收藏")
+    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    
+    def __repr__(self):
+        return f"<Operator(id={self.id}, name={self.name}, version={self.version})>"
--- a/runtime/datamate-python/app/models/operator/operator_category.py
+++ b/runtime/datamate-python/app/models/operator/operator_category.py
@@ -0,0 +1,15 @@
+from sqlalchemy import Column, String, Integer
+from app.db.database import Base
+
+class OperatorCategory(Base):
+    """算子分类模型"""
+    
+    __tablename__ = "t_operator_category"
+    
+    id = Column(Integer, primary_key=True, autoincrement=True, comment="分类ID")
+    name = Column(String(64), nullable=True, comment="分类名称")
+    type = Column(String(64), nullable=True, comment="分类类型")
+    parent_id = Column(Integer, nullable=True, comment="父分类ID")
+    
+    def __repr__(self):
+        return f"<OperatorCategory(id={self.id}, name={self.name}, type={self.type})>"
--- a/runtime/datamate-python/app/models/operator/operator_category_relation.py
+++ b/runtime/datamate-python/app/models/operator/operator_category_relation.py
@@ -0,0 +1,13 @@
+from sqlalchemy import Column, String, Integer
+from app.db.database import Base
+
+class OperatorCategoryRelation(Base):
+    """算子分类关联模型"""
+    
+    __tablename__ = "t_operator_category_relation"
+    
+    category_id = Column(Integer, primary_key=True, comment="分类ID")
+    operator_id = Column(String(64), primary_key=True, comment="算子ID")
+    
+    def __repr__(self):
+        return f"<OperatorCategoryRelation(category_id={self.category_id}, operator_id={self.operator_id})>"
--- a/runtime/label-studio-adapter/app/schemas/init.py
+++ b/runtime/label-studio-adapter/app/schemas/init.py
--- a/runtime/label-studio-adapter/app/schemas/common.py
+++ b/runtime/label-studio-adapter/app/schemas/common.py
--- a/runtime/label-studio-adapter/app/schemas/dataset_mapping.py
+++ b/runtime/label-studio-adapter/app/schemas/dataset_mapping.py
@@ -6,7 +6,7 @@ from .common import BaseResponseModel

 class DatasetMappingBase(BaseResponseModel):
    """数据集映射 基础模型"""
-    source_dataset_id: str = Field(..., description="源数据集ID")
+    dataset_id: str = Field(..., description="源数据集ID")

 class DatasetMappingCreateRequest(DatasetMappingBase):
    """数据集映射 创建 请求模型"""
@@ -21,7 +21,7 @@ class DatasetMappingCreateResponse(BaseResponseModel):

 class DatasetMappingUpdateRequest(BaseResponseModel):
    """数据集映射 更新 请求模型"""
-    source_dataset_id: Optional[str] = Field(None, description="源数据集ID")
+    dataset_id: Optional[str] = Field(None, description="源数据集ID")

 class DatasetMappingResponse(DatasetMappingBase):
    """数据集映射 查询 响应模型"""
--- a/runtime/label-studio-adapter/app/schemas/dm_service.py
+++ b/runtime/label-studio-adapter/app/schemas/dm_service.py
--- a/runtime/label-studio-adapter/app/schemas/label_studio.py
+++ b/runtime/label-studio-adapter/app/schemas/label_studio.py
--- a/runtime/label-studio-adapter/app/services/init.py
+++ b/runtime/label-studio-adapter/app/services/init.py
--- a/runtime/label-studio-adapter/app/services/dataset_mapping_service.py
+++ b/runtime/label-studio-adapter/app/services/dataset_mapping_service.py
@@ -5,7 +5,7 @@ from typing import Optional, List, Tuple
 from datetime import datetime
 import uuid

-from app.models.dataset_mapping import DatasetMapping
+from app.models.dm.labeling_project import LabelingProject
 from app.schemas.dataset_mapping import (
    DatasetMappingCreateRequest, 
    DatasetMappingUpdateRequest, 
@@ -28,11 +28,11 @@ class DatasetMappingService:
        labelling_project_name: str
    ) -> DatasetMappingResponse:
        """创建数据集映射"""
-        logger.info(f"Create dataset mapping: {mapping_data.source_dataset_id} -> {labelling_project_id}")
+        logger.info(f"Create dataset mapping: {mapping_data.dataset_id} -> {labelling_project_id}")
        
-        db_mapping = DatasetMapping(
+        db_mapping = LabelingProject(
            mapping_id=str(uuid.uuid4()),
-            source_dataset_id=mapping_data.source_dataset_id,
+            dataset_id=mapping_data.dataset_id,
            labelling_project_id=labelling_project_id,
            labelling_project_name=labelling_project_name
        )
@@ -41,48 +41,48 @@ class DatasetMappingService:
        await self.db.commit()
        await self.db.refresh(db_mapping)
        
-        logger.info(f"Mapping created: {db_mapping.mapping_id}")
+        logger.info(f"Mapping created: {db_mapping.id}")
        return DatasetMappingResponse.model_validate(db_mapping)
    
    async def get_mapping_by_source_uuid(
        self, 
-        source_dataset_id: str
+        dataset_id: str
    ) -> Optional[DatasetMappingResponse]:
        """根据源数据集ID获取映射（返回第一个未删除的）"""
-        logger.debug(f"Get mapping by source dataset id: {source_dataset_id}")
+        logger.debug(f"Get mapping by source dataset id: {dataset_id}")
        
        result = await self.db.execute(
-            select(DatasetMapping).where(
-                DatasetMapping.source_dataset_id == source_dataset_id,
-                DatasetMapping.deleted_at.is_(None)
+            select(LabelingProject).where(
+                LabelingProject.dataset_id == dataset_id,
+                LabelingProject.deleted_at.is_(None)
            )
        )
        mapping = result.scalar_one_or_none()
        
        if mapping:
-            logger.debug(f"Found mapping: {mapping.mapping_id}")
+            logger.debug(f"Found mapping: {mapping.id}")
            return DatasetMappingResponse.model_validate(mapping)
        
-        logger.debug(f"No mapping found for source dataset id: {source_dataset_id}")
+        logger.debug(f"No mapping found for source dataset id: {dataset_id}")
        return None
    
-    async def get_mappings_by_source_dataset_id(
+    async def get_mappings_by_dataset_id(
        self, 
-        source_dataset_id: str,
+        dataset_id: str,
        include_deleted: bool = False
    ) -> List[DatasetMappingResponse]:
        """根据源数据集ID获取所有映射关系"""
-        logger.debug(f"Get all mappings by source dataset id: {source_dataset_id}")
+        logger.debug(f"Get all mappings by source dataset id: {dataset_id}")
        
-        query = select(DatasetMapping).where(
-            DatasetMapping.source_dataset_id == source_dataset_id
+        query = select(LabelingProject).where(
+            LabelingProject.dataset_id == dataset_id
        )
        
        if not include_deleted:
-            query = query.where(DatasetMapping.deleted_at.is_(None))
+            query = query.where(LabelingProject.deleted_at.is_(None))
        
        result = await self.db.execute(
-            query.order_by(DatasetMapping.created_at.desc())
+            query.order_by(LabelingProject.created_at.desc())
        )
        mappings = result.scalars().all()
        
@@ -97,9 +97,9 @@ class DatasetMappingService:
        logger.debug(f"Get mapping by Label Studio project id: {labelling_project_id}")
        
        result = await self.db.execute(
-            select(DatasetMapping).where(
-                DatasetMapping.labelling_project_id == labelling_project_id,
-                DatasetMapping.deleted_at.is_(None)
+            select(LabelingProject).where(
+                LabelingProject.labeling_project_id == labelling_project_id,
+                LabelingProject.deleted_at.is_(None)
            )
        )
        mapping = result.scalar_one_or_none()
@@ -116,15 +116,15 @@ class DatasetMappingService:
        logger.debug(f"Get mapping: {mapping_id}")
        
        result = await self.db.execute(
-            select(DatasetMapping).where(
-                DatasetMapping.mapping_id == mapping_id,
-                DatasetMapping.deleted_at.is_(None)
+            select(LabelingProject).where(
+                LabelingProject.id == mapping_id,
+                LabelingProject.deleted_at.is_(None)
            )
        )
        mapping = result.scalar_one_or_none()
        
        if mapping:
-            logger.debug(f"Found mapping: {mapping.mapping_id}")
+            logger.debug(f"Found mapping: {mapping.id}")
            return DatasetMappingResponse.model_validate(mapping)
        
        logger.debug(f"Mapping not found: {mapping_id}")
@@ -143,11 +143,11 @@ class DatasetMappingService:
            return None
        
        update_values = update_data.model_dump(exclude_unset=True)
-        update_values["last_updated_at"] = datetime.utcnow()
+        update_values["last_updated_at"] = datetime.now()
        
        result = await self.db.execute(
-            update(DatasetMapping)
-            .where(DatasetMapping.mapping_id == mapping_id)
+            update(LabelingProject)
+            .where(LabelingProject.id == mapping_id)
            .values(**update_values)
        )
        await self.db.commit()
@@ -161,10 +161,10 @@ class DatasetMappingService:
        logger.debug(f"Update mapping last updated at: {mapping_id}")
        
        result = await self.db.execute(
-            update(DatasetMapping)
+            update(LabelingProject)
            .where(
-                DatasetMapping.mapping_id == mapping_id,
-                DatasetMapping.deleted_at.is_(None)
+                LabelingProject.id == mapping_id,
+                LabelingProject.deleted_at.is_(None)
            )
            .values(last_updated_at=datetime.utcnow())
        )
@@ -176,12 +176,12 @@ class DatasetMappingService:
        logger.info(f"Soft delete mapping: {mapping_id}")
        
        result = await self.db.execute(
-            update(DatasetMapping)
+            update(LabelingProject)
            .where(
-                DatasetMapping.mapping_id == mapping_id,
-                DatasetMapping.deleted_at.is_(None)
+                LabelingProject.id == mapping_id,
+                LabelingProject.deleted_at.is_(None)
            )
-            .values(deleted_at=datetime.utcnow())
+            .values(deleted_at=datetime.now())
        )
        await self.db.commit()
        
@@ -202,11 +202,11 @@ class DatasetMappingService:
        logger.debug(f"List all mappings, skip: {skip}, limit: {limit}")
        
        result = await self.db.execute(
-            select(DatasetMapping)
-            .where(DatasetMapping.deleted_at.is_(None))
+            select(LabelingProject)
+            .where(LabelingProject.deleted_at.is_(None))
            .offset(skip)
            .limit(limit)
-            .order_by(DatasetMapping.created_at.desc())
+            .order_by(LabelingProject.created_at.desc())
        )
        mappings = result.scalars().all()
        
@@ -215,10 +215,10 @@ class DatasetMappingService:
    
    async def count_mappings(self, include_deleted: bool = False) -> int:
        """统计映射总数"""
-        query = select(func.count()).select_from(DatasetMapping)
+        query = select(func.count()).select_from(LabelingProject)
        
        if not include_deleted:
-            query = query.where(DatasetMapping.deleted_at.is_(None))
+            query = query.where(LabelingProject.deleted_at.is_(None))
        
        result = await self.db.execute(query)
        return result.scalar_one()
@@ -233,14 +233,14 @@ class DatasetMappingService:
        logger.debug(f"List all mappings with count, skip: {skip}, limit: {limit}")
        
        # 构建查询
-        query = select(DatasetMapping)
+        query = select(LabelingProject)
        if not include_deleted:
-            query = query.where(DatasetMapping.deleted_at.is_(None))
+            query = query.where(LabelingProject.deleted_at.is_(None))
        
        # 获取总数
-        count_query = select(func.count()).select_from(DatasetMapping)
+        count_query = select(func.count()).select_from(LabelingProject)
        if not include_deleted:
-            count_query = count_query.where(DatasetMapping.deleted_at.is_(None))
+            count_query = count_query.where(LabelingProject.deleted_at.is_(None))
        
        count_result = await self.db.execute(count_query)
        total = count_result.scalar_one()
@@ -250,7 +250,7 @@ class DatasetMappingService:
            query
            .offset(skip)
            .limit(limit)
-            .order_by(DatasetMapping.created_at.desc())
+            .order_by(LabelingProject.created_at.desc())
        )
        mappings = result.scalars().all()
        
@@ -259,28 +259,28 @@ class DatasetMappingService:
    
    async def get_mappings_by_source_with_count(
        self,
-        source_dataset_id: str,
+        dataset_id: str,
        skip: int = 0,
        limit: int = 100,
        include_deleted: bool = False
    ) -> Tuple[List[DatasetMappingResponse], int]:
        """根据源数据集ID获取映射关系及总数（用于分页）"""
-        logger.debug(f"Get mappings by source dataset id with count: {source_dataset_id}")
+        logger.debug(f"Get mappings by source dataset id with count: {dataset_id}")
        
        # 构建查询
-        query = select(DatasetMapping).where(
-            DatasetMapping.source_dataset_id == source_dataset_id
+        query = select(LabelingProject).where(
+            LabelingProject.dataset_id == dataset_id
        )
        
        if not include_deleted:
-            query = query.where(DatasetMapping.deleted_at.is_(None))
+            query = query.where(LabelingProject.deleted_at.is_(None))
        
        # 获取总数
-        count_query = select(func.count()).select_from(DatasetMapping).where(
-            DatasetMapping.source_dataset_id == source_dataset_id
+        count_query = select(func.count()).select_from(LabelingProject).where(
+            LabelingProject.dataset_id == dataset_id
        )
        if not include_deleted:
-            count_query = count_query.where(DatasetMapping.deleted_at.is_(None))
+            count_query = count_query.where(LabelingProject.deleted_at.is_(None))
        
        count_result = await self.db.execute(count_query)
        total = count_result.scalar_one()
@@ -290,7 +290,7 @@ class DatasetMappingService:
            query
            .offset(skip)
            .limit(limit)
-            .order_by(DatasetMapping.created_at.desc())
+            .order_by(LabelingProject.created_at.desc())
        )
        mappings = result.scalars().all()
        
--- a/runtime/label-studio-adapter/app/services/sync_service.py
+++ b/runtime/label-studio-adapter/app/services/sync_service.py
@@ -1,6 +1,5 @@
 from typing import Optional, List, Dict, Any, Tuple
-from app.clients.dm_client import DMServiceClient
-from app.clients.label_studio_client import LabelStudioClient
+from app.infrastructure import LabelStudioClient, DatamateClient
 from app.services.dataset_mapping_service import DatasetMappingService
 from app.schemas.dataset_mapping import SyncDatasetResponse
 from app.core.logging import get_logger
@@ -14,7 +13,7 @@ class SyncService:
    
    def __init__(
        self, 
-        dm_client: DMServiceClient, 
+        dm_client: DatamateClient, 
        ls_client: LabelStudioClient,
        mapping_service: DatasetMappingService
    ):
@@ -107,9 +106,9 @@ class SyncService:
        
        try:
            # 获取数据集信息
-            dataset_info = await self.dm_client.get_dataset(mapping.source_dataset_id)
+            dataset_info = await self.dm_client.get_dataset(mapping.dataset_id)
            if not dataset_info:
-                raise NoDatasetInfoFoundError(mapping.source_dataset_id)
+                raise NoDatasetInfoFoundError(mapping.dataset_id)
            
            synced_files = 0
            deleted_tasks = 0
@@ -129,7 +128,7 @@ class SyncService:
            # 分页获取并同步文件
            while True:
                files_response = await self.dm_client.get_dataset_files(
-                    mapping.source_dataset_id, 
+                    mapping.dataset_id, 
                    page=page, 
                    size=batch_size,
                    status="COMPLETED"  # 只同步已完成的文件
@@ -173,7 +172,7 @@ class SyncService:
                        "meta": {
                            "file_size": file_info.size,
                            "file_type": file_info.fileType,
-                            "dm_dataset_id": mapping.source_dataset_id,
+                            "dm_dataset_id": mapping.dataset_id,
                            "dm_file_id": file_info.id,
                            "original_name": file_info.originalName,
                        }
@@ -249,22 +248,22 @@ class SyncService:
    
    async def get_sync_status(
        self, 
-        source_dataset_id: str
+        dataset_id: str
    ) -> Optional[Dict[str, Any]]:
        """获取同步状态"""
-        mapping = await self.mapping_service.get_mapping_by_source_uuid(source_dataset_id)
+        mapping = await self.mapping_service.get_mapping_by_source_uuid(dataset_id)
        if not mapping:
            return None
        
        # 获取DM数据集信息
-        dataset_info = await self.dm_client.get_dataset(source_dataset_id)
+        dataset_info = await self.dm_client.get_dataset(dataset_id)
        
        # 获取Label Studio项目任务数量
        tasks_info = await self.ls_client.get_project_tasks(mapping.labelling_project_id)
        
        return {
            "mapping_id": mapping.mapping_id,
-            "source_dataset_id": source_dataset_id,
+            "dataset_id": dataset_id,
            "labelling_project_id": mapping.labelling_project_id,
            "last_updated_at": mapping.last_updated_at,
            "dm_total_files": dataset_info.fileCount if dataset_info else 0,
--- a/runtime/label-studio-adapter/deploy/docker-entrypoint.sh
+++ b/runtime/label-studio-adapter/deploy/docker-entrypoint.sh
--- a/runtime/label-studio-adapter/requirements.txt
+++ b/runtime/label-studio-adapter/requirements.txt
--- a/runtime/datamate-python/uvicorn_start.sh
+++ b/runtime/datamate-python/uvicorn_start.sh
@@ -0,0 +1,5 @@
+uvicorn app.main:app \
+    --host 0.0.0.0 \
+    --port 18000 \
+    --reload \
+    --log-level debug
--- a/runtime/label-studio-adapter/alembic.ini
+++ b/runtime/label-studio-adapter/alembic.ini
@@ -1,148 +0,0 @@
-# A generic, single database configuration.
-
-[alembic]
-# path to migration scripts.
-# this is typically a path given in POSIX (e.g. forward slashes)
-# format, relative to the token %(here)s which refers to the location of this
-# ini file
-script_location = %(here)s/alembic
-
-# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
-# Uncomment the line below if you want the files to be prepended with date and time
-# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
-# for all available tokens
-# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
-
-# sys.path path, will be prepended to sys.path if present.
-# defaults to the current working directory.  for multiple paths, the path separator
-# is defined by "path_separator" below.
-prepend_sys_path = .
-
-
-# timezone to use when rendering the date within the migration file
-# as well as the filename.
-# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
-# Any required deps can installed by adding `alembic[tz]` to the pip requirements
-# string value is passed to ZoneInfo()
-# leave blank for localtime
-# timezone =
-
-# max length of characters to apply to the "slug" field
-# truncate_slug_length = 40
-
-# set to 'true' to run the environment during
-# the 'revision' command, regardless of autogenerate
-# revision_environment = false
-
-# set to 'true' to allow .pyc and .pyo files without
-# a source .py file to be detected as revisions in the
-# versions/ directory
-# sourceless = false
-
-# version location specification; This defaults
-# to <script_location>/versions.  When using multiple version
-# directories, initial revisions must be specified with --version-path.
-# The path separator used here should be the separator specified by "path_separator"
-# below.
-# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
-
-# path_separator; This indicates what character is used to split lists of file
-# paths, including version_locations and prepend_sys_path within configparser
-# files such as alembic.ini.
-# The default rendered in new alembic.ini files is "os", which uses os.pathsep
-# to provide os-dependent path splitting.
-#
-# Note that in order to support legacy alembic.ini files, this default does NOT
-# take place if path_separator is not present in alembic.ini.  If this
-# option is omitted entirely, fallback logic is as follows:
-#
-# 1. Parsing of the version_locations option falls back to using the legacy
-#    "version_path_separator" key, which if absent then falls back to the legacy
-#    behavior of splitting on spaces and/or commas.
-# 2. Parsing of the prepend_sys_path option falls back to the legacy
-#    behavior of splitting on spaces, commas, or colons.
-#
-# Valid values for path_separator are:
-#
-# path_separator = :
-# path_separator = ;
-# path_separator = space
-# path_separator = newline
-#
-# Use os.pathsep. Default configuration used for new projects.
-path_separator = os
-
-# set to 'true' to search source files recursively
-# in each "version_locations" directory
-# new in Alembic version 1.10
-# recursive_version_locations = false
-
-# the output encoding used when revision files
-# are written from script.py.mako
-# output_encoding = utf-8
-
-# database URL.  This is consumed by the user-maintained env.py script only.
-# other means of configuring database URLs may be customized within the env.py
-# file.
-# sqlalchemy.url = driver://user:pass@localhost/dbname
-# 注释掉默认 URL，我们将在 env.py 中从应用配置读取
-
-
-[post_write_hooks]
-# post_write_hooks defines scripts or Python functions that are run
-# on newly generated revision scripts.  See the documentation for further
-# detail and examples
-
-# format using "black" - use the console_scripts runner, against the "black" entrypoint
-# hooks = black
-# black.type = console_scripts
-# black.entrypoint = black
-# black.options = -l 79 REVISION_SCRIPT_FILENAME
-
-# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
-# hooks = ruff
-# ruff.type = module
-# ruff.module = ruff
-# ruff.options = check --fix REVISION_SCRIPT_FILENAME
-
-# Alternatively, use the exec runner to execute a binary found on your PATH
-# hooks = ruff
-# ruff.type = exec
-# ruff.executable = ruff
-# ruff.options = check --fix REVISION_SCRIPT_FILENAME
-
-# Logging configuration.  This is also consumed by the user-maintained
-# env.py script only.
-[loggers]
-keys = root,sqlalchemy,alembic
-
-[handlers]
-keys = console
-
-[formatters]
-keys = generic
-
-[logger_root]
-level = WARNING
-handlers = console
-qualname =
-
-[logger_sqlalchemy]
-level = WARNING
-handlers =
-qualname = sqlalchemy.engine
-
-[logger_alembic]
-level = INFO
-handlers =
-qualname = alembic
-
-[handler_console]
-class = StreamHandler
-args = (sys.stderr,)
-level = NOTSET
-formatter = generic
-
-[formatter_generic]
-format = %(levelname)-5.5s [%(name)s] %(message)s
-datefmt = %H:%M:%S
--- a/runtime/label-studio-adapter/alembic/README
+++ b/runtime/label-studio-adapter/alembic/README
@@ -1 +0,0 @@
-Generic single-database configuration.
--- a/runtime/label-studio-adapter/alembic/env.py
+++ b/runtime/label-studio-adapter/alembic/env.py
@@ -1,145 +0,0 @@
-from logging.config import fileConfig
-
-from sqlalchemy import engine_from_config
-from sqlalchemy import pool
-from sqlalchemy import create_engine, text
-
-from alembic import context
-import os
-from urllib.parse import quote_plus
-
-# 导入应用配置和模型
-from app.core.config import settings
-from app.db.database import Base
-# 导入所有模型，以便 autogenerate 能够检测到它们
-from app.models import dataset_mapping  # noqa
-
-# this is the Alembic Config object, which provides
-# access to the values within the .ini file in use.
-config = context.config
-
-
-def ensure_database_and_user():
-    """
-    确保数据库和用户存在
-    使用 root 用户连接 MySQL，创建数据库和应用用户
-    """
-    # 只在 MySQL 配置时执行
-    if not settings.mysql_host:
-        return
-
-    mysql_root_password = os.getenv('MYSQL_ROOT_PASSWORD', 'password')
-
-    # URL 编码密码以处理特殊字符
-    encoded_password = quote_plus(mysql_root_password)
-
-    # 使用 root 用户连接（不指定数据库）
-    root_url = f"mysql+pymysql://root:{encoded_password}@{settings.mysql_host}:{settings.mysql_port}/"
-
-    try:
-        root_engine = create_engine(root_url, poolclass=pool.NullPool)
-        with root_engine.connect() as conn:
-            # 创建数据库（如果不存在）
-            conn.execute(text(
-                f"CREATE DATABASE IF NOT EXISTS `{settings.mysql_database}` "
-                f"CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci"
-            ))
-            conn.commit()
-
-            # 创建用户（如果不存在）- 使用 MySQL 8 默认的 caching_sha2_password
-            conn.execute(text(
-                f"CREATE USER IF NOT EXISTS '{settings.mysql_user}'@'%' "
-                f"IDENTIFIED BY '{settings.mysql_password}'"
-            ))
-            conn.commit()
-
-            # 授予权限
-            conn.execute(text(
-                f"GRANT ALL PRIVILEGES ON `{settings.mysql_database}`.* TO '{settings.mysql_user}'@'%'"
-            ))
-            conn.commit()
-
-            # 刷新权限
-            conn.execute(text("FLUSH PRIVILEGES"))
-            conn.commit()
-
-        root_engine.dispose()
-        print(f"✓ Database '{settings.mysql_database}' and user '{settings.mysql_user}' are ready")
-    except Exception as e:
-        print(f"⚠️  Warning: Could not ensure database and user: {e}")
-        print(f"   This may be expected if database already exists or permissions are set")
-
-
-# 从应用配置设置数据库 URL
-config.set_main_option('sqlalchemy.url', settings.sync_database_url)
-
-# Interpret the config file for Python logging.
-# This line sets up loggers basically.
-if config.config_file_name is not None:
-    fileConfig(config.config_file_name)
-
-# add your model's MetaData object here
-# for 'autogenerate' support
-# from myapp import mymodel
-# target_metadata = mymodel.Base.metadata
-target_metadata = Base.metadata
-
-# other values from the config, defined by the needs of env.py,
-# can be acquired:
-# my_important_option = config.get_main_option("my_important_option")
-# ... etc.
-
-
-def run_migrations_offline() -> None:
-    """Run migrations in 'offline' mode.
-
-    This configures the context with just a URL
-    and not an Engine, though an Engine is acceptable
-    here as well.  By skipping the Engine creation
-    we don't even need a DBAPI to be available.
-
-    Calls to context.execute() here emit the given string to the
-    script output.
-
-    """
-    url = config.get_main_option("sqlalchemy.url")
-    context.configure(
-        url=url,
-        target_metadata=target_metadata,
-        literal_binds=True,
-        dialect_opts={"paramstyle": "named"},
-    )
-
-    with context.begin_transaction():
-        context.run_migrations()
-
-
-def run_migrations_online() -> None:
-    """Run migrations in 'online' mode.
-
-    In this scenario we need to create an Engine
-    and associate a connection with the context.
-
-    """
-    # 先确保数据库和用户存在
-    ensure_database_and_user()
-
-    connectable = engine_from_config(
-        config.get_section(config.config_ini_section, {}),
-        prefix="sqlalchemy.",
-        poolclass=pool.NullPool,
-    )
-
-    with connectable.connect() as connection:
-        context.configure(
-            connection=connection, target_metadata=target_metadata
-        )
-
-        with context.begin_transaction():
-            context.run_migrations()
-
-
-if context.is_offline_mode():
-    run_migrations_offline()
-else:
-    run_migrations_online()
--- a/runtime/label-studio-adapter/alembic/script.py.mako
+++ b/runtime/label-studio-adapter/alembic/script.py.mako
@@ -1,28 +0,0 @@
-"""${message}
-
-Revision ID: ${up_revision}
-Revises: ${down_revision | comma,n}
-Create Date: ${create_date}
-
-"""
-from typing import Sequence, Union
-
-from alembic import op
-import sqlalchemy as sa
-${imports if imports else ""}
-
-# revision identifiers, used by Alembic.
-revision: str = ${repr(up_revision)}
-down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
-branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
-depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
-
-
-def upgrade() -> None:
-    """Upgrade schema."""
-    ${upgrades if upgrades else "pass"}
-
-
-def downgrade() -> None:
-    """Downgrade schema."""
-    ${downgrades if downgrades else "pass"}
--- a/runtime/label-studio-adapter/alembic/versions/755dc1afb8ad_initiation.py
+++ b/runtime/label-studio-adapter/alembic/versions/755dc1afb8ad_initiation.py
@@ -1,41 +0,0 @@
-"""Initiation
-
-Revision ID: 755dc1afb8ad
-Revises: 
-Create Date: 2025-10-20 19:34:20.258554
-
-"""
-from typing import Sequence, Union
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision: str = '755dc1afb8ad'
-down_revision: Union[str, Sequence[str], None] = None
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
-    """Upgrade schema."""
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.create_table('mapping',
-    sa.Column('mapping_id', sa.String(length=36), nullable=False),
-    sa.Column('source_dataset_id', sa.String(length=36), nullable=False, comment='源数据集ID'),
-    sa.Column('labelling_project_id', sa.String(length=36), nullable=False, comment='标注项目ID'),
-    sa.Column('labelling_project_name', sa.String(length=255), nullable=True, comment='标注项目名称'),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True, comment='创建时间'),
-    sa.Column('last_updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True, comment='最后更新时间'),
-    sa.Column('deleted_at', sa.DateTime(timezone=True), nullable=True, comment='删除时间'),
-    sa.PrimaryKeyConstraint('mapping_id')
-    )
-    # ### end Alembic commands ###
-
-
-def downgrade() -> None:
-    """Downgrade schema."""
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_table('mapping')
-    # ### end Alembic commands ###
--- a/runtime/label-studio-adapter/app/clients/init.py
+++ b/runtime/label-studio-adapter/app/clients/init.py
@@ -1,8 +0,0 @@
-# app/clients/__init__.py
-
-from .dm_client import DMServiceClient
-from .label_studio_client import LabelStudioClient
-from .client_manager import get_clients, set_clients, get_dm_client, get_ls_client
-
-__all__ = ["DMServiceClient", "LabelStudioClient", "get_clients", "set_clients", "get_dm_client", "get_ls_client"]
-
--- a/runtime/label-studio-adapter/app/clients/client_manager.py
+++ b/runtime/label-studio-adapter/app/clients/client_manager.py
@@ -1,34 +0,0 @@
-from typing import Optional
-from fastapi import HTTPException
-
-from .dm_client import DMServiceClient
-from .label_studio_client import LabelStudioClient
-
-# 全局客户端实例（将在main.py中初始化）
-dm_client: Optional[DMServiceClient] = None
-ls_client: Optional[LabelStudioClient] = None
-
-def get_clients() -> tuple[DMServiceClient, LabelStudioClient]:
-    """获取客户端实例"""
-    global dm_client, ls_client
-    if not dm_client or not ls_client:
-        raise HTTPException(status_code=500, detail="客户端未初始化")
-    return dm_client, ls_client
-
-def set_clients(dm_client_instance: DMServiceClient, ls_client_instance: LabelStudioClient) -> None:
-    """设置全局客户端实例"""
-    global dm_client, ls_client
-    dm_client = dm_client_instance
-    ls_client = ls_client_instance
-
-def get_dm_client() -> DMServiceClient:
-    """获取DM服务客户端"""
-    if not dm_client:
-        raise HTTPException(status_code=500, detail="DM客户端未初始化")
-    return dm_client
-
-def get_ls_client() -> LabelStudioClient:
-    """获取Label Studio客户端"""
-    if not ls_client:
-        raise HTTPException(status_code=500, detail="Label Studio客户端未初始化")
-    return ls_client
--- a/runtime/label-studio-adapter/app/clients/dm_client.py
+++ b/runtime/label-studio-adapter/app/clients/dm_client.py
@@ -1,138 +0,0 @@
-import httpx
-from typing import Optional
-from app.core.config import settings
-from app.core.logging import get_logger
-from app.schemas.dm_service import DatasetResponse, PagedDatasetFileResponse
-
-logger = get_logger(__name__)
-
-class DMServiceClient:
-    """数据管理服务客户端"""
-    
-    def __init__(self, base_url: str|None = None, timeout: float = 30.0):
-        self.base_url = base_url or settings.dm_service_base_url
-        self.timeout = timeout
-        self.client = httpx.AsyncClient(
-            base_url=self.base_url,
-            timeout=self.timeout
-        )
-        logger.info(f"Initialize DM service client, base url: {self.base_url}")
-
-    @staticmethod
-    def _unwrap_payload(data):
-        """Unwrap common envelope shapes like {'code': ..., 'message': ..., 'data': {...}}."""
-        if isinstance(data, dict) and 'data' in data and isinstance(data['data'], (dict, list)):
-            return data['data']
-        return data
-
-    @staticmethod
-    def _is_error_payload(data) -> bool:
-        """Detect error-shaped payloads returned with HTTP 200."""
-        if not isinstance(data, dict):
-            return False
-        # Common patterns: {error, message, ...} or {code, message, ...} without data
-        if 'error' in data and 'message' in data:
-            return True
-        if 'code' in data and 'message' in data and 'data' not in data:
-            return True
-        return False
-
-    @staticmethod
-    def _keys(d):
-        return list(d.keys()) if isinstance(d, dict) else []
-    
-    async def get_dataset(self, dataset_id: str) -> Optional[DatasetResponse]:
-        """获取数据集详情"""
-        try:
-            logger.info(f"Getting dataset detail: {dataset_id} ...")
-            response = await self.client.get(f"/data-management/datasets/{dataset_id}")
-            response.raise_for_status()
-            raw = response.json()
-
-            data = self._unwrap_payload(raw)
-
-            if self._is_error_payload(data):
-                logger.error(f"DM service returned error for dataset {dataset_id}: {data}")
-                return None
-            if not isinstance(data, dict):
-                logger.error(f"Unexpected dataset payload type for {dataset_id}: {type(data).__name__}")
-                return None
-            required = ["id", "name", "description", "datasetType", "status", "fileCount", "totalSize"]
-            if not all(k in data for k in required):
-                logger.error(f"Dataset payload missing required fields for {dataset_id}. Keys: {self._keys(data)}")
-                return None
-            return DatasetResponse(**data)
-        except httpx.HTTPError as e:
-            logger.error(f"Failed to get dataset {dataset_id}: {e}")
-            return None
-        except Exception as e:
-            logger.error(f"[Unexpected] [GET] dataset {dataset_id}: \n{e}\nRaw JSON received: \n{raw}")
-
-            return None
-    
-    async def get_dataset_files(
-        self, 
-        dataset_id: str, 
-        page: int = 0, 
-        size: int = 100,
-        file_type: Optional[str] = None,
-        status: Optional[str] = None
-    ) -> Optional[PagedDatasetFileResponse]:
-        """获取数据集文件列表"""
-        try:
-            logger.info(f"Get dataset files: dataset={dataset_id}, page={page}, size={size}")
-            params: dict = {"page": page, "size": size}
-            if file_type:
-                params["fileType"] = file_type
-            if status:
-                params["status"] = status
-                
-            response = await self.client.get(
-                f"/data-management/datasets/{dataset_id}/files",
-                params=params
-            )
-            response.raise_for_status()
-            raw = response.json()
-            data = self._unwrap_payload(raw)
-            if self._is_error_payload(data):
-                logger.error(f"DM service returned error for dataset files {dataset_id}: {data}")
-                return None
-            if not isinstance(data, dict):
-                logger.error(f"Unexpected dataset files payload type for {dataset_id}: {type(data).__name__}")
-                return None
-            required = ["content", "totalElements", "totalPages", "page", "size"]
-            if not all(k in data for k in required):
-                logger.error(f"Files payload missing required fields for {dataset_id}. Keys: {self._keys(data)}")
-                return None
-            return PagedDatasetFileResponse(**data)
-        except httpx.HTTPError as e:
-            logger.error(f"Failed to get dataset files for {dataset_id}: {e}")
-            return None
-        except Exception as e:
-            logger.error(f"[Unexpected] [GET] dataset files {dataset_id}: \n{e}\nRaw JSON received: \n{raw}")
-            return None
-    
-    async def download_file(self, dataset_id: str, file_id: str) -> Optional[bytes]:
-        """下载文件内容"""
-        try:
-            logger.info(f"Download file: dataset={dataset_id}, file={file_id}")
-            response = await self.client.get(
-                f"/data-management/datasets/{dataset_id}/files/{file_id}/download"
-            )
-            response.raise_for_status()
-            return response.content
-        except httpx.HTTPError as e:
-            logger.error(f"Failed to download file {file_id}: {e}")
-            return None
-        except Exception as e:
-            logger.error(f"Unexpected error while downloading file {file_id}: {e}")
-            return None
-    
-    async def get_file_download_url(self, dataset_id: str, file_id: str) -> str:
-        """获取文件下载URL"""
-        return f"{self.base_url}/data-management/datasets/{dataset_id}/files/{file_id}/download"
-    
-    async def close(self):
-        """关闭客户端连接"""
-        await self.client.aclose()
-        logger.info("DM service client connection closed")
--- a/runtime/label-studio-adapter/app/models/init.py
+++ b/runtime/label-studio-adapter/app/models/init.py
@@ -1,5 +0,0 @@
-# app/models/__init__.py
-
-from .dataset_mapping import DatasetMapping
-
-__all__ = ["DatasetMapping"]
--- a/runtime/label-studio-adapter/app/models/dataset_mapping.py
+++ b/runtime/label-studio-adapter/app/models/dataset_mapping.py
@@ -1,25 +0,0 @@
-from sqlalchemy import Column, String, DateTime, Boolean, Text
-from sqlalchemy.sql import func
-from app.db.database import Base
-import uuid
-
-class DatasetMapping(Base):
-    """数据集映射模型"""
-    
-    __tablename__ = "mapping"
-    
-    mapping_id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
-    source_dataset_id = Column(String(36), nullable=False, comment="源数据集ID")
-    labelling_project_id = Column(String(36), nullable=False, comment="标注项目ID")
-    labelling_project_name = Column(String(255), nullable=True, comment="标注项目名称")
-    created_at = Column(DateTime(timezone=True), server_default=func.now(), comment="创建时间")
-    last_updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), comment="最后更新时间")
-    deleted_at = Column(DateTime(timezone=True), nullable=True, comment="删除时间")
-    
-    def __repr__(self):
-        return f"<DatasetMapping(uuid={self.mapping_id}, source={self.source_dataset_id}, labelling={self.labelling_project_id})>"
-    
-    @property
-    def is_deleted(self) -> bool:
-        """检查是否已被软删除"""
-        return self.deleted_at is not None