diff --git a/deployment/docker/label-studio/docker-compose.yml b/deployment/docker/label-studio/docker-compose.yml new file mode 100644 index 0000000..059a3e4 --- /dev/null +++ b/deployment/docker/label-studio/docker-compose.yml @@ -0,0 +1,50 @@ +services: + + app: + stdin_open: true + tty: true + image: heartexlabs/label-studio:latest + restart: unless-stopped + user: root + expose: + - "8000" + ports: + - "8000:8000" + depends_on: + - db + environment: + - DJANGO_DB=default + - POSTGRE_NAME=postgres + - POSTGRE_USER=postgres + - POSTGRE_PASSWORD= + - POSTGRE_PORT=5432 + - POSTGRE_HOST=db + - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-} + - LOCAL_FILES_SERVING_ENABLED=true + - LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local + - USE_USERNAME_FOR_LOGIN=true + - LABEL_STUDIO_USERNAME=admin@huawei.com + - LABEL_STUDIO_PASSWORD=admin1234 + - LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true + - LABEL_STUDIO_USER_TOKEN=abc123abc123 + - LOG_LEVEL=INFO + volumes: + - label-studio-data:/label-studio/data:rw + - dataset_volume:/label-studio/local:rw + command: label-studio-uwsgi + + db: + image: pgautoupgrade/pgautoupgrade:13-alpine + hostname: db + restart: unless-stopped + environment: + - POSTGRES_HOST_AUTH_METHOD=trust + - POSTGRES_USER=postgres + volumes: + - label-studio-db:/var/lib/postgresql/data + +volumes: + label-studio-data: + label-studio-db: + dataset_volume: + name: datamate-dataset-volume \ No newline at end of file diff --git a/runtime/datamate-python/.env.example b/runtime/datamate-python/.env.example index 4f50985..df9a6a7 100644 --- a/runtime/datamate-python/.env.example +++ b/runtime/datamate-python/.env.example @@ -66,37 +66,6 @@ MYSQL_USER=label_studio_user MYSQL_PASSWORD=user_password MYSQL_DATABASE=label_studio_adapter -# ========================= -# Label Studio 数据库配置 (PostgreSQL) -# ========================= -# 仅在使用 docker-compose.label-studio.yml 启动 Label Studio 时需要配置 -POSTGRES_HOST=label-studio-db -POSTGRES_PORT=5432 -POSTGRES_USER=labelstudio -POSTGRES_PASSWORD=labelstudio@4321 -POSTGRES_DATABASE=labelstudio - -# ========================= -# SQLite 数据库配置(兜底选项) -# ========================= -# 优先级3:如果没有配置 MySQL/PostgreSQL,将使用 SQLite -SQLITE_PATH=./data/labelstudio_adapter.db - -# ========================= -# 可选:直接指定数据库 URL -# ========================= -# 如果设置了此项,将覆盖上面的 MySQL/PostgreSQL/SQLite 配置 -# DATABASE_URL=postgresql+asyncpg://user:password@host:port/database - -# ========================= -# 安全配置 -# ========================= -# 密钥(生产环境务必修改) -SECRET_KEY=your-secret-key-change-this-in-production - -# Token 过期时间(分钟) -ACCESS_TOKEN_EXPIRE_MINUTES=30 - # ========================= # CORS 配置 # ========================= diff --git a/runtime/datamate-python/app/__init__.py b/runtime/datamate-python/app/__init__.py deleted file mode 100644 index 4ec4b56..0000000 --- a/runtime/datamate-python/app/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# app/__init__.py \ No newline at end of file diff --git a/runtime/datamate-python/app/api/__init__.py b/runtime/datamate-python/app/api/__init__.py deleted file mode 100644 index ca138c2..0000000 --- a/runtime/datamate-python/app/api/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -API 路由模块 - -集中管理所有API路由的组织结构 -""" -from fastapi import APIRouter - -from .system import router as system_router -from .project import project_router - -# 创建主API路由器 -api_router = APIRouter() - -# 注册到主路由器 -api_router.include_router(system_router, tags=["系统"]) -api_router.include_router(project_router, prefix="/project", tags=["项目"]) - -# 导出路由器供 main.py 使用 -__all__ = ["api_router"] \ No newline at end of file diff --git a/runtime/datamate-python/app/api/project/__init__.py b/runtime/datamate-python/app/api/project/__init__.py deleted file mode 100644 index f499596..0000000 --- a/runtime/datamate-python/app/api/project/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -标注工程相关API路由模块 -""" -from fastapi import APIRouter - -project_router = APIRouter() - -from . import create -from . import sync -from . import list -from . import delete \ No newline at end of file diff --git a/runtime/datamate-python/app/api/project/create.py b/runtime/datamate-python/app/api/project/create.py deleted file mode 100644 index 922df63..0000000 --- a/runtime/datamate-python/app/api/project/create.py +++ /dev/null @@ -1,130 +0,0 @@ -from fastapi import APIRouter, Depends, HTTPException -from sqlalchemy.ext.asyncio import AsyncSession -from typing import Optional - -from app.db.database import get_db -from app.services.dataset_mapping_service import DatasetMappingService -from app.infrastructure import DatamateClient, LabelStudioClient -from app.schemas.dataset_mapping import ( - DatasetMappingCreateRequest, - DatasetMappingCreateResponse, -) -from app.schemas import StandardResponse -from app.core.logging import get_logger -from app.core.config import settings -from . import project_router - -logger = get_logger(__name__) - -@project_router.post("/create", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201) -async def create_dataset_mapping( - request: DatasetMappingCreateRequest, - db: AsyncSession = Depends(get_db) -): - """ - 创建数据集映射 - - 根据指定的DM程序中的数据集,创建Label Studio中的数据集, - 在数据库中记录这一关联关系,返回Label Studio数据集的ID - - 注意:一个数据集可以创建多个标注项目 - """ - try: - dm_client = DatamateClient(db) - ls_client = LabelStudioClient(base_url=settings.label_studio_base_url, - token=settings.label_studio_user_token) - service = DatasetMappingService(db) - - logger.info(f"Create dataset mapping request: {request.dataset_id}") - - # 从DM服务获取数据集信息 - dataset_info = await dm_client.get_dataset(request.dataset_id) - if not dataset_info: - raise HTTPException( - status_code=404, - detail=f"Dataset not found in DM service: {request.dataset_id}" - ) - - # 确定数据类型(基于数据集类型) - data_type = "image" # 默认值 - if dataset_info.type and dataset_info.type.code: - type_code = dataset_info.type.code.lower() - if "audio" in type_code: - data_type = "audio" - elif "video" in type_code: - data_type = "video" - elif "text" in type_code: - data_type = "text" - - project_name = f"{dataset_info.name}" - - # 在Label Studio中创建项目 - project_data = await ls_client.create_project( - title=project_name, - description=dataset_info.description or f"Imported from DM dataset {dataset_info.id}", - data_type=data_type - ) - - if not project_data: - raise HTTPException( - status_code=500, - detail="Fail to create Label Studio project." - ) - - project_id = project_data["id"] - - # 配置本地存储:dataset/ - local_storage_path = f"{settings.label_studio_local_storage_dataset_base_path}/{request.dataset_id}" - storage_result = await ls_client.create_local_storage( - project_id=project_id, - path=local_storage_path, - title="Dataset_BLOB", - use_blob_urls=True, - description=f"Local storage for dataset {dataset_info.name}" - ) - - # 配置本地存储:upload - local_storage_path = f"{settings.label_studio_local_storage_upload_base_path}" - storage_result = await ls_client.create_local_storage( - project_id=project_id, - path=local_storage_path, - title="Upload_BLOB", - use_blob_urls=True, - description=f"Local storage for dataset {dataset_info.name}" - ) - - if not storage_result: - # 本地存储配置失败,记录警告但不中断流程 - logger.warning(f"Failed to configure local storage for project {project_id}") - else: - logger.info(f"Local storage configured for project {project_id}: {local_storage_path}") - - # 创建映射关系,包含项目名称 - mapping = await service.create_mapping( - request, - str(project_id), - project_name - ) - - logger.debug( - f"Dataset mapping created: {mapping.mapping_id} -> S {mapping.dataset_id} <> L {mapping.labelling_project_id}" - ) - - response_data = DatasetMappingCreateResponse( - mapping_id=mapping.mapping_id, - labelling_project_id=mapping.labelling_project_id, - labelling_project_name=mapping.labelling_project_name or project_name, - message="Dataset mapping created successfully" - ) - - return StandardResponse( - code=201, - message="success", - data=response_data - ) - - except HTTPException: - raise - except Exception as e: - logger.error(f"Error while creating dataset mapping: {e}") - raise HTTPException(status_code=500, detail="Internal server error") \ No newline at end of file diff --git a/runtime/datamate-python/app/api/project/delete.py b/runtime/datamate-python/app/api/project/delete.py deleted file mode 100644 index b1bb8e2..0000000 --- a/runtime/datamate-python/app/api/project/delete.py +++ /dev/null @@ -1,106 +0,0 @@ -from fastapi import Depends, HTTPException, Query -from sqlalchemy.ext.asyncio import AsyncSession -from typing import Optional - -from app.db.database import get_db -from app.services.dataset_mapping_service import DatasetMappingService -from app.infrastructure import DatamateClient, LabelStudioClient -from app.schemas.dataset_mapping import DeleteDatasetResponse -from app.schemas import StandardResponse -from app.core.logging import get_logger -from app.core.config import settings - -from . import project_router - -logger = get_logger(__name__) - -@project_router.delete("/mappings", response_model=StandardResponse[DeleteDatasetResponse]) -async def delete_mapping( - m: Optional[str] = Query(None, description="映射UUID"), - proj: Optional[str] = Query(None, description="Label Studio项目ID"), - db: AsyncSession = Depends(get_db) -): - """ - 删除映射关系和对应的 Label Studio 项目 - - 可以通过以下任一方式指定要删除的映射: - - m: 映射UUID - - proj: Label Studio项目ID - - 两者都提供(优先使用 m) - - 此操作会: - 1. 删除 Label Studio 中的项目 - 2. 软删除数据库中的映射记录 - """ - try: - # 至少需要提供一个参数 - if not m and not proj: - raise HTTPException( - status_code=400, - detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided" - ) - - ls_client = LabelStudioClient(base_url=settings.label_studio_base_url, - token=settings.label_studio_user_token) - service = DatasetMappingService(db) - - # 优先使用 mapping_id 查询 - if m: - logger.debug(f"Deleting by mapping UUID: {m}") - mapping = await service.get_mapping_by_uuid(m) - # 如果没有提供 m,使用 proj 查询 - elif proj: - logger.debug(f"Deleting by project ID: {proj}") - mapping = await service.get_mapping_by_labelling_project_id(proj) - else: - mapping = None - - if not mapping: - raise HTTPException( - status_code=404, - detail=f"Mapping either not found or not specified." - ) - - mapping_id = mapping.mapping_id - labelling_project_id = mapping.labelling_project_id - labelling_project_name = mapping.labelling_project_name - - logger.debug(f"Found mapping: {mapping_id}, Label Studio project ID: {labelling_project_id}") - - # 1. 删除 Label Studio 项目 - try: - delete_success = await ls_client.delete_project(int(labelling_project_id)) - if delete_success: - logger.debug(f"Successfully deleted Label Studio project: {labelling_project_id}") - else: - logger.warning(f"Failed to delete Label Studio project or project not found: {labelling_project_id}") - except Exception as e: - logger.error(f"Error deleting Label Studio project: {e}") - # 继续执行,即使 Label Studio 项目删除失败也要删除映射记录 - - # 2. 软删除映射记录 - soft_delete_success = await service.soft_delete_mapping(mapping_id) - - if not soft_delete_success: - raise HTTPException( - status_code=500, - detail="Failed to delete mapping record" - ) - - logger.info(f"Successfully deleted mapping: {mapping_id}, Label Studio project: {labelling_project_id}") - - return StandardResponse( - code=200, - message="success", - data=DeleteDatasetResponse( - mapping_id=mapping_id, - status="success", - message=f"Successfully deleted mapping and Label Studio project '{labelling_project_name}'" - ) - ) - - except HTTPException: - raise - except Exception as e: - logger.error(f"Error deleting mapping: {e}") - raise HTTPException(status_code=500, detail="Internal server error") diff --git a/runtime/datamate-python/app/api/project/list.py b/runtime/datamate-python/app/api/project/list.py deleted file mode 100644 index a56a7f4..0000000 --- a/runtime/datamate-python/app/api/project/list.py +++ /dev/null @@ -1,152 +0,0 @@ -from fastapi import APIRouter, Depends, HTTPException, Query -from sqlalchemy.ext.asyncio import AsyncSession -from typing import List -import math - -from app.db.database import get_db -from app.services.dataset_mapping_service import DatasetMappingService -from app.schemas.dataset_mapping import DatasetMappingResponse -from app.schemas.common import StandardResponse, PaginatedData -from app.core.logging import get_logger -from . import project_router - -logger = get_logger(__name__) - -@project_router.get("/mappings/list", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]]) -async def list_mappings( - page: int = Query(1, ge=1, description="页码(从1开始)"), - page_size: int = Query(20, ge=1, le=100, description="每页记录数"), - db: AsyncSession = Depends(get_db) -): - """ - 查询所有映射关系(分页) - - 返回所有有效的数据集映射关系(未被软删除的),支持分页查询 - """ - try: - service = DatasetMappingService(db) - - # 计算 skip - skip = (page - 1) * page_size - - logger.info(f"Listing mappings, page={page}, page_size={page_size}") - - # 获取数据和总数 - mappings, total = await service.get_all_mappings_with_count( - skip=skip, - limit=page_size - ) - - # 计算总页数 - total_pages = math.ceil(total / page_size) if total > 0 else 0 - - # 构造分页响应 - paginated_data = PaginatedData( - page=page, - size=page_size, - total_elements=total, - total_pages=total_pages, - content=mappings - ) - - logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}") - - return StandardResponse( - code=200, - message="success", - data=paginated_data - ) - - except Exception as e: - logger.error(f"Error listing mappings: {e}") - raise HTTPException(status_code=500, detail="Internal server error") - - -@project_router.get("/mappings/{mapping_id}", response_model=StandardResponse[DatasetMappingResponse]) -async def get_mapping( - mapping_id: str, - db: AsyncSession = Depends(get_db) -): - """ - 根据 UUID 查询单个映射关系 - """ - try: - service = DatasetMappingService(db) - - logger.info(f"Get mapping: {mapping_id}") - - mapping = await service.get_mapping_by_uuid(mapping_id) - - if not mapping: - raise HTTPException( - status_code=404, - detail=f"Mapping not found: {mapping_id}" - ) - - logger.info(f"Found mapping: {mapping.mapping_id}") - - return StandardResponse( - code=200, - message="success", - data=mapping - ) - - except HTTPException: - raise - except Exception as e: - logger.error(f"Error getting mapping: {e}") - raise HTTPException(status_code=500, detail="Internal server error") - - -@project_router.get("/mappings/by-source/{dataset_id}", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]]) -async def get_mappings_by_source( - dataset_id: str, - page: int = Query(1, ge=1, description="页码(从1开始)"), - page_size: int = Query(20, ge=1, le=100, description="每页记录数"), - db: AsyncSession = Depends(get_db) -): - """ - 根据源数据集 ID 查询所有映射关系(分页) - - 返回该数据集创建的所有标注项目(不包括已删除的),支持分页查询 - """ - try: - service = DatasetMappingService(db) - - # 计算 skip - skip = (page - 1) * page_size - - logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, page_size={page_size}") - - # 获取数据和总数 - mappings, total = await service.get_mappings_by_source_with_count( - dataset_id=dataset_id, - skip=skip, - limit=page_size - ) - - # 计算总页数 - total_pages = math.ceil(total / page_size) if total > 0 else 0 - - # 构造分页响应 - paginated_data = PaginatedData( - page=page, - size=page_size, - total_elements=total, - total_pages=total_pages, - content=mappings - ) - - logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}") - - return StandardResponse( - code=200, - message="success", - data=paginated_data - ) - - except HTTPException: - raise - except Exception as e: - logger.error(f"Error getting mappings: {e}") - raise HTTPException(status_code=500, detail="Internal server error") diff --git a/runtime/datamate-python/app/core/__init__.py b/runtime/datamate-python/app/core/__init__.py index 4402e6f..8f23461 100644 --- a/runtime/datamate-python/app/core/__init__.py +++ b/runtime/datamate-python/app/core/__init__.py @@ -1 +1,7 @@ -# app/core/__init__.py \ No newline at end of file +# app/core/__init__.py + +""" +Core module + + +""" \ No newline at end of file diff --git a/runtime/datamate-python/app/core/config.py b/runtime/datamate-python/app/core/config.py index 57f90bb..3371ccf 100644 --- a/runtime/datamate-python/app/core/config.py +++ b/runtime/datamate-python/app/core/config.py @@ -1,5 +1,5 @@ from pydantic_settings import BaseSettings -from typing import Optional +from typing import Optional, List import os from pathlib import Path @@ -24,9 +24,9 @@ class Settings(BaseSettings): port: int = 8000 # CORS配置 - allowed_origins: list = ["*"] - allowed_methods: list = ["*"] - allowed_headers: list = ["*"] + allowed_origins: List[str] = ["*"] + allowed_methods: List[str] = ["*"] + allowed_headers: List[str] = ["*"] # MySQL数据库配置 (优先级1) mysql_host: Optional[str] = None @@ -49,11 +49,7 @@ class Settings(BaseSettings): database_url: Optional[str] = None # 日志配置 - log_level: str = "INFO" - - # 安全配置 - secret_key: str = "your-secret-key-change-this-in-production" - access_token_expire_minutes: int = 30 + log_level: str = "DEBUG" # ========================= # Label Studio 服务配置 @@ -63,8 +59,7 @@ class Settings(BaseSettings): label_studio_password: Optional[str] = None # Label Studio 密码(用于登录) label_studio_user_token: Optional[str] = None # Legacy Token - label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files/dataset" # Label Studio容器中的本地存储基础路径 - label_studio_local_storage_upload_base_path: str = "/label-studio/local_files/upload" # Label Studio容器中的本地存储基础路径 + label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files" # Label Studio容器中的本地存储基础路径 label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio本地文件服务路径前缀 ls_task_page_size: int = 1000 @@ -73,7 +68,7 @@ class Settings(BaseSettings): # ========================= # Data Management 服务配置 # ========================= - dm_file_path_prefix: str = "/" # DM存储文件夹前缀 + dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀 @property diff --git a/runtime/datamate-python/app/core/logging.py b/runtime/datamate-python/app/core/logging.py index e474c22..6cb13aa 100644 --- a/runtime/datamate-python/app/core/logging.py +++ b/runtime/datamate-python/app/core/logging.py @@ -11,7 +11,7 @@ def setup_logging(): log_dir.mkdir(exist_ok=True) # 配置日志格式 - log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + log_format = "%(asctime)s - %(name)s - [%(levelname)s] - %(message)s" date_format = "%Y-%m-%d %H:%M:%S" # 创建处理器 @@ -44,9 +44,10 @@ def setup_logging(): root_logger.addHandler(error_handler) # 配置第三方库日志级别(减少详细日志) - logging.getLogger("uvicorn").setLevel(logging.WARNING) + logging.getLogger("uvicorn").setLevel(logging.ERROR) logging.getLogger("sqlalchemy.engine").setLevel(logging.ERROR) # 隐藏SQL查询日志 - logging.getLogger("httpx").setLevel(logging.WARNING) + logging.getLogger("httpx").setLevel(logging.ERROR) + logging.getLogger("httpcore").setLevel(logging.ERROR) def get_logger(name: str) -> logging.Logger: """获取指定名称的日志器""" diff --git a/runtime/datamate-python/app/db/__init__.py b/runtime/datamate-python/app/db/__init__.py index 894b869..e69de29 100644 --- a/runtime/datamate-python/app/db/__init__.py +++ b/runtime/datamate-python/app/db/__init__.py @@ -1 +0,0 @@ -# app/db/__init__.py \ No newline at end of file diff --git a/runtime/datamate-python/app/db/models/__init__.py b/runtime/datamate-python/app/db/models/__init__.py new file mode 100644 index 0000000..5a237f3 --- /dev/null +++ b/runtime/datamate-python/app/db/models/__init__.py @@ -0,0 +1,28 @@ + +from .dataset_management import ( + Dataset, + DatasetTag, + DatasetFiles, + DatasetStatistics, + Tag +) + +from .user_management import ( + User +) + +from .annotation_management import ( + AnnotationTemplate, + LabelingProject +) + +__all__ = [ + "Dataset", + "DatasetTag", + "DatasetFiles", + "DatasetStatistics", + "Tag", + "User", + "AnnotationTemplate", + "LabelingProject", +] \ No newline at end of file diff --git a/runtime/datamate-python/app/db/models/annotation_management.py b/runtime/datamate-python/app/db/models/annotation_management.py new file mode 100644 index 0000000..4a21feb --- /dev/null +++ b/runtime/datamate-python/app/db/models/annotation_management.py @@ -0,0 +1,51 @@ +""" +Tables of Annotation Management Module +""" + +import uuid +from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP, Text, Integer, JSON, Date +from sqlalchemy.sql import func + +from app.db.session import Base + +class AnnotationTemplate(Base): + """标注模板模型""" + + __tablename__ = "t_dm_annotation_templates" + + id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID") + name = Column(String(32), nullable=False, comment="模板名称") + description = Column(String(255), nullable=True, comment="模板描述") + configuration = Column(JSON, nullable=True, comment="配置信息(JSON格式)") + created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") + deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)") + + def __repr__(self): + return f"" + + @property + def is_deleted(self) -> bool: + """检查是否已被软删除""" + return self.deleted_at is not None + +class LabelingProject(Base): + """标注工程表""" + + __tablename__ = "t_dm_labeling_projects" + + id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID") + dataset_id = Column(String(36), nullable=False, comment="数据集ID") + name = Column(String(32), nullable=False, comment="项目名称") + labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID") + configuration = Column(JSON, nullable=True, comment="标签配置") + progress = Column(JSON, nullable=True, comment="标注进度统计") + created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") + deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)") + + def __repr__(self): + return f"" + + @property + def is_deleted(self) -> bool: + """检查是否已被软删除""" + return self.deleted_at is not None \ No newline at end of file diff --git a/runtime/datamate-python/app/db/models/dataset_management.py b/runtime/datamate-python/app/db/models/dataset_management.py new file mode 100644 index 0000000..90a0f58 --- /dev/null +++ b/runtime/datamate-python/app/db/models/dataset_management.py @@ -0,0 +1,113 @@ +""" +Tables of Dataset Management Module +""" + +import uuid +from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP, Text, Integer, JSON, Date +from sqlalchemy.sql import func + +from app.db.session import Base + +class Dataset(Base): + """数据集模型(支持医学影像、文本、问答等多种类型)""" + + __tablename__ = "t_dm_datasets" + + id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") + name = Column(String(255), nullable=False, comment="数据集名称") + description = Column(Text, nullable=True, comment="数据集描述") + dataset_type = Column(String(50), nullable=False, comment="数据集类型:IMAGE/TEXT/QA/MULTIMODAL/OTHER") + category = Column(String(100), nullable=True, comment="数据集分类:医学影像/问答/文献等") + path = Column(String(500), nullable=True, comment="数据存储路径") + format = Column(String(50), nullable=True, comment="数据格式:DCM/JPG/JSON/CSV等") + schema_info = Column(JSON, nullable=True, comment="数据结构信息") + size_bytes = Column(BigInteger, default=0, comment="数据大小(字节)") + file_count = Column(BigInteger, default=0, comment="文件数量") + record_count = Column(BigInteger, default=0, comment="记录数量") + retention_days = Column(Integer, default=0, comment="数据保留天数(0表示长期保留)") + tags = Column(JSON, nullable=True, comment="标签列表") + dataset_metadata = Column("metadata", JSON, nullable=True, comment="元数据信息") + status = Column(String(50), default='DRAFT', comment="状态:DRAFT/ACTIVE/ARCHIVED") + is_public = Column(Boolean, default=False, comment="是否公开") + is_featured = Column(Boolean, default=False, comment="是否推荐") + version = Column(BigInteger, nullable=False, default=0, comment="版本号") + created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") + updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") + created_by = Column(String(255), nullable=True, comment="创建者") + updated_by = Column(String(255), nullable=True, comment="更新者") + + def __repr__(self): + return f"" + +class DatasetTag(Base): + """数据集标签关联模型""" + + __tablename__ = "t_dm_dataset_tags" + + dataset_id = Column(String(36), primary_key=True, comment="数据集ID(UUID)") + tag_id = Column(String(36), primary_key=True, comment="标签ID(UUID)") + created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") + + def __repr__(self): + return f"" + +class DatasetFiles(Base): + """DM数据集文件模型""" + + __tablename__ = "t_dm_dataset_files" + + id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") + dataset_id = Column(String(36), nullable=False, comment="所属数据集ID(UUID)") + file_name = Column(String(255), nullable=False, comment="文件名") + file_path = Column(String(1000), nullable=False, comment="文件路径") + file_type = Column(String(50), nullable=True, comment="文件格式:JPG/PNG/DCM/TXT等") + file_size = Column(BigInteger, default=0, comment="文件大小(字节)") + check_sum = Column(String(64), nullable=True, comment="文件校验和") + tags = Column(JSON, nullable=True, comment="文件标签信息") + dataset_filemetadata = Column("metadata", JSON, nullable=True, comment="文件元数据") + status = Column(String(50), default='ACTIVE', comment="文件状态:ACTIVE/DELETED/PROCESSING") + upload_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传时间") + last_access_time = Column(TIMESTAMP, nullable=True, comment="最后访问时间") + created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") + updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") + + def __repr__(self): + return f"" + +class DatasetStatistics(Base): + """数据集统计信息模型""" + + __tablename__ = "t_dm_dataset_statistics" + + id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") + dataset_id = Column(String(36), nullable=False, comment="数据集ID(UUID)") + stat_date = Column(Date, nullable=False, comment="统计日期") + total_files = Column(BigInteger, default=0, comment="总文件数") + total_size = Column(BigInteger, default=0, comment="总大小(字节)") + processed_files = Column(BigInteger, default=0, comment="已处理文件数") + error_files = Column(BigInteger, default=0, comment="错误文件数") + download_count = Column(BigInteger, default=0, comment="下载次数") + view_count = Column(BigInteger, default=0, comment="查看次数") + quality_metrics = Column(JSON, nullable=True, comment="质量指标") + created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") + updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") + + def __repr__(self): + return f"" + +class Tag(Base): + """标签集合模型""" + + __tablename__ = "t_dm_tags" + + id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") + name = Column(String(100), nullable=False, unique=True, comment="标签名称") + description = Column(Text, nullable=True, comment="标签描述") + category = Column(String(50), nullable=True, comment="标签分类") + color = Column(String(7), nullable=True, comment="标签颜色(十六进制)") + usage_count = Column(BigInteger, default=0, comment="使用次数") + created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") + updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") + + def __repr__(self): + return f"" \ No newline at end of file diff --git a/runtime/datamate-python/app/models/dm/user.py b/runtime/datamate-python/app/db/models/user_management.py similarity index 94% rename from runtime/datamate-python/app/models/dm/user.py rename to runtime/datamate-python/app/db/models/user_management.py index ef38760..e0d6d6b 100644 --- a/runtime/datamate-python/app/models/dm/user.py +++ b/runtime/datamate-python/app/db/models/user_management.py @@ -1,6 +1,11 @@ +""" +Tables of User Management Module +""" + from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP from sqlalchemy.sql import func -from app.db.database import Base + +from app.db.session import Base class User(Base): """用户模型""" diff --git a/runtime/datamate-python/app/db/database.py b/runtime/datamate-python/app/db/session.py similarity index 91% rename from runtime/datamate-python/app/db/database.py rename to runtime/datamate-python/app/db/session.py index d475a54..7195c8f 100644 --- a/runtime/datamate-python/app/db/database.py +++ b/runtime/datamate-python/app/db/session.py @@ -8,8 +8,7 @@ logger = get_logger(__name__) # 获取数据库配置信息 db_info = settings.get_database_info() -logger.info(f"使用数据库: {db_info['type']}") -logger.info(f"连接URL: {db_info['url']}") +logger.info(f"使用数据库: {db_info['type']} || 连接URL: {db_info['url']}") # 创建数据库引擎 engine = create_async_engine( diff --git a/runtime/datamate-python/app/exception.py b/runtime/datamate-python/app/exception.py new file mode 100644 index 0000000..853e27c --- /dev/null +++ b/runtime/datamate-python/app/exception.py @@ -0,0 +1,97 @@ +""" +全局自定义异常类定义 +""" +from fastapi.responses import JSONResponse +from fastapi.exceptions import RequestValidationError +from starlette.exceptions import HTTPException as StarletteHTTPException +from fastapi import FastAPI, Request, HTTPException, status + +from .core.logging import setup_logging, get_logger + +logger = get_logger(__name__) + +# 自定义异常处理器:StarletteHTTPException (包括404等) +async def starlette_http_exception_handler(request: Request, exc: StarletteHTTPException): + """将Starlette的HTTPException转换为标准响应格式""" + return JSONResponse( + status_code=exc.status_code, + content={ + "code": exc.status_code, + "message": "error", + "data": { + "detail": exc.detail + } + } + ) + +# 自定义异常处理器:FastAPI HTTPException +async def fastapi_http_exception_handler(request: Request, exc: HTTPException): + """将FastAPI的HTTPException转换为标准响应格式""" + return JSONResponse( + status_code=exc.status_code, + content={ + "code": exc.status_code, + "message": "error", + "data": { + "detail": exc.detail + } + } + ) + +# 自定义异常处理器:RequestValidationError +async def validation_exception_handler(request: Request, exc: RequestValidationError): + """将请求验证错误转换为标准响应格式""" + return JSONResponse( + status_code=422, + content={ + "code": 422, + "message": "error", + "data": { + "detail": "Validation error", + "errors": exc.errors() + } + } + ) + +# 自定义异常处理器:未捕获的异常 +async def general_exception_handler(request: Request, exc: Exception): + """将未捕获的异常转换为标准响应格式""" + logger.error(f"Unhandled exception: {exc}", exc_info=True) + return JSONResponse( + status_code=500, + content={ + "code": 500, + "message": "error", + "data": { + "detail": "Internal server error" + } + } + ) + +class LabelStudioAdapterException(Exception): + """Label Studio Adapter 基础异常类""" + pass + +class DatasetMappingNotFoundError(LabelStudioAdapterException): + """数据集映射未找到异常""" + def __init__(self, mapping_id: str): + self.mapping_id = mapping_id + super().__init__(f"Dataset mapping not found: {mapping_id}") + +class NoDatasetInfoFoundError(LabelStudioAdapterException): + """无法获取数据集信息异常""" + def __init__(self, dataset_uuid: str): + self.dataset_uuid = dataset_uuid + super().__init__(f"Failed to get dataset info: {dataset_uuid}") + +class LabelStudioClientError(LabelStudioAdapterException): + """Label Studio 客户端错误""" + pass + +class DMServiceClientError(LabelStudioAdapterException): + """DM 服务客户端错误""" + pass + +class SyncServiceError(LabelStudioAdapterException): + """同步服务错误""" + pass \ No newline at end of file diff --git a/runtime/datamate-python/app/exceptions.py b/runtime/datamate-python/app/exceptions.py deleted file mode 100644 index 383e303..0000000 --- a/runtime/datamate-python/app/exceptions.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -自定义异常类定义 -""" - -class LabelStudioAdapterException(Exception): - """Label Studio Adapter 基础异常类""" - pass - -class DatasetMappingNotFoundError(LabelStudioAdapterException): - """数据集映射未找到异常""" - def __init__(self, mapping_id: str): - self.mapping_id = mapping_id - super().__init__(f"Dataset mapping not found: {mapping_id}") - -class NoDatasetInfoFoundError(LabelStudioAdapterException): - """无法获取数据集信息异常""" - def __init__(self, dataset_uuid: str): - self.dataset_uuid = dataset_uuid - super().__init__(f"Failed to get dataset info: {dataset_uuid}") - -class LabelStudioClientError(LabelStudioAdapterException): - """Label Studio 客户端错误""" - pass - -class DMServiceClientError(LabelStudioAdapterException): - """DM 服务客户端错误""" - pass - -class SyncServiceError(LabelStudioAdapterException): - """同步服务错误""" - pass \ No newline at end of file diff --git a/runtime/datamate-python/app/infrastructure/__init__.py b/runtime/datamate-python/app/infrastructure/__init__.py deleted file mode 100644 index 95f29bc..0000000 --- a/runtime/datamate-python/app/infrastructure/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# app/clients/__init__.py - -from .label_studio import Client as LabelStudioClient -from .datamate import Client as DatamateClient - -__all__ = ["LabelStudioClient", "DatamateClient"] \ No newline at end of file diff --git a/runtime/datamate-python/app/main.py b/runtime/datamate-python/app/main.py index db59a46..374d399 100644 --- a/runtime/datamate-python/app/main.py +++ b/runtime/datamate-python/app/main.py @@ -1,16 +1,23 @@ from fastapi import FastAPI, Request, HTTPException, status from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse from fastapi.exceptions import RequestValidationError from starlette.exceptions import HTTPException as StarletteHTTPException + from contextlib import asynccontextmanager from typing import Dict, Any +from sqlalchemy import text from .core.config import settings from .core.logging import setup_logging, get_logger -from .infrastructure import LabelStudioClient -from .api import api_router -from .schemas import StandardResponse +from .db.session import engine, AsyncSessionLocal +from .module.shared.schema import StandardResponse +from .module import router +from .exception import ( + starlette_http_exception_handler, + fastapi_http_exception_handler, + validation_exception_handler, + general_exception_handler +) # 设置日志 setup_logging() @@ -21,23 +28,21 @@ async def lifespan(app: FastAPI): """应用程序生命周期管理""" # 启动时初始化 - logger.info("Starting Label Studio Adapter...") - - # 初始化 Label Studio 客户端,使用 HTTP REST API + Token 认证 - ls_client = LabelStudioClient( - base_url=settings.label_studio_base_url, - token=settings.label_studio_user_token - ) - - logger.info("Label Studio Adapter started") - + logger.info("DataMate Python Backend starting...") + # 数据库连接验证 + try: + async with AsyncSessionLocal() as session: + await session.execute(text("SELECT 1")) + logger.info("Database connection validated successfully.") + except Exception as e: + logger.error(f"Database connection validation failed: {e}") + logger.debug(f"Connection details: {settings.computed_database_url}") + raise + yield # 关闭时清理 - logger.info("Shutting down Label Studio Adapter...") - - # 客户端清理会在客户端管理器中处理 - logger.info("Label Studio Adapter stopped") + logger.info("DataMate Python Backend shutting down ...") # 创建FastAPI应用 app = FastAPI( @@ -57,70 +62,16 @@ app.add_middleware( allow_headers=settings.allowed_headers, ) -# 自定义异常处理器:StarletteHTTPException (包括404等) -@app.exception_handler(StarletteHTTPException) -async def starlette_http_exception_handler(request: Request, exc: StarletteHTTPException): - """将Starlette的HTTPException转换为标准响应格式""" - return JSONResponse( - status_code=exc.status_code, - content={ - "code": exc.status_code, - "message": "error", - "data": { - "detail": exc.detail - } - } - ) - -# 自定义异常处理器:FastAPI HTTPException -@app.exception_handler(HTTPException) -async def fastapi_http_exception_handler(request: Request, exc: HTTPException): - """将FastAPI的HTTPException转换为标准响应格式""" - return JSONResponse( - status_code=exc.status_code, - content={ - "code": exc.status_code, - "message": "error", - "data": { - "detail": exc.detail - } - } - ) - -# 自定义异常处理器:RequestValidationError -@app.exception_handler(RequestValidationError) -async def validation_exception_handler(request: Request, exc: RequestValidationError): - """将请求验证错误转换为标准响应格式""" - return JSONResponse( - status_code=422, - content={ - "code": 422, - "message": "error", - "data": { - "detail": "Validation error", - "errors": exc.errors() - } - } - ) - -# 自定义异常处理器:未捕获的异常 -@app.exception_handler(Exception) -async def general_exception_handler(request: Request, exc: Exception): - """将未捕获的异常转换为标准响应格式""" - logger.error(f"Unhandled exception: {exc}", exc_info=True) - return JSONResponse( - status_code=500, - content={ - "code": 500, - "message": "error", - "data": { - "detail": "Internal server error" - } - } - ) - # 注册路由 -app.include_router(api_router, prefix="/api") +app.include_router(router) + +logger.debug("Registered routes: %s", [getattr(r, "path", None) for r in app.routes]) + +# 注册全局异常处理器 +app.add_exception_handler(StarletteHTTPException, starlette_http_exception_handler) # type: ignore +app.add_exception_handler(HTTPException, fastapi_http_exception_handler) # type: ignore +app.add_exception_handler(RequestValidationError, validation_exception_handler) # type: ignore +app.add_exception_handler(Exception, general_exception_handler) # 测试端点:验证异常处理 @app.get("/test-404", include_in_schema=False) diff --git a/runtime/datamate-python/app/models/README.md b/runtime/datamate-python/app/models/README.md deleted file mode 100644 index 506f19e..0000000 --- a/runtime/datamate-python/app/models/README.md +++ /dev/null @@ -1,138 +0,0 @@ -# DataMate 数据模型结构 - -本文档列出了根据 `scripts/db` 中的 SQL 文件创建的所有 Python 数据模型。 - -## 模型组织结构 - -``` -app/models/ -├── __init__.py # 主模块导出文件 -├── dm/ # 数据管理 (Data Management) 模块 -│ ├── __init__.py -│ ├── annotation_template.py # 标注模板 -│ ├── labeling_project.py # 标注项目 -│ ├── dataset.py # 数据集 -│ ├── dataset_files.py # 数据集文件 -│ ├── dataset_statistics.py # 数据集统计 -│ ├── dataset_tag.py # 数据集标签关联 -│ ├── tag.py # 标签 -│ └── user.py # 用户 -├── cleaning/ # 数据清洗 (Data Cleaning) 模块 -│ ├── __init__.py -│ ├── clean_template.py # 清洗模板 -│ ├── clean_task.py # 清洗任务 -│ ├── operator_instance.py # 算子实例 -│ └── clean_result.py # 清洗结果 -├── collection/ # 数据归集 (Data Collection) 模块 -│ ├── __init__.py -│ ├── task_execution.py # 任务执行明细 -│ ├── collection_task.py # 数据归集任务 -│ ├── task_log.py # 任务执行记录 -│ └── datax_template.py # DataX模板配置 -├── common/ # 通用 (Common) 模块 -│ ├── __init__.py -│ └── chunk_upload_request.py # 文件切片上传请求 -└── operator/ # 算子 (Operator) 模块 - ├── __init__.py - ├── operator.py # 算子 - ├── operator_category.py # 算子分类 - └── operator_category_relation.py # 算子分类关联 -``` - -## 模块详情 - -### 1. Data Management (DM) 模块 -对应 SQL: `data-management-init.sql` 和 `data-annotation-init.sql` - -#### 模型列表: -- **AnnotationTemplate** (`t_dm_annotation_templates`) - 标注模板 -- **LabelingProject** (`t_dm_labeling_projects`) - 标注项目 -- **Dataset** (`t_dm_datasets`) - 数据集(支持医学影像、文本、问答等多种类型) -- **DatasetFiles** (`t_dm_dataset_files`) - 数据集文件 -- **DatasetStatistics** (`t_dm_dataset_statistics`) - 数据集统计信息 -- **Tag** (`t_dm_tags`) - 标签 -- **DatasetTag** (`t_dm_dataset_tags`) - 数据集标签关联 -- **User** (`users`) - 用户 - -### 2. Data Cleaning 模块 -对应 SQL: `data-cleaning-init.sql` - -#### 模型列表: -- **CleanTemplate** (`t_clean_template`) - 清洗模板 -- **CleanTask** (`t_clean_task`) - 清洗任务 -- **OperatorInstance** (`t_operator_instance`) - 算子实例 -- **CleanResult** (`t_clean_result`) - 清洗结果 - -### 3. Data Collection (DC) 模块 -对应 SQL: `data-collection-init.sql` - -#### 模型列表: -- **TaskExecution** (`t_dc_task_executions`) - 任务执行明细 -- **CollectionTask** (`t_dc_collection_tasks`) - 数据归集任务 -- **TaskLog** (`t_dc_task_log`) - 任务执行记录 -- **DataxTemplate** (`t_dc_datax_templates`) - DataX模板配置 - -### 4. Common 模块 -对应 SQL: `data-common-init.sql` - -#### 模型列表: -- **ChunkUploadRequest** (`t_chunk_upload_request`) - 文件切片上传请求 - -### 5. Operator 模块 -对应 SQL: `data-operator-init.sql` - -#### 模型列表: -- **Operator** (`t_operator`) - 算子 -- **OperatorCategory** (`t_operator_category`) - 算子分类 -- **OperatorCategoryRelation** (`t_operator_category_relation`) - 算子分类关联 - -## 使用方式 - -```python -# 导入所有模型 -from app.models import ( - # DM 模块 - AnnotationTemplate, - LabelingProject, - Dataset, - DatasetFiles, - DatasetStatistics, - DatasetTag, - Tag, - User, - # Cleaning 模块 - CleanTemplate, - CleanTask, - OperatorInstance, - CleanResult, - # Collection 模块 - TaskExecution, - CollectionTask, - TaskLog, - DataxTemplate, - # Common 模块 - ChunkUploadRequest, - # Operator 模块 - Operator, - OperatorCategory, - OperatorCategoryRelation -) - -# 或者按模块导入 -from app.models.dm import Dataset, DatasetFiles -from app.models.collection import CollectionTask -from app.models.operator import Operator -``` - -## 注意事项 - -1. **UUID 主键**: 大部分表使用 UUID (String(36)) 作为主键 -2. **时间戳**: 使用 `TIMESTAMP` 类型,并配置自动更新 -3. **软删除**: 部分模型(如 AnnotationTemplate, LabelingProject)支持软删除,包含 `deleted_at` 字段和 `is_deleted` 属性 -4. **JSON 字段**: 配置信息、元数据等使用 JSON 类型存储 -5. **字段一致性**: 所有模型字段都严格按照 SQL 定义创建,确保与数据库表结构完全一致 - -## 更新记录 - -- 2025-10-25: 根据 `scripts/db` 中的 SQL 文件创建所有数据模型 -- 已更新现有的 `annotation_template.py`、`labeling_project.py`、`dataset_files.py` 以匹配 SQL 定义 diff --git a/runtime/datamate-python/app/models/__init__.py b/runtime/datamate-python/app/models/__init__.py deleted file mode 100644 index 11cb8d7..0000000 --- a/runtime/datamate-python/app/models/__init__.py +++ /dev/null @@ -1,69 +0,0 @@ -# app/models/__init__.py - -# Data Management (DM) 模块 -from .dm import ( - AnnotationTemplate, - LabelingProject, - Dataset, - DatasetFiles, - DatasetStatistics, - DatasetTag, - Tag, - User -) - -# Data Cleaning 模块 -from .cleaning import ( - CleanTemplate, - CleanTask, - OperatorInstance, - CleanResult -) - -# Data Collection (DC) 模块 -from .collection import ( - TaskExecution, - CollectionTask, - TaskLog, - DataxTemplate -) - -# Common 模块 -from .common import ( - ChunkUploadRequest -) - -# Operator 模块 -from .operator import ( - Operator, - OperatorCategory, - OperatorCategoryRelation -) - -__all__ = [ - # DM 模块 - "AnnotationTemplate", - "LabelingProject", - "Dataset", - "DatasetFiles", - "DatasetStatistics", - "DatasetTag", - "Tag", - "User", - # Cleaning 模块 - "CleanTemplate", - "CleanTask", - "OperatorInstance", - "CleanResult", - # Collection 模块 - "TaskExecution", - "CollectionTask", - "TaskLog", - "DataxTemplate", - # Common 模块 - "ChunkUploadRequest", - # Operator 模块 - "Operator", - "OperatorCategory", - "OperatorCategoryRelation" -] \ No newline at end of file diff --git a/runtime/datamate-python/app/models/cleaning/__init__.py b/runtime/datamate-python/app/models/cleaning/__init__.py deleted file mode 100644 index 9d19aa1..0000000 --- a/runtime/datamate-python/app/models/cleaning/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# app/models/cleaning/__init__.py - -from .clean_template import CleanTemplate -from .clean_task import CleanTask -from .operator_instance import OperatorInstance -from .clean_result import CleanResult - -__all__ = [ - "CleanTemplate", - "CleanTask", - "OperatorInstance", - "CleanResult" -] diff --git a/runtime/datamate-python/app/models/cleaning/clean_result.py b/runtime/datamate-python/app/models/cleaning/clean_result.py deleted file mode 100644 index 9ece0af..0000000 --- a/runtime/datamate-python/app/models/cleaning/clean_result.py +++ /dev/null @@ -1,22 +0,0 @@ -from sqlalchemy import Column, String, BigInteger, Text -from app.db.database import Base - -class CleanResult(Base): - """清洗结果模型""" - - __tablename__ = "t_clean_result" - - instance_id = Column(String(64), primary_key=True, comment="实例ID") - src_file_id = Column(String(64), nullable=True, comment="源文件ID") - dest_file_id = Column(String(64), primary_key=True, comment="目标文件ID") - src_name = Column(String(256), nullable=True, comment="源文件名") - dest_name = Column(String(256), nullable=True, comment="目标文件名") - src_type = Column(String(256), nullable=True, comment="源文件类型") - dest_type = Column(String(256), nullable=True, comment="目标文件类型") - src_size = Column(BigInteger, nullable=True, comment="源文件大小") - dest_size = Column(BigInteger, nullable=True, comment="目标文件大小") - status = Column(String(256), nullable=True, comment="处理状态") - result = Column(Text, nullable=True, comment="处理结果") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/cleaning/clean_task.py b/runtime/datamate-python/app/models/cleaning/clean_task.py deleted file mode 100644 index 6ae984c..0000000 --- a/runtime/datamate-python/app/models/cleaning/clean_task.py +++ /dev/null @@ -1,27 +0,0 @@ -from sqlalchemy import Column, String, BigInteger, Integer, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base - -class CleanTask(Base): - """清洗任务模型""" - - __tablename__ = "t_clean_task" - - id = Column(String(64), primary_key=True, comment="任务ID") - name = Column(String(64), nullable=True, comment="任务名称") - description = Column(String(256), nullable=True, comment="任务描述") - status = Column(String(256), nullable=True, comment="任务状态") - src_dataset_id = Column(String(64), nullable=True, comment="源数据集ID") - src_dataset_name = Column(String(64), nullable=True, comment="源数据集名称") - dest_dataset_id = Column(String(64), nullable=True, comment="目标数据集ID") - dest_dataset_name = Column(String(64), nullable=True, comment="目标数据集名称") - before_size = Column(BigInteger, nullable=True, comment="清洗前大小") - after_size = Column(BigInteger, nullable=True, comment="清洗后大小") - file_count = Column(Integer, nullable=True, comment="文件数量") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - started_at = Column(TIMESTAMP, nullable=True, comment="开始时间") - finished_at = Column(TIMESTAMP, nullable=True, comment="完成时间") - created_by = Column(String(256), nullable=True, comment="创建者") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/cleaning/clean_template.py b/runtime/datamate-python/app/models/cleaning/clean_template.py deleted file mode 100644 index f156a66..0000000 --- a/runtime/datamate-python/app/models/cleaning/clean_template.py +++ /dev/null @@ -1,18 +0,0 @@ -from sqlalchemy import Column, String, Text, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base - -class CleanTemplate(Base): - """清洗模板模型""" - - __tablename__ = "t_clean_template" - - id = Column(String(64), primary_key=True, unique=True, comment="模板ID") - name = Column(String(64), nullable=True, comment="模板名称") - description = Column(String(256), nullable=True, comment="模板描述") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") - created_by = Column(String(256), nullable=True, comment="创建者") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/cleaning/operator_instance.py b/runtime/datamate-python/app/models/cleaning/operator_instance.py deleted file mode 100644 index 5fa7114..0000000 --- a/runtime/datamate-python/app/models/cleaning/operator_instance.py +++ /dev/null @@ -1,15 +0,0 @@ -from sqlalchemy import Column, String, Integer, Text -from app.db.database import Base - -class OperatorInstance(Base): - """算子实例模型""" - - __tablename__ = "t_operator_instance" - - instance_id = Column(String(256), primary_key=True, comment="实例ID") - operator_id = Column(String(256), primary_key=True, comment="算子ID") - op_index = Column(Integer, primary_key=True, comment="算子索引") - settings_override = Column(Text, nullable=True, comment="配置覆盖") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/collection/__init__.py b/runtime/datamate-python/app/models/collection/__init__.py deleted file mode 100644 index a89330f..0000000 --- a/runtime/datamate-python/app/models/collection/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# app/models/collection/__init__.py - -from .task_execution import TaskExecution -from .collection_task import CollectionTask -from .task_log import TaskLog -from .datax_template import DataxTemplate - -__all__ = [ - "TaskExecution", - "CollectionTask", - "TaskLog", - "DataxTemplate" -] diff --git a/runtime/datamate-python/app/models/collection/collection_task.py b/runtime/datamate-python/app/models/collection/collection_task.py deleted file mode 100644 index db2c48a..0000000 --- a/runtime/datamate-python/app/models/collection/collection_task.py +++ /dev/null @@ -1,28 +0,0 @@ -from sqlalchemy import Column, String, Text, Integer, BigInteger, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base - -class CollectionTask(Base): - """数据归集任务模型""" - - __tablename__ = "t_dc_collection_tasks" - - id = Column(String(36), primary_key=True, comment="任务ID(UUID)") - name = Column(String(255), nullable=False, comment="任务名称") - description = Column(Text, nullable=True, comment="任务描述") - sync_mode = Column(String(20), default='ONCE', comment="同步模式:ONCE/SCHEDULED") - config = Column(Text, nullable=False, comment="归集配置(DataX配置),包含源端和目标端配置信息") - schedule_expression = Column(String(255), nullable=True, comment="Cron调度表达式") - status = Column(String(20), default='DRAFT', comment="任务状态:DRAFT/READY/RUNNING/SUCCESS/FAILED/STOPPED") - retry_count = Column(Integer, default=3, comment="重试次数") - timeout_seconds = Column(Integer, default=3600, comment="超时时间(秒)") - max_records = Column(BigInteger, nullable=True, comment="最大处理记录数") - sort_field = Column(String(100), nullable=True, comment="增量字段") - last_execution_id = Column(String(36), nullable=True, comment="最后执行ID(UUID)") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") - created_by = Column(String(255), nullable=True, comment="创建者") - updated_by = Column(String(255), nullable=True, comment="更新者") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/collection/datax_template.py b/runtime/datamate-python/app/models/collection/datax_template.py deleted file mode 100644 index eadcde2..0000000 --- a/runtime/datamate-python/app/models/collection/datax_template.py +++ /dev/null @@ -1,23 +0,0 @@ -from sqlalchemy import Column, String, Text, Boolean, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base - -class DataxTemplate(Base): - """DataX模板配置模型""" - - __tablename__ = "t_dc_datax_templates" - - id = Column(String(36), primary_key=True, comment="模板ID(UUID)") - name = Column(String(255), nullable=False, unique=True, comment="模板名称") - source_type = Column(String(50), nullable=False, comment="源数据源类型") - target_type = Column(String(50), nullable=False, comment="目标数据源类型") - template_content = Column(Text, nullable=False, comment="模板内容") - description = Column(Text, nullable=True, comment="模板描述") - version = Column(String(20), default='1.0.0', comment="版本号") - is_system = Column(Boolean, default=False, comment="是否系统模板") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") - created_by = Column(String(255), nullable=True, comment="创建者") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/collection/task_execution.py b/runtime/datamate-python/app/models/collection/task_execution.py deleted file mode 100644 index 1450ff5..0000000 --- a/runtime/datamate-python/app/models/collection/task_execution.py +++ /dev/null @@ -1,34 +0,0 @@ -from sqlalchemy import Column, String, Text, Integer, BigInteger, DECIMAL, JSON, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base - -class TaskExecution(Base): - """任务执行明细模型""" - - __tablename__ = "t_dc_task_executions" - - id = Column(String(36), primary_key=True, comment="执行记录ID(UUID)") - task_id = Column(String(36), nullable=False, comment="任务ID") - task_name = Column(String(255), nullable=False, comment="任务名称") - status = Column(String(20), default='RUNNING', comment="执行状态:RUNNING/SUCCESS/FAILED/STOPPED") - progress = Column(DECIMAL(5, 2), default=0.00, comment="进度百分比") - records_total = Column(BigInteger, default=0, comment="总记录数") - records_processed = Column(BigInteger, default=0, comment="已处理记录数") - records_success = Column(BigInteger, default=0, comment="成功记录数") - records_failed = Column(BigInteger, default=0, comment="失败记录数") - throughput = Column(DECIMAL(10, 2), default=0.00, comment="吞吐量(条/秒)") - data_size_bytes = Column(BigInteger, default=0, comment="数据量(字节)") - started_at = Column(TIMESTAMP, nullable=True, comment="开始时间") - completed_at = Column(TIMESTAMP, nullable=True, comment="完成时间") - duration_seconds = Column(Integer, default=0, comment="执行时长(秒)") - config = Column(JSON, nullable=True, comment="执行配置") - error_message = Column(Text, nullable=True, comment="错误信息") - datax_job_id = Column(Text, nullable=True, comment="datax任务ID") - result = Column(Text, nullable=True, comment="执行结果") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") - created_by = Column(String(255), nullable=True, comment="创建者") - updated_by = Column(String(255), nullable=True, comment="更新者") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/collection/task_log.py b/runtime/datamate-python/app/models/collection/task_log.py deleted file mode 100644 index 7bc2c1e..0000000 --- a/runtime/datamate-python/app/models/collection/task_log.py +++ /dev/null @@ -1,26 +0,0 @@ -from sqlalchemy import Column, String, Text, Integer, BigInteger, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base - -class TaskLog(Base): - """任务执行记录模型""" - - __tablename__ = "t_dc_task_log" - - id = Column(String(36), primary_key=True, comment="执行记录ID(UUID)") - task_id = Column(String(36), nullable=False, comment="任务ID") - task_name = Column(String(255), nullable=False, comment="任务名称") - sync_mode = Column(String(20), default='FULL', comment="同步模式:FULL/INCREMENTAL") - status = Column(String(20), default='RUNNING', comment="执行状态:RUNNING/SUCCESS/FAILED/STOPPED") - start_time = Column(TIMESTAMP, nullable=True, comment="开始时间") - end_time = Column(TIMESTAMP, nullable=True, comment="结束时间") - duration = Column(BigInteger, nullable=True, comment="执行时长(毫秒)") - process_id = Column(String(50), nullable=True, comment="进程ID") - log_path = Column(String(500), nullable=True, comment="日志文件路径") - error_msg = Column(Text, nullable=True, comment="错误信息") - result = Column(Text, nullable=True, comment="执行结果") - retry_times = Column(Integer, default=0, comment="重试次数") - create_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/common/__init__.py b/runtime/datamate-python/app/models/common/__init__.py deleted file mode 100644 index c90cfdf..0000000 --- a/runtime/datamate-python/app/models/common/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# app/models/common/__init__.py - -from .chunk_upload_request import ChunkUploadRequest - -__all__ = [ - "ChunkUploadRequest" -] diff --git a/runtime/datamate-python/app/models/common/chunk_upload_request.py b/runtime/datamate-python/app/models/common/chunk_upload_request.py deleted file mode 100644 index 91e830e..0000000 --- a/runtime/datamate-python/app/models/common/chunk_upload_request.py +++ /dev/null @@ -1,19 +0,0 @@ -from sqlalchemy import Column, String, Integer, Text, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base - -class ChunkUploadRequest(Base): - """文件切片上传请求模型""" - - __tablename__ = "t_chunk_upload_request" - - id = Column(String(36), primary_key=True, comment="UUID") - total_file_num = Column(Integer, nullable=True, comment="总文件数") - uploaded_file_num = Column(Integer, nullable=True, comment="已上传文件数") - upload_path = Column(String(256), nullable=True, comment="文件路径") - timeout = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传请求超时时间") - service_id = Column(String(64), nullable=True, comment="上传请求所属服务:DATA-MANAGEMENT(数据管理)") - check_info = Column(Text, nullable=True, comment="业务信息") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/dm/__init__.py b/runtime/datamate-python/app/models/dm/__init__.py deleted file mode 100644 index 7d1c9ae..0000000 --- a/runtime/datamate-python/app/models/dm/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# app/models/dm/__init__.py - -from .annotation_template import AnnotationTemplate -from .labeling_project import LabelingProject -from .dataset import Dataset -from .dataset_files import DatasetFiles -from .dataset_statistics import DatasetStatistics -from .dataset_tag import DatasetTag -from .tag import Tag -from .user import User - -__all__ = [ - "AnnotationTemplate", - "LabelingProject", - "Dataset", - "DatasetFiles", - "DatasetStatistics", - "DatasetTag", - "Tag", - "User" -] diff --git a/runtime/datamate-python/app/models/dm/annotation_template.py b/runtime/datamate-python/app/models/dm/annotation_template.py deleted file mode 100644 index b26a054..0000000 --- a/runtime/datamate-python/app/models/dm/annotation_template.py +++ /dev/null @@ -1,24 +0,0 @@ -from sqlalchemy import Column, String, JSON, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base -import uuid - -class AnnotationTemplate(Base): - """标注模板模型""" - - __tablename__ = "t_dm_annotation_templates" - - id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID") - name = Column(String(32), nullable=False, comment="模板名称") - description = Column(String(255), nullable=True, comment="模板描述") - configuration = Column(JSON, nullable=True, comment="配置信息(JSON格式)") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)") - - def __repr__(self): - return f"" - - @property - def is_deleted(self) -> bool: - """检查是否已被软删除""" - return self.deleted_at is not None diff --git a/runtime/datamate-python/app/models/dm/dataset.py b/runtime/datamate-python/app/models/dm/dataset.py deleted file mode 100644 index cab75f3..0000000 --- a/runtime/datamate-python/app/models/dm/dataset.py +++ /dev/null @@ -1,35 +0,0 @@ -from sqlalchemy import Column, String, Text, BigInteger, Integer, Boolean, JSON, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base -import uuid - -class Dataset(Base): - """数据集模型(支持医学影像、文本、问答等多种类型)""" - - __tablename__ = "t_dm_datasets" - - id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") - name = Column(String(255), nullable=False, comment="数据集名称") - description = Column(Text, nullable=True, comment="数据集描述") - dataset_type = Column(String(50), nullable=False, comment="数据集类型:IMAGE/TEXT/QA/MULTIMODAL/OTHER") - category = Column(String(100), nullable=True, comment="数据集分类:医学影像/问答/文献等") - path = Column(String(500), nullable=True, comment="数据存储路径") - format = Column(String(50), nullable=True, comment="数据格式:DCM/JPG/JSON/CSV等") - schema_info = Column(JSON, nullable=True, comment="数据结构信息") - size_bytes = Column(BigInteger, default=0, comment="数据大小(字节)") - file_count = Column(BigInteger, default=0, comment="文件数量") - record_count = Column(BigInteger, default=0, comment="记录数量") - retention_days = Column(Integer, default=0, comment="数据保留天数(0表示长期保留)") - tags = Column(JSON, nullable=True, comment="标签列表") - metadata = Column(JSON, nullable=True, comment="元数据信息") - status = Column(String(50), default='DRAFT', comment="状态:DRAFT/ACTIVE/ARCHIVED") - is_public = Column(Boolean, default=False, comment="是否公开") - is_featured = Column(Boolean, default=False, comment="是否推荐") - version = Column(BigInteger, nullable=False, default=0, comment="版本号") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") - created_by = Column(String(255), nullable=True, comment="创建者") - updated_by = Column(String(255), nullable=True, comment="更新者") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/dm/dataset_files.py b/runtime/datamate-python/app/models/dm/dataset_files.py deleted file mode 100644 index bbe41a9..0000000 --- a/runtime/datamate-python/app/models/dm/dataset_files.py +++ /dev/null @@ -1,27 +0,0 @@ -from sqlalchemy import Column, String, JSON, BigInteger, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base -import uuid - -class DatasetFiles(Base): - """DM数据集文件模型""" - - __tablename__ = "t_dm_dataset_files" - - id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") - dataset_id = Column(String(36), nullable=False, comment="所属数据集ID(UUID)") - file_name = Column(String(255), nullable=False, comment="文件名") - file_path = Column(String(1000), nullable=False, comment="文件路径") - file_type = Column(String(50), nullable=True, comment="文件格式:JPG/PNG/DCM/TXT等") - file_size = Column(BigInteger, default=0, comment="文件大小(字节)") - check_sum = Column(String(64), nullable=True, comment="文件校验和") - tags = Column(JSON, nullable=True, comment="文件标签信息") - metadata = Column(JSON, nullable=True, comment="文件元数据") - status = Column(String(50), default='ACTIVE', comment="文件状态:ACTIVE/DELETED/PROCESSING") - upload_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传时间") - last_access_time = Column(TIMESTAMP, nullable=True, comment="最后访问时间") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") - - def __repr__(self): - return f"" \ No newline at end of file diff --git a/runtime/datamate-python/app/models/dm/dataset_statistics.py b/runtime/datamate-python/app/models/dm/dataset_statistics.py deleted file mode 100644 index 00bcadd..0000000 --- a/runtime/datamate-python/app/models/dm/dataset_statistics.py +++ /dev/null @@ -1,25 +0,0 @@ -from sqlalchemy import Column, String, Date, BigInteger, JSON, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base -import uuid - -class DatasetStatistics(Base): - """数据集统计信息模型""" - - __tablename__ = "t_dm_dataset_statistics" - - id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") - dataset_id = Column(String(36), nullable=False, comment="数据集ID(UUID)") - stat_date = Column(Date, nullable=False, comment="统计日期") - total_files = Column(BigInteger, default=0, comment="总文件数") - total_size = Column(BigInteger, default=0, comment="总大小(字节)") - processed_files = Column(BigInteger, default=0, comment="已处理文件数") - error_files = Column(BigInteger, default=0, comment="错误文件数") - download_count = Column(BigInteger, default=0, comment="下载次数") - view_count = Column(BigInteger, default=0, comment="查看次数") - quality_metrics = Column(JSON, nullable=True, comment="质量指标") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/dm/dataset_tag.py b/runtime/datamate-python/app/models/dm/dataset_tag.py deleted file mode 100644 index 617b0c4..0000000 --- a/runtime/datamate-python/app/models/dm/dataset_tag.py +++ /dev/null @@ -1,15 +0,0 @@ -from sqlalchemy import Column, String, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base - -class DatasetTag(Base): - """数据集标签关联模型""" - - __tablename__ = "t_dm_dataset_tags" - - dataset_id = Column(String(36), primary_key=True, comment="数据集ID(UUID)") - tag_id = Column(String(36), primary_key=True, comment="标签ID(UUID)") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/dm/labeling_project.py b/runtime/datamate-python/app/models/dm/labeling_project.py deleted file mode 100644 index 4be5394..0000000 --- a/runtime/datamate-python/app/models/dm/labeling_project.py +++ /dev/null @@ -1,26 +0,0 @@ -from sqlalchemy import Column, String, Integer, JSON, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base -import uuid - -class LabelingProject(Base): - """DM标注项目模型(原 DatasetMapping)""" - - __tablename__ = "t_dm_labeling_projects" - - id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID") - dataset_id = Column(String(36), nullable=False, comment="数据集ID") - name = Column(String(32), nullable=False, comment="项目名称") - labeling_project_id = Column(Integer, nullable=False, comment="Label Studio项目ID") - configuration = Column(JSON, nullable=True, comment="标签配置") - progress = Column(JSON, nullable=True, comment="标注进度统计") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)") - - def __repr__(self): - return f"" - - @property - def is_deleted(self) -> bool: - """检查是否已被软删除""" - return self.deleted_at is not None \ No newline at end of file diff --git a/runtime/datamate-python/app/models/dm/tag.py b/runtime/datamate-python/app/models/dm/tag.py deleted file mode 100644 index 095e1e4..0000000 --- a/runtime/datamate-python/app/models/dm/tag.py +++ /dev/null @@ -1,21 +0,0 @@ -from sqlalchemy import Column, String, Text, BigInteger, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base -import uuid - -class Tag(Base): - """标签模型""" - - __tablename__ = "t_dm_tags" - - id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") - name = Column(String(100), nullable=False, unique=True, comment="标签名称") - description = Column(Text, nullable=True, comment="标签描述") - category = Column(String(50), nullable=True, comment="标签分类") - color = Column(String(7), nullable=True, comment="标签颜色(十六进制)") - usage_count = Column(BigInteger, default=0, comment="使用次数") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/operator/__init__.py b/runtime/datamate-python/app/models/operator/__init__.py deleted file mode 100644 index 6cf1324..0000000 --- a/runtime/datamate-python/app/models/operator/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# app/models/operator/__init__.py - -from .operator import Operator -from .operator_category import OperatorCategory -from .operator_category_relation import OperatorCategoryRelation - -__all__ = [ - "Operator", - "OperatorCategory", - "OperatorCategoryRelation" -] diff --git a/runtime/datamate-python/app/models/operator/operator.py b/runtime/datamate-python/app/models/operator/operator.py deleted file mode 100644 index db53ef5..0000000 --- a/runtime/datamate-python/app/models/operator/operator.py +++ /dev/null @@ -1,24 +0,0 @@ -from sqlalchemy import Column, String, Text, Boolean, TIMESTAMP -from sqlalchemy.sql import func -from app.db.database import Base - -class Operator(Base): - """算子模型""" - - __tablename__ = "t_operator" - - id = Column(String(64), primary_key=True, comment="算子ID") - name = Column(String(64), nullable=True, comment="算子名称") - description = Column(String(256), nullable=True, comment="算子描述") - version = Column(String(256), nullable=True, comment="版本") - inputs = Column(String(256), nullable=True, comment="输入类型") - outputs = Column(String(256), nullable=True, comment="输出类型") - runtime = Column(Text, nullable=True, comment="运行时信息") - settings = Column(Text, nullable=True, comment="配置信息") - file_name = Column(Text, nullable=True, comment="文件名") - is_star = Column(Boolean, nullable=True, comment="是否收藏") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/operator/operator_category.py b/runtime/datamate-python/app/models/operator/operator_category.py deleted file mode 100644 index cc27d93..0000000 --- a/runtime/datamate-python/app/models/operator/operator_category.py +++ /dev/null @@ -1,15 +0,0 @@ -from sqlalchemy import Column, String, Integer -from app.db.database import Base - -class OperatorCategory(Base): - """算子分类模型""" - - __tablename__ = "t_operator_category" - - id = Column(Integer, primary_key=True, autoincrement=True, comment="分类ID") - name = Column(String(64), nullable=True, comment="分类名称") - type = Column(String(64), nullable=True, comment="分类类型") - parent_id = Column(Integer, nullable=True, comment="父分类ID") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/models/operator/operator_category_relation.py b/runtime/datamate-python/app/models/operator/operator_category_relation.py deleted file mode 100644 index bc55523..0000000 --- a/runtime/datamate-python/app/models/operator/operator_category_relation.py +++ /dev/null @@ -1,13 +0,0 @@ -from sqlalchemy import Column, String, Integer -from app.db.database import Base - -class OperatorCategoryRelation(Base): - """算子分类关联模型""" - - __tablename__ = "t_operator_category_relation" - - category_id = Column(Integer, primary_key=True, comment="分类ID") - operator_id = Column(String(64), primary_key=True, comment="算子ID") - - def __repr__(self): - return f"" diff --git a/runtime/datamate-python/app/module/__init__.py b/runtime/datamate-python/app/module/__init__.py new file mode 100644 index 0000000..4ca3782 --- /dev/null +++ b/runtime/datamate-python/app/module/__init__.py @@ -0,0 +1,11 @@ +from fastapi import APIRouter + +from .annotation.interface import router as annotation_router + +router = APIRouter( + prefix="/api" +) + +router.include_router(annotation_router) + +__all__ = ["router"] \ No newline at end of file diff --git a/runtime/datamate-python/app/module/annotation/__init__.py b/runtime/datamate-python/app/module/annotation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/runtime/datamate-python/app/module/annotation/client/__init__.py b/runtime/datamate-python/app/module/annotation/client/__init__.py new file mode 100644 index 0000000..aeb4e2a --- /dev/null +++ b/runtime/datamate-python/app/module/annotation/client/__init__.py @@ -0,0 +1,3 @@ +from .labelstudio import LabelStudioClient + +__all__ = ["LabelStudioClient"] \ No newline at end of file diff --git a/runtime/datamate-python/app/module/annotation/client/labelstudio/__init__.py b/runtime/datamate-python/app/module/annotation/client/labelstudio/__init__.py new file mode 100644 index 0000000..e46d4e8 --- /dev/null +++ b/runtime/datamate-python/app/module/annotation/client/labelstudio/__init__.py @@ -0,0 +1,3 @@ +from .client import Client as LabelStudioClient + +__all__ = ["LabelStudioClient"] \ No newline at end of file diff --git a/runtime/datamate-python/app/infrastructure/label_studio.py b/runtime/datamate-python/app/module/annotation/client/labelstudio/client.py similarity index 82% rename from runtime/datamate-python/app/infrastructure/label_studio.py rename to runtime/datamate-python/app/module/annotation/client/labelstudio/client.py index cef979c..49225a1 100644 --- a/runtime/datamate-python/app/infrastructure/label_studio.py +++ b/runtime/datamate-python/app/module/annotation/client/labelstudio/client.py @@ -1,10 +1,10 @@ import httpx from typing import Optional, Dict, Any, List -import json from app.core.config import settings from app.core.logging import get_logger -from app.schemas.label_studio import ( + +from .schema import ( LabelStudioProject, LabelStudioCreateProjectRequest, LabelStudioCreateTaskRequest @@ -88,7 +88,7 @@ class Client: } ) - logger.info(f"Label Studio client initialized: {self.base_url}") + logger.debug(f"Label Studio client initialized: {self.base_url}") def get_label_config_by_type(self, data_type: str) -> str: """根据数据类型获取标注配置""" @@ -103,7 +103,7 @@ class Client: ) -> Optional[Dict[str, Any]]: """创建Label Studio项目""" try: - logger.info(f"Creating Label Studio project: {title}") + logger.debug(f"Creating Label Studio project: {title}") if not label_config: label_config = self.get_label_config_by_type(data_type) @@ -123,7 +123,7 @@ class Client: if not project_id: raise Exception("Label Studio response does not contain project ID") - logger.info(f"Project created successfully, ID: {project_id}") + logger.debug(f"Project created successfully, ID: {project_id}") return project except httpx.HTTPStatusError as e: @@ -142,7 +142,7 @@ class Client: ) -> Optional[Dict[str, Any]]: """批量导入任务到Label Studio项目""" try: - logger.info(f"Importing {len(tasks)} tasks into project {project_id}") + logger.debug(f"Importing {len(tasks)} tasks into project {project_id}") response = await self.client.post( f"/api/projects/{project_id}/import", @@ -157,7 +157,7 @@ class Client: result = response.json() task_count = result.get("task_count", len(tasks)) - logger.info(f"Tasks imported successfully: {task_count}") + logger.debug(f"Tasks imported successfully: {task_count}") return result except httpx.HTTPStatusError as e: @@ -236,11 +236,12 @@ class Client: # 如果指定了page,直接获取单页任务 if page is not None: - logger.info(f"Fetching tasks for project {pid}, page {page} (page_size={page_size})") + logger.debug(f"Fetching tasks for project {pid}, page {page} (page_size={page_size})") response = await self.client.get( - f"/api/projects/{pid}/tasks", + f"/api/tasks", params={ + "project": pid, "page": page, "page_size": page_size } @@ -259,48 +260,27 @@ class Client: } # 如果未指定page,获取所有任务 - logger.info(f"Start fetching all tasks for project {pid} (page_size={page_size})") + logger.debug(f"(page) not specified, fetching all tasks.") all_tasks = [] - current_page = 1 + + response = await self.client.get( + f"/api/tasks", + params={ + "project": pid + } + ) + response.raise_for_status() - while True: - try: - response = await self.client.get( - f"/api/projects/{pid}/tasks", - params={ - "page": current_page, - "page_size": page_size - } - ) - response.raise_for_status() - - result = response.json() - tasks = result.get("tasks", []) - - if not tasks: - logger.debug(f"No more tasks on page {current_page}") - break - - all_tasks.extend(tasks) - logger.debug(f"Fetched page {current_page}, {len(tasks)} tasks") - - # 检查是否还有更多页 - total = result.get("total", 0) - if len(all_tasks) >= total: - break - - current_page += 1 - - except httpx.HTTPStatusError as e: - if e.response.status_code == 404: - # 超出页数范围,结束分页 - logger.debug(f"Reached last page (page {current_page})") - break - else: - raise + result = response.json() + tasks = result.get("tasks", []) - logger.info(f"Fetched all tasks for project {pid}, total {len(all_tasks)}") + if not tasks: + logger.debug(f"No tasks found for this project.") + + all_tasks.extend(tasks) + logger.debug(f"Fetched {len(tasks)} tasks.") + # 返回所有任务,不包含分页信息 return { "count": len(all_tasks), @@ -321,12 +301,12 @@ class Client: ) -> bool: """删除单个任务""" try: - logger.info(f"Deleting task: {task_id}") + logger.debug(f"Deleting task: {task_id}") response = await self.client.delete(f"/api/tasks/{task_id}") response.raise_for_status() - logger.info(f"Task deleted: {task_id}") + logger.debug(f"Task deleted: {task_id}") return True except httpx.HTTPStatusError as e: @@ -342,7 +322,7 @@ class Client: ) -> Dict[str, int]: """批量删除任务""" try: - logger.info(f"Deleting {len(task_ids)} tasks in batch") + logger.debug(f"Deleting {len(task_ids)} tasks in batch") successful_deletions = 0 failed_deletions = 0 @@ -353,7 +333,7 @@ class Client: else: failed_deletions += 1 - logger.info(f"Batch deletion finished: success {successful_deletions}, failed {failed_deletions}") + logger.debug(f"Batch deletion finished: success {successful_deletions}, failed {failed_deletions}") return { "successful": successful_deletions, @@ -372,7 +352,7 @@ class Client: async def get_project(self, project_id: int) -> Optional[Dict[str, Any]]: """获取项目信息""" try: - logger.info(f"Fetching project info: {project_id}") + logger.debug(f"Fetching project info: {project_id}") response = await self.client.get(f"/api/projects/{project_id}") response.raise_for_status() @@ -389,12 +369,12 @@ class Client: async def delete_project(self, project_id: int) -> bool: """删除项目""" try: - logger.info(f"Deleting project: {project_id}") + logger.debug(f"Deleting project: {project_id}") response = await self.client.delete(f"/api/projects/{project_id}") response.raise_for_status() - logger.info(f"Project deleted: {project_id}") + logger.debug(f"Project deleted: {project_id}") return True except httpx.HTTPStatusError as e: @@ -427,7 +407,7 @@ class Client: 创建的存储配置信息,失败返回 None """ try: - logger.info(f"Creating local storage for project {project_id}: {path}") + logger.debug(f"Creating local storage for project {project_id}: {path}") storage_data = { "project": project_id, @@ -450,7 +430,7 @@ class Client: storage = response.json() storage_id = storage.get("id") - logger.info(f"Local storage created successfully, ID: {storage_id}") + logger.debug(f"Local storage created successfully, ID: {storage_id}") return storage except httpx.HTTPStatusError as e: @@ -464,6 +444,6 @@ class Client: """关闭客户端连接""" try: await self.client.aclose() - logger.info("Label Studio client closed") + logger.debug("Label Studio client closed") except Exception as e: logger.error(f"Error while closing Label Studio client: {e}") diff --git a/runtime/datamate-python/app/schemas/label_studio.py b/runtime/datamate-python/app/module/annotation/client/labelstudio/schema.py similarity index 94% rename from runtime/datamate-python/app/schemas/label_studio.py rename to runtime/datamate-python/app/module/annotation/client/labelstudio/schema.py index 00e5417..b308ed0 100644 --- a/runtime/datamate-python/app/schemas/label_studio.py +++ b/runtime/datamate-python/app/module/annotation/client/labelstudio/schema.py @@ -1,7 +1,9 @@ from pydantic import Field -from typing import Dict, Any, Optional, List +from typing import Dict, Any, Optional from datetime import datetime -from .common import BaseResponseModel + +from app.module.shared.schema import BaseResponseModel + class LabelStudioProject(BaseResponseModel): """Label Studio项目模型""" diff --git a/runtime/datamate-python/app/module/annotation/interface/__init__.py b/runtime/datamate-python/app/module/annotation/interface/__init__.py new file mode 100644 index 0000000..47744f6 --- /dev/null +++ b/runtime/datamate-python/app/module/annotation/interface/__init__.py @@ -0,0 +1,12 @@ +from fastapi import APIRouter + +from .project import router as project_router +from .task import router as task_router + +router = APIRouter( + prefix="/annotation", + tags = ["annotation"] +) + +router.include_router(project_router) +router.include_router(task_router) \ No newline at end of file diff --git a/runtime/datamate-python/app/module/annotation/interface/project.py b/runtime/datamate-python/app/module/annotation/interface/project.py new file mode 100644 index 0000000..c22a3d9 --- /dev/null +++ b/runtime/datamate-python/app/module/annotation/interface/project.py @@ -0,0 +1,353 @@ +from typing import Optional +import math + +from fastapi import APIRouter, Depends, HTTPException, Query +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db.session import get_db +from app.module.shared.schema import StandardResponse, PaginatedData +from app.module.dataset import DatasetManagementService +from app.core.logging import get_logger +from app.core.config import settings + +from ..client import LabelStudioClient +from ..service.mapping import DatasetMappingService +from ..schema import ( + DatasetMappingCreateRequest, + DatasetMappingCreateResponse, + DeleteDatasetResponse, + DatasetMappingResponse, +) + +router = APIRouter( + prefix="/project", + tags=["annotation/project"] +) +logger = get_logger(__name__) + +@router.post("/", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201) +async def create_mapping( + request: DatasetMappingCreateRequest, + db: AsyncSession = Depends(get_db) +): + """ + 创建数据集映射 + + 根据指定的DM程序中的数据集,创建Label Studio中的数据集, + 在数据库中记录这一关联关系,返回Label Studio数据集的ID + + 注意:一个数据集可以创建多个标注项目 + """ + try: + dm_client = DatasetManagementService(db) + ls_client = LabelStudioClient(base_url=settings.label_studio_base_url, + token=settings.label_studio_user_token) + service = DatasetMappingService(db) + + logger.info(f"Create dataset mapping request: {request.dataset_id}") + + # 从DM服务获取数据集信息 + dataset_info = await dm_client.get_dataset(request.dataset_id) + if not dataset_info: + raise HTTPException( + status_code=404, + detail=f"Dataset not found in DM service: {request.dataset_id}" + ) + + # 确定数据类型(基于数据集类型) + data_type = "image" # 默认值 + if dataset_info.type and dataset_info.type.code: + type_code = dataset_info.type.code.lower() + if "audio" in type_code: + data_type = "audio" + elif "video" in type_code: + data_type = "video" + elif "text" in type_code: + data_type = "text" + + project_name = f"{dataset_info.name}" + + # 在Label Studio中创建项目 + project_data = await ls_client.create_project( + title=project_name, + description=dataset_info.description or f"Imported from DM dataset {dataset_info.id}", + data_type=data_type + ) + + if not project_data: + raise HTTPException( + status_code=500, + detail="Fail to create Label Studio project." + ) + + project_id = project_data["id"] + + # 配置本地存储:dataset/ + local_storage_path = f"{settings.label_studio_local_storage_dataset_base_path}/{request.dataset_id}" + storage_result = await ls_client.create_local_storage( + project_id=project_id, + path=local_storage_path, + title="Dataset_BLOB", + use_blob_urls=True, + description=f"Local storage for dataset {dataset_info.name}" + ) + + if not storage_result: + # 本地存储配置失败,记录警告但不中断流程 + logger.warning(f"Failed to configure local storage for project {project_id}") + else: + logger.info(f"Local storage configured for project {project_id}: {local_storage_path}") + + # 创建映射关系,包含项目名称 + mapping = await service.create_mapping( + request, + str(project_id), + project_name + ) + + response_data = DatasetMappingCreateResponse( + id=mapping.id, + labeling_project_id=str(mapping.labeling_project_id), + labeling_project_name=mapping.name or project_name, + message="Dataset mapping created successfully" + ) + + return StandardResponse( + code=201, + message="success", + data=response_data + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error while creating dataset mapping: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + +@router.get("/", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]]) +async def list_mappings( + page: int = Query(1, ge=1, description="页码(从1开始)"), + page_size: int = Query(20, ge=1, le=100, description="每页记录数"), + db: AsyncSession = Depends(get_db) +): + """ + 查询所有映射关系(分页) + + 返回所有有效的数据集映射关系(未被软删除的),支持分页查询 + """ + try: + service = DatasetMappingService(db) + + # 计算 skip + skip = (page - 1) * page_size + + logger.info(f"Listing mappings, page={page}, page_size={page_size}") + + # 获取数据和总数 + mappings, total = await service.get_all_mappings_with_count( + skip=skip, + limit=page_size + ) + + # 计算总页数 + total_pages = math.ceil(total / page_size) if total > 0 else 0 + + # 构造分页响应 + paginated_data = PaginatedData( + page=page, + size=page_size, + total_elements=total, + total_pages=total_pages, + content=mappings + ) + + logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}") + + return StandardResponse( + code=200, + message="success", + data=paginated_data + ) + + except Exception as e: + logger.error(f"Error listing mappings: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + +@router.get("/{mapping_id}", response_model=StandardResponse[DatasetMappingResponse]) +async def get_mapping( + mapping_id: str, + db: AsyncSession = Depends(get_db) +): + """ + 根据 UUID 查询单个映射关系 + """ + try: + service = DatasetMappingService(db) + + logger.info(f"Get mapping: {mapping_id}") + + mapping = await service.get_mapping_by_uuid(mapping_id) + + if not mapping: + raise HTTPException( + status_code=404, + detail=f"Mapping not found: {mapping_id}" + ) + + logger.info(f"Found mapping: {mapping.id}") + + return StandardResponse( + code=200, + message="success", + data=mapping + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting mapping: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + +@router.get("/by-source/{dataset_id}", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]]) +async def get_mappings_by_source( + dataset_id: str, + page: int = Query(1, ge=1, description="页码(从1开始)"), + page_size: int = Query(20, ge=1, le=100, description="每页记录数"), + db: AsyncSession = Depends(get_db) +): + """ + 根据源数据集 ID 查询所有映射关系(分页) + + 返回该数据集创建的所有标注项目(不包括已删除的),支持分页查询 + """ + try: + service = DatasetMappingService(db) + + # 计算 skip + skip = (page - 1) * page_size + + logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, page_size={page_size}") + + # 获取数据和总数 + mappings, total = await service.get_mappings_by_source_with_count( + dataset_id=dataset_id, + skip=skip, + limit=page_size + ) + + # 计算总页数 + total_pages = math.ceil(total / page_size) if total > 0 else 0 + + # 构造分页响应 + paginated_data = PaginatedData( + page=page, + size=page_size, + total_elements=total, + total_pages=total_pages, + content=mappings + ) + + logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}") + + return StandardResponse( + code=200, + message="success", + data=paginated_data + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting mappings: {e}") + raise HTTPException(status_code=500, detail="Internal server error") + +@router.delete("/", response_model=StandardResponse[DeleteDatasetResponse]) +async def delete_mapping( + m: Optional[str] = Query(None, description="映射UUID"), + proj: Optional[str] = Query(None, description="Label Studio项目ID"), + db: AsyncSession = Depends(get_db) +): + """ + 删除映射关系和对应的 Label Studio 项目 + + 可以通过以下任一方式指定要删除的映射: + - m: 映射UUID + - proj: Label Studio项目ID + - 两者都提供(优先使用 m) + + 此操作会: + 1. 删除 Label Studio 中的项目 + 2. 软删除数据库中的映射记录 + """ + try: + # 至少需要提供一个参数 + if not m and not proj: + raise HTTPException( + status_code=400, + detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided" + ) + + ls_client = LabelStudioClient(base_url=settings.label_studio_base_url, + token=settings.label_studio_user_token) + service = DatasetMappingService(db) + + # 优先使用 mapping_id 查询 + if m: + logger.debug(f"Deleting by mapping UUID: {m}") + mapping = await service.get_mapping_by_uuid(m) + # 如果没有提供 m,使用 proj 查询 + elif proj: + logger.debug(f"Deleting by project ID: {proj}") + mapping = await service.get_mapping_by_labeling_project_id(proj) + else: + mapping = None + + if not mapping: + raise HTTPException( + status_code=404, + detail=f"Mapping either not found or not specified." + ) + + id = mapping.id + labeling_project_id = mapping.labeling_project_id + labeling_project_name = mapping.name + + logger.debug(f"Found mapping: {id}, Label Studio project ID: {labeling_project_id}") + + # 1. 删除 Label Studio 项目 + try: + delete_success = await ls_client.delete_project(int(labeling_project_id)) + if delete_success: + logger.debug(f"Successfully deleted Label Studio project: {labeling_project_id}") + else: + logger.warning(f"Failed to delete Label Studio project or project not found: {labeling_project_id}") + except Exception as e: + logger.error(f"Error deleting Label Studio project: {e}") + # 继续执行,即使 Label Studio 项目删除失败也要删除映射记录 + + # 2. 软删除映射记录 + soft_delete_success = await service.soft_delete_mapping(id) + + if not soft_delete_success: + raise HTTPException( + status_code=500, + detail="Failed to delete mapping record" + ) + + logger.info(f"Successfully deleted mapping: {id}, Label Studio project: {labeling_project_id}") + + return StandardResponse( + code=200, + message="success", + data=DeleteDatasetResponse( + id=id, + status="success", + message=f"Successfully deleted mapping and Label Studio project '{labeling_project_name}'" + ) + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error deleting mapping: {e}") + raise HTTPException(status_code=500, detail="Internal server error") diff --git a/runtime/datamate-python/app/api/project/sync.py b/runtime/datamate-python/app/module/annotation/interface/task.py similarity index 70% rename from runtime/datamate-python/app/api/project/sync.py rename to runtime/datamate-python/app/module/annotation/interface/task.py index f0d35aa..2f472f5 100644 --- a/runtime/datamate-python/app/api/project/sync.py +++ b/runtime/datamate-python/app/module/annotation/interface/task.py @@ -2,24 +2,29 @@ from fastapi import APIRouter, Depends, HTTPException, Query from sqlalchemy.ext.asyncio import AsyncSession from typing import List, Optional -from app.db.database import get_db -from app.services.dataset_mapping_service import DatasetMappingService -from app.services.sync_service import SyncService -from app.infrastructure import DatamateClient, LabelStudioClient -from app.exceptions import NoDatasetInfoFoundError, DatasetMappingNotFoundError -from app.schemas.dataset_mapping import ( - DatasetMappingResponse, +from app.db.session import get_db +from app.module.shared.schema import StandardResponse +from app.module.dataset import DatasetManagementService +from app.core.logging import get_logger +from app.core.config import settings +from app.exception import NoDatasetInfoFoundError, DatasetMappingNotFoundError + +from ..client import LabelStudioClient +from ..service.sync import SyncService +from ..service.mapping import DatasetMappingService +from ..schema import ( SyncDatasetRequest, SyncDatasetResponse, ) -from app.schemas import StandardResponse -from app.core.logging import get_logger -from app.core.config import settings -from . import project_router + +router = APIRouter( + prefix="/task", + tags=["annotation/task"] +) logger = get_logger(__name__) -@project_router.post("/sync", response_model=StandardResponse[SyncDatasetResponse]) +@router.post("/sync", response_model=StandardResponse[SyncDatasetResponse]) async def sync_dataset_content( request: SyncDatasetRequest, db: AsyncSession = Depends(get_db) @@ -33,22 +38,22 @@ async def sync_dataset_content( try: ls_client = LabelStudioClient(base_url=settings.label_studio_base_url, token=settings.label_studio_user_token) - dm_client = DatamateClient(db) + dm_client = DatasetManagementService(db) mapping_service = DatasetMappingService(db) sync_service = SyncService(dm_client, ls_client, mapping_service) - logger.info(f"Sync dataset content request: mapping_id={request.mapping_id}") - - # 根据 mapping_id 获取映射关系 - mapping = await mapping_service.get_mapping_by_uuid(request.mapping_id) + logger.info(f"Sync dataset content request: mapping_id={request.id}") + + # request.id 合法性校验 + mapping = await mapping_service.get_mapping_by_uuid(request.id) if not mapping: raise HTTPException( status_code=404, - detail=f"Mapping not found: {request.mapping_id}" + detail=f"Mapping not found: {request.id}" ) # 执行同步(使用映射中的源数据集UUID) - result = await sync_service.sync_dataset_files(request.mapping_id, request.batch_size) + result = await sync_service.sync_dataset_files(request.id, request.batch_size) logger.info(f"Sync completed: {result.synced_files}/{result.total_files} files") diff --git a/runtime/datamate-python/app/module/annotation/schema/__init__.py b/runtime/datamate-python/app/module/annotation/schema/__init__.py new file mode 100644 index 0000000..289a327 --- /dev/null +++ b/runtime/datamate-python/app/module/annotation/schema/__init__.py @@ -0,0 +1,24 @@ +from .mapping import ( + DatasetMappingBase, + DatasetMappingCreateRequest, + DatasetMappingCreateResponse, + DatasetMappingUpdateRequest, + DatasetMappingResponse, + DeleteDatasetResponse +) + +from .sync import ( + SyncDatasetRequest, + SyncDatasetResponse +) + +__all__ = [ + "DatasetMappingBase", + "DatasetMappingCreateRequest", + "DatasetMappingCreateResponse", + "DatasetMappingUpdateRequest", + "DatasetMappingResponse", + "SyncDatasetRequest", + "SyncDatasetResponse", + "DeleteDatasetResponse" +] \ No newline at end of file diff --git a/runtime/datamate-python/app/module/annotation/schema/mapping.py b/runtime/datamate-python/app/module/annotation/schema/mapping.py new file mode 100644 index 0000000..d0c80d8 --- /dev/null +++ b/runtime/datamate-python/app/module/annotation/schema/mapping.py @@ -0,0 +1,42 @@ +from pydantic import Field +from typing import Optional +from datetime import datetime + +from app.module.shared.schema import BaseResponseModel + +class DatasetMappingBase(BaseResponseModel): + """数据集映射 基础模型""" + dataset_id: str = Field(..., description="源数据集ID") + +class DatasetMappingCreateRequest(DatasetMappingBase): + """数据集映射 创建 请求模型""" + pass + +class DatasetMappingCreateResponse(BaseResponseModel): + """数据集映射 创建 响应模型""" + id: str = Field(..., description="映射UUID") + labeling_project_id: str = Field(..., description="Label Studio项目ID") + labeling_project_name: str = Field(..., description="Label Studio项目名称") + message: str = Field(..., description="响应消息") + +class DatasetMappingUpdateRequest(BaseResponseModel): + """数据集映射 更新 请求模型""" + dataset_id: Optional[str] = Field(None, description="源数据集ID") + +class DatasetMappingResponse(DatasetMappingBase): + """数据集映射 查询 响应模型""" + id: str = Field(..., description="映射UUID") + labeling_project_id: str = Field(..., description="标注项目ID") + name: Optional[str] = Field(None, description="标注项目名称") + created_at: datetime = Field(..., description="创建时间") + deleted_at: Optional[datetime] = Field(None, description="删除时间") + + class Config: + from_attributes = True + populate_by_name = True + +class DeleteDatasetResponse(BaseResponseModel): + """删除数据集响应模型""" + id: str = Field(..., description="映射UUID") + status: str = Field(..., description="删除状态") + message: str = Field(..., description="响应消息") \ No newline at end of file diff --git a/runtime/datamate-python/app/module/annotation/schema/sync.py b/runtime/datamate-python/app/module/annotation/schema/sync.py new file mode 100644 index 0000000..492d372 --- /dev/null +++ b/runtime/datamate-python/app/module/annotation/schema/sync.py @@ -0,0 +1,19 @@ +from pydantic import Field +from typing import Optional +from datetime import datetime + +from app.module.shared.schema import BaseResponseModel + + +class SyncDatasetRequest(BaseResponseModel): + """同步数据集请求模型""" + id: str = Field(..., description="映射ID(mapping UUID)") + batch_size: int = Field(50, ge=1, le=100, description="批处理大小") + +class SyncDatasetResponse(BaseResponseModel): + """同步数据集响应模型""" + id: str = Field(..., description="映射UUID") + status: str = Field(..., description="同步状态") + synced_files: int = Field(..., description="已同步文件数量") + total_files: int = Field(0, description="总文件数量") + message: str = Field(..., description="响应消息") diff --git a/runtime/datamate-python/app/module/annotation/service/__init__.py b/runtime/datamate-python/app/module/annotation/service/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/runtime/datamate-python/app/services/dataset_mapping_service.py b/runtime/datamate-python/app/module/annotation/service/mapping.py similarity index 88% rename from runtime/datamate-python/app/services/dataset_mapping_service.py rename to runtime/datamate-python/app/module/annotation/service/mapping.py index 8e03ac5..fd8f4c3 100644 --- a/runtime/datamate-python/app/services/dataset_mapping_service.py +++ b/runtime/datamate-python/app/module/annotation/service/mapping.py @@ -5,13 +5,13 @@ from typing import Optional, List, Tuple from datetime import datetime import uuid -from app.models.dm.labeling_project import LabelingProject -from app.schemas.dataset_mapping import ( +from app.core.logging import get_logger +from app.db.models import LabelingProject +from app.module.annotation.schema import ( DatasetMappingCreateRequest, DatasetMappingUpdateRequest, DatasetMappingResponse ) -from app.core.logging import get_logger logger = get_logger(__name__) @@ -24,24 +24,24 @@ class DatasetMappingService: async def create_mapping( self, mapping_data: DatasetMappingCreateRequest, - labelling_project_id: str, - labelling_project_name: str + labeling_project_id: str, + labeling_project_name: str ) -> DatasetMappingResponse: """创建数据集映射""" - logger.info(f"Create dataset mapping: {mapping_data.dataset_id} -> {labelling_project_id}") + logger.info(f"Create dataset mapping: {mapping_data.dataset_id} -> {labeling_project_id}") db_mapping = LabelingProject( - mapping_id=str(uuid.uuid4()), + id=str(uuid.uuid4()), dataset_id=mapping_data.dataset_id, - labelling_project_id=labelling_project_id, - labelling_project_name=labelling_project_name + labeling_project_id=labeling_project_id, + name=labeling_project_name ) - + self.db.add(db_mapping) await self.db.commit() await self.db.refresh(db_mapping) - logger.info(f"Mapping created: {db_mapping.id}") + logger.debug(f"Mapping created: {db_mapping.id}") return DatasetMappingResponse.model_validate(db_mapping) async def get_mapping_by_source_uuid( @@ -89,16 +89,16 @@ class DatasetMappingService: logger.debug(f"Found {len(mappings)} mappings") return [DatasetMappingResponse.model_validate(mapping) for mapping in mappings] - async def get_mapping_by_labelling_project_id( + async def get_mapping_by_labeling_project_id( self, - labelling_project_id: str + labeling_project_id: str ) -> Optional[DatasetMappingResponse]: """根据Label Studio项目ID获取映射""" - logger.debug(f"Get mapping by Label Studio project id: {labelling_project_id}") + logger.debug(f"Get mapping by Label Studio project id: {labeling_project_id}") result = await self.db.execute( select(LabelingProject).where( - LabelingProject.labeling_project_id == labelling_project_id, + LabelingProject.labeling_project_id == labeling_project_id, LabelingProject.deleted_at.is_(None) ) ) @@ -108,7 +108,7 @@ class DatasetMappingService: logger.debug(f"Found mapping: {mapping.mapping_id}") return DatasetMappingResponse.model_validate(mapping) - logger.debug(f"No mapping found for Label Studio project id: {labelling_project_id}") + logger.debug(f"No mapping found for Label Studio project id: {labeling_project_id}") return None async def get_mapping_by_uuid(self, mapping_id: str) -> Optional[DatasetMappingResponse]: @@ -156,21 +156,6 @@ class DatasetMappingService: return await self.get_mapping_by_uuid(mapping_id) return None - async def update_last_updated_at(self, mapping_id: str) -> bool: - """更新最后更新时间""" - logger.debug(f"Update mapping last updated at: {mapping_id}") - - result = await self.db.execute( - update(LabelingProject) - .where( - LabelingProject.id == mapping_id, - LabelingProject.deleted_at.is_(None) - ) - .values(last_updated_at=datetime.utcnow()) - ) - await self.db.commit() - return result.rowcount > 0 - async def soft_delete_mapping(self, mapping_id: str) -> bool: """软删除映射""" logger.info(f"Soft delete mapping: {mapping_id}") diff --git a/runtime/datamate-python/app/services/sync_service.py b/runtime/datamate-python/app/module/annotation/service/sync.py similarity index 78% rename from runtime/datamate-python/app/services/sync_service.py rename to runtime/datamate-python/app/module/annotation/service/sync.py index bda8345..e2610c5 100644 --- a/runtime/datamate-python/app/services/sync_service.py +++ b/runtime/datamate-python/app/module/annotation/service/sync.py @@ -1,10 +1,13 @@ from typing import Optional, List, Dict, Any, Tuple -from app.infrastructure import LabelStudioClient, DatamateClient -from app.services.dataset_mapping_service import DatasetMappingService -from app.schemas.dataset_mapping import SyncDatasetResponse +from app.module.dataset import DatasetManagementService + from app.core.logging import get_logger from app.core.config import settings -from app.exceptions import NoDatasetInfoFoundError, DatasetMappingNotFoundError +from app.exception import NoDatasetInfoFoundError + +from ..client import LabelStudioClient +from ..schema import SyncDatasetResponse +from ..service.mapping import DatasetMappingService logger = get_logger(__name__) @@ -13,7 +16,7 @@ class SyncService: def __init__( self, - dm_client: DatamateClient, + dm_client: DatasetManagementService, ls_client: LabelStudioClient, mapping_service: DatasetMappingService ): @@ -44,7 +47,7 @@ class SyncService: project_id: Label Studio项目ID Returns: - dm_file_id到task_id的映射字典 + file_id到task_id的映射字典 """ try: logger.info(f"Fetching existing task mappings for project {project_id} (page_size={settings.ls_task_page_size})") @@ -59,24 +62,29 @@ class SyncService: page=None, # 不指定page,获取所有任务 page_size=page_size ) + + logger.info(f"Fetched tasks result: {result}") if not result: logger.warning(f"Failed to fetch tasks for project {project_id}") return {} + logger.info(f"Successfully fetched tasks for project {project_id}") + all_tasks = result.get("tasks", []) # 遍历所有任务,构建映射 for task in all_tasks: - # 检查任务的meta字段中是否有dm_file_id - meta = task.get('meta') - if meta: - dm_file_id = meta.get('dm_file_id') - if dm_file_id: - task_id = task.get('id') - if task_id: - dm_file_to_task_mapping[str(dm_file_id)] = task_id - + # logger.debug(task) + try: + file_id = task.get('data', {}).get('file_id') + task_id = task.get('id') + + dm_file_to_task_mapping[str(file_id)] = task_id + except Exception as e: + logger.error(f"Error processing task {task.get('id')}: {e}") + continue + logger.debug(dm_file_to_task_mapping) logger.info(f"Found {len(dm_file_to_task_mapping)} existing task mappings") return dm_file_to_task_mapping @@ -86,22 +94,22 @@ class SyncService: async def sync_dataset_files( self, - mapping_id: str, + id: str, batch_size: int = 50 ) -> SyncDatasetResponse: """同步数据集文件到Label Studio""" - logger.info(f"Start syncing dataset by mapping: {mapping_id}") + logger.info(f"Start syncing dataset by mapping: {id}") # 获取映射关系 - mapping = await self.mapping_service.get_mapping_by_uuid(mapping_id) + mapping = await self.mapping_service.get_mapping_by_uuid(id) if not mapping: - logger.error(f"Dataset mapping not found: {mapping_id}") + logger.error(f"Dataset mapping not found: {id}") return SyncDatasetResponse( - mapping_id="", + id="", status="error", synced_files=0, total_files=0, - message=f"Dataset mapping not found: {mapping_id}" + message=f"Dataset mapping not found: {id}" ) try: @@ -118,20 +126,17 @@ class SyncService: logger.info(f"Total files in dataset: {total_files}") # 获取Label Studio中已存在的DM文件ID到任务ID的映射 - existing_dm_file_mapping = await self.get_existing_dm_file_mapping(mapping.labelling_project_id) - existing_dm_file_ids = set(existing_dm_file_mapping.keys()) - logger.info(f"{len(existing_dm_file_ids)} tasks already exist in Label Studio") + existing_dm_file_mapping = await self.get_existing_dm_file_mapping(mapping.labeling_project_id) + existing_file_ids = set(existing_dm_file_mapping.keys()) + logger.info(f"{len(existing_file_ids)} tasks already exist in Label Studio") # 收集DM中当前存在的所有文件ID - current_dm_file_ids = set() - - # 分页获取并同步文件 + current_file_ids = set() while True: files_response = await self.dm_client.get_dataset_files( mapping.dataset_id, page=page, size=batch_size, - status="COMPLETED" # 只同步已完成的文件 ) if not files_response or not files_response.content: @@ -147,19 +152,18 @@ class SyncService: for file_info in files_response.content: # 记录当前DM中存在的文件ID - current_dm_file_ids.add(str(file_info.id)) + current_file_ids.add(str(file_info.id)) # 检查文件是否已存在 - if str(file_info.id) in existing_dm_file_ids: + if str(file_info.id) in existing_file_ids: existing_files_count += 1 logger.debug(f"Skip existing file: {file_info.originalName} (ID: {file_info.id})") continue new_files_count += 1 - # 确定数据类型 data_type = self.determine_data_type(file_info.fileType) - + # 替换文件路径前缀:只替换开头的前缀,不影响路径中间可能出现的相同字符串 file_path = file_info.filePath.removeprefix(settings.dm_file_path_prefix) file_path = settings.label_studio_file_path_prefix + file_path @@ -167,14 +171,11 @@ class SyncService: # 构造任务数据 task_data = { "data": { - data_type: file_path - }, - "meta": { - "file_size": file_info.size, - "file_type": file_info.fileType, - "dm_dataset_id": mapping.dataset_id, - "dm_file_id": file_info.id, + f"{data_type}": file_path, + "file_path": file_info.filePath, + "file_id": file_info.id, "original_name": file_info.originalName, + "dataset_id": mapping.dataset_id, } } tasks.append(task_data) @@ -184,7 +185,7 @@ class SyncService: # 批量创建Label Studio任务 if tasks: batch_result = await self.ls_client.create_tasks_batch( - mapping.labelling_project_id, + mapping.labeling_project_id, tasks ) @@ -196,7 +197,7 @@ class SyncService: # 如果批量创建失败,尝试单个创建 for task_data in tasks: task_result = await self.ls_client.create_task( - mapping.labelling_project_id, + mapping.labeling_project_id, task_data["data"], task_data.get("meta") ) @@ -210,10 +211,10 @@ class SyncService: # 清理在DM中不存在但在Label Studio中存在的任务 tasks_to_delete = [] - for dm_file_id, task_id in existing_dm_file_mapping.items(): - if dm_file_id not in current_dm_file_ids: + for file_id, task_id in existing_dm_file_mapping.items(): + if file_id not in current_file_ids: tasks_to_delete.append(task_id) - logger.debug(f"Mark task for deletion: {task_id} (DM file ID: {dm_file_id})") + logger.debug(f"Mark task for deletion: {task_id} (DM file ID: {file_id})") if tasks_to_delete: logger.info(f"Deleting {len(tasks_to_delete)} tasks not present in DM") @@ -223,13 +224,10 @@ class SyncService: else: logger.info("No tasks to delete") - # 更新映射的最后更新时间 - await self.mapping_service.update_last_updated_at(mapping.mapping_id) - logger.info(f"Sync completed: total_files={total_files}, created={synced_files}, deleted={deleted_tasks}") return SyncDatasetResponse( - mapping_id=mapping.mapping_id, + id=mapping.id, status="success", synced_files=synced_files, total_files=total_files, @@ -239,7 +237,7 @@ class SyncService: except Exception as e: logger.error(f"Error while syncing dataset: {e}") return SyncDatasetResponse( - mapping_id=mapping.mapping_id, + id=mapping.id, status="error", synced_files=0, total_files=0, @@ -259,13 +257,12 @@ class SyncService: dataset_info = await self.dm_client.get_dataset(dataset_id) # 获取Label Studio项目任务数量 - tasks_info = await self.ls_client.get_project_tasks(mapping.labelling_project_id) + tasks_info = await self.ls_client.get_project_tasks(mapping.labeling_project_id) return { - "mapping_id": mapping.mapping_id, + "id": mapping.id, "dataset_id": dataset_id, - "labelling_project_id": mapping.labelling_project_id, - "last_updated_at": mapping.last_updated_at, + "labeling_project_id": mapping.labeling_project_id, "dm_total_files": dataset_info.fileCount if dataset_info else 0, "ls_total_tasks": tasks_info.get("count", 0) if tasks_info else 0, "sync_ratio": ( diff --git a/runtime/datamate-python/app/module/dataset/__init__.py b/runtime/datamate-python/app/module/dataset/__init__.py new file mode 100644 index 0000000..330cd7f --- /dev/null +++ b/runtime/datamate-python/app/module/dataset/__init__.py @@ -0,0 +1,3 @@ +from .service import DatasetManagementService + +__all__ = ["DatasetManagementService"] \ No newline at end of file diff --git a/runtime/datamate-python/app/module/dataset/schema/__init__.py b/runtime/datamate-python/app/module/dataset/schema/__init__.py new file mode 100644 index 0000000..b64b5bf --- /dev/null +++ b/runtime/datamate-python/app/module/dataset/schema/__init__.py @@ -0,0 +1,16 @@ +from .dataset_file import ( + DatasetFileResponse, + PagedDatasetFileResponse, +) + +from .dataset import ( + DatasetResponse, + DatasetTypeResponse, +) + +__all__ = [ + "DatasetResponse", + "DatasetFileResponse", + "PagedDatasetFileResponse", + "DatasetTypeResponse", +] \ No newline at end of file diff --git a/runtime/datamate-python/app/schemas/dm_service.py b/runtime/datamate-python/app/module/dataset/schema/dataset.py similarity index 57% rename from runtime/datamate-python/app/schemas/dm_service.py rename to runtime/datamate-python/app/module/dataset/schema/dataset.py index aca3c40..8c35e56 100644 --- a/runtime/datamate-python/app/schemas/dm_service.py +++ b/runtime/datamate-python/app/module/dataset/schema/dataset.py @@ -2,28 +2,6 @@ from pydantic import BaseModel, Field from typing import List, Optional, Dict, Any from datetime import datetime -class DatasetFileResponse(BaseModel): - """DM服务数据集文件响应模型""" - id: str = Field(..., description="文件ID") - fileName: str = Field(..., description="文件名") - fileType: str = Field(..., description="文件类型") - filePath: str = Field(..., description="文件路径") - originalName: Optional[str] = Field(None, description="原始文件名") - size: Optional[int] = Field(None, description="文件大小(字节)") - status: Optional[str] = Field(None, description="文件状态") - uploadedAt: Optional[datetime] = Field(None, description="上传时间") - description: Optional[str] = Field(None, description="文件描述") - uploadedBy: Optional[str] = Field(None, description="上传者") - lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间") - -class PagedDatasetFileResponse(BaseModel): - """DM服务分页文件响应模型""" - content: List[DatasetFileResponse] = Field(..., description="文件列表") - totalElements: int = Field(..., description="总元素数") - totalPages: int = Field(..., description="总页数") - page: int = Field(..., description="当前页码") - size: int = Field(..., description="每页大小") - class DatasetTypeResponse(BaseModel): """数据集类型响应模型""" code: str = Field(..., description="类型编码") diff --git a/runtime/datamate-python/app/module/dataset/schema/dataset_file.py b/runtime/datamate-python/app/module/dataset/schema/dataset_file.py new file mode 100644 index 0000000..95c457f --- /dev/null +++ b/runtime/datamate-python/app/module/dataset/schema/dataset_file.py @@ -0,0 +1,26 @@ +from pydantic import BaseModel, Field +from typing import List, Optional, Dict, Any +from datetime import datetime + +class DatasetFileResponse(BaseModel): + """DM服务数据集文件响应模型""" + id: str = Field(..., description="文件ID") + fileName: str = Field(..., description="文件名") + fileType: str = Field(..., description="文件类型") + filePath: str = Field(..., description="文件路径") + originalName: Optional[str] = Field(None, description="原始文件名") + size: Optional[int] = Field(None, description="文件大小(字节)") + status: Optional[str] = Field(None, description="文件状态") + uploadedAt: Optional[datetime] = Field(None, description="上传时间") + description: Optional[str] = Field(None, description="文件描述") + uploadedBy: Optional[str] = Field(None, description="上传者") + lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间") + +class PagedDatasetFileResponse(BaseModel): + """DM服务分页文件响应模型""" + content: List[DatasetFileResponse] = Field(..., description="文件列表") + totalElements: int = Field(..., description="总元素数") + totalPages: int = Field(..., description="总页数") + page: int = Field(..., description="当前页码") + size: int = Field(..., description="每页大小") + diff --git a/runtime/datamate-python/app/module/dataset/service/__init__.py b/runtime/datamate-python/app/module/dataset/service/__init__.py new file mode 100644 index 0000000..688cf26 --- /dev/null +++ b/runtime/datamate-python/app/module/dataset/service/__init__.py @@ -0,0 +1,3 @@ +from .service import Service as DatasetManagementService + +__all__ = ["DatasetManagementService"] \ No newline at end of file diff --git a/runtime/datamate-python/app/infrastructure/datamate.py b/runtime/datamate-python/app/module/dataset/service/service.py similarity index 96% rename from runtime/datamate-python/app/infrastructure/datamate.py rename to runtime/datamate-python/app/module/dataset/service/service.py index d3082ad..6acc38e 100644 --- a/runtime/datamate-python/app/infrastructure/datamate.py +++ b/runtime/datamate-python/app/module/dataset/service/service.py @@ -2,15 +2,16 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy import func from typing import Optional + from app.core.config import settings from app.core.logging import get_logger -from app.schemas.dm_service import DatasetResponse, PagedDatasetFileResponse, DatasetFileResponse -from app.models.dm.dataset import Dataset -from app.models.dm.dataset_files import DatasetFiles +from app.db.models import Dataset, DatasetFiles + +from ..schema import DatasetResponse, PagedDatasetFileResponse, DatasetFileResponse logger = get_logger(__name__) -class Client: +class Service: """数据管理服务客户端 - 直接访问数据库""" def __init__(self, db: AsyncSession): diff --git a/runtime/datamate-python/app/module/management/__init__.py b/runtime/datamate-python/app/module/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/runtime/datamate-python/app/module/management/api/__init__.py b/runtime/datamate-python/app/module/management/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/runtime/datamate-python/app/api/system.py b/runtime/datamate-python/app/module/management/api/system.py similarity index 100% rename from runtime/datamate-python/app/api/system.py rename to runtime/datamate-python/app/module/management/api/system.py diff --git a/runtime/datamate-python/app/module/management/service/__init__.py b/runtime/datamate-python/app/module/management/service/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/runtime/datamate-python/app/module/shared/__init__.py b/runtime/datamate-python/app/module/shared/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/runtime/datamate-python/app/module/shared/schema/__init__.py b/runtime/datamate-python/app/module/shared/schema/__init__.py new file mode 100644 index 0000000..88cf3b0 --- /dev/null +++ b/runtime/datamate-python/app/module/shared/schema/__init__.py @@ -0,0 +1,11 @@ +from .common import ( + BaseResponseModel, + StandardResponse, + PaginatedData +) + +__all__ = [ + "BaseResponseModel", + "StandardResponse", + "PaginatedData" +] \ No newline at end of file diff --git a/runtime/datamate-python/app/schemas/common.py b/runtime/datamate-python/app/module/shared/schema/common.py similarity index 99% rename from runtime/datamate-python/app/schemas/common.py rename to runtime/datamate-python/app/module/shared/schema/common.py index 00f73f5..b0681d1 100644 --- a/runtime/datamate-python/app/schemas/common.py +++ b/runtime/datamate-python/app/module/shared/schema/common.py @@ -42,7 +42,6 @@ class StandardResponse(BaseResponseModel, Generic[T]): } } - class PaginatedData(BaseResponseModel, Generic[T]): """分页数据容器""" page: int = Field(..., description="当前页码(从1开始)") diff --git a/runtime/datamate-python/app/schemas/__init__.py b/runtime/datamate-python/app/schemas/__init__.py deleted file mode 100644 index 7941ab7..0000000 --- a/runtime/datamate-python/app/schemas/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# app/schemas/__init__.py - -from .common import * -from .dataset_mapping import * -from .dm_service import * -from .label_studio import * - -__all__ = [ - # Common schemas - "StandardResponse", - - # Dataset Mapping schemas - "DatasetMappingBase", - "DatasetMappingCreateRequest", - "DatasetMappingUpdateRequest", - "DatasetMappingResponse", - "DatasetMappingCreateResponse", - "SyncDatasetResponse", - "DeleteDatasetResponse", - - # DM Service schemas - "DatasetFileResponse", - "PagedDatasetFileResponse", - "DatasetResponse", - - # Label Studio schemas - "LabelStudioProject", - "LabelStudioTask" -] \ No newline at end of file diff --git a/runtime/datamate-python/app/schemas/dataset_mapping.py b/runtime/datamate-python/app/schemas/dataset_mapping.py deleted file mode 100644 index de6d85d..0000000 --- a/runtime/datamate-python/app/schemas/dataset_mapping.py +++ /dev/null @@ -1,56 +0,0 @@ -from pydantic import Field -from typing import Optional -from datetime import datetime - -from .common import BaseResponseModel - -class DatasetMappingBase(BaseResponseModel): - """数据集映射 基础模型""" - dataset_id: str = Field(..., description="源数据集ID") - -class DatasetMappingCreateRequest(DatasetMappingBase): - """数据集映射 创建 请求模型""" - pass - -class DatasetMappingCreateResponse(BaseResponseModel): - """数据集映射 创建 响应模型""" - mapping_id: str = Field(..., description="映射UUID") - labelling_project_id: str = Field(..., description="Label Studio项目ID") - labelling_project_name: str = Field(..., description="Label Studio项目名称") - message: str = Field(..., description="响应消息") - -class DatasetMappingUpdateRequest(BaseResponseModel): - """数据集映射 更新 请求模型""" - dataset_id: Optional[str] = Field(None, description="源数据集ID") - -class DatasetMappingResponse(DatasetMappingBase): - """数据集映射 查询 响应模型""" - mapping_id: str = Field(..., description="映射UUID") - labelling_project_id: str = Field(..., description="标注项目ID") - labelling_project_name: Optional[str] = Field(None, description="标注项目名称") - created_at: datetime = Field(..., description="创建时间") - last_updated_at: datetime = Field(..., description="最后更新时间") - deleted_at: Optional[datetime] = Field(None, description="删除时间") - - class Config: - from_attributes = True - populate_by_name = True - -class SyncDatasetRequest(BaseResponseModel): - """同步数据集请求模型""" - mapping_id: str = Field(..., description="映射ID(mapping UUID)") - batch_size: int = Field(50, ge=1, le=100, description="批处理大小") - -class SyncDatasetResponse(BaseResponseModel): - """同步数据集响应模型""" - mapping_id: str = Field(..., description="映射UUID") - status: str = Field(..., description="同步状态") - synced_files: int = Field(..., description="已同步文件数量") - total_files: int = Field(0, description="总文件数量") - message: str = Field(..., description="响应消息") - -class DeleteDatasetResponse(BaseResponseModel): - """删除数据集响应模型""" - mapping_id: str = Field(..., description="映射UUID") - status: str = Field(..., description="删除状态") - message: str = Field(..., description="响应消息") \ No newline at end of file diff --git a/runtime/datamate-python/app/services/__init__.py b/runtime/datamate-python/app/services/__init__.py deleted file mode 100644 index 7818db1..0000000 --- a/runtime/datamate-python/app/services/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# app/services/__init__.py - -from .dataset_mapping_service import DatasetMappingService -from .sync_service import SyncService - -__all__ = ["DatasetMappingService", "SyncService"] \ No newline at end of file diff --git a/scripts/db/data-annotation-init.sql b/scripts/db/data-annotation-init.sql new file mode 100644 index 0000000..09de2c6 --- /dev/null +++ b/scripts/db/data-annotation-init.sql @@ -0,0 +1,19 @@ +CREATE TABLE t_dm_annotation_templates ( + id VARCHAR(36) PRIMARY KEY, + name VARCHAR(32) NOT NULL COMMENT '模板名称', + description VARCHAR(255) COMMENT '模板描述', + configuration JSON, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)' +); + +CREATE TABLE t_dm_labeling_projects ( + id VARCHAR(36) PRIMARY KEY, + dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID', + name VARCHAR(32) NOT NULL COMMENT '项目名称', + labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID', + configuration JSON, + progress JSON, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)' +); \ No newline at end of file