You've already forked DataMate
refactor: Reorganize datamate-python (#34)
refactor: Reorganize datamate-python (previously label-studio-adapter) into a DDD style structure.
This commit is contained in:
50
deployment/docker/label-studio/docker-compose.yml
Normal file
50
deployment/docker/label-studio/docker-compose.yml
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
services:
|
||||||
|
|
||||||
|
app:
|
||||||
|
stdin_open: true
|
||||||
|
tty: true
|
||||||
|
image: heartexlabs/label-studio:latest
|
||||||
|
restart: unless-stopped
|
||||||
|
user: root
|
||||||
|
expose:
|
||||||
|
- "8000"
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
depends_on:
|
||||||
|
- db
|
||||||
|
environment:
|
||||||
|
- DJANGO_DB=default
|
||||||
|
- POSTGRE_NAME=postgres
|
||||||
|
- POSTGRE_USER=postgres
|
||||||
|
- POSTGRE_PASSWORD=
|
||||||
|
- POSTGRE_PORT=5432
|
||||||
|
- POSTGRE_HOST=db
|
||||||
|
- LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-}
|
||||||
|
- LOCAL_FILES_SERVING_ENABLED=true
|
||||||
|
- LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local
|
||||||
|
- USE_USERNAME_FOR_LOGIN=true
|
||||||
|
- LABEL_STUDIO_USERNAME=admin@huawei.com
|
||||||
|
- LABEL_STUDIO_PASSWORD=admin1234
|
||||||
|
- LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true
|
||||||
|
- LABEL_STUDIO_USER_TOKEN=abc123abc123
|
||||||
|
- LOG_LEVEL=INFO
|
||||||
|
volumes:
|
||||||
|
- label-studio-data:/label-studio/data:rw
|
||||||
|
- dataset_volume:/label-studio/local:rw
|
||||||
|
command: label-studio-uwsgi
|
||||||
|
|
||||||
|
db:
|
||||||
|
image: pgautoupgrade/pgautoupgrade:13-alpine
|
||||||
|
hostname: db
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
- POSTGRES_HOST_AUTH_METHOD=trust
|
||||||
|
- POSTGRES_USER=postgres
|
||||||
|
volumes:
|
||||||
|
- label-studio-db:/var/lib/postgresql/data
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
label-studio-data:
|
||||||
|
label-studio-db:
|
||||||
|
dataset_volume:
|
||||||
|
name: datamate-dataset-volume
|
||||||
@@ -66,37 +66,6 @@ MYSQL_USER=label_studio_user
|
|||||||
MYSQL_PASSWORD=user_password
|
MYSQL_PASSWORD=user_password
|
||||||
MYSQL_DATABASE=label_studio_adapter
|
MYSQL_DATABASE=label_studio_adapter
|
||||||
|
|
||||||
# =========================
|
|
||||||
# Label Studio 数据库配置 (PostgreSQL)
|
|
||||||
# =========================
|
|
||||||
# 仅在使用 docker-compose.label-studio.yml 启动 Label Studio 时需要配置
|
|
||||||
POSTGRES_HOST=label-studio-db
|
|
||||||
POSTGRES_PORT=5432
|
|
||||||
POSTGRES_USER=labelstudio
|
|
||||||
POSTGRES_PASSWORD=labelstudio@4321
|
|
||||||
POSTGRES_DATABASE=labelstudio
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# SQLite 数据库配置(兜底选项)
|
|
||||||
# =========================
|
|
||||||
# 优先级3:如果没有配置 MySQL/PostgreSQL,将使用 SQLite
|
|
||||||
SQLITE_PATH=./data/labelstudio_adapter.db
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# 可选:直接指定数据库 URL
|
|
||||||
# =========================
|
|
||||||
# 如果设置了此项,将覆盖上面的 MySQL/PostgreSQL/SQLite 配置
|
|
||||||
# DATABASE_URL=postgresql+asyncpg://user:password@host:port/database
|
|
||||||
|
|
||||||
# =========================
|
|
||||||
# 安全配置
|
|
||||||
# =========================
|
|
||||||
# 密钥(生产环境务必修改)
|
|
||||||
SECRET_KEY=your-secret-key-change-this-in-production
|
|
||||||
|
|
||||||
# Token 过期时间(分钟)
|
|
||||||
ACCESS_TOKEN_EXPIRE_MINUTES=30
|
|
||||||
|
|
||||||
# =========================
|
# =========================
|
||||||
# CORS 配置
|
# CORS 配置
|
||||||
# =========================
|
# =========================
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
# app/__init__.py
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
"""
|
|
||||||
API 路由模块
|
|
||||||
|
|
||||||
集中管理所有API路由的组织结构
|
|
||||||
"""
|
|
||||||
from fastapi import APIRouter
|
|
||||||
|
|
||||||
from .system import router as system_router
|
|
||||||
from .project import project_router
|
|
||||||
|
|
||||||
# 创建主API路由器
|
|
||||||
api_router = APIRouter()
|
|
||||||
|
|
||||||
# 注册到主路由器
|
|
||||||
api_router.include_router(system_router, tags=["系统"])
|
|
||||||
api_router.include_router(project_router, prefix="/project", tags=["项目"])
|
|
||||||
|
|
||||||
# 导出路由器供 main.py 使用
|
|
||||||
__all__ = ["api_router"]
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
"""
|
|
||||||
标注工程相关API路由模块
|
|
||||||
"""
|
|
||||||
from fastapi import APIRouter
|
|
||||||
|
|
||||||
project_router = APIRouter()
|
|
||||||
|
|
||||||
from . import create
|
|
||||||
from . import sync
|
|
||||||
from . import list
|
|
||||||
from . import delete
|
|
||||||
@@ -1,130 +0,0 @@
|
|||||||
from fastapi import APIRouter, Depends, HTTPException
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from app.db.database import get_db
|
|
||||||
from app.services.dataset_mapping_service import DatasetMappingService
|
|
||||||
from app.infrastructure import DatamateClient, LabelStudioClient
|
|
||||||
from app.schemas.dataset_mapping import (
|
|
||||||
DatasetMappingCreateRequest,
|
|
||||||
DatasetMappingCreateResponse,
|
|
||||||
)
|
|
||||||
from app.schemas import StandardResponse
|
|
||||||
from app.core.logging import get_logger
|
|
||||||
from app.core.config import settings
|
|
||||||
from . import project_router
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
|
||||||
|
|
||||||
@project_router.post("/create", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201)
|
|
||||||
async def create_dataset_mapping(
|
|
||||||
request: DatasetMappingCreateRequest,
|
|
||||||
db: AsyncSession = Depends(get_db)
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
创建数据集映射
|
|
||||||
|
|
||||||
根据指定的DM程序中的数据集,创建Label Studio中的数据集,
|
|
||||||
在数据库中记录这一关联关系,返回Label Studio数据集的ID
|
|
||||||
|
|
||||||
注意:一个数据集可以创建多个标注项目
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
dm_client = DatamateClient(db)
|
|
||||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
|
||||||
token=settings.label_studio_user_token)
|
|
||||||
service = DatasetMappingService(db)
|
|
||||||
|
|
||||||
logger.info(f"Create dataset mapping request: {request.dataset_id}")
|
|
||||||
|
|
||||||
# 从DM服务获取数据集信息
|
|
||||||
dataset_info = await dm_client.get_dataset(request.dataset_id)
|
|
||||||
if not dataset_info:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404,
|
|
||||||
detail=f"Dataset not found in DM service: {request.dataset_id}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# 确定数据类型(基于数据集类型)
|
|
||||||
data_type = "image" # 默认值
|
|
||||||
if dataset_info.type and dataset_info.type.code:
|
|
||||||
type_code = dataset_info.type.code.lower()
|
|
||||||
if "audio" in type_code:
|
|
||||||
data_type = "audio"
|
|
||||||
elif "video" in type_code:
|
|
||||||
data_type = "video"
|
|
||||||
elif "text" in type_code:
|
|
||||||
data_type = "text"
|
|
||||||
|
|
||||||
project_name = f"{dataset_info.name}"
|
|
||||||
|
|
||||||
# 在Label Studio中创建项目
|
|
||||||
project_data = await ls_client.create_project(
|
|
||||||
title=project_name,
|
|
||||||
description=dataset_info.description or f"Imported from DM dataset {dataset_info.id}",
|
|
||||||
data_type=data_type
|
|
||||||
)
|
|
||||||
|
|
||||||
if not project_data:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail="Fail to create Label Studio project."
|
|
||||||
)
|
|
||||||
|
|
||||||
project_id = project_data["id"]
|
|
||||||
|
|
||||||
# 配置本地存储:dataset/<id>
|
|
||||||
local_storage_path = f"{settings.label_studio_local_storage_dataset_base_path}/{request.dataset_id}"
|
|
||||||
storage_result = await ls_client.create_local_storage(
|
|
||||||
project_id=project_id,
|
|
||||||
path=local_storage_path,
|
|
||||||
title="Dataset_BLOB",
|
|
||||||
use_blob_urls=True,
|
|
||||||
description=f"Local storage for dataset {dataset_info.name}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# 配置本地存储:upload
|
|
||||||
local_storage_path = f"{settings.label_studio_local_storage_upload_base_path}"
|
|
||||||
storage_result = await ls_client.create_local_storage(
|
|
||||||
project_id=project_id,
|
|
||||||
path=local_storage_path,
|
|
||||||
title="Upload_BLOB",
|
|
||||||
use_blob_urls=True,
|
|
||||||
description=f"Local storage for dataset {dataset_info.name}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not storage_result:
|
|
||||||
# 本地存储配置失败,记录警告但不中断流程
|
|
||||||
logger.warning(f"Failed to configure local storage for project {project_id}")
|
|
||||||
else:
|
|
||||||
logger.info(f"Local storage configured for project {project_id}: {local_storage_path}")
|
|
||||||
|
|
||||||
# 创建映射关系,包含项目名称
|
|
||||||
mapping = await service.create_mapping(
|
|
||||||
request,
|
|
||||||
str(project_id),
|
|
||||||
project_name
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.debug(
|
|
||||||
f"Dataset mapping created: {mapping.mapping_id} -> S {mapping.dataset_id} <> L {mapping.labelling_project_id}"
|
|
||||||
)
|
|
||||||
|
|
||||||
response_data = DatasetMappingCreateResponse(
|
|
||||||
mapping_id=mapping.mapping_id,
|
|
||||||
labelling_project_id=mapping.labelling_project_id,
|
|
||||||
labelling_project_name=mapping.labelling_project_name or project_name,
|
|
||||||
message="Dataset mapping created successfully"
|
|
||||||
)
|
|
||||||
|
|
||||||
return StandardResponse(
|
|
||||||
code=201,
|
|
||||||
message="success",
|
|
||||||
data=response_data
|
|
||||||
)
|
|
||||||
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error while creating dataset mapping: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail="Internal server error")
|
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
from fastapi import Depends, HTTPException, Query
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from app.db.database import get_db
|
|
||||||
from app.services.dataset_mapping_service import DatasetMappingService
|
|
||||||
from app.infrastructure import DatamateClient, LabelStudioClient
|
|
||||||
from app.schemas.dataset_mapping import DeleteDatasetResponse
|
|
||||||
from app.schemas import StandardResponse
|
|
||||||
from app.core.logging import get_logger
|
|
||||||
from app.core.config import settings
|
|
||||||
|
|
||||||
from . import project_router
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
|
||||||
|
|
||||||
@project_router.delete("/mappings", response_model=StandardResponse[DeleteDatasetResponse])
|
|
||||||
async def delete_mapping(
|
|
||||||
m: Optional[str] = Query(None, description="映射UUID"),
|
|
||||||
proj: Optional[str] = Query(None, description="Label Studio项目ID"),
|
|
||||||
db: AsyncSession = Depends(get_db)
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
删除映射关系和对应的 Label Studio 项目
|
|
||||||
|
|
||||||
可以通过以下任一方式指定要删除的映射:
|
|
||||||
- m: 映射UUID
|
|
||||||
- proj: Label Studio项目ID
|
|
||||||
- 两者都提供(优先使用 m)
|
|
||||||
|
|
||||||
此操作会:
|
|
||||||
1. 删除 Label Studio 中的项目
|
|
||||||
2. 软删除数据库中的映射记录
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# 至少需要提供一个参数
|
|
||||||
if not m and not proj:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided"
|
|
||||||
)
|
|
||||||
|
|
||||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
|
||||||
token=settings.label_studio_user_token)
|
|
||||||
service = DatasetMappingService(db)
|
|
||||||
|
|
||||||
# 优先使用 mapping_id 查询
|
|
||||||
if m:
|
|
||||||
logger.debug(f"Deleting by mapping UUID: {m}")
|
|
||||||
mapping = await service.get_mapping_by_uuid(m)
|
|
||||||
# 如果没有提供 m,使用 proj 查询
|
|
||||||
elif proj:
|
|
||||||
logger.debug(f"Deleting by project ID: {proj}")
|
|
||||||
mapping = await service.get_mapping_by_labelling_project_id(proj)
|
|
||||||
else:
|
|
||||||
mapping = None
|
|
||||||
|
|
||||||
if not mapping:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404,
|
|
||||||
detail=f"Mapping either not found or not specified."
|
|
||||||
)
|
|
||||||
|
|
||||||
mapping_id = mapping.mapping_id
|
|
||||||
labelling_project_id = mapping.labelling_project_id
|
|
||||||
labelling_project_name = mapping.labelling_project_name
|
|
||||||
|
|
||||||
logger.debug(f"Found mapping: {mapping_id}, Label Studio project ID: {labelling_project_id}")
|
|
||||||
|
|
||||||
# 1. 删除 Label Studio 项目
|
|
||||||
try:
|
|
||||||
delete_success = await ls_client.delete_project(int(labelling_project_id))
|
|
||||||
if delete_success:
|
|
||||||
logger.debug(f"Successfully deleted Label Studio project: {labelling_project_id}")
|
|
||||||
else:
|
|
||||||
logger.warning(f"Failed to delete Label Studio project or project not found: {labelling_project_id}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error deleting Label Studio project: {e}")
|
|
||||||
# 继续执行,即使 Label Studio 项目删除失败也要删除映射记录
|
|
||||||
|
|
||||||
# 2. 软删除映射记录
|
|
||||||
soft_delete_success = await service.soft_delete_mapping(mapping_id)
|
|
||||||
|
|
||||||
if not soft_delete_success:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail="Failed to delete mapping record"
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Successfully deleted mapping: {mapping_id}, Label Studio project: {labelling_project_id}")
|
|
||||||
|
|
||||||
return StandardResponse(
|
|
||||||
code=200,
|
|
||||||
message="success",
|
|
||||||
data=DeleteDatasetResponse(
|
|
||||||
mapping_id=mapping_id,
|
|
||||||
status="success",
|
|
||||||
message=f"Successfully deleted mapping and Label Studio project '{labelling_project_name}'"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error deleting mapping: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail="Internal server error")
|
|
||||||
@@ -1,152 +0,0 @@
|
|||||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
|
||||||
from typing import List
|
|
||||||
import math
|
|
||||||
|
|
||||||
from app.db.database import get_db
|
|
||||||
from app.services.dataset_mapping_service import DatasetMappingService
|
|
||||||
from app.schemas.dataset_mapping import DatasetMappingResponse
|
|
||||||
from app.schemas.common import StandardResponse, PaginatedData
|
|
||||||
from app.core.logging import get_logger
|
|
||||||
from . import project_router
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
|
||||||
|
|
||||||
@project_router.get("/mappings/list", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
|
|
||||||
async def list_mappings(
|
|
||||||
page: int = Query(1, ge=1, description="页码(从1开始)"),
|
|
||||||
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
|
|
||||||
db: AsyncSession = Depends(get_db)
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
查询所有映射关系(分页)
|
|
||||||
|
|
||||||
返回所有有效的数据集映射关系(未被软删除的),支持分页查询
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
service = DatasetMappingService(db)
|
|
||||||
|
|
||||||
# 计算 skip
|
|
||||||
skip = (page - 1) * page_size
|
|
||||||
|
|
||||||
logger.info(f"Listing mappings, page={page}, page_size={page_size}")
|
|
||||||
|
|
||||||
# 获取数据和总数
|
|
||||||
mappings, total = await service.get_all_mappings_with_count(
|
|
||||||
skip=skip,
|
|
||||||
limit=page_size
|
|
||||||
)
|
|
||||||
|
|
||||||
# 计算总页数
|
|
||||||
total_pages = math.ceil(total / page_size) if total > 0 else 0
|
|
||||||
|
|
||||||
# 构造分页响应
|
|
||||||
paginated_data = PaginatedData(
|
|
||||||
page=page,
|
|
||||||
size=page_size,
|
|
||||||
total_elements=total,
|
|
||||||
total_pages=total_pages,
|
|
||||||
content=mappings
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
|
|
||||||
|
|
||||||
return StandardResponse(
|
|
||||||
code=200,
|
|
||||||
message="success",
|
|
||||||
data=paginated_data
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error listing mappings: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail="Internal server error")
|
|
||||||
|
|
||||||
|
|
||||||
@project_router.get("/mappings/{mapping_id}", response_model=StandardResponse[DatasetMappingResponse])
|
|
||||||
async def get_mapping(
|
|
||||||
mapping_id: str,
|
|
||||||
db: AsyncSession = Depends(get_db)
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
根据 UUID 查询单个映射关系
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
service = DatasetMappingService(db)
|
|
||||||
|
|
||||||
logger.info(f"Get mapping: {mapping_id}")
|
|
||||||
|
|
||||||
mapping = await service.get_mapping_by_uuid(mapping_id)
|
|
||||||
|
|
||||||
if not mapping:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404,
|
|
||||||
detail=f"Mapping not found: {mapping_id}"
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Found mapping: {mapping.mapping_id}")
|
|
||||||
|
|
||||||
return StandardResponse(
|
|
||||||
code=200,
|
|
||||||
message="success",
|
|
||||||
data=mapping
|
|
||||||
)
|
|
||||||
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting mapping: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail="Internal server error")
|
|
||||||
|
|
||||||
|
|
||||||
@project_router.get("/mappings/by-source/{dataset_id}", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
|
|
||||||
async def get_mappings_by_source(
|
|
||||||
dataset_id: str,
|
|
||||||
page: int = Query(1, ge=1, description="页码(从1开始)"),
|
|
||||||
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
|
|
||||||
db: AsyncSession = Depends(get_db)
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
根据源数据集 ID 查询所有映射关系(分页)
|
|
||||||
|
|
||||||
返回该数据集创建的所有标注项目(不包括已删除的),支持分页查询
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
service = DatasetMappingService(db)
|
|
||||||
|
|
||||||
# 计算 skip
|
|
||||||
skip = (page - 1) * page_size
|
|
||||||
|
|
||||||
logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, page_size={page_size}")
|
|
||||||
|
|
||||||
# 获取数据和总数
|
|
||||||
mappings, total = await service.get_mappings_by_source_with_count(
|
|
||||||
dataset_id=dataset_id,
|
|
||||||
skip=skip,
|
|
||||||
limit=page_size
|
|
||||||
)
|
|
||||||
|
|
||||||
# 计算总页数
|
|
||||||
total_pages = math.ceil(total / page_size) if total > 0 else 0
|
|
||||||
|
|
||||||
# 构造分页响应
|
|
||||||
paginated_data = PaginatedData(
|
|
||||||
page=page,
|
|
||||||
size=page_size,
|
|
||||||
total_elements=total,
|
|
||||||
total_pages=total_pages,
|
|
||||||
content=mappings
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
|
|
||||||
|
|
||||||
return StandardResponse(
|
|
||||||
code=200,
|
|
||||||
message="success",
|
|
||||||
data=paginated_data
|
|
||||||
)
|
|
||||||
|
|
||||||
except HTTPException:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting mappings: {e}")
|
|
||||||
raise HTTPException(status_code=500, detail="Internal server error")
|
|
||||||
@@ -1 +1,7 @@
|
|||||||
# app/core/__init__.py
|
# app/core/__init__.py
|
||||||
|
|
||||||
|
"""
|
||||||
|
Core module
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
from pydantic_settings import BaseSettings
|
from pydantic_settings import BaseSettings
|
||||||
from typing import Optional
|
from typing import Optional, List
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -24,9 +24,9 @@ class Settings(BaseSettings):
|
|||||||
port: int = 8000
|
port: int = 8000
|
||||||
|
|
||||||
# CORS配置
|
# CORS配置
|
||||||
allowed_origins: list = ["*"]
|
allowed_origins: List[str] = ["*"]
|
||||||
allowed_methods: list = ["*"]
|
allowed_methods: List[str] = ["*"]
|
||||||
allowed_headers: list = ["*"]
|
allowed_headers: List[str] = ["*"]
|
||||||
|
|
||||||
# MySQL数据库配置 (优先级1)
|
# MySQL数据库配置 (优先级1)
|
||||||
mysql_host: Optional[str] = None
|
mysql_host: Optional[str] = None
|
||||||
@@ -49,11 +49,7 @@ class Settings(BaseSettings):
|
|||||||
database_url: Optional[str] = None
|
database_url: Optional[str] = None
|
||||||
|
|
||||||
# 日志配置
|
# 日志配置
|
||||||
log_level: str = "INFO"
|
log_level: str = "DEBUG"
|
||||||
|
|
||||||
# 安全配置
|
|
||||||
secret_key: str = "your-secret-key-change-this-in-production"
|
|
||||||
access_token_expire_minutes: int = 30
|
|
||||||
|
|
||||||
# =========================
|
# =========================
|
||||||
# Label Studio 服务配置
|
# Label Studio 服务配置
|
||||||
@@ -63,8 +59,7 @@ class Settings(BaseSettings):
|
|||||||
label_studio_password: Optional[str] = None # Label Studio 密码(用于登录)
|
label_studio_password: Optional[str] = None # Label Studio 密码(用于登录)
|
||||||
label_studio_user_token: Optional[str] = None # Legacy Token
|
label_studio_user_token: Optional[str] = None # Legacy Token
|
||||||
|
|
||||||
label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files/dataset" # Label Studio容器中的本地存储基础路径
|
label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files" # Label Studio容器中的本地存储基础路径
|
||||||
label_studio_local_storage_upload_base_path: str = "/label-studio/local_files/upload" # Label Studio容器中的本地存储基础路径
|
|
||||||
label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio本地文件服务路径前缀
|
label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio本地文件服务路径前缀
|
||||||
|
|
||||||
ls_task_page_size: int = 1000
|
ls_task_page_size: int = 1000
|
||||||
@@ -73,7 +68,7 @@ class Settings(BaseSettings):
|
|||||||
# =========================
|
# =========================
|
||||||
# Data Management 服务配置
|
# Data Management 服务配置
|
||||||
# =========================
|
# =========================
|
||||||
dm_file_path_prefix: str = "/" # DM存储文件夹前缀
|
dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ def setup_logging():
|
|||||||
log_dir.mkdir(exist_ok=True)
|
log_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
# 配置日志格式
|
# 配置日志格式
|
||||||
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
log_format = "%(asctime)s - %(name)s - [%(levelname)s] - %(message)s"
|
||||||
date_format = "%Y-%m-%d %H:%M:%S"
|
date_format = "%Y-%m-%d %H:%M:%S"
|
||||||
|
|
||||||
# 创建处理器
|
# 创建处理器
|
||||||
@@ -44,9 +44,10 @@ def setup_logging():
|
|||||||
root_logger.addHandler(error_handler)
|
root_logger.addHandler(error_handler)
|
||||||
|
|
||||||
# 配置第三方库日志级别(减少详细日志)
|
# 配置第三方库日志级别(减少详细日志)
|
||||||
logging.getLogger("uvicorn").setLevel(logging.WARNING)
|
logging.getLogger("uvicorn").setLevel(logging.ERROR)
|
||||||
logging.getLogger("sqlalchemy.engine").setLevel(logging.ERROR) # 隐藏SQL查询日志
|
logging.getLogger("sqlalchemy.engine").setLevel(logging.ERROR) # 隐藏SQL查询日志
|
||||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
logging.getLogger("httpx").setLevel(logging.ERROR)
|
||||||
|
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
||||||
|
|
||||||
def get_logger(name: str) -> logging.Logger:
|
def get_logger(name: str) -> logging.Logger:
|
||||||
"""获取指定名称的日志器"""
|
"""获取指定名称的日志器"""
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
# app/db/__init__.py
|
|
||||||
28
runtime/datamate-python/app/db/models/__init__.py
Normal file
28
runtime/datamate-python/app/db/models/__init__.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
from .dataset_management import (
|
||||||
|
Dataset,
|
||||||
|
DatasetTag,
|
||||||
|
DatasetFiles,
|
||||||
|
DatasetStatistics,
|
||||||
|
Tag
|
||||||
|
)
|
||||||
|
|
||||||
|
from .user_management import (
|
||||||
|
User
|
||||||
|
)
|
||||||
|
|
||||||
|
from .annotation_management import (
|
||||||
|
AnnotationTemplate,
|
||||||
|
LabelingProject
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Dataset",
|
||||||
|
"DatasetTag",
|
||||||
|
"DatasetFiles",
|
||||||
|
"DatasetStatistics",
|
||||||
|
"Tag",
|
||||||
|
"User",
|
||||||
|
"AnnotationTemplate",
|
||||||
|
"LabelingProject",
|
||||||
|
]
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
"""
|
||||||
|
Tables of Annotation Management Module
|
||||||
|
"""
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP, Text, Integer, JSON, Date
|
||||||
|
from sqlalchemy.sql import func
|
||||||
|
|
||||||
|
from app.db.session import Base
|
||||||
|
|
||||||
|
class AnnotationTemplate(Base):
|
||||||
|
"""标注模板模型"""
|
||||||
|
|
||||||
|
__tablename__ = "t_dm_annotation_templates"
|
||||||
|
|
||||||
|
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID")
|
||||||
|
name = Column(String(32), nullable=False, comment="模板名称")
|
||||||
|
description = Column(String(255), nullable=True, comment="模板描述")
|
||||||
|
configuration = Column(JSON, nullable=True, comment="配置信息(JSON格式)")
|
||||||
|
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||||
|
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<AnnotationTemplate(id={self.id}, name={self.name})>"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_deleted(self) -> bool:
|
||||||
|
"""检查是否已被软删除"""
|
||||||
|
return self.deleted_at is not None
|
||||||
|
|
||||||
|
class LabelingProject(Base):
|
||||||
|
"""标注工程表"""
|
||||||
|
|
||||||
|
__tablename__ = "t_dm_labeling_projects"
|
||||||
|
|
||||||
|
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID")
|
||||||
|
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
|
||||||
|
name = Column(String(32), nullable=False, comment="项目名称")
|
||||||
|
labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID")
|
||||||
|
configuration = Column(JSON, nullable=True, comment="标签配置")
|
||||||
|
progress = Column(JSON, nullable=True, comment="标注进度统计")
|
||||||
|
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||||
|
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<LabelingProject(id={self.id}, dataset_id={self.dataset_id}, name={self.name})>"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_deleted(self) -> bool:
|
||||||
|
"""检查是否已被软删除"""
|
||||||
|
return self.deleted_at is not None
|
||||||
113
runtime/datamate-python/app/db/models/dataset_management.py
Normal file
113
runtime/datamate-python/app/db/models/dataset_management.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
"""
|
||||||
|
Tables of Dataset Management Module
|
||||||
|
"""
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP, Text, Integer, JSON, Date
|
||||||
|
from sqlalchemy.sql import func
|
||||||
|
|
||||||
|
from app.db.session import Base
|
||||||
|
|
||||||
|
class Dataset(Base):
|
||||||
|
"""数据集模型(支持医学影像、文本、问答等多种类型)"""
|
||||||
|
|
||||||
|
__tablename__ = "t_dm_datasets"
|
||||||
|
|
||||||
|
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||||
|
name = Column(String(255), nullable=False, comment="数据集名称")
|
||||||
|
description = Column(Text, nullable=True, comment="数据集描述")
|
||||||
|
dataset_type = Column(String(50), nullable=False, comment="数据集类型:IMAGE/TEXT/QA/MULTIMODAL/OTHER")
|
||||||
|
category = Column(String(100), nullable=True, comment="数据集分类:医学影像/问答/文献等")
|
||||||
|
path = Column(String(500), nullable=True, comment="数据存储路径")
|
||||||
|
format = Column(String(50), nullable=True, comment="数据格式:DCM/JPG/JSON/CSV等")
|
||||||
|
schema_info = Column(JSON, nullable=True, comment="数据结构信息")
|
||||||
|
size_bytes = Column(BigInteger, default=0, comment="数据大小(字节)")
|
||||||
|
file_count = Column(BigInteger, default=0, comment="文件数量")
|
||||||
|
record_count = Column(BigInteger, default=0, comment="记录数量")
|
||||||
|
retention_days = Column(Integer, default=0, comment="数据保留天数(0表示长期保留)")
|
||||||
|
tags = Column(JSON, nullable=True, comment="标签列表")
|
||||||
|
dataset_metadata = Column("metadata", JSON, nullable=True, comment="元数据信息")
|
||||||
|
status = Column(String(50), default='DRAFT', comment="状态:DRAFT/ACTIVE/ARCHIVED")
|
||||||
|
is_public = Column(Boolean, default=False, comment="是否公开")
|
||||||
|
is_featured = Column(Boolean, default=False, comment="是否推荐")
|
||||||
|
version = Column(BigInteger, nullable=False, default=0, comment="版本号")
|
||||||
|
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||||
|
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||||
|
created_by = Column(String(255), nullable=True, comment="创建者")
|
||||||
|
updated_by = Column(String(255), nullable=True, comment="更新者")
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<Dataset(id={self.id}, name={self.name}, type={self.dataset_type})>"
|
||||||
|
|
||||||
|
class DatasetTag(Base):
|
||||||
|
"""数据集标签关联模型"""
|
||||||
|
|
||||||
|
__tablename__ = "t_dm_dataset_tags"
|
||||||
|
|
||||||
|
dataset_id = Column(String(36), primary_key=True, comment="数据集ID(UUID)")
|
||||||
|
tag_id = Column(String(36), primary_key=True, comment="标签ID(UUID)")
|
||||||
|
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<DatasetTag(dataset_id={self.dataset_id}, tag_id={self.tag_id})>"
|
||||||
|
|
||||||
|
class DatasetFiles(Base):
|
||||||
|
"""DM数据集文件模型"""
|
||||||
|
|
||||||
|
__tablename__ = "t_dm_dataset_files"
|
||||||
|
|
||||||
|
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||||
|
dataset_id = Column(String(36), nullable=False, comment="所属数据集ID(UUID)")
|
||||||
|
file_name = Column(String(255), nullable=False, comment="文件名")
|
||||||
|
file_path = Column(String(1000), nullable=False, comment="文件路径")
|
||||||
|
file_type = Column(String(50), nullable=True, comment="文件格式:JPG/PNG/DCM/TXT等")
|
||||||
|
file_size = Column(BigInteger, default=0, comment="文件大小(字节)")
|
||||||
|
check_sum = Column(String(64), nullable=True, comment="文件校验和")
|
||||||
|
tags = Column(JSON, nullable=True, comment="文件标签信息")
|
||||||
|
dataset_filemetadata = Column("metadata", JSON, nullable=True, comment="文件元数据")
|
||||||
|
status = Column(String(50), default='ACTIVE', comment="文件状态:ACTIVE/DELETED/PROCESSING")
|
||||||
|
upload_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传时间")
|
||||||
|
last_access_time = Column(TIMESTAMP, nullable=True, comment="最后访问时间")
|
||||||
|
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||||
|
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<DatasetFiles(id={self.id}, dataset_id={self.dataset_id}, file_name={self.file_name})>"
|
||||||
|
|
||||||
|
class DatasetStatistics(Base):
|
||||||
|
"""数据集统计信息模型"""
|
||||||
|
|
||||||
|
__tablename__ = "t_dm_dataset_statistics"
|
||||||
|
|
||||||
|
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||||
|
dataset_id = Column(String(36), nullable=False, comment="数据集ID(UUID)")
|
||||||
|
stat_date = Column(Date, nullable=False, comment="统计日期")
|
||||||
|
total_files = Column(BigInteger, default=0, comment="总文件数")
|
||||||
|
total_size = Column(BigInteger, default=0, comment="总大小(字节)")
|
||||||
|
processed_files = Column(BigInteger, default=0, comment="已处理文件数")
|
||||||
|
error_files = Column(BigInteger, default=0, comment="错误文件数")
|
||||||
|
download_count = Column(BigInteger, default=0, comment="下载次数")
|
||||||
|
view_count = Column(BigInteger, default=0, comment="查看次数")
|
||||||
|
quality_metrics = Column(JSON, nullable=True, comment="质量指标")
|
||||||
|
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||||
|
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<DatasetStatistics(id={self.id}, dataset_id={self.dataset_id}, date={self.stat_date})>"
|
||||||
|
|
||||||
|
class Tag(Base):
|
||||||
|
"""标签集合模型"""
|
||||||
|
|
||||||
|
__tablename__ = "t_dm_tags"
|
||||||
|
|
||||||
|
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||||
|
name = Column(String(100), nullable=False, unique=True, comment="标签名称")
|
||||||
|
description = Column(Text, nullable=True, comment="标签描述")
|
||||||
|
category = Column(String(50), nullable=True, comment="标签分类")
|
||||||
|
color = Column(String(7), nullable=True, comment="标签颜色(十六进制)")
|
||||||
|
usage_count = Column(BigInteger, default=0, comment="使用次数")
|
||||||
|
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||||
|
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<Tag(id={self.id}, name={self.name}, category={self.category})>"
|
||||||
@@ -1,6 +1,11 @@
|
|||||||
|
"""
|
||||||
|
Tables of User Management Module
|
||||||
|
"""
|
||||||
|
|
||||||
from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP
|
from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP
|
||||||
from sqlalchemy.sql import func
|
from sqlalchemy.sql import func
|
||||||
from app.db.database import Base
|
|
||||||
|
from app.db.session import Base
|
||||||
|
|
||||||
class User(Base):
|
class User(Base):
|
||||||
"""用户模型"""
|
"""用户模型"""
|
||||||
@@ -8,8 +8,7 @@ logger = get_logger(__name__)
|
|||||||
|
|
||||||
# 获取数据库配置信息
|
# 获取数据库配置信息
|
||||||
db_info = settings.get_database_info()
|
db_info = settings.get_database_info()
|
||||||
logger.info(f"使用数据库: {db_info['type']}")
|
logger.info(f"使用数据库: {db_info['type']} || 连接URL: {db_info['url']}")
|
||||||
logger.info(f"连接URL: {db_info['url']}")
|
|
||||||
|
|
||||||
# 创建数据库引擎
|
# 创建数据库引擎
|
||||||
engine = create_async_engine(
|
engine = create_async_engine(
|
||||||
97
runtime/datamate-python/app/exception.py
Normal file
97
runtime/datamate-python/app/exception.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
"""
|
||||||
|
全局自定义异常类定义
|
||||||
|
"""
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from fastapi.exceptions import RequestValidationError
|
||||||
|
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||||
|
from fastapi import FastAPI, Request, HTTPException, status
|
||||||
|
|
||||||
|
from .core.logging import setup_logging, get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
# 自定义异常处理器:StarletteHTTPException (包括404等)
|
||||||
|
async def starlette_http_exception_handler(request: Request, exc: StarletteHTTPException):
|
||||||
|
"""将Starlette的HTTPException转换为标准响应格式"""
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=exc.status_code,
|
||||||
|
content={
|
||||||
|
"code": exc.status_code,
|
||||||
|
"message": "error",
|
||||||
|
"data": {
|
||||||
|
"detail": exc.detail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 自定义异常处理器:FastAPI HTTPException
|
||||||
|
async def fastapi_http_exception_handler(request: Request, exc: HTTPException):
|
||||||
|
"""将FastAPI的HTTPException转换为标准响应格式"""
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=exc.status_code,
|
||||||
|
content={
|
||||||
|
"code": exc.status_code,
|
||||||
|
"message": "error",
|
||||||
|
"data": {
|
||||||
|
"detail": exc.detail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 自定义异常处理器:RequestValidationError
|
||||||
|
async def validation_exception_handler(request: Request, exc: RequestValidationError):
|
||||||
|
"""将请求验证错误转换为标准响应格式"""
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=422,
|
||||||
|
content={
|
||||||
|
"code": 422,
|
||||||
|
"message": "error",
|
||||||
|
"data": {
|
||||||
|
"detail": "Validation error",
|
||||||
|
"errors": exc.errors()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 自定义异常处理器:未捕获的异常
|
||||||
|
async def general_exception_handler(request: Request, exc: Exception):
|
||||||
|
"""将未捕获的异常转换为标准响应格式"""
|
||||||
|
logger.error(f"Unhandled exception: {exc}", exc_info=True)
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=500,
|
||||||
|
content={
|
||||||
|
"code": 500,
|
||||||
|
"message": "error",
|
||||||
|
"data": {
|
||||||
|
"detail": "Internal server error"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
class LabelStudioAdapterException(Exception):
|
||||||
|
"""Label Studio Adapter 基础异常类"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class DatasetMappingNotFoundError(LabelStudioAdapterException):
|
||||||
|
"""数据集映射未找到异常"""
|
||||||
|
def __init__(self, mapping_id: str):
|
||||||
|
self.mapping_id = mapping_id
|
||||||
|
super().__init__(f"Dataset mapping not found: {mapping_id}")
|
||||||
|
|
||||||
|
class NoDatasetInfoFoundError(LabelStudioAdapterException):
|
||||||
|
"""无法获取数据集信息异常"""
|
||||||
|
def __init__(self, dataset_uuid: str):
|
||||||
|
self.dataset_uuid = dataset_uuid
|
||||||
|
super().__init__(f"Failed to get dataset info: {dataset_uuid}")
|
||||||
|
|
||||||
|
class LabelStudioClientError(LabelStudioAdapterException):
|
||||||
|
"""Label Studio 客户端错误"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class DMServiceClientError(LabelStudioAdapterException):
|
||||||
|
"""DM 服务客户端错误"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class SyncServiceError(LabelStudioAdapterException):
|
||||||
|
"""同步服务错误"""
|
||||||
|
pass
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
"""
|
|
||||||
自定义异常类定义
|
|
||||||
"""
|
|
||||||
|
|
||||||
class LabelStudioAdapterException(Exception):
|
|
||||||
"""Label Studio Adapter 基础异常类"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class DatasetMappingNotFoundError(LabelStudioAdapterException):
|
|
||||||
"""数据集映射未找到异常"""
|
|
||||||
def __init__(self, mapping_id: str):
|
|
||||||
self.mapping_id = mapping_id
|
|
||||||
super().__init__(f"Dataset mapping not found: {mapping_id}")
|
|
||||||
|
|
||||||
class NoDatasetInfoFoundError(LabelStudioAdapterException):
|
|
||||||
"""无法获取数据集信息异常"""
|
|
||||||
def __init__(self, dataset_uuid: str):
|
|
||||||
self.dataset_uuid = dataset_uuid
|
|
||||||
super().__init__(f"Failed to get dataset info: {dataset_uuid}")
|
|
||||||
|
|
||||||
class LabelStudioClientError(LabelStudioAdapterException):
|
|
||||||
"""Label Studio 客户端错误"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class DMServiceClientError(LabelStudioAdapterException):
|
|
||||||
"""DM 服务客户端错误"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class SyncServiceError(LabelStudioAdapterException):
|
|
||||||
"""同步服务错误"""
|
|
||||||
pass
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
# app/clients/__init__.py
|
|
||||||
|
|
||||||
from .label_studio import Client as LabelStudioClient
|
|
||||||
from .datamate import Client as DatamateClient
|
|
||||||
|
|
||||||
__all__ = ["LabelStudioClient", "DatamateClient"]
|
|
||||||
@@ -1,16 +1,23 @@
|
|||||||
from fastapi import FastAPI, Request, HTTPException, status
|
from fastapi import FastAPI, Request, HTTPException, status
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.responses import JSONResponse
|
|
||||||
from fastapi.exceptions import RequestValidationError
|
from fastapi.exceptions import RequestValidationError
|
||||||
from starlette.exceptions import HTTPException as StarletteHTTPException
|
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||||
|
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
from .core.config import settings
|
from .core.config import settings
|
||||||
from .core.logging import setup_logging, get_logger
|
from .core.logging import setup_logging, get_logger
|
||||||
from .infrastructure import LabelStudioClient
|
from .db.session import engine, AsyncSessionLocal
|
||||||
from .api import api_router
|
from .module.shared.schema import StandardResponse
|
||||||
from .schemas import StandardResponse
|
from .module import router
|
||||||
|
from .exception import (
|
||||||
|
starlette_http_exception_handler,
|
||||||
|
fastapi_http_exception_handler,
|
||||||
|
validation_exception_handler,
|
||||||
|
general_exception_handler
|
||||||
|
)
|
||||||
|
|
||||||
# 设置日志
|
# 设置日志
|
||||||
setup_logging()
|
setup_logging()
|
||||||
@@ -21,23 +28,21 @@ async def lifespan(app: FastAPI):
|
|||||||
"""应用程序生命周期管理"""
|
"""应用程序生命周期管理"""
|
||||||
|
|
||||||
# 启动时初始化
|
# 启动时初始化
|
||||||
logger.info("Starting Label Studio Adapter...")
|
logger.info("DataMate Python Backend starting...")
|
||||||
|
# 数据库连接验证
|
||||||
# 初始化 Label Studio 客户端,使用 HTTP REST API + Token 认证
|
try:
|
||||||
ls_client = LabelStudioClient(
|
async with AsyncSessionLocal() as session:
|
||||||
base_url=settings.label_studio_base_url,
|
await session.execute(text("SELECT 1"))
|
||||||
token=settings.label_studio_user_token
|
logger.info("Database connection validated successfully.")
|
||||||
)
|
except Exception as e:
|
||||||
|
logger.error(f"Database connection validation failed: {e}")
|
||||||
logger.info("Label Studio Adapter started")
|
logger.debug(f"Connection details: {settings.computed_database_url}")
|
||||||
|
raise
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
# 关闭时清理
|
# 关闭时清理
|
||||||
logger.info("Shutting down Label Studio Adapter...")
|
logger.info("DataMate Python Backend shutting down ...")
|
||||||
|
|
||||||
# 客户端清理会在客户端管理器中处理
|
|
||||||
logger.info("Label Studio Adapter stopped")
|
|
||||||
|
|
||||||
# 创建FastAPI应用
|
# 创建FastAPI应用
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
@@ -57,70 +62,16 @@ app.add_middleware(
|
|||||||
allow_headers=settings.allowed_headers,
|
allow_headers=settings.allowed_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 自定义异常处理器:StarletteHTTPException (包括404等)
|
|
||||||
@app.exception_handler(StarletteHTTPException)
|
|
||||||
async def starlette_http_exception_handler(request: Request, exc: StarletteHTTPException):
|
|
||||||
"""将Starlette的HTTPException转换为标准响应格式"""
|
|
||||||
return JSONResponse(
|
|
||||||
status_code=exc.status_code,
|
|
||||||
content={
|
|
||||||
"code": exc.status_code,
|
|
||||||
"message": "error",
|
|
||||||
"data": {
|
|
||||||
"detail": exc.detail
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# 自定义异常处理器:FastAPI HTTPException
|
|
||||||
@app.exception_handler(HTTPException)
|
|
||||||
async def fastapi_http_exception_handler(request: Request, exc: HTTPException):
|
|
||||||
"""将FastAPI的HTTPException转换为标准响应格式"""
|
|
||||||
return JSONResponse(
|
|
||||||
status_code=exc.status_code,
|
|
||||||
content={
|
|
||||||
"code": exc.status_code,
|
|
||||||
"message": "error",
|
|
||||||
"data": {
|
|
||||||
"detail": exc.detail
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# 自定义异常处理器:RequestValidationError
|
|
||||||
@app.exception_handler(RequestValidationError)
|
|
||||||
async def validation_exception_handler(request: Request, exc: RequestValidationError):
|
|
||||||
"""将请求验证错误转换为标准响应格式"""
|
|
||||||
return JSONResponse(
|
|
||||||
status_code=422,
|
|
||||||
content={
|
|
||||||
"code": 422,
|
|
||||||
"message": "error",
|
|
||||||
"data": {
|
|
||||||
"detail": "Validation error",
|
|
||||||
"errors": exc.errors()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# 自定义异常处理器:未捕获的异常
|
|
||||||
@app.exception_handler(Exception)
|
|
||||||
async def general_exception_handler(request: Request, exc: Exception):
|
|
||||||
"""将未捕获的异常转换为标准响应格式"""
|
|
||||||
logger.error(f"Unhandled exception: {exc}", exc_info=True)
|
|
||||||
return JSONResponse(
|
|
||||||
status_code=500,
|
|
||||||
content={
|
|
||||||
"code": 500,
|
|
||||||
"message": "error",
|
|
||||||
"data": {
|
|
||||||
"detail": "Internal server error"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# 注册路由
|
# 注册路由
|
||||||
app.include_router(api_router, prefix="/api")
|
app.include_router(router)
|
||||||
|
|
||||||
|
logger.debug("Registered routes: %s", [getattr(r, "path", None) for r in app.routes])
|
||||||
|
|
||||||
|
# 注册全局异常处理器
|
||||||
|
app.add_exception_handler(StarletteHTTPException, starlette_http_exception_handler) # type: ignore
|
||||||
|
app.add_exception_handler(HTTPException, fastapi_http_exception_handler) # type: ignore
|
||||||
|
app.add_exception_handler(RequestValidationError, validation_exception_handler) # type: ignore
|
||||||
|
app.add_exception_handler(Exception, general_exception_handler)
|
||||||
|
|
||||||
# 测试端点:验证异常处理
|
# 测试端点:验证异常处理
|
||||||
@app.get("/test-404", include_in_schema=False)
|
@app.get("/test-404", include_in_schema=False)
|
||||||
|
|||||||
@@ -1,138 +0,0 @@
|
|||||||
# DataMate 数据模型结构
|
|
||||||
|
|
||||||
本文档列出了根据 `scripts/db` 中的 SQL 文件创建的所有 Python 数据模型。
|
|
||||||
|
|
||||||
## 模型组织结构
|
|
||||||
|
|
||||||
```
|
|
||||||
app/models/
|
|
||||||
├── __init__.py # 主模块导出文件
|
|
||||||
├── dm/ # 数据管理 (Data Management) 模块
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── annotation_template.py # 标注模板
|
|
||||||
│ ├── labeling_project.py # 标注项目
|
|
||||||
│ ├── dataset.py # 数据集
|
|
||||||
│ ├── dataset_files.py # 数据集文件
|
|
||||||
│ ├── dataset_statistics.py # 数据集统计
|
|
||||||
│ ├── dataset_tag.py # 数据集标签关联
|
|
||||||
│ ├── tag.py # 标签
|
|
||||||
│ └── user.py # 用户
|
|
||||||
├── cleaning/ # 数据清洗 (Data Cleaning) 模块
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── clean_template.py # 清洗模板
|
|
||||||
│ ├── clean_task.py # 清洗任务
|
|
||||||
│ ├── operator_instance.py # 算子实例
|
|
||||||
│ └── clean_result.py # 清洗结果
|
|
||||||
├── collection/ # 数据归集 (Data Collection) 模块
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── task_execution.py # 任务执行明细
|
|
||||||
│ ├── collection_task.py # 数据归集任务
|
|
||||||
│ ├── task_log.py # 任务执行记录
|
|
||||||
│ └── datax_template.py # DataX模板配置
|
|
||||||
├── common/ # 通用 (Common) 模块
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ └── chunk_upload_request.py # 文件切片上传请求
|
|
||||||
└── operator/ # 算子 (Operator) 模块
|
|
||||||
├── __init__.py
|
|
||||||
├── operator.py # 算子
|
|
||||||
├── operator_category.py # 算子分类
|
|
||||||
└── operator_category_relation.py # 算子分类关联
|
|
||||||
```
|
|
||||||
|
|
||||||
## 模块详情
|
|
||||||
|
|
||||||
### 1. Data Management (DM) 模块
|
|
||||||
对应 SQL: `data-management-init.sql` 和 `data-annotation-init.sql`
|
|
||||||
|
|
||||||
#### 模型列表:
|
|
||||||
- **AnnotationTemplate** (`t_dm_annotation_templates`) - 标注模板
|
|
||||||
- **LabelingProject** (`t_dm_labeling_projects`) - 标注项目
|
|
||||||
- **Dataset** (`t_dm_datasets`) - 数据集(支持医学影像、文本、问答等多种类型)
|
|
||||||
- **DatasetFiles** (`t_dm_dataset_files`) - 数据集文件
|
|
||||||
- **DatasetStatistics** (`t_dm_dataset_statistics`) - 数据集统计信息
|
|
||||||
- **Tag** (`t_dm_tags`) - 标签
|
|
||||||
- **DatasetTag** (`t_dm_dataset_tags`) - 数据集标签关联
|
|
||||||
- **User** (`users`) - 用户
|
|
||||||
|
|
||||||
### 2. Data Cleaning 模块
|
|
||||||
对应 SQL: `data-cleaning-init.sql`
|
|
||||||
|
|
||||||
#### 模型列表:
|
|
||||||
- **CleanTemplate** (`t_clean_template`) - 清洗模板
|
|
||||||
- **CleanTask** (`t_clean_task`) - 清洗任务
|
|
||||||
- **OperatorInstance** (`t_operator_instance`) - 算子实例
|
|
||||||
- **CleanResult** (`t_clean_result`) - 清洗结果
|
|
||||||
|
|
||||||
### 3. Data Collection (DC) 模块
|
|
||||||
对应 SQL: `data-collection-init.sql`
|
|
||||||
|
|
||||||
#### 模型列表:
|
|
||||||
- **TaskExecution** (`t_dc_task_executions`) - 任务执行明细
|
|
||||||
- **CollectionTask** (`t_dc_collection_tasks`) - 数据归集任务
|
|
||||||
- **TaskLog** (`t_dc_task_log`) - 任务执行记录
|
|
||||||
- **DataxTemplate** (`t_dc_datax_templates`) - DataX模板配置
|
|
||||||
|
|
||||||
### 4. Common 模块
|
|
||||||
对应 SQL: `data-common-init.sql`
|
|
||||||
|
|
||||||
#### 模型列表:
|
|
||||||
- **ChunkUploadRequest** (`t_chunk_upload_request`) - 文件切片上传请求
|
|
||||||
|
|
||||||
### 5. Operator 模块
|
|
||||||
对应 SQL: `data-operator-init.sql`
|
|
||||||
|
|
||||||
#### 模型列表:
|
|
||||||
- **Operator** (`t_operator`) - 算子
|
|
||||||
- **OperatorCategory** (`t_operator_category`) - 算子分类
|
|
||||||
- **OperatorCategoryRelation** (`t_operator_category_relation`) - 算子分类关联
|
|
||||||
|
|
||||||
## 使用方式
|
|
||||||
|
|
||||||
```python
|
|
||||||
# 导入所有模型
|
|
||||||
from app.models import (
|
|
||||||
# DM 模块
|
|
||||||
AnnotationTemplate,
|
|
||||||
LabelingProject,
|
|
||||||
Dataset,
|
|
||||||
DatasetFiles,
|
|
||||||
DatasetStatistics,
|
|
||||||
DatasetTag,
|
|
||||||
Tag,
|
|
||||||
User,
|
|
||||||
# Cleaning 模块
|
|
||||||
CleanTemplate,
|
|
||||||
CleanTask,
|
|
||||||
OperatorInstance,
|
|
||||||
CleanResult,
|
|
||||||
# Collection 模块
|
|
||||||
TaskExecution,
|
|
||||||
CollectionTask,
|
|
||||||
TaskLog,
|
|
||||||
DataxTemplate,
|
|
||||||
# Common 模块
|
|
||||||
ChunkUploadRequest,
|
|
||||||
# Operator 模块
|
|
||||||
Operator,
|
|
||||||
OperatorCategory,
|
|
||||||
OperatorCategoryRelation
|
|
||||||
)
|
|
||||||
|
|
||||||
# 或者按模块导入
|
|
||||||
from app.models.dm import Dataset, DatasetFiles
|
|
||||||
from app.models.collection import CollectionTask
|
|
||||||
from app.models.operator import Operator
|
|
||||||
```
|
|
||||||
|
|
||||||
## 注意事项
|
|
||||||
|
|
||||||
1. **UUID 主键**: 大部分表使用 UUID (String(36)) 作为主键
|
|
||||||
2. **时间戳**: 使用 `TIMESTAMP` 类型,并配置自动更新
|
|
||||||
3. **软删除**: 部分模型(如 AnnotationTemplate, LabelingProject)支持软删除,包含 `deleted_at` 字段和 `is_deleted` 属性
|
|
||||||
4. **JSON 字段**: 配置信息、元数据等使用 JSON 类型存储
|
|
||||||
5. **字段一致性**: 所有模型字段都严格按照 SQL 定义创建,确保与数据库表结构完全一致
|
|
||||||
|
|
||||||
## 更新记录
|
|
||||||
|
|
||||||
- 2025-10-25: 根据 `scripts/db` 中的 SQL 文件创建所有数据模型
|
|
||||||
- 已更新现有的 `annotation_template.py`、`labeling_project.py`、`dataset_files.py` 以匹配 SQL 定义
|
|
||||||
@@ -1,69 +0,0 @@
|
|||||||
# app/models/__init__.py
|
|
||||||
|
|
||||||
# Data Management (DM) 模块
|
|
||||||
from .dm import (
|
|
||||||
AnnotationTemplate,
|
|
||||||
LabelingProject,
|
|
||||||
Dataset,
|
|
||||||
DatasetFiles,
|
|
||||||
DatasetStatistics,
|
|
||||||
DatasetTag,
|
|
||||||
Tag,
|
|
||||||
User
|
|
||||||
)
|
|
||||||
|
|
||||||
# Data Cleaning 模块
|
|
||||||
from .cleaning import (
|
|
||||||
CleanTemplate,
|
|
||||||
CleanTask,
|
|
||||||
OperatorInstance,
|
|
||||||
CleanResult
|
|
||||||
)
|
|
||||||
|
|
||||||
# Data Collection (DC) 模块
|
|
||||||
from .collection import (
|
|
||||||
TaskExecution,
|
|
||||||
CollectionTask,
|
|
||||||
TaskLog,
|
|
||||||
DataxTemplate
|
|
||||||
)
|
|
||||||
|
|
||||||
# Common 模块
|
|
||||||
from .common import (
|
|
||||||
ChunkUploadRequest
|
|
||||||
)
|
|
||||||
|
|
||||||
# Operator 模块
|
|
||||||
from .operator import (
|
|
||||||
Operator,
|
|
||||||
OperatorCategory,
|
|
||||||
OperatorCategoryRelation
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
# DM 模块
|
|
||||||
"AnnotationTemplate",
|
|
||||||
"LabelingProject",
|
|
||||||
"Dataset",
|
|
||||||
"DatasetFiles",
|
|
||||||
"DatasetStatistics",
|
|
||||||
"DatasetTag",
|
|
||||||
"Tag",
|
|
||||||
"User",
|
|
||||||
# Cleaning 模块
|
|
||||||
"CleanTemplate",
|
|
||||||
"CleanTask",
|
|
||||||
"OperatorInstance",
|
|
||||||
"CleanResult",
|
|
||||||
# Collection 模块
|
|
||||||
"TaskExecution",
|
|
||||||
"CollectionTask",
|
|
||||||
"TaskLog",
|
|
||||||
"DataxTemplate",
|
|
||||||
# Common 模块
|
|
||||||
"ChunkUploadRequest",
|
|
||||||
# Operator 模块
|
|
||||||
"Operator",
|
|
||||||
"OperatorCategory",
|
|
||||||
"OperatorCategoryRelation"
|
|
||||||
]
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
# app/models/cleaning/__init__.py
|
|
||||||
|
|
||||||
from .clean_template import CleanTemplate
|
|
||||||
from .clean_task import CleanTask
|
|
||||||
from .operator_instance import OperatorInstance
|
|
||||||
from .clean_result import CleanResult
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"CleanTemplate",
|
|
||||||
"CleanTask",
|
|
||||||
"OperatorInstance",
|
|
||||||
"CleanResult"
|
|
||||||
]
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, BigInteger, Text
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class CleanResult(Base):
|
|
||||||
"""清洗结果模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_clean_result"
|
|
||||||
|
|
||||||
instance_id = Column(String(64), primary_key=True, comment="实例ID")
|
|
||||||
src_file_id = Column(String(64), nullable=True, comment="源文件ID")
|
|
||||||
dest_file_id = Column(String(64), primary_key=True, comment="目标文件ID")
|
|
||||||
src_name = Column(String(256), nullable=True, comment="源文件名")
|
|
||||||
dest_name = Column(String(256), nullable=True, comment="目标文件名")
|
|
||||||
src_type = Column(String(256), nullable=True, comment="源文件类型")
|
|
||||||
dest_type = Column(String(256), nullable=True, comment="目标文件类型")
|
|
||||||
src_size = Column(BigInteger, nullable=True, comment="源文件大小")
|
|
||||||
dest_size = Column(BigInteger, nullable=True, comment="目标文件大小")
|
|
||||||
status = Column(String(256), nullable=True, comment="处理状态")
|
|
||||||
result = Column(Text, nullable=True, comment="处理结果")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<CleanResult(instance_id={self.instance_id}, dest_file_id={self.dest_file_id}, status={self.status})>"
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, BigInteger, Integer, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class CleanTask(Base):
|
|
||||||
"""清洗任务模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_clean_task"
|
|
||||||
|
|
||||||
id = Column(String(64), primary_key=True, comment="任务ID")
|
|
||||||
name = Column(String(64), nullable=True, comment="任务名称")
|
|
||||||
description = Column(String(256), nullable=True, comment="任务描述")
|
|
||||||
status = Column(String(256), nullable=True, comment="任务状态")
|
|
||||||
src_dataset_id = Column(String(64), nullable=True, comment="源数据集ID")
|
|
||||||
src_dataset_name = Column(String(64), nullable=True, comment="源数据集名称")
|
|
||||||
dest_dataset_id = Column(String(64), nullable=True, comment="目标数据集ID")
|
|
||||||
dest_dataset_name = Column(String(64), nullable=True, comment="目标数据集名称")
|
|
||||||
before_size = Column(BigInteger, nullable=True, comment="清洗前大小")
|
|
||||||
after_size = Column(BigInteger, nullable=True, comment="清洗后大小")
|
|
||||||
file_count = Column(Integer, nullable=True, comment="文件数量")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
started_at = Column(TIMESTAMP, nullable=True, comment="开始时间")
|
|
||||||
finished_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
|
|
||||||
created_by = Column(String(256), nullable=True, comment="创建者")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<CleanTask(id={self.id}, name={self.name}, status={self.status})>"
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Text, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class CleanTemplate(Base):
|
|
||||||
"""清洗模板模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_clean_template"
|
|
||||||
|
|
||||||
id = Column(String(64), primary_key=True, unique=True, comment="模板ID")
|
|
||||||
name = Column(String(64), nullable=True, comment="模板名称")
|
|
||||||
description = Column(String(256), nullable=True, comment="模板描述")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
|
||||||
created_by = Column(String(256), nullable=True, comment="创建者")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<CleanTemplate(id={self.id}, name={self.name})>"
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Integer, Text
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class OperatorInstance(Base):
|
|
||||||
"""算子实例模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_operator_instance"
|
|
||||||
|
|
||||||
instance_id = Column(String(256), primary_key=True, comment="实例ID")
|
|
||||||
operator_id = Column(String(256), primary_key=True, comment="算子ID")
|
|
||||||
op_index = Column(Integer, primary_key=True, comment="算子索引")
|
|
||||||
settings_override = Column(Text, nullable=True, comment="配置覆盖")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<OperatorInstance(instance_id={self.instance_id}, operator_id={self.operator_id}, index={self.op_index})>"
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
# app/models/collection/__init__.py
|
|
||||||
|
|
||||||
from .task_execution import TaskExecution
|
|
||||||
from .collection_task import CollectionTask
|
|
||||||
from .task_log import TaskLog
|
|
||||||
from .datax_template import DataxTemplate
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"TaskExecution",
|
|
||||||
"CollectionTask",
|
|
||||||
"TaskLog",
|
|
||||||
"DataxTemplate"
|
|
||||||
]
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Text, Integer, BigInteger, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class CollectionTask(Base):
|
|
||||||
"""数据归集任务模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dc_collection_tasks"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, comment="任务ID(UUID)")
|
|
||||||
name = Column(String(255), nullable=False, comment="任务名称")
|
|
||||||
description = Column(Text, nullable=True, comment="任务描述")
|
|
||||||
sync_mode = Column(String(20), default='ONCE', comment="同步模式:ONCE/SCHEDULED")
|
|
||||||
config = Column(Text, nullable=False, comment="归集配置(DataX配置),包含源端和目标端配置信息")
|
|
||||||
schedule_expression = Column(String(255), nullable=True, comment="Cron调度表达式")
|
|
||||||
status = Column(String(20), default='DRAFT', comment="任务状态:DRAFT/READY/RUNNING/SUCCESS/FAILED/STOPPED")
|
|
||||||
retry_count = Column(Integer, default=3, comment="重试次数")
|
|
||||||
timeout_seconds = Column(Integer, default=3600, comment="超时时间(秒)")
|
|
||||||
max_records = Column(BigInteger, nullable=True, comment="最大处理记录数")
|
|
||||||
sort_field = Column(String(100), nullable=True, comment="增量字段")
|
|
||||||
last_execution_id = Column(String(36), nullable=True, comment="最后执行ID(UUID)")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
|
||||||
created_by = Column(String(255), nullable=True, comment="创建者")
|
|
||||||
updated_by = Column(String(255), nullable=True, comment="更新者")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<CollectionTask(id={self.id}, name={self.name}, status={self.status})>"
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Text, Boolean, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class DataxTemplate(Base):
|
|
||||||
"""DataX模板配置模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dc_datax_templates"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, comment="模板ID(UUID)")
|
|
||||||
name = Column(String(255), nullable=False, unique=True, comment="模板名称")
|
|
||||||
source_type = Column(String(50), nullable=False, comment="源数据源类型")
|
|
||||||
target_type = Column(String(50), nullable=False, comment="目标数据源类型")
|
|
||||||
template_content = Column(Text, nullable=False, comment="模板内容")
|
|
||||||
description = Column(Text, nullable=True, comment="模板描述")
|
|
||||||
version = Column(String(20), default='1.0.0', comment="版本号")
|
|
||||||
is_system = Column(Boolean, default=False, comment="是否系统模板")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
|
||||||
created_by = Column(String(255), nullable=True, comment="创建者")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<DataxTemplate(id={self.id}, name={self.name}, source={self.source_type}, target={self.target_type})>"
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Text, Integer, BigInteger, DECIMAL, JSON, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class TaskExecution(Base):
|
|
||||||
"""任务执行明细模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dc_task_executions"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, comment="执行记录ID(UUID)")
|
|
||||||
task_id = Column(String(36), nullable=False, comment="任务ID")
|
|
||||||
task_name = Column(String(255), nullable=False, comment="任务名称")
|
|
||||||
status = Column(String(20), default='RUNNING', comment="执行状态:RUNNING/SUCCESS/FAILED/STOPPED")
|
|
||||||
progress = Column(DECIMAL(5, 2), default=0.00, comment="进度百分比")
|
|
||||||
records_total = Column(BigInteger, default=0, comment="总记录数")
|
|
||||||
records_processed = Column(BigInteger, default=0, comment="已处理记录数")
|
|
||||||
records_success = Column(BigInteger, default=0, comment="成功记录数")
|
|
||||||
records_failed = Column(BigInteger, default=0, comment="失败记录数")
|
|
||||||
throughput = Column(DECIMAL(10, 2), default=0.00, comment="吞吐量(条/秒)")
|
|
||||||
data_size_bytes = Column(BigInteger, default=0, comment="数据量(字节)")
|
|
||||||
started_at = Column(TIMESTAMP, nullable=True, comment="开始时间")
|
|
||||||
completed_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
|
|
||||||
duration_seconds = Column(Integer, default=0, comment="执行时长(秒)")
|
|
||||||
config = Column(JSON, nullable=True, comment="执行配置")
|
|
||||||
error_message = Column(Text, nullable=True, comment="错误信息")
|
|
||||||
datax_job_id = Column(Text, nullable=True, comment="datax任务ID")
|
|
||||||
result = Column(Text, nullable=True, comment="执行结果")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
|
||||||
created_by = Column(String(255), nullable=True, comment="创建者")
|
|
||||||
updated_by = Column(String(255), nullable=True, comment="更新者")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<TaskExecution(id={self.id}, task_id={self.task_id}, status={self.status})>"
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Text, Integer, BigInteger, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class TaskLog(Base):
|
|
||||||
"""任务执行记录模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dc_task_log"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, comment="执行记录ID(UUID)")
|
|
||||||
task_id = Column(String(36), nullable=False, comment="任务ID")
|
|
||||||
task_name = Column(String(255), nullable=False, comment="任务名称")
|
|
||||||
sync_mode = Column(String(20), default='FULL', comment="同步模式:FULL/INCREMENTAL")
|
|
||||||
status = Column(String(20), default='RUNNING', comment="执行状态:RUNNING/SUCCESS/FAILED/STOPPED")
|
|
||||||
start_time = Column(TIMESTAMP, nullable=True, comment="开始时间")
|
|
||||||
end_time = Column(TIMESTAMP, nullable=True, comment="结束时间")
|
|
||||||
duration = Column(BigInteger, nullable=True, comment="执行时长(毫秒)")
|
|
||||||
process_id = Column(String(50), nullable=True, comment="进程ID")
|
|
||||||
log_path = Column(String(500), nullable=True, comment="日志文件路径")
|
|
||||||
error_msg = Column(Text, nullable=True, comment="错误信息")
|
|
||||||
result = Column(Text, nullable=True, comment="执行结果")
|
|
||||||
retry_times = Column(Integer, default=0, comment="重试次数")
|
|
||||||
create_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<TaskLog(id={self.id}, task_id={self.task_id}, status={self.status})>"
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
# app/models/common/__init__.py
|
|
||||||
|
|
||||||
from .chunk_upload_request import ChunkUploadRequest
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"ChunkUploadRequest"
|
|
||||||
]
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Integer, Text, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class ChunkUploadRequest(Base):
|
|
||||||
"""文件切片上传请求模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_chunk_upload_request"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, comment="UUID")
|
|
||||||
total_file_num = Column(Integer, nullable=True, comment="总文件数")
|
|
||||||
uploaded_file_num = Column(Integer, nullable=True, comment="已上传文件数")
|
|
||||||
upload_path = Column(String(256), nullable=True, comment="文件路径")
|
|
||||||
timeout = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传请求超时时间")
|
|
||||||
service_id = Column(String(64), nullable=True, comment="上传请求所属服务:DATA-MANAGEMENT(数据管理)")
|
|
||||||
check_info = Column(Text, nullable=True, comment="业务信息")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<ChunkUploadRequest(id={self.id}, service_id={self.service_id}, progress={self.uploaded_file_num}/{self.total_file_num})>"
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
# app/models/dm/__init__.py
|
|
||||||
|
|
||||||
from .annotation_template import AnnotationTemplate
|
|
||||||
from .labeling_project import LabelingProject
|
|
||||||
from .dataset import Dataset
|
|
||||||
from .dataset_files import DatasetFiles
|
|
||||||
from .dataset_statistics import DatasetStatistics
|
|
||||||
from .dataset_tag import DatasetTag
|
|
||||||
from .tag import Tag
|
|
||||||
from .user import User
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"AnnotationTemplate",
|
|
||||||
"LabelingProject",
|
|
||||||
"Dataset",
|
|
||||||
"DatasetFiles",
|
|
||||||
"DatasetStatistics",
|
|
||||||
"DatasetTag",
|
|
||||||
"Tag",
|
|
||||||
"User"
|
|
||||||
]
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, JSON, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
class AnnotationTemplate(Base):
|
|
||||||
"""标注模板模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dm_annotation_templates"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID")
|
|
||||||
name = Column(String(32), nullable=False, comment="模板名称")
|
|
||||||
description = Column(String(255), nullable=True, comment="模板描述")
|
|
||||||
configuration = Column(JSON, nullable=True, comment="配置信息(JSON格式)")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<AnnotationTemplate(id={self.id}, name={self.name})>"
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_deleted(self) -> bool:
|
|
||||||
"""检查是否已被软删除"""
|
|
||||||
return self.deleted_at is not None
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Text, BigInteger, Integer, Boolean, JSON, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
class Dataset(Base):
|
|
||||||
"""数据集模型(支持医学影像、文本、问答等多种类型)"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dm_datasets"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
|
||||||
name = Column(String(255), nullable=False, comment="数据集名称")
|
|
||||||
description = Column(Text, nullable=True, comment="数据集描述")
|
|
||||||
dataset_type = Column(String(50), nullable=False, comment="数据集类型:IMAGE/TEXT/QA/MULTIMODAL/OTHER")
|
|
||||||
category = Column(String(100), nullable=True, comment="数据集分类:医学影像/问答/文献等")
|
|
||||||
path = Column(String(500), nullable=True, comment="数据存储路径")
|
|
||||||
format = Column(String(50), nullable=True, comment="数据格式:DCM/JPG/JSON/CSV等")
|
|
||||||
schema_info = Column(JSON, nullable=True, comment="数据结构信息")
|
|
||||||
size_bytes = Column(BigInteger, default=0, comment="数据大小(字节)")
|
|
||||||
file_count = Column(BigInteger, default=0, comment="文件数量")
|
|
||||||
record_count = Column(BigInteger, default=0, comment="记录数量")
|
|
||||||
retention_days = Column(Integer, default=0, comment="数据保留天数(0表示长期保留)")
|
|
||||||
tags = Column(JSON, nullable=True, comment="标签列表")
|
|
||||||
metadata = Column(JSON, nullable=True, comment="元数据信息")
|
|
||||||
status = Column(String(50), default='DRAFT', comment="状态:DRAFT/ACTIVE/ARCHIVED")
|
|
||||||
is_public = Column(Boolean, default=False, comment="是否公开")
|
|
||||||
is_featured = Column(Boolean, default=False, comment="是否推荐")
|
|
||||||
version = Column(BigInteger, nullable=False, default=0, comment="版本号")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
|
||||||
created_by = Column(String(255), nullable=True, comment="创建者")
|
|
||||||
updated_by = Column(String(255), nullable=True, comment="更新者")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<Dataset(id={self.id}, name={self.name}, type={self.dataset_type})>"
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, JSON, BigInteger, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
class DatasetFiles(Base):
|
|
||||||
"""DM数据集文件模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dm_dataset_files"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
|
||||||
dataset_id = Column(String(36), nullable=False, comment="所属数据集ID(UUID)")
|
|
||||||
file_name = Column(String(255), nullable=False, comment="文件名")
|
|
||||||
file_path = Column(String(1000), nullable=False, comment="文件路径")
|
|
||||||
file_type = Column(String(50), nullable=True, comment="文件格式:JPG/PNG/DCM/TXT等")
|
|
||||||
file_size = Column(BigInteger, default=0, comment="文件大小(字节)")
|
|
||||||
check_sum = Column(String(64), nullable=True, comment="文件校验和")
|
|
||||||
tags = Column(JSON, nullable=True, comment="文件标签信息")
|
|
||||||
metadata = Column(JSON, nullable=True, comment="文件元数据")
|
|
||||||
status = Column(String(50), default='ACTIVE', comment="文件状态:ACTIVE/DELETED/PROCESSING")
|
|
||||||
upload_time = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="上传时间")
|
|
||||||
last_access_time = Column(TIMESTAMP, nullable=True, comment="最后访问时间")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<DatasetFiles(id={self.id}, dataset_id={self.dataset_id}, file_name={self.file_name})>"
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Date, BigInteger, JSON, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
class DatasetStatistics(Base):
|
|
||||||
"""数据集统计信息模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dm_dataset_statistics"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
|
||||||
dataset_id = Column(String(36), nullable=False, comment="数据集ID(UUID)")
|
|
||||||
stat_date = Column(Date, nullable=False, comment="统计日期")
|
|
||||||
total_files = Column(BigInteger, default=0, comment="总文件数")
|
|
||||||
total_size = Column(BigInteger, default=0, comment="总大小(字节)")
|
|
||||||
processed_files = Column(BigInteger, default=0, comment="已处理文件数")
|
|
||||||
error_files = Column(BigInteger, default=0, comment="错误文件数")
|
|
||||||
download_count = Column(BigInteger, default=0, comment="下载次数")
|
|
||||||
view_count = Column(BigInteger, default=0, comment="查看次数")
|
|
||||||
quality_metrics = Column(JSON, nullable=True, comment="质量指标")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<DatasetStatistics(id={self.id}, dataset_id={self.dataset_id}, date={self.stat_date})>"
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class DatasetTag(Base):
|
|
||||||
"""数据集标签关联模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dm_dataset_tags"
|
|
||||||
|
|
||||||
dataset_id = Column(String(36), primary_key=True, comment="数据集ID(UUID)")
|
|
||||||
tag_id = Column(String(36), primary_key=True, comment="标签ID(UUID)")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<DatasetTag(dataset_id={self.dataset_id}, tag_id={self.tag_id})>"
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Integer, JSON, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
class LabelingProject(Base):
|
|
||||||
"""DM标注项目模型(原 DatasetMapping)"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dm_labeling_projects"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID主键ID")
|
|
||||||
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
|
|
||||||
name = Column(String(32), nullable=False, comment="项目名称")
|
|
||||||
labeling_project_id = Column(Integer, nullable=False, comment="Label Studio项目ID")
|
|
||||||
configuration = Column(JSON, nullable=True, comment="标签配置")
|
|
||||||
progress = Column(JSON, nullable=True, comment="标注进度统计")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<LabelingProject(id={self.id}, dataset_id={self.dataset_id}, name={self.name})>"
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_deleted(self) -> bool:
|
|
||||||
"""检查是否已被软删除"""
|
|
||||||
return self.deleted_at is not None
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Text, BigInteger, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
class Tag(Base):
|
|
||||||
"""标签模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_dm_tags"
|
|
||||||
|
|
||||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
|
||||||
name = Column(String(100), nullable=False, unique=True, comment="标签名称")
|
|
||||||
description = Column(Text, nullable=True, comment="标签描述")
|
|
||||||
category = Column(String(50), nullable=True, comment="标签分类")
|
|
||||||
color = Column(String(7), nullable=True, comment="标签颜色(十六进制)")
|
|
||||||
usage_count = Column(BigInteger, default=0, comment="使用次数")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<Tag(id={self.id}, name={self.name}, category={self.category})>"
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
# app/models/operator/__init__.py
|
|
||||||
|
|
||||||
from .operator import Operator
|
|
||||||
from .operator_category import OperatorCategory
|
|
||||||
from .operator_category_relation import OperatorCategoryRelation
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"Operator",
|
|
||||||
"OperatorCategory",
|
|
||||||
"OperatorCategoryRelation"
|
|
||||||
]
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Text, Boolean, TIMESTAMP
|
|
||||||
from sqlalchemy.sql import func
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class Operator(Base):
|
|
||||||
"""算子模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_operator"
|
|
||||||
|
|
||||||
id = Column(String(64), primary_key=True, comment="算子ID")
|
|
||||||
name = Column(String(64), nullable=True, comment="算子名称")
|
|
||||||
description = Column(String(256), nullable=True, comment="算子描述")
|
|
||||||
version = Column(String(256), nullable=True, comment="版本")
|
|
||||||
inputs = Column(String(256), nullable=True, comment="输入类型")
|
|
||||||
outputs = Column(String(256), nullable=True, comment="输出类型")
|
|
||||||
runtime = Column(Text, nullable=True, comment="运行时信息")
|
|
||||||
settings = Column(Text, nullable=True, comment="配置信息")
|
|
||||||
file_name = Column(Text, nullable=True, comment="文件名")
|
|
||||||
is_star = Column(Boolean, nullable=True, comment="是否收藏")
|
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
|
||||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<Operator(id={self.id}, name={self.name}, version={self.version})>"
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Integer
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class OperatorCategory(Base):
|
|
||||||
"""算子分类模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_operator_category"
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True, autoincrement=True, comment="分类ID")
|
|
||||||
name = Column(String(64), nullable=True, comment="分类名称")
|
|
||||||
type = Column(String(64), nullable=True, comment="分类类型")
|
|
||||||
parent_id = Column(Integer, nullable=True, comment="父分类ID")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<OperatorCategory(id={self.id}, name={self.name}, type={self.type})>"
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
from sqlalchemy import Column, String, Integer
|
|
||||||
from app.db.database import Base
|
|
||||||
|
|
||||||
class OperatorCategoryRelation(Base):
|
|
||||||
"""算子分类关联模型"""
|
|
||||||
|
|
||||||
__tablename__ = "t_operator_category_relation"
|
|
||||||
|
|
||||||
category_id = Column(Integer, primary_key=True, comment="分类ID")
|
|
||||||
operator_id = Column(String(64), primary_key=True, comment="算子ID")
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<OperatorCategoryRelation(category_id={self.category_id}, operator_id={self.operator_id})>"
|
|
||||||
11
runtime/datamate-python/app/module/__init__.py
Normal file
11
runtime/datamate-python/app/module/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from .annotation.interface import router as annotation_router
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/api"
|
||||||
|
)
|
||||||
|
|
||||||
|
router.include_router(annotation_router)
|
||||||
|
|
||||||
|
__all__ = ["router"]
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .labelstudio import LabelStudioClient
|
||||||
|
|
||||||
|
__all__ = ["LabelStudioClient"]
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .client import Client as LabelStudioClient
|
||||||
|
|
||||||
|
__all__ = ["LabelStudioClient"]
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
import httpx
|
import httpx
|
||||||
from typing import Optional, Dict, Any, List
|
from typing import Optional, Dict, Any, List
|
||||||
import json
|
|
||||||
|
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.core.logging import get_logger
|
from app.core.logging import get_logger
|
||||||
from app.schemas.label_studio import (
|
|
||||||
|
from .schema import (
|
||||||
LabelStudioProject,
|
LabelStudioProject,
|
||||||
LabelStudioCreateProjectRequest,
|
LabelStudioCreateProjectRequest,
|
||||||
LabelStudioCreateTaskRequest
|
LabelStudioCreateTaskRequest
|
||||||
@@ -88,7 +88,7 @@ class Client:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Label Studio client initialized: {self.base_url}")
|
logger.debug(f"Label Studio client initialized: {self.base_url}")
|
||||||
|
|
||||||
def get_label_config_by_type(self, data_type: str) -> str:
|
def get_label_config_by_type(self, data_type: str) -> str:
|
||||||
"""根据数据类型获取标注配置"""
|
"""根据数据类型获取标注配置"""
|
||||||
@@ -103,7 +103,7 @@ class Client:
|
|||||||
) -> Optional[Dict[str, Any]]:
|
) -> Optional[Dict[str, Any]]:
|
||||||
"""创建Label Studio项目"""
|
"""创建Label Studio项目"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Creating Label Studio project: {title}")
|
logger.debug(f"Creating Label Studio project: {title}")
|
||||||
|
|
||||||
if not label_config:
|
if not label_config:
|
||||||
label_config = self.get_label_config_by_type(data_type)
|
label_config = self.get_label_config_by_type(data_type)
|
||||||
@@ -123,7 +123,7 @@ class Client:
|
|||||||
if not project_id:
|
if not project_id:
|
||||||
raise Exception("Label Studio response does not contain project ID")
|
raise Exception("Label Studio response does not contain project ID")
|
||||||
|
|
||||||
logger.info(f"Project created successfully, ID: {project_id}")
|
logger.debug(f"Project created successfully, ID: {project_id}")
|
||||||
return project
|
return project
|
||||||
|
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
@@ -142,7 +142,7 @@ class Client:
|
|||||||
) -> Optional[Dict[str, Any]]:
|
) -> Optional[Dict[str, Any]]:
|
||||||
"""批量导入任务到Label Studio项目"""
|
"""批量导入任务到Label Studio项目"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Importing {len(tasks)} tasks into project {project_id}")
|
logger.debug(f"Importing {len(tasks)} tasks into project {project_id}")
|
||||||
|
|
||||||
response = await self.client.post(
|
response = await self.client.post(
|
||||||
f"/api/projects/{project_id}/import",
|
f"/api/projects/{project_id}/import",
|
||||||
@@ -157,7 +157,7 @@ class Client:
|
|||||||
result = response.json()
|
result = response.json()
|
||||||
task_count = result.get("task_count", len(tasks))
|
task_count = result.get("task_count", len(tasks))
|
||||||
|
|
||||||
logger.info(f"Tasks imported successfully: {task_count}")
|
logger.debug(f"Tasks imported successfully: {task_count}")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
@@ -236,11 +236,12 @@ class Client:
|
|||||||
|
|
||||||
# 如果指定了page,直接获取单页任务
|
# 如果指定了page,直接获取单页任务
|
||||||
if page is not None:
|
if page is not None:
|
||||||
logger.info(f"Fetching tasks for project {pid}, page {page} (page_size={page_size})")
|
logger.debug(f"Fetching tasks for project {pid}, page {page} (page_size={page_size})")
|
||||||
|
|
||||||
response = await self.client.get(
|
response = await self.client.get(
|
||||||
f"/api/projects/{pid}/tasks",
|
f"/api/tasks",
|
||||||
params={
|
params={
|
||||||
|
"project": pid,
|
||||||
"page": page,
|
"page": page,
|
||||||
"page_size": page_size
|
"page_size": page_size
|
||||||
}
|
}
|
||||||
@@ -259,17 +260,13 @@ class Client:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# 如果未指定page,获取所有任务
|
# 如果未指定page,获取所有任务
|
||||||
logger.info(f"Start fetching all tasks for project {pid} (page_size={page_size})")
|
logger.debug(f"(page) not specified, fetching all tasks.")
|
||||||
all_tasks = []
|
all_tasks = []
|
||||||
current_page = 1
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
response = await self.client.get(
|
response = await self.client.get(
|
||||||
f"/api/projects/{pid}/tasks",
|
f"/api/tasks",
|
||||||
params={
|
params={
|
||||||
"page": current_page,
|
"project": pid
|
||||||
"page_size": page_size
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
@@ -278,28 +275,11 @@ class Client:
|
|||||||
tasks = result.get("tasks", [])
|
tasks = result.get("tasks", [])
|
||||||
|
|
||||||
if not tasks:
|
if not tasks:
|
||||||
logger.debug(f"No more tasks on page {current_page}")
|
logger.debug(f"No tasks found for this project.")
|
||||||
break
|
|
||||||
|
|
||||||
all_tasks.extend(tasks)
|
all_tasks.extend(tasks)
|
||||||
logger.debug(f"Fetched page {current_page}, {len(tasks)} tasks")
|
logger.debug(f"Fetched {len(tasks)} tasks.")
|
||||||
|
|
||||||
# 检查是否还有更多页
|
|
||||||
total = result.get("total", 0)
|
|
||||||
if len(all_tasks) >= total:
|
|
||||||
break
|
|
||||||
|
|
||||||
current_page += 1
|
|
||||||
|
|
||||||
except httpx.HTTPStatusError as e:
|
|
||||||
if e.response.status_code == 404:
|
|
||||||
# 超出页数范围,结束分页
|
|
||||||
logger.debug(f"Reached last page (page {current_page})")
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
logger.info(f"Fetched all tasks for project {pid}, total {len(all_tasks)}")
|
|
||||||
|
|
||||||
# 返回所有任务,不包含分页信息
|
# 返回所有任务,不包含分页信息
|
||||||
return {
|
return {
|
||||||
@@ -321,12 +301,12 @@ class Client:
|
|||||||
) -> bool:
|
) -> bool:
|
||||||
"""删除单个任务"""
|
"""删除单个任务"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Deleting task: {task_id}")
|
logger.debug(f"Deleting task: {task_id}")
|
||||||
|
|
||||||
response = await self.client.delete(f"/api/tasks/{task_id}")
|
response = await self.client.delete(f"/api/tasks/{task_id}")
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
logger.info(f"Task deleted: {task_id}")
|
logger.debug(f"Task deleted: {task_id}")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
@@ -342,7 +322,7 @@ class Client:
|
|||||||
) -> Dict[str, int]:
|
) -> Dict[str, int]:
|
||||||
"""批量删除任务"""
|
"""批量删除任务"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Deleting {len(task_ids)} tasks in batch")
|
logger.debug(f"Deleting {len(task_ids)} tasks in batch")
|
||||||
|
|
||||||
successful_deletions = 0
|
successful_deletions = 0
|
||||||
failed_deletions = 0
|
failed_deletions = 0
|
||||||
@@ -353,7 +333,7 @@ class Client:
|
|||||||
else:
|
else:
|
||||||
failed_deletions += 1
|
failed_deletions += 1
|
||||||
|
|
||||||
logger.info(f"Batch deletion finished: success {successful_deletions}, failed {failed_deletions}")
|
logger.debug(f"Batch deletion finished: success {successful_deletions}, failed {failed_deletions}")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"successful": successful_deletions,
|
"successful": successful_deletions,
|
||||||
@@ -372,7 +352,7 @@ class Client:
|
|||||||
async def get_project(self, project_id: int) -> Optional[Dict[str, Any]]:
|
async def get_project(self, project_id: int) -> Optional[Dict[str, Any]]:
|
||||||
"""获取项目信息"""
|
"""获取项目信息"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Fetching project info: {project_id}")
|
logger.debug(f"Fetching project info: {project_id}")
|
||||||
|
|
||||||
response = await self.client.get(f"/api/projects/{project_id}")
|
response = await self.client.get(f"/api/projects/{project_id}")
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
@@ -389,12 +369,12 @@ class Client:
|
|||||||
async def delete_project(self, project_id: int) -> bool:
|
async def delete_project(self, project_id: int) -> bool:
|
||||||
"""删除项目"""
|
"""删除项目"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Deleting project: {project_id}")
|
logger.debug(f"Deleting project: {project_id}")
|
||||||
|
|
||||||
response = await self.client.delete(f"/api/projects/{project_id}")
|
response = await self.client.delete(f"/api/projects/{project_id}")
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
logger.info(f"Project deleted: {project_id}")
|
logger.debug(f"Project deleted: {project_id}")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
@@ -427,7 +407,7 @@ class Client:
|
|||||||
创建的存储配置信息,失败返回 None
|
创建的存储配置信息,失败返回 None
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Creating local storage for project {project_id}: {path}")
|
logger.debug(f"Creating local storage for project {project_id}: {path}")
|
||||||
|
|
||||||
storage_data = {
|
storage_data = {
|
||||||
"project": project_id,
|
"project": project_id,
|
||||||
@@ -450,7 +430,7 @@ class Client:
|
|||||||
storage = response.json()
|
storage = response.json()
|
||||||
storage_id = storage.get("id")
|
storage_id = storage.get("id")
|
||||||
|
|
||||||
logger.info(f"Local storage created successfully, ID: {storage_id}")
|
logger.debug(f"Local storage created successfully, ID: {storage_id}")
|
||||||
return storage
|
return storage
|
||||||
|
|
||||||
except httpx.HTTPStatusError as e:
|
except httpx.HTTPStatusError as e:
|
||||||
@@ -464,6 +444,6 @@ class Client:
|
|||||||
"""关闭客户端连接"""
|
"""关闭客户端连接"""
|
||||||
try:
|
try:
|
||||||
await self.client.aclose()
|
await self.client.aclose()
|
||||||
logger.info("Label Studio client closed")
|
logger.debug("Label Studio client closed")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while closing Label Studio client: {e}")
|
logger.error(f"Error while closing Label Studio client: {e}")
|
||||||
@@ -1,7 +1,9 @@
|
|||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
from typing import Dict, Any, Optional, List
|
from typing import Dict, Any, Optional
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from .common import BaseResponseModel
|
|
||||||
|
from app.module.shared.schema import BaseResponseModel
|
||||||
|
|
||||||
|
|
||||||
class LabelStudioProject(BaseResponseModel):
|
class LabelStudioProject(BaseResponseModel):
|
||||||
"""Label Studio项目模型"""
|
"""Label Studio项目模型"""
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from .project import router as project_router
|
||||||
|
from .task import router as task_router
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/annotation",
|
||||||
|
tags = ["annotation"]
|
||||||
|
)
|
||||||
|
|
||||||
|
router.include_router(project_router)
|
||||||
|
router.include_router(task_router)
|
||||||
@@ -0,0 +1,353 @@
|
|||||||
|
from typing import Optional
|
||||||
|
import math
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.db.session import get_db
|
||||||
|
from app.module.shared.schema import StandardResponse, PaginatedData
|
||||||
|
from app.module.dataset import DatasetManagementService
|
||||||
|
from app.core.logging import get_logger
|
||||||
|
from app.core.config import settings
|
||||||
|
|
||||||
|
from ..client import LabelStudioClient
|
||||||
|
from ..service.mapping import DatasetMappingService
|
||||||
|
from ..schema import (
|
||||||
|
DatasetMappingCreateRequest,
|
||||||
|
DatasetMappingCreateResponse,
|
||||||
|
DeleteDatasetResponse,
|
||||||
|
DatasetMappingResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/project",
|
||||||
|
tags=["annotation/project"]
|
||||||
|
)
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
@router.post("/", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201)
|
||||||
|
async def create_mapping(
|
||||||
|
request: DatasetMappingCreateRequest,
|
||||||
|
db: AsyncSession = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
创建数据集映射
|
||||||
|
|
||||||
|
根据指定的DM程序中的数据集,创建Label Studio中的数据集,
|
||||||
|
在数据库中记录这一关联关系,返回Label Studio数据集的ID
|
||||||
|
|
||||||
|
注意:一个数据集可以创建多个标注项目
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
dm_client = DatasetManagementService(db)
|
||||||
|
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||||
|
token=settings.label_studio_user_token)
|
||||||
|
service = DatasetMappingService(db)
|
||||||
|
|
||||||
|
logger.info(f"Create dataset mapping request: {request.dataset_id}")
|
||||||
|
|
||||||
|
# 从DM服务获取数据集信息
|
||||||
|
dataset_info = await dm_client.get_dataset(request.dataset_id)
|
||||||
|
if not dataset_info:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Dataset not found in DM service: {request.dataset_id}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 确定数据类型(基于数据集类型)
|
||||||
|
data_type = "image" # 默认值
|
||||||
|
if dataset_info.type and dataset_info.type.code:
|
||||||
|
type_code = dataset_info.type.code.lower()
|
||||||
|
if "audio" in type_code:
|
||||||
|
data_type = "audio"
|
||||||
|
elif "video" in type_code:
|
||||||
|
data_type = "video"
|
||||||
|
elif "text" in type_code:
|
||||||
|
data_type = "text"
|
||||||
|
|
||||||
|
project_name = f"{dataset_info.name}"
|
||||||
|
|
||||||
|
# 在Label Studio中创建项目
|
||||||
|
project_data = await ls_client.create_project(
|
||||||
|
title=project_name,
|
||||||
|
description=dataset_info.description or f"Imported from DM dataset {dataset_info.id}",
|
||||||
|
data_type=data_type
|
||||||
|
)
|
||||||
|
|
||||||
|
if not project_data:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail="Fail to create Label Studio project."
|
||||||
|
)
|
||||||
|
|
||||||
|
project_id = project_data["id"]
|
||||||
|
|
||||||
|
# 配置本地存储:dataset/<id>
|
||||||
|
local_storage_path = f"{settings.label_studio_local_storage_dataset_base_path}/{request.dataset_id}"
|
||||||
|
storage_result = await ls_client.create_local_storage(
|
||||||
|
project_id=project_id,
|
||||||
|
path=local_storage_path,
|
||||||
|
title="Dataset_BLOB",
|
||||||
|
use_blob_urls=True,
|
||||||
|
description=f"Local storage for dataset {dataset_info.name}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not storage_result:
|
||||||
|
# 本地存储配置失败,记录警告但不中断流程
|
||||||
|
logger.warning(f"Failed to configure local storage for project {project_id}")
|
||||||
|
else:
|
||||||
|
logger.info(f"Local storage configured for project {project_id}: {local_storage_path}")
|
||||||
|
|
||||||
|
# 创建映射关系,包含项目名称
|
||||||
|
mapping = await service.create_mapping(
|
||||||
|
request,
|
||||||
|
str(project_id),
|
||||||
|
project_name
|
||||||
|
)
|
||||||
|
|
||||||
|
response_data = DatasetMappingCreateResponse(
|
||||||
|
id=mapping.id,
|
||||||
|
labeling_project_id=str(mapping.labeling_project_id),
|
||||||
|
labeling_project_name=mapping.name or project_name,
|
||||||
|
message="Dataset mapping created successfully"
|
||||||
|
)
|
||||||
|
|
||||||
|
return StandardResponse(
|
||||||
|
code=201,
|
||||||
|
message="success",
|
||||||
|
data=response_data
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error while creating dataset mapping: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|
||||||
|
@router.get("/", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
|
||||||
|
async def list_mappings(
|
||||||
|
page: int = Query(1, ge=1, description="页码(从1开始)"),
|
||||||
|
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
|
||||||
|
db: AsyncSession = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
查询所有映射关系(分页)
|
||||||
|
|
||||||
|
返回所有有效的数据集映射关系(未被软删除的),支持分页查询
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
service = DatasetMappingService(db)
|
||||||
|
|
||||||
|
# 计算 skip
|
||||||
|
skip = (page - 1) * page_size
|
||||||
|
|
||||||
|
logger.info(f"Listing mappings, page={page}, page_size={page_size}")
|
||||||
|
|
||||||
|
# 获取数据和总数
|
||||||
|
mappings, total = await service.get_all_mappings_with_count(
|
||||||
|
skip=skip,
|
||||||
|
limit=page_size
|
||||||
|
)
|
||||||
|
|
||||||
|
# 计算总页数
|
||||||
|
total_pages = math.ceil(total / page_size) if total > 0 else 0
|
||||||
|
|
||||||
|
# 构造分页响应
|
||||||
|
paginated_data = PaginatedData(
|
||||||
|
page=page,
|
||||||
|
size=page_size,
|
||||||
|
total_elements=total,
|
||||||
|
total_pages=total_pages,
|
||||||
|
content=mappings
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
|
||||||
|
|
||||||
|
return StandardResponse(
|
||||||
|
code=200,
|
||||||
|
message="success",
|
||||||
|
data=paginated_data
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error listing mappings: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|
||||||
|
@router.get("/{mapping_id}", response_model=StandardResponse[DatasetMappingResponse])
|
||||||
|
async def get_mapping(
|
||||||
|
mapping_id: str,
|
||||||
|
db: AsyncSession = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
根据 UUID 查询单个映射关系
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
service = DatasetMappingService(db)
|
||||||
|
|
||||||
|
logger.info(f"Get mapping: {mapping_id}")
|
||||||
|
|
||||||
|
mapping = await service.get_mapping_by_uuid(mapping_id)
|
||||||
|
|
||||||
|
if not mapping:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Mapping not found: {mapping_id}"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Found mapping: {mapping.id}")
|
||||||
|
|
||||||
|
return StandardResponse(
|
||||||
|
code=200,
|
||||||
|
message="success",
|
||||||
|
data=mapping
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting mapping: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|
||||||
|
@router.get("/by-source/{dataset_id}", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
|
||||||
|
async def get_mappings_by_source(
|
||||||
|
dataset_id: str,
|
||||||
|
page: int = Query(1, ge=1, description="页码(从1开始)"),
|
||||||
|
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
|
||||||
|
db: AsyncSession = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
根据源数据集 ID 查询所有映射关系(分页)
|
||||||
|
|
||||||
|
返回该数据集创建的所有标注项目(不包括已删除的),支持分页查询
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
service = DatasetMappingService(db)
|
||||||
|
|
||||||
|
# 计算 skip
|
||||||
|
skip = (page - 1) * page_size
|
||||||
|
|
||||||
|
logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, page_size={page_size}")
|
||||||
|
|
||||||
|
# 获取数据和总数
|
||||||
|
mappings, total = await service.get_mappings_by_source_with_count(
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
skip=skip,
|
||||||
|
limit=page_size
|
||||||
|
)
|
||||||
|
|
||||||
|
# 计算总页数
|
||||||
|
total_pages = math.ceil(total / page_size) if total > 0 else 0
|
||||||
|
|
||||||
|
# 构造分页响应
|
||||||
|
paginated_data = PaginatedData(
|
||||||
|
page=page,
|
||||||
|
size=page_size,
|
||||||
|
total_elements=total,
|
||||||
|
total_pages=total_pages,
|
||||||
|
content=mappings
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
|
||||||
|
|
||||||
|
return StandardResponse(
|
||||||
|
code=200,
|
||||||
|
message="success",
|
||||||
|
data=paginated_data
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting mappings: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|
||||||
|
@router.delete("/", response_model=StandardResponse[DeleteDatasetResponse])
|
||||||
|
async def delete_mapping(
|
||||||
|
m: Optional[str] = Query(None, description="映射UUID"),
|
||||||
|
proj: Optional[str] = Query(None, description="Label Studio项目ID"),
|
||||||
|
db: AsyncSession = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
删除映射关系和对应的 Label Studio 项目
|
||||||
|
|
||||||
|
可以通过以下任一方式指定要删除的映射:
|
||||||
|
- m: 映射UUID
|
||||||
|
- proj: Label Studio项目ID
|
||||||
|
- 两者都提供(优先使用 m)
|
||||||
|
|
||||||
|
此操作会:
|
||||||
|
1. 删除 Label Studio 中的项目
|
||||||
|
2. 软删除数据库中的映射记录
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 至少需要提供一个参数
|
||||||
|
if not m and not proj:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided"
|
||||||
|
)
|
||||||
|
|
||||||
|
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||||
|
token=settings.label_studio_user_token)
|
||||||
|
service = DatasetMappingService(db)
|
||||||
|
|
||||||
|
# 优先使用 mapping_id 查询
|
||||||
|
if m:
|
||||||
|
logger.debug(f"Deleting by mapping UUID: {m}")
|
||||||
|
mapping = await service.get_mapping_by_uuid(m)
|
||||||
|
# 如果没有提供 m,使用 proj 查询
|
||||||
|
elif proj:
|
||||||
|
logger.debug(f"Deleting by project ID: {proj}")
|
||||||
|
mapping = await service.get_mapping_by_labeling_project_id(proj)
|
||||||
|
else:
|
||||||
|
mapping = None
|
||||||
|
|
||||||
|
if not mapping:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Mapping either not found or not specified."
|
||||||
|
)
|
||||||
|
|
||||||
|
id = mapping.id
|
||||||
|
labeling_project_id = mapping.labeling_project_id
|
||||||
|
labeling_project_name = mapping.name
|
||||||
|
|
||||||
|
logger.debug(f"Found mapping: {id}, Label Studio project ID: {labeling_project_id}")
|
||||||
|
|
||||||
|
# 1. 删除 Label Studio 项目
|
||||||
|
try:
|
||||||
|
delete_success = await ls_client.delete_project(int(labeling_project_id))
|
||||||
|
if delete_success:
|
||||||
|
logger.debug(f"Successfully deleted Label Studio project: {labeling_project_id}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Failed to delete Label Studio project or project not found: {labeling_project_id}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error deleting Label Studio project: {e}")
|
||||||
|
# 继续执行,即使 Label Studio 项目删除失败也要删除映射记录
|
||||||
|
|
||||||
|
# 2. 软删除映射记录
|
||||||
|
soft_delete_success = await service.soft_delete_mapping(id)
|
||||||
|
|
||||||
|
if not soft_delete_success:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail="Failed to delete mapping record"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Successfully deleted mapping: {id}, Label Studio project: {labeling_project_id}")
|
||||||
|
|
||||||
|
return StandardResponse(
|
||||||
|
code=200,
|
||||||
|
message="success",
|
||||||
|
data=DeleteDatasetResponse(
|
||||||
|
id=id,
|
||||||
|
status="success",
|
||||||
|
message=f"Successfully deleted mapping and Label Studio project '{labeling_project_name}'"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error deleting mapping: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
@@ -2,24 +2,29 @@ from fastapi import APIRouter, Depends, HTTPException, Query
|
|||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from app.db.database import get_db
|
from app.db.session import get_db
|
||||||
from app.services.dataset_mapping_service import DatasetMappingService
|
from app.module.shared.schema import StandardResponse
|
||||||
from app.services.sync_service import SyncService
|
from app.module.dataset import DatasetManagementService
|
||||||
from app.infrastructure import DatamateClient, LabelStudioClient
|
from app.core.logging import get_logger
|
||||||
from app.exceptions import NoDatasetInfoFoundError, DatasetMappingNotFoundError
|
from app.core.config import settings
|
||||||
from app.schemas.dataset_mapping import (
|
from app.exception import NoDatasetInfoFoundError, DatasetMappingNotFoundError
|
||||||
DatasetMappingResponse,
|
|
||||||
|
from ..client import LabelStudioClient
|
||||||
|
from ..service.sync import SyncService
|
||||||
|
from ..service.mapping import DatasetMappingService
|
||||||
|
from ..schema import (
|
||||||
SyncDatasetRequest,
|
SyncDatasetRequest,
|
||||||
SyncDatasetResponse,
|
SyncDatasetResponse,
|
||||||
)
|
)
|
||||||
from app.schemas import StandardResponse
|
|
||||||
from app.core.logging import get_logger
|
|
||||||
from app.core.config import settings
|
|
||||||
from . import project_router
|
|
||||||
|
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/task",
|
||||||
|
tags=["annotation/task"]
|
||||||
|
)
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
@project_router.post("/sync", response_model=StandardResponse[SyncDatasetResponse])
|
@router.post("/sync", response_model=StandardResponse[SyncDatasetResponse])
|
||||||
async def sync_dataset_content(
|
async def sync_dataset_content(
|
||||||
request: SyncDatasetRequest,
|
request: SyncDatasetRequest,
|
||||||
db: AsyncSession = Depends(get_db)
|
db: AsyncSession = Depends(get_db)
|
||||||
@@ -33,22 +38,22 @@ async def sync_dataset_content(
|
|||||||
try:
|
try:
|
||||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||||
token=settings.label_studio_user_token)
|
token=settings.label_studio_user_token)
|
||||||
dm_client = DatamateClient(db)
|
dm_client = DatasetManagementService(db)
|
||||||
mapping_service = DatasetMappingService(db)
|
mapping_service = DatasetMappingService(db)
|
||||||
sync_service = SyncService(dm_client, ls_client, mapping_service)
|
sync_service = SyncService(dm_client, ls_client, mapping_service)
|
||||||
|
|
||||||
logger.info(f"Sync dataset content request: mapping_id={request.mapping_id}")
|
logger.info(f"Sync dataset content request: mapping_id={request.id}")
|
||||||
|
|
||||||
# 根据 mapping_id 获取映射关系
|
# request.id 合法性校验
|
||||||
mapping = await mapping_service.get_mapping_by_uuid(request.mapping_id)
|
mapping = await mapping_service.get_mapping_by_uuid(request.id)
|
||||||
if not mapping:
|
if not mapping:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=404,
|
status_code=404,
|
||||||
detail=f"Mapping not found: {request.mapping_id}"
|
detail=f"Mapping not found: {request.id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 执行同步(使用映射中的源数据集UUID)
|
# 执行同步(使用映射中的源数据集UUID)
|
||||||
result = await sync_service.sync_dataset_files(request.mapping_id, request.batch_size)
|
result = await sync_service.sync_dataset_files(request.id, request.batch_size)
|
||||||
|
|
||||||
logger.info(f"Sync completed: {result.synced_files}/{result.total_files} files")
|
logger.info(f"Sync completed: {result.synced_files}/{result.total_files} files")
|
||||||
|
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
from .mapping import (
|
||||||
|
DatasetMappingBase,
|
||||||
|
DatasetMappingCreateRequest,
|
||||||
|
DatasetMappingCreateResponse,
|
||||||
|
DatasetMappingUpdateRequest,
|
||||||
|
DatasetMappingResponse,
|
||||||
|
DeleteDatasetResponse
|
||||||
|
)
|
||||||
|
|
||||||
|
from .sync import (
|
||||||
|
SyncDatasetRequest,
|
||||||
|
SyncDatasetResponse
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"DatasetMappingBase",
|
||||||
|
"DatasetMappingCreateRequest",
|
||||||
|
"DatasetMappingCreateResponse",
|
||||||
|
"DatasetMappingUpdateRequest",
|
||||||
|
"DatasetMappingResponse",
|
||||||
|
"SyncDatasetRequest",
|
||||||
|
"SyncDatasetResponse",
|
||||||
|
"DeleteDatasetResponse"
|
||||||
|
]
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
from pydantic import Field
|
||||||
|
from typing import Optional
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from app.module.shared.schema import BaseResponseModel
|
||||||
|
|
||||||
|
class DatasetMappingBase(BaseResponseModel):
|
||||||
|
"""数据集映射 基础模型"""
|
||||||
|
dataset_id: str = Field(..., description="源数据集ID")
|
||||||
|
|
||||||
|
class DatasetMappingCreateRequest(DatasetMappingBase):
|
||||||
|
"""数据集映射 创建 请求模型"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class DatasetMappingCreateResponse(BaseResponseModel):
|
||||||
|
"""数据集映射 创建 响应模型"""
|
||||||
|
id: str = Field(..., description="映射UUID")
|
||||||
|
labeling_project_id: str = Field(..., description="Label Studio项目ID")
|
||||||
|
labeling_project_name: str = Field(..., description="Label Studio项目名称")
|
||||||
|
message: str = Field(..., description="响应消息")
|
||||||
|
|
||||||
|
class DatasetMappingUpdateRequest(BaseResponseModel):
|
||||||
|
"""数据集映射 更新 请求模型"""
|
||||||
|
dataset_id: Optional[str] = Field(None, description="源数据集ID")
|
||||||
|
|
||||||
|
class DatasetMappingResponse(DatasetMappingBase):
|
||||||
|
"""数据集映射 查询 响应模型"""
|
||||||
|
id: str = Field(..., description="映射UUID")
|
||||||
|
labeling_project_id: str = Field(..., description="标注项目ID")
|
||||||
|
name: Optional[str] = Field(None, description="标注项目名称")
|
||||||
|
created_at: datetime = Field(..., description="创建时间")
|
||||||
|
deleted_at: Optional[datetime] = Field(None, description="删除时间")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
from_attributes = True
|
||||||
|
populate_by_name = True
|
||||||
|
|
||||||
|
class DeleteDatasetResponse(BaseResponseModel):
|
||||||
|
"""删除数据集响应模型"""
|
||||||
|
id: str = Field(..., description="映射UUID")
|
||||||
|
status: str = Field(..., description="删除状态")
|
||||||
|
message: str = Field(..., description="响应消息")
|
||||||
19
runtime/datamate-python/app/module/annotation/schema/sync.py
Normal file
19
runtime/datamate-python/app/module/annotation/schema/sync.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
from pydantic import Field
|
||||||
|
from typing import Optional
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from app.module.shared.schema import BaseResponseModel
|
||||||
|
|
||||||
|
|
||||||
|
class SyncDatasetRequest(BaseResponseModel):
|
||||||
|
"""同步数据集请求模型"""
|
||||||
|
id: str = Field(..., description="映射ID(mapping UUID)")
|
||||||
|
batch_size: int = Field(50, ge=1, le=100, description="批处理大小")
|
||||||
|
|
||||||
|
class SyncDatasetResponse(BaseResponseModel):
|
||||||
|
"""同步数据集响应模型"""
|
||||||
|
id: str = Field(..., description="映射UUID")
|
||||||
|
status: str = Field(..., description="同步状态")
|
||||||
|
synced_files: int = Field(..., description="已同步文件数量")
|
||||||
|
total_files: int = Field(0, description="总文件数量")
|
||||||
|
message: str = Field(..., description="响应消息")
|
||||||
@@ -5,13 +5,13 @@ from typing import Optional, List, Tuple
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from app.models.dm.labeling_project import LabelingProject
|
from app.core.logging import get_logger
|
||||||
from app.schemas.dataset_mapping import (
|
from app.db.models import LabelingProject
|
||||||
|
from app.module.annotation.schema import (
|
||||||
DatasetMappingCreateRequest,
|
DatasetMappingCreateRequest,
|
||||||
DatasetMappingUpdateRequest,
|
DatasetMappingUpdateRequest,
|
||||||
DatasetMappingResponse
|
DatasetMappingResponse
|
||||||
)
|
)
|
||||||
from app.core.logging import get_logger
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
@@ -24,24 +24,24 @@ class DatasetMappingService:
|
|||||||
async def create_mapping(
|
async def create_mapping(
|
||||||
self,
|
self,
|
||||||
mapping_data: DatasetMappingCreateRequest,
|
mapping_data: DatasetMappingCreateRequest,
|
||||||
labelling_project_id: str,
|
labeling_project_id: str,
|
||||||
labelling_project_name: str
|
labeling_project_name: str
|
||||||
) -> DatasetMappingResponse:
|
) -> DatasetMappingResponse:
|
||||||
"""创建数据集映射"""
|
"""创建数据集映射"""
|
||||||
logger.info(f"Create dataset mapping: {mapping_data.dataset_id} -> {labelling_project_id}")
|
logger.info(f"Create dataset mapping: {mapping_data.dataset_id} -> {labeling_project_id}")
|
||||||
|
|
||||||
db_mapping = LabelingProject(
|
db_mapping = LabelingProject(
|
||||||
mapping_id=str(uuid.uuid4()),
|
id=str(uuid.uuid4()),
|
||||||
dataset_id=mapping_data.dataset_id,
|
dataset_id=mapping_data.dataset_id,
|
||||||
labelling_project_id=labelling_project_id,
|
labeling_project_id=labeling_project_id,
|
||||||
labelling_project_name=labelling_project_name
|
name=labeling_project_name
|
||||||
)
|
)
|
||||||
|
|
||||||
self.db.add(db_mapping)
|
self.db.add(db_mapping)
|
||||||
await self.db.commit()
|
await self.db.commit()
|
||||||
await self.db.refresh(db_mapping)
|
await self.db.refresh(db_mapping)
|
||||||
|
|
||||||
logger.info(f"Mapping created: {db_mapping.id}")
|
logger.debug(f"Mapping created: {db_mapping.id}")
|
||||||
return DatasetMappingResponse.model_validate(db_mapping)
|
return DatasetMappingResponse.model_validate(db_mapping)
|
||||||
|
|
||||||
async def get_mapping_by_source_uuid(
|
async def get_mapping_by_source_uuid(
|
||||||
@@ -89,16 +89,16 @@ class DatasetMappingService:
|
|||||||
logger.debug(f"Found {len(mappings)} mappings")
|
logger.debug(f"Found {len(mappings)} mappings")
|
||||||
return [DatasetMappingResponse.model_validate(mapping) for mapping in mappings]
|
return [DatasetMappingResponse.model_validate(mapping) for mapping in mappings]
|
||||||
|
|
||||||
async def get_mapping_by_labelling_project_id(
|
async def get_mapping_by_labeling_project_id(
|
||||||
self,
|
self,
|
||||||
labelling_project_id: str
|
labeling_project_id: str
|
||||||
) -> Optional[DatasetMappingResponse]:
|
) -> Optional[DatasetMappingResponse]:
|
||||||
"""根据Label Studio项目ID获取映射"""
|
"""根据Label Studio项目ID获取映射"""
|
||||||
logger.debug(f"Get mapping by Label Studio project id: {labelling_project_id}")
|
logger.debug(f"Get mapping by Label Studio project id: {labeling_project_id}")
|
||||||
|
|
||||||
result = await self.db.execute(
|
result = await self.db.execute(
|
||||||
select(LabelingProject).where(
|
select(LabelingProject).where(
|
||||||
LabelingProject.labeling_project_id == labelling_project_id,
|
LabelingProject.labeling_project_id == labeling_project_id,
|
||||||
LabelingProject.deleted_at.is_(None)
|
LabelingProject.deleted_at.is_(None)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -108,7 +108,7 @@ class DatasetMappingService:
|
|||||||
logger.debug(f"Found mapping: {mapping.mapping_id}")
|
logger.debug(f"Found mapping: {mapping.mapping_id}")
|
||||||
return DatasetMappingResponse.model_validate(mapping)
|
return DatasetMappingResponse.model_validate(mapping)
|
||||||
|
|
||||||
logger.debug(f"No mapping found for Label Studio project id: {labelling_project_id}")
|
logger.debug(f"No mapping found for Label Studio project id: {labeling_project_id}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def get_mapping_by_uuid(self, mapping_id: str) -> Optional[DatasetMappingResponse]:
|
async def get_mapping_by_uuid(self, mapping_id: str) -> Optional[DatasetMappingResponse]:
|
||||||
@@ -156,21 +156,6 @@ class DatasetMappingService:
|
|||||||
return await self.get_mapping_by_uuid(mapping_id)
|
return await self.get_mapping_by_uuid(mapping_id)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def update_last_updated_at(self, mapping_id: str) -> bool:
|
|
||||||
"""更新最后更新时间"""
|
|
||||||
logger.debug(f"Update mapping last updated at: {mapping_id}")
|
|
||||||
|
|
||||||
result = await self.db.execute(
|
|
||||||
update(LabelingProject)
|
|
||||||
.where(
|
|
||||||
LabelingProject.id == mapping_id,
|
|
||||||
LabelingProject.deleted_at.is_(None)
|
|
||||||
)
|
|
||||||
.values(last_updated_at=datetime.utcnow())
|
|
||||||
)
|
|
||||||
await self.db.commit()
|
|
||||||
return result.rowcount > 0
|
|
||||||
|
|
||||||
async def soft_delete_mapping(self, mapping_id: str) -> bool:
|
async def soft_delete_mapping(self, mapping_id: str) -> bool:
|
||||||
"""软删除映射"""
|
"""软删除映射"""
|
||||||
logger.info(f"Soft delete mapping: {mapping_id}")
|
logger.info(f"Soft delete mapping: {mapping_id}")
|
||||||
@@ -1,10 +1,13 @@
|
|||||||
from typing import Optional, List, Dict, Any, Tuple
|
from typing import Optional, List, Dict, Any, Tuple
|
||||||
from app.infrastructure import LabelStudioClient, DatamateClient
|
from app.module.dataset import DatasetManagementService
|
||||||
from app.services.dataset_mapping_service import DatasetMappingService
|
|
||||||
from app.schemas.dataset_mapping import SyncDatasetResponse
|
|
||||||
from app.core.logging import get_logger
|
from app.core.logging import get_logger
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.exceptions import NoDatasetInfoFoundError, DatasetMappingNotFoundError
|
from app.exception import NoDatasetInfoFoundError
|
||||||
|
|
||||||
|
from ..client import LabelStudioClient
|
||||||
|
from ..schema import SyncDatasetResponse
|
||||||
|
from ..service.mapping import DatasetMappingService
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
@@ -13,7 +16,7 @@ class SyncService:
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
dm_client: DatamateClient,
|
dm_client: DatasetManagementService,
|
||||||
ls_client: LabelStudioClient,
|
ls_client: LabelStudioClient,
|
||||||
mapping_service: DatasetMappingService
|
mapping_service: DatasetMappingService
|
||||||
):
|
):
|
||||||
@@ -44,7 +47,7 @@ class SyncService:
|
|||||||
project_id: Label Studio项目ID
|
project_id: Label Studio项目ID
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dm_file_id到task_id的映射字典
|
file_id到task_id的映射字典
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Fetching existing task mappings for project {project_id} (page_size={settings.ls_task_page_size})")
|
logger.info(f"Fetching existing task mappings for project {project_id} (page_size={settings.ls_task_page_size})")
|
||||||
@@ -60,23 +63,28 @@ class SyncService:
|
|||||||
page_size=page_size
|
page_size=page_size
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger.info(f"Fetched tasks result: {result}")
|
||||||
|
|
||||||
if not result:
|
if not result:
|
||||||
logger.warning(f"Failed to fetch tasks for project {project_id}")
|
logger.warning(f"Failed to fetch tasks for project {project_id}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
logger.info(f"Successfully fetched tasks for project {project_id}")
|
||||||
|
|
||||||
all_tasks = result.get("tasks", [])
|
all_tasks = result.get("tasks", [])
|
||||||
|
|
||||||
# 遍历所有任务,构建映射
|
# 遍历所有任务,构建映射
|
||||||
for task in all_tasks:
|
for task in all_tasks:
|
||||||
# 检查任务的meta字段中是否有dm_file_id
|
# logger.debug(task)
|
||||||
meta = task.get('meta')
|
try:
|
||||||
if meta:
|
file_id = task.get('data', {}).get('file_id')
|
||||||
dm_file_id = meta.get('dm_file_id')
|
|
||||||
if dm_file_id:
|
|
||||||
task_id = task.get('id')
|
task_id = task.get('id')
|
||||||
if task_id:
|
|
||||||
dm_file_to_task_mapping[str(dm_file_id)] = task_id
|
|
||||||
|
|
||||||
|
dm_file_to_task_mapping[str(file_id)] = task_id
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing task {task.get('id')}: {e}")
|
||||||
|
continue
|
||||||
|
logger.debug(dm_file_to_task_mapping)
|
||||||
logger.info(f"Found {len(dm_file_to_task_mapping)} existing task mappings")
|
logger.info(f"Found {len(dm_file_to_task_mapping)} existing task mappings")
|
||||||
return dm_file_to_task_mapping
|
return dm_file_to_task_mapping
|
||||||
|
|
||||||
@@ -86,22 +94,22 @@ class SyncService:
|
|||||||
|
|
||||||
async def sync_dataset_files(
|
async def sync_dataset_files(
|
||||||
self,
|
self,
|
||||||
mapping_id: str,
|
id: str,
|
||||||
batch_size: int = 50
|
batch_size: int = 50
|
||||||
) -> SyncDatasetResponse:
|
) -> SyncDatasetResponse:
|
||||||
"""同步数据集文件到Label Studio"""
|
"""同步数据集文件到Label Studio"""
|
||||||
logger.info(f"Start syncing dataset by mapping: {mapping_id}")
|
logger.info(f"Start syncing dataset by mapping: {id}")
|
||||||
|
|
||||||
# 获取映射关系
|
# 获取映射关系
|
||||||
mapping = await self.mapping_service.get_mapping_by_uuid(mapping_id)
|
mapping = await self.mapping_service.get_mapping_by_uuid(id)
|
||||||
if not mapping:
|
if not mapping:
|
||||||
logger.error(f"Dataset mapping not found: {mapping_id}")
|
logger.error(f"Dataset mapping not found: {id}")
|
||||||
return SyncDatasetResponse(
|
return SyncDatasetResponse(
|
||||||
mapping_id="",
|
id="",
|
||||||
status="error",
|
status="error",
|
||||||
synced_files=0,
|
synced_files=0,
|
||||||
total_files=0,
|
total_files=0,
|
||||||
message=f"Dataset mapping not found: {mapping_id}"
|
message=f"Dataset mapping not found: {id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -118,20 +126,17 @@ class SyncService:
|
|||||||
logger.info(f"Total files in dataset: {total_files}")
|
logger.info(f"Total files in dataset: {total_files}")
|
||||||
|
|
||||||
# 获取Label Studio中已存在的DM文件ID到任务ID的映射
|
# 获取Label Studio中已存在的DM文件ID到任务ID的映射
|
||||||
existing_dm_file_mapping = await self.get_existing_dm_file_mapping(mapping.labelling_project_id)
|
existing_dm_file_mapping = await self.get_existing_dm_file_mapping(mapping.labeling_project_id)
|
||||||
existing_dm_file_ids = set(existing_dm_file_mapping.keys())
|
existing_file_ids = set(existing_dm_file_mapping.keys())
|
||||||
logger.info(f"{len(existing_dm_file_ids)} tasks already exist in Label Studio")
|
logger.info(f"{len(existing_file_ids)} tasks already exist in Label Studio")
|
||||||
|
|
||||||
# 收集DM中当前存在的所有文件ID
|
# 收集DM中当前存在的所有文件ID
|
||||||
current_dm_file_ids = set()
|
current_file_ids = set()
|
||||||
|
|
||||||
# 分页获取并同步文件
|
|
||||||
while True:
|
while True:
|
||||||
files_response = await self.dm_client.get_dataset_files(
|
files_response = await self.dm_client.get_dataset_files(
|
||||||
mapping.dataset_id,
|
mapping.dataset_id,
|
||||||
page=page,
|
page=page,
|
||||||
size=batch_size,
|
size=batch_size,
|
||||||
status="COMPLETED" # 只同步已完成的文件
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if not files_response or not files_response.content:
|
if not files_response or not files_response.content:
|
||||||
@@ -147,17 +152,16 @@ class SyncService:
|
|||||||
|
|
||||||
for file_info in files_response.content:
|
for file_info in files_response.content:
|
||||||
# 记录当前DM中存在的文件ID
|
# 记录当前DM中存在的文件ID
|
||||||
current_dm_file_ids.add(str(file_info.id))
|
current_file_ids.add(str(file_info.id))
|
||||||
|
|
||||||
# 检查文件是否已存在
|
# 检查文件是否已存在
|
||||||
if str(file_info.id) in existing_dm_file_ids:
|
if str(file_info.id) in existing_file_ids:
|
||||||
existing_files_count += 1
|
existing_files_count += 1
|
||||||
logger.debug(f"Skip existing file: {file_info.originalName} (ID: {file_info.id})")
|
logger.debug(f"Skip existing file: {file_info.originalName} (ID: {file_info.id})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
new_files_count += 1
|
new_files_count += 1
|
||||||
|
|
||||||
# 确定数据类型
|
|
||||||
data_type = self.determine_data_type(file_info.fileType)
|
data_type = self.determine_data_type(file_info.fileType)
|
||||||
|
|
||||||
# 替换文件路径前缀:只替换开头的前缀,不影响路径中间可能出现的相同字符串
|
# 替换文件路径前缀:只替换开头的前缀,不影响路径中间可能出现的相同字符串
|
||||||
@@ -167,14 +171,11 @@ class SyncService:
|
|||||||
# 构造任务数据
|
# 构造任务数据
|
||||||
task_data = {
|
task_data = {
|
||||||
"data": {
|
"data": {
|
||||||
data_type: file_path
|
f"{data_type}": file_path,
|
||||||
},
|
"file_path": file_info.filePath,
|
||||||
"meta": {
|
"file_id": file_info.id,
|
||||||
"file_size": file_info.size,
|
|
||||||
"file_type": file_info.fileType,
|
|
||||||
"dm_dataset_id": mapping.dataset_id,
|
|
||||||
"dm_file_id": file_info.id,
|
|
||||||
"original_name": file_info.originalName,
|
"original_name": file_info.originalName,
|
||||||
|
"dataset_id": mapping.dataset_id,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tasks.append(task_data)
|
tasks.append(task_data)
|
||||||
@@ -184,7 +185,7 @@ class SyncService:
|
|||||||
# 批量创建Label Studio任务
|
# 批量创建Label Studio任务
|
||||||
if tasks:
|
if tasks:
|
||||||
batch_result = await self.ls_client.create_tasks_batch(
|
batch_result = await self.ls_client.create_tasks_batch(
|
||||||
mapping.labelling_project_id,
|
mapping.labeling_project_id,
|
||||||
tasks
|
tasks
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -196,7 +197,7 @@ class SyncService:
|
|||||||
# 如果批量创建失败,尝试单个创建
|
# 如果批量创建失败,尝试单个创建
|
||||||
for task_data in tasks:
|
for task_data in tasks:
|
||||||
task_result = await self.ls_client.create_task(
|
task_result = await self.ls_client.create_task(
|
||||||
mapping.labelling_project_id,
|
mapping.labeling_project_id,
|
||||||
task_data["data"],
|
task_data["data"],
|
||||||
task_data.get("meta")
|
task_data.get("meta")
|
||||||
)
|
)
|
||||||
@@ -210,10 +211,10 @@ class SyncService:
|
|||||||
|
|
||||||
# 清理在DM中不存在但在Label Studio中存在的任务
|
# 清理在DM中不存在但在Label Studio中存在的任务
|
||||||
tasks_to_delete = []
|
tasks_to_delete = []
|
||||||
for dm_file_id, task_id in existing_dm_file_mapping.items():
|
for file_id, task_id in existing_dm_file_mapping.items():
|
||||||
if dm_file_id not in current_dm_file_ids:
|
if file_id not in current_file_ids:
|
||||||
tasks_to_delete.append(task_id)
|
tasks_to_delete.append(task_id)
|
||||||
logger.debug(f"Mark task for deletion: {task_id} (DM file ID: {dm_file_id})")
|
logger.debug(f"Mark task for deletion: {task_id} (DM file ID: {file_id})")
|
||||||
|
|
||||||
if tasks_to_delete:
|
if tasks_to_delete:
|
||||||
logger.info(f"Deleting {len(tasks_to_delete)} tasks not present in DM")
|
logger.info(f"Deleting {len(tasks_to_delete)} tasks not present in DM")
|
||||||
@@ -223,13 +224,10 @@ class SyncService:
|
|||||||
else:
|
else:
|
||||||
logger.info("No tasks to delete")
|
logger.info("No tasks to delete")
|
||||||
|
|
||||||
# 更新映射的最后更新时间
|
|
||||||
await self.mapping_service.update_last_updated_at(mapping.mapping_id)
|
|
||||||
|
|
||||||
logger.info(f"Sync completed: total_files={total_files}, created={synced_files}, deleted={deleted_tasks}")
|
logger.info(f"Sync completed: total_files={total_files}, created={synced_files}, deleted={deleted_tasks}")
|
||||||
|
|
||||||
return SyncDatasetResponse(
|
return SyncDatasetResponse(
|
||||||
mapping_id=mapping.mapping_id,
|
id=mapping.id,
|
||||||
status="success",
|
status="success",
|
||||||
synced_files=synced_files,
|
synced_files=synced_files,
|
||||||
total_files=total_files,
|
total_files=total_files,
|
||||||
@@ -239,7 +237,7 @@ class SyncService:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while syncing dataset: {e}")
|
logger.error(f"Error while syncing dataset: {e}")
|
||||||
return SyncDatasetResponse(
|
return SyncDatasetResponse(
|
||||||
mapping_id=mapping.mapping_id,
|
id=mapping.id,
|
||||||
status="error",
|
status="error",
|
||||||
synced_files=0,
|
synced_files=0,
|
||||||
total_files=0,
|
total_files=0,
|
||||||
@@ -259,13 +257,12 @@ class SyncService:
|
|||||||
dataset_info = await self.dm_client.get_dataset(dataset_id)
|
dataset_info = await self.dm_client.get_dataset(dataset_id)
|
||||||
|
|
||||||
# 获取Label Studio项目任务数量
|
# 获取Label Studio项目任务数量
|
||||||
tasks_info = await self.ls_client.get_project_tasks(mapping.labelling_project_id)
|
tasks_info = await self.ls_client.get_project_tasks(mapping.labeling_project_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"mapping_id": mapping.mapping_id,
|
"id": mapping.id,
|
||||||
"dataset_id": dataset_id,
|
"dataset_id": dataset_id,
|
||||||
"labelling_project_id": mapping.labelling_project_id,
|
"labeling_project_id": mapping.labeling_project_id,
|
||||||
"last_updated_at": mapping.last_updated_at,
|
|
||||||
"dm_total_files": dataset_info.fileCount if dataset_info else 0,
|
"dm_total_files": dataset_info.fileCount if dataset_info else 0,
|
||||||
"ls_total_tasks": tasks_info.get("count", 0) if tasks_info else 0,
|
"ls_total_tasks": tasks_info.get("count", 0) if tasks_info else 0,
|
||||||
"sync_ratio": (
|
"sync_ratio": (
|
||||||
3
runtime/datamate-python/app/module/dataset/__init__.py
Normal file
3
runtime/datamate-python/app/module/dataset/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from .service import DatasetManagementService
|
||||||
|
|
||||||
|
__all__ = ["DatasetManagementService"]
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
from .dataset_file import (
|
||||||
|
DatasetFileResponse,
|
||||||
|
PagedDatasetFileResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .dataset import (
|
||||||
|
DatasetResponse,
|
||||||
|
DatasetTypeResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"DatasetResponse",
|
||||||
|
"DatasetFileResponse",
|
||||||
|
"PagedDatasetFileResponse",
|
||||||
|
"DatasetTypeResponse",
|
||||||
|
]
|
||||||
@@ -2,28 +2,6 @@ from pydantic import BaseModel, Field
|
|||||||
from typing import List, Optional, Dict, Any
|
from typing import List, Optional, Dict, Any
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
class DatasetFileResponse(BaseModel):
|
|
||||||
"""DM服务数据集文件响应模型"""
|
|
||||||
id: str = Field(..., description="文件ID")
|
|
||||||
fileName: str = Field(..., description="文件名")
|
|
||||||
fileType: str = Field(..., description="文件类型")
|
|
||||||
filePath: str = Field(..., description="文件路径")
|
|
||||||
originalName: Optional[str] = Field(None, description="原始文件名")
|
|
||||||
size: Optional[int] = Field(None, description="文件大小(字节)")
|
|
||||||
status: Optional[str] = Field(None, description="文件状态")
|
|
||||||
uploadedAt: Optional[datetime] = Field(None, description="上传时间")
|
|
||||||
description: Optional[str] = Field(None, description="文件描述")
|
|
||||||
uploadedBy: Optional[str] = Field(None, description="上传者")
|
|
||||||
lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间")
|
|
||||||
|
|
||||||
class PagedDatasetFileResponse(BaseModel):
|
|
||||||
"""DM服务分页文件响应模型"""
|
|
||||||
content: List[DatasetFileResponse] = Field(..., description="文件列表")
|
|
||||||
totalElements: int = Field(..., description="总元素数")
|
|
||||||
totalPages: int = Field(..., description="总页数")
|
|
||||||
page: int = Field(..., description="当前页码")
|
|
||||||
size: int = Field(..., description="每页大小")
|
|
||||||
|
|
||||||
class DatasetTypeResponse(BaseModel):
|
class DatasetTypeResponse(BaseModel):
|
||||||
"""数据集类型响应模型"""
|
"""数据集类型响应模型"""
|
||||||
code: str = Field(..., description="类型编码")
|
code: str = Field(..., description="类型编码")
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from typing import List, Optional, Dict, Any
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
class DatasetFileResponse(BaseModel):
|
||||||
|
"""DM服务数据集文件响应模型"""
|
||||||
|
id: str = Field(..., description="文件ID")
|
||||||
|
fileName: str = Field(..., description="文件名")
|
||||||
|
fileType: str = Field(..., description="文件类型")
|
||||||
|
filePath: str = Field(..., description="文件路径")
|
||||||
|
originalName: Optional[str] = Field(None, description="原始文件名")
|
||||||
|
size: Optional[int] = Field(None, description="文件大小(字节)")
|
||||||
|
status: Optional[str] = Field(None, description="文件状态")
|
||||||
|
uploadedAt: Optional[datetime] = Field(None, description="上传时间")
|
||||||
|
description: Optional[str] = Field(None, description="文件描述")
|
||||||
|
uploadedBy: Optional[str] = Field(None, description="上传者")
|
||||||
|
lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间")
|
||||||
|
|
||||||
|
class PagedDatasetFileResponse(BaseModel):
|
||||||
|
"""DM服务分页文件响应模型"""
|
||||||
|
content: List[DatasetFileResponse] = Field(..., description="文件列表")
|
||||||
|
totalElements: int = Field(..., description="总元素数")
|
||||||
|
totalPages: int = Field(..., description="总页数")
|
||||||
|
page: int = Field(..., description="当前页码")
|
||||||
|
size: int = Field(..., description="每页大小")
|
||||||
|
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .service import Service as DatasetManagementService
|
||||||
|
|
||||||
|
__all__ = ["DatasetManagementService"]
|
||||||
@@ -2,15 +2,16 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.core.logging import get_logger
|
from app.core.logging import get_logger
|
||||||
from app.schemas.dm_service import DatasetResponse, PagedDatasetFileResponse, DatasetFileResponse
|
from app.db.models import Dataset, DatasetFiles
|
||||||
from app.models.dm.dataset import Dataset
|
|
||||||
from app.models.dm.dataset_files import DatasetFiles
|
from ..schema import DatasetResponse, PagedDatasetFileResponse, DatasetFileResponse
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
class Client:
|
class Service:
|
||||||
"""数据管理服务客户端 - 直接访问数据库"""
|
"""数据管理服务客户端 - 直接访问数据库"""
|
||||||
|
|
||||||
def __init__(self, db: AsyncSession):
|
def __init__(self, db: AsyncSession):
|
||||||
11
runtime/datamate-python/app/module/shared/schema/__init__.py
Normal file
11
runtime/datamate-python/app/module/shared/schema/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from .common import (
|
||||||
|
BaseResponseModel,
|
||||||
|
StandardResponse,
|
||||||
|
PaginatedData
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"BaseResponseModel",
|
||||||
|
"StandardResponse",
|
||||||
|
"PaginatedData"
|
||||||
|
]
|
||||||
@@ -42,7 +42,6 @@ class StandardResponse(BaseResponseModel, Generic[T]):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class PaginatedData(BaseResponseModel, Generic[T]):
|
class PaginatedData(BaseResponseModel, Generic[T]):
|
||||||
"""分页数据容器"""
|
"""分页数据容器"""
|
||||||
page: int = Field(..., description="当前页码(从1开始)")
|
page: int = Field(..., description="当前页码(从1开始)")
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
# app/schemas/__init__.py
|
|
||||||
|
|
||||||
from .common import *
|
|
||||||
from .dataset_mapping import *
|
|
||||||
from .dm_service import *
|
|
||||||
from .label_studio import *
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
# Common schemas
|
|
||||||
"StandardResponse",
|
|
||||||
|
|
||||||
# Dataset Mapping schemas
|
|
||||||
"DatasetMappingBase",
|
|
||||||
"DatasetMappingCreateRequest",
|
|
||||||
"DatasetMappingUpdateRequest",
|
|
||||||
"DatasetMappingResponse",
|
|
||||||
"DatasetMappingCreateResponse",
|
|
||||||
"SyncDatasetResponse",
|
|
||||||
"DeleteDatasetResponse",
|
|
||||||
|
|
||||||
# DM Service schemas
|
|
||||||
"DatasetFileResponse",
|
|
||||||
"PagedDatasetFileResponse",
|
|
||||||
"DatasetResponse",
|
|
||||||
|
|
||||||
# Label Studio schemas
|
|
||||||
"LabelStudioProject",
|
|
||||||
"LabelStudioTask"
|
|
||||||
]
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
from pydantic import Field
|
|
||||||
from typing import Optional
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from .common import BaseResponseModel
|
|
||||||
|
|
||||||
class DatasetMappingBase(BaseResponseModel):
|
|
||||||
"""数据集映射 基础模型"""
|
|
||||||
dataset_id: str = Field(..., description="源数据集ID")
|
|
||||||
|
|
||||||
class DatasetMappingCreateRequest(DatasetMappingBase):
|
|
||||||
"""数据集映射 创建 请求模型"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class DatasetMappingCreateResponse(BaseResponseModel):
|
|
||||||
"""数据集映射 创建 响应模型"""
|
|
||||||
mapping_id: str = Field(..., description="映射UUID")
|
|
||||||
labelling_project_id: str = Field(..., description="Label Studio项目ID")
|
|
||||||
labelling_project_name: str = Field(..., description="Label Studio项目名称")
|
|
||||||
message: str = Field(..., description="响应消息")
|
|
||||||
|
|
||||||
class DatasetMappingUpdateRequest(BaseResponseModel):
|
|
||||||
"""数据集映射 更新 请求模型"""
|
|
||||||
dataset_id: Optional[str] = Field(None, description="源数据集ID")
|
|
||||||
|
|
||||||
class DatasetMappingResponse(DatasetMappingBase):
|
|
||||||
"""数据集映射 查询 响应模型"""
|
|
||||||
mapping_id: str = Field(..., description="映射UUID")
|
|
||||||
labelling_project_id: str = Field(..., description="标注项目ID")
|
|
||||||
labelling_project_name: Optional[str] = Field(None, description="标注项目名称")
|
|
||||||
created_at: datetime = Field(..., description="创建时间")
|
|
||||||
last_updated_at: datetime = Field(..., description="最后更新时间")
|
|
||||||
deleted_at: Optional[datetime] = Field(None, description="删除时间")
|
|
||||||
|
|
||||||
class Config:
|
|
||||||
from_attributes = True
|
|
||||||
populate_by_name = True
|
|
||||||
|
|
||||||
class SyncDatasetRequest(BaseResponseModel):
|
|
||||||
"""同步数据集请求模型"""
|
|
||||||
mapping_id: str = Field(..., description="映射ID(mapping UUID)")
|
|
||||||
batch_size: int = Field(50, ge=1, le=100, description="批处理大小")
|
|
||||||
|
|
||||||
class SyncDatasetResponse(BaseResponseModel):
|
|
||||||
"""同步数据集响应模型"""
|
|
||||||
mapping_id: str = Field(..., description="映射UUID")
|
|
||||||
status: str = Field(..., description="同步状态")
|
|
||||||
synced_files: int = Field(..., description="已同步文件数量")
|
|
||||||
total_files: int = Field(0, description="总文件数量")
|
|
||||||
message: str = Field(..., description="响应消息")
|
|
||||||
|
|
||||||
class DeleteDatasetResponse(BaseResponseModel):
|
|
||||||
"""删除数据集响应模型"""
|
|
||||||
mapping_id: str = Field(..., description="映射UUID")
|
|
||||||
status: str = Field(..., description="删除状态")
|
|
||||||
message: str = Field(..., description="响应消息")
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
# app/services/__init__.py
|
|
||||||
|
|
||||||
from .dataset_mapping_service import DatasetMappingService
|
|
||||||
from .sync_service import SyncService
|
|
||||||
|
|
||||||
__all__ = ["DatasetMappingService", "SyncService"]
|
|
||||||
19
scripts/db/data-annotation-init.sql
Normal file
19
scripts/db/data-annotation-init.sql
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
CREATE TABLE t_dm_annotation_templates (
|
||||||
|
id VARCHAR(36) PRIMARY KEY,
|
||||||
|
name VARCHAR(32) NOT NULL COMMENT '模板名称',
|
||||||
|
description VARCHAR(255) COMMENT '模板描述',
|
||||||
|
configuration JSON,
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||||
|
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE t_dm_labeling_projects (
|
||||||
|
id VARCHAR(36) PRIMARY KEY,
|
||||||
|
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
|
||||||
|
name VARCHAR(32) NOT NULL COMMENT '项目名称',
|
||||||
|
labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
|
||||||
|
configuration JSON,
|
||||||
|
progress JSON,
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||||
|
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)'
|
||||||
|
);
|
||||||
Reference in New Issue
Block a user