You've already forked DataMate
feat: add labeling template. refactor: switch to Poetry, build and deploy of backend Python (#79)
* feat: Enhance annotation module with template management and validation - Added DatasetMappingCreateRequest and DatasetMappingUpdateRequest schemas to handle dataset mapping requests with camelCase and snake_case support. - Introduced Annotation Template schemas including CreateAnnotationTemplateRequest, UpdateAnnotationTemplateRequest, and AnnotationTemplateResponse for managing annotation templates. - Implemented AnnotationTemplateService for creating, updating, retrieving, and deleting annotation templates, including validation of configurations and XML generation. - Added utility class LabelStudioConfigValidator for validating Label Studio configurations and XML formats. - Updated database schema for annotation templates and labeling projects to include new fields and constraints. - Seeded initial annotation templates for various use cases including image classification, object detection, and text classification. * feat: Enhance TemplateForm with improved validation and dynamic field rendering; update LabelStudio config validation for camelCase support * feat: Update docker-compose.yml to mark datamate dataset volume and network as external * feat: Add tag configuration management and related components - Introduced new components for tag selection and browsing in the frontend. - Added API endpoint to fetch tag configuration from the backend. - Implemented tag configuration management in the backend, including loading from YAML. - Enhanced template service to support dynamic tag rendering based on configuration. - Updated validation utilities to incorporate tag configuration checks. - Refactored existing code to utilize the new tag configuration structure. * feat: Refactor LabelStudioTagConfig for improved configuration loading and validation * feat: Update Makefile to include backend-python-docker-build in the build process * feat: Migrate to poetry for better deps management * Add pyyaml dependency and update Dockerfile to use Poetry for dependency management - Added pyyaml (>=6.0.3,<7.0.0) to pyproject.toml dependencies. - Updated Dockerfile to install Poetry and manage dependencies using it. - Improved layer caching by copying only dependency files before the application code. - Removed unnecessary installation of build dependencies to keep the final image size small. * feat: Remove duplicated backend-python-docker-build target from Makefile * fix: airflow is not ready for adding yet * feat: update Python version to 3.12 and remove project installation step in Dockerfile
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
from .about import router as about_router
|
||||
from .config import router as about_router
|
||||
from .project import router as project_router
|
||||
from .task import router as task_router
|
||||
from .template import router as template_router
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
from app.module.shared.schema import StandardResponse
|
||||
from app.core.logging import get_logger
|
||||
from app.core.config import settings
|
||||
|
||||
from ..schema import ConfigResponse
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/about",
|
||||
tags=["annotation/about"]
|
||||
)
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@router.get("", response_model=StandardResponse[ConfigResponse])
|
||||
async def get_config():
|
||||
"""获取配置信息"""
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=ConfigResponse(
|
||||
label_studio_url=settings.label_studio_base_url,
|
||||
)
|
||||
)
|
||||
@@ -0,0 +1,47 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
from app.module.shared.schema import StandardResponse
|
||||
from app.core.logging import get_logger
|
||||
from app.core.config import settings
|
||||
|
||||
from ..schema import (
|
||||
ConfigResponse,
|
||||
TagConfigResponse
|
||||
)
|
||||
from ..config.tag_config import LabelStudioTagConfig
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/tags",
|
||||
tags=["annotation/config"]
|
||||
)
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@router.get("", response_model=StandardResponse[ConfigResponse])
|
||||
async def get_config():
|
||||
"""获取配置信息(已废弃,请使用 /api/annotation/about)"""
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=ConfigResponse(
|
||||
label_studio_url=settings.label_studio_base_url,
|
||||
)
|
||||
)
|
||||
|
||||
@router.get("/config", response_model=StandardResponse[TagConfigResponse], summary="获取标签配置")
|
||||
async def get_tag_config():
|
||||
"""
|
||||
获取所有Label Studio标签类型的配置(对象+控件),用于前端动态渲染。
|
||||
"""
|
||||
# Ensure config is loaded by instantiating the class
|
||||
tag_config = LabelStudioTagConfig()
|
||||
config = LabelStudioTagConfig._config
|
||||
|
||||
if not config:
|
||||
logger.error("Failed to load tag configuration")
|
||||
return StandardResponse(
|
||||
code=500,
|
||||
message="Failed to load tag configuration",
|
||||
data={"objects": {}, "controls": {}}
|
||||
)
|
||||
|
||||
return StandardResponse(code=200, message="success", data=config)
|
||||
@@ -2,7 +2,7 @@ from typing import Optional
|
||||
import math
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db.session import get_db
|
||||
@@ -149,7 +149,7 @@ async def create_mapping(
|
||||
@router.get("", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
|
||||
async def list_mappings(
|
||||
page: int = Query(1, ge=1, description="页码(从1开始)"),
|
||||
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
|
||||
page_size: int = Query(20, ge=1, le=100, description="每页记录数", alias="pageSize"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
@@ -163,8 +163,6 @@ async def list_mappings(
|
||||
# 计算 skip
|
||||
skip = (page - 1) * page_size
|
||||
|
||||
logger.info(f"Listing mappings, page={page}, page_size={page_size}")
|
||||
|
||||
# 获取数据和总数
|
||||
mappings, total = await service.get_all_mappings_with_count(
|
||||
skip=skip,
|
||||
@@ -183,7 +181,7 @@ async def list_mappings(
|
||||
content=mappings
|
||||
)
|
||||
|
||||
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
|
||||
logger.info(f"List mappings: page={page}, returned {len(mappings)}/{total}")
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
@@ -234,7 +232,7 @@ async def get_mapping(
|
||||
async def get_mappings_by_source(
|
||||
dataset_id: str,
|
||||
page: int = Query(1, ge=1, description="页码(从1开始)"),
|
||||
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
|
||||
page_size: int = Query(20, ge=1, le=100, description="每页记录数", alias="pageSize"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
@@ -283,49 +281,30 @@ async def get_mappings_by_source(
|
||||
logger.error(f"Error getting mappings: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@router.delete("", response_model=StandardResponse[DeleteDatasetResponse])
|
||||
@router.delete("/{project_id}", response_model=StandardResponse[DeleteDatasetResponse])
|
||||
async def delete_mapping(
|
||||
m: Optional[str] = Query(None, description="映射UUID"),
|
||||
proj: Optional[str] = Query(None, description="Label Studio项目ID"),
|
||||
project_id: str = Path(..., description="映射UUID(path param)"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
删除映射关系和对应的 Label Studio 项目
|
||||
|
||||
可以通过以下任一方式指定要删除的映射:
|
||||
- m: 映射UUID
|
||||
- proj: Label Studio项目ID
|
||||
- 两者都提供(优先使用 m)
|
||||
|
||||
|
||||
通过 path 参数 `project_id` 指定要删除的映射(映射的 UUID)。
|
||||
|
||||
此操作会:
|
||||
1. 删除 Label Studio 中的项目
|
||||
2. 软删除数据库中的映射记录
|
||||
"""
|
||||
try:
|
||||
# Log incoming request parameters for debugging
|
||||
logger.debug(f"Delete mapping request received: m={m!r}, proj={proj!r}")
|
||||
# 至少需要提供一个参数
|
||||
if not m and not proj:
|
||||
logger.debug("Missing both 'm' and 'proj' in delete request")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided"
|
||||
)
|
||||
logger.debug(f"Delete mapping request received: project_id={project_id!r}")
|
||||
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
token=settings.label_studio_user_token)
|
||||
service = DatasetMappingService(db)
|
||||
|
||||
# 优先使用 mapping_id 查询
|
||||
if m:
|
||||
logger.debug(f"Deleting by mapping UUID: {m}")
|
||||
mapping = await service.get_mapping_by_uuid(m)
|
||||
# 如果没有提供 m,使用 proj 查询
|
||||
elif proj:
|
||||
logger.debug(f"Deleting by project ID: {proj}")
|
||||
mapping = await service.get_mapping_by_labeling_project_id(proj)
|
||||
else:
|
||||
mapping = None
|
||||
# 使用 mapping UUID 查询映射记录
|
||||
logger.debug(f"Deleting by mapping UUID: {project_id}")
|
||||
mapping = await service.get_mapping_by_uuid(project_id)
|
||||
|
||||
logger.debug(f"Mapping lookup result: {mapping}")
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
|
||||
from app.db.session import get_db
|
||||
from app.module.shared.schema import StandardResponse
|
||||
@@ -17,6 +19,10 @@ from ..schema import (
|
||||
SyncDatasetResponse,
|
||||
SyncAnnotationsRequest,
|
||||
SyncAnnotationsResponse,
|
||||
UpdateFileTagsRequest,
|
||||
UpdateFileTagsResponse,
|
||||
UpdateFileTagsRequest,
|
||||
UpdateFileTagsResponse
|
||||
)
|
||||
|
||||
|
||||
@@ -32,24 +38,10 @@ async def sync_dataset_content(
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
同步数据集内容(包括文件和标注)
|
||||
Sync Dataset Content (Files and Annotations)
|
||||
|
||||
根据指定的mapping ID,同步DM程序数据集中的内容到Label Studio数据集中。
|
||||
默认同时同步文件和标注数据。
|
||||
|
||||
Args:
|
||||
request: 同步请求,包含:
|
||||
- id: 映射ID(mapping UUID)
|
||||
- batchSize: 批处理大小
|
||||
- filePriority: 文件同步优先级
|
||||
- labelPriority: 标签同步优先级
|
||||
- syncAnnotations: 是否同步标注(默认True)
|
||||
- annotationDirection: 标注同步方向(默认bidirectional)
|
||||
- overwrite: 是否允许覆盖DataMate中的标注(默认True)
|
||||
- overwriteLabelingProject: 是否允许覆盖Label Studio中的标注(默认True)
|
||||
|
||||
Returns:
|
||||
同步结果
|
||||
"""
|
||||
try:
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
@@ -123,28 +115,10 @@ async def sync_annotations(
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
仅同步标注结果(支持双向同步)
|
||||
|
||||
根据指定的mapping ID和同步方向,在DM数据集和Label Studio之间同步标注结果。
|
||||
标注结果存储在数据集文件表的tags字段中,使用简化格式。
|
||||
|
||||
同步策略:
|
||||
- 默认为双向同步,基于时间戳自动解决冲突
|
||||
- overwrite: 控制是否允许用Label Studio的标注覆盖DataMate(基于时间戳比较)
|
||||
- overwriteLabelingProject: 控制是否允许用DataMate的标注覆盖Label Studio(基于时间戳比较)
|
||||
- 如果Label Studio标注的updated_at更新,且overwrite=True,则覆盖DataMate
|
||||
- 如果DataMate标注的updated_at更新,且overwriteLabelingProject=True,则覆盖Label Studio
|
||||
|
||||
Args:
|
||||
request: 同步请求,包含:
|
||||
- id: 映射ID(mapping UUID)
|
||||
- batchSize: 批处理大小
|
||||
- direction: 同步方向 (ls_to_dm/dm_to_ls/bidirectional)
|
||||
- overwrite: 是否允许覆盖DataMate中的标注(默认True)
|
||||
- overwriteLabelingProject: 是否允许覆盖Label Studio中的标注(默认True)
|
||||
|
||||
Returns:
|
||||
同步结果,包含同步统计信息和冲突解决情况
|
||||
Sync Annotations Only (Bidirectional Support)
|
||||
|
||||
同步指定 mapping 下的标注数据,支持单向或双向同步,基于时间戳自动解决冲突。
|
||||
请求与响应由 Pydantic 模型 `SyncAnnotationsRequest` / `SyncAnnotationsResponse` 定义。
|
||||
"""
|
||||
try:
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
@@ -207,9 +181,9 @@ async def sync_annotations(
|
||||
@router.get("/check-ls-connection")
|
||||
async def check_label_studio_connection():
|
||||
"""
|
||||
检查Label Studio连接状态
|
||||
|
||||
用于诊断Label Studio连接问题,返回连接状态和配置信息
|
||||
Check Label Studio Connection Status
|
||||
|
||||
诊断 Label Studio 连接并返回简要连接信息(状态、base URL、token 摘要、项目统计)。
|
||||
"""
|
||||
try:
|
||||
ls_client = LabelStudioClient(
|
||||
@@ -258,4 +232,55 @@ async def check_label_studio_connection():
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking Label Studio connection: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.put(
|
||||
"/{file_id}",
|
||||
response_model=StandardResponse[UpdateFileTagsResponse],
|
||||
)
|
||||
async def update_file_tags(
|
||||
request: UpdateFileTagsRequest,
|
||||
file_id: str = Path(..., description="文件ID"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Update File Tags (Partial Update)
|
||||
|
||||
接收部分标签更新并合并到指定文件(只修改提交的标签,其余保持不变),并更新 `tags_updated_at`。
|
||||
请求与响应使用 Pydantic 模型 `UpdateFileTagsRequest` / `UpdateFileTagsResponse`。
|
||||
"""
|
||||
service = DatasetManagementService(db)
|
||||
|
||||
success, error_msg, updated_at = await service.update_file_tags_partial(
|
||||
file_id=file_id,
|
||||
new_tags=request.tags
|
||||
)
|
||||
|
||||
if not success:
|
||||
if "not found" in (error_msg or "").lower():
|
||||
raise HTTPException(status_code=404, detail=error_msg)
|
||||
raise HTTPException(status_code=500, detail=error_msg or "更新标签失败")
|
||||
|
||||
# 获取更新后的完整标签列表
|
||||
from sqlalchemy.future import select
|
||||
from app.db.models import DatasetFiles
|
||||
|
||||
result = await db.execute(
|
||||
select(DatasetFiles).where(DatasetFiles.id == file_id)
|
||||
)
|
||||
file_record = result.scalar_one_or_none()
|
||||
|
||||
if not file_record:
|
||||
raise HTTPException(status_code=404, detail=f"File not found: {file_id}")
|
||||
|
||||
response_data = UpdateFileTagsResponse(
|
||||
fileId=file_id,
|
||||
tags=file_record.tags or [], # type: ignore
|
||||
tagsUpdatedAt=updated_at or datetime.now()
|
||||
)
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="标签更新成功",
|
||||
data=response_data
|
||||
)
|
||||
|
||||
@@ -7,7 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db.session import get_db
|
||||
from app.module.shared.schema import StandardResponse
|
||||
from app.module.annotation.schema.template import (
|
||||
from app.module.annotation.schema import (
|
||||
CreateAnnotationTemplateRequest,
|
||||
UpdateAnnotationTemplateRequest,
|
||||
AnnotationTemplateResponse,
|
||||
@@ -15,7 +15,7 @@ from app.module.annotation.schema.template import (
|
||||
)
|
||||
from app.module.annotation.service.template import AnnotationTemplateService
|
||||
|
||||
router = APIRouter(prefix="/templates", tags=["Annotation Template"])
|
||||
router = APIRouter(prefix="/template", tags=["annotation/template"])
|
||||
|
||||
template_service = AnnotationTemplateService()
|
||||
|
||||
@@ -23,7 +23,6 @@ template_service = AnnotationTemplateService()
|
||||
@router.post(
|
||||
"",
|
||||
response_model=StandardResponse[AnnotationTemplateResponse],
|
||||
summary="创建标注模板"
|
||||
)
|
||||
async def create_template(
|
||||
request: CreateAnnotationTemplateRequest,
|
||||
@@ -47,7 +46,6 @@ async def create_template(
|
||||
@router.get(
|
||||
"/{template_id}",
|
||||
response_model=StandardResponse[AnnotationTemplateResponse],
|
||||
summary="获取模板详情"
|
||||
)
|
||||
async def get_template(
|
||||
template_id: str,
|
||||
@@ -65,9 +63,8 @@ async def get_template(
|
||||
@router.get(
|
||||
"",
|
||||
response_model=StandardResponse[AnnotationTemplateListResponse],
|
||||
summary="获取模板列表"
|
||||
)
|
||||
async def list_templates(
|
||||
async def list_template(
|
||||
page: int = Query(1, ge=1, description="页码"),
|
||||
size: int = Query(10, ge=1, le=100, description="每页大小"),
|
||||
category: Optional[str] = Query(None, description="分类筛选"),
|
||||
@@ -101,7 +98,6 @@ async def list_templates(
|
||||
@router.put(
|
||||
"/{template_id}",
|
||||
response_model=StandardResponse[AnnotationTemplateResponse],
|
||||
summary="更新模板"
|
||||
)
|
||||
async def update_template(
|
||||
template_id: str,
|
||||
@@ -122,7 +118,6 @@ async def update_template(
|
||||
@router.delete(
|
||||
"/{template_id}",
|
||||
response_model=StandardResponse[bool],
|
||||
summary="删除模板"
|
||||
)
|
||||
async def delete_template(
|
||||
template_id: str,
|
||||
|
||||
Reference in New Issue
Block a user