feat: add labeling template. refactor: switch to Poetry, build and deploy of backend Python (#79)

* feat: Enhance annotation module with template management and validation

- Added DatasetMappingCreateRequest and DatasetMappingUpdateRequest schemas to handle dataset mapping requests with camelCase and snake_case support.
- Introduced Annotation Template schemas including CreateAnnotationTemplateRequest, UpdateAnnotationTemplateRequest, and AnnotationTemplateResponse for managing annotation templates.
- Implemented AnnotationTemplateService for creating, updating, retrieving, and deleting annotation templates, including validation of configurations and XML generation.
- Added utility class LabelStudioConfigValidator for validating Label Studio configurations and XML formats.
- Updated database schema for annotation templates and labeling projects to include new fields and constraints.
- Seeded initial annotation templates for various use cases including image classification, object detection, and text classification.

* feat: Enhance TemplateForm with improved validation and dynamic field rendering; update LabelStudio config validation for camelCase support

* feat: Update docker-compose.yml to mark datamate dataset volume and network as external

* feat: Add tag configuration management and related components

- Introduced new components for tag selection and browsing in the frontend.
- Added API endpoint to fetch tag configuration from the backend.
- Implemented tag configuration management in the backend, including loading from YAML.
- Enhanced template service to support dynamic tag rendering based on configuration.
- Updated validation utilities to incorporate tag configuration checks.
- Refactored existing code to utilize the new tag configuration structure.

* feat: Refactor LabelStudioTagConfig for improved configuration loading and validation

* feat: Update Makefile to include backend-python-docker-build in the build process

* feat: Migrate to poetry for better deps management

* Add pyyaml dependency and update Dockerfile to use Poetry for dependency management

- Added pyyaml (>=6.0.3,<7.0.0) to pyproject.toml dependencies.
- Updated Dockerfile to install Poetry and manage dependencies using it.
- Improved layer caching by copying only dependency files before the application code.
- Removed unnecessary installation of build dependencies to keep the final image size small.

* feat: Remove duplicated backend-python-docker-build target from Makefile

* fix: airflow is not ready for adding yet

* feat: update Python version to 3.12 and remove project installation step in Dockerfile
This commit is contained in:
Jason Wang
2025-11-13 15:32:30 +08:00
committed by GitHub
parent 2660845b74
commit 45743f39f5
40 changed files with 3223 additions and 262 deletions

View File

@@ -2,7 +2,7 @@ from typing import Optional
import math
import uuid
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi import APIRouter, Depends, HTTPException, Query, Path
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.session import get_db
@@ -149,7 +149,7 @@ async def create_mapping(
@router.get("", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
async def list_mappings(
page: int = Query(1, ge=1, description="页码(从1开始)"),
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
page_size: int = Query(20, ge=1, le=100, description="每页记录数", alias="pageSize"),
db: AsyncSession = Depends(get_db)
):
"""
@@ -163,8 +163,6 @@ async def list_mappings(
# 计算 skip
skip = (page - 1) * page_size
logger.info(f"Listing mappings, page={page}, page_size={page_size}")
# 获取数据和总数
mappings, total = await service.get_all_mappings_with_count(
skip=skip,
@@ -183,7 +181,7 @@ async def list_mappings(
content=mappings
)
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}")
logger.info(f"List mappings: page={page}, returned {len(mappings)}/{total}")
return StandardResponse(
code=200,
@@ -234,7 +232,7 @@ async def get_mapping(
async def get_mappings_by_source(
dataset_id: str,
page: int = Query(1, ge=1, description="页码(从1开始)"),
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
page_size: int = Query(20, ge=1, le=100, description="每页记录数", alias="pageSize"),
db: AsyncSession = Depends(get_db)
):
"""
@@ -283,49 +281,30 @@ async def get_mappings_by_source(
logger.error(f"Error getting mappings: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.delete("", response_model=StandardResponse[DeleteDatasetResponse])
@router.delete("/{project_id}", response_model=StandardResponse[DeleteDatasetResponse])
async def delete_mapping(
m: Optional[str] = Query(None, description="映射UUID"),
proj: Optional[str] = Query(None, description="Label Studio项目ID"),
project_id: str = Path(..., description="映射UUID(path param)"),
db: AsyncSession = Depends(get_db)
):
"""
删除映射关系和对应的 Label Studio 项目
可以通过以下任一方式指定要删除的映射:
- m: 映射UUID
- proj: Label Studio项目ID
- 两者都提供(优先使用 m)
通过 path 参数 `project_id` 指定要删除的映射(映射的 UUID)。
此操作会:
1. 删除 Label Studio 中的项目
2. 软删除数据库中的映射记录
"""
try:
# Log incoming request parameters for debugging
logger.debug(f"Delete mapping request received: m={m!r}, proj={proj!r}")
# 至少需要提供一个参数
if not m and not proj:
logger.debug("Missing both 'm' and 'proj' in delete request")
raise HTTPException(
status_code=400,
detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided"
)
logger.debug(f"Delete mapping request received: project_id={project_id!r}")
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
token=settings.label_studio_user_token)
service = DatasetMappingService(db)
# 优先使用 mapping_id 查询
if m:
logger.debug(f"Deleting by mapping UUID: {m}")
mapping = await service.get_mapping_by_uuid(m)
# 如果没有提供 m,使用 proj 查询
elif proj:
logger.debug(f"Deleting by project ID: {proj}")
mapping = await service.get_mapping_by_labeling_project_id(proj)
else:
mapping = None
# 使用 mapping UUID 查询映射记录
logger.debug(f"Deleting by mapping UUID: {project_id}")
mapping = await service.get_mapping_by_uuid(project_id)
logger.debug(f"Mapping lookup result: {mapping}")