You've already forked DataMate
feat: add labeling template. refactor: switch to Poetry, build and deploy of backend Python (#79)
* feat: Enhance annotation module with template management and validation - Added DatasetMappingCreateRequest and DatasetMappingUpdateRequest schemas to handle dataset mapping requests with camelCase and snake_case support. - Introduced Annotation Template schemas including CreateAnnotationTemplateRequest, UpdateAnnotationTemplateRequest, and AnnotationTemplateResponse for managing annotation templates. - Implemented AnnotationTemplateService for creating, updating, retrieving, and deleting annotation templates, including validation of configurations and XML generation. - Added utility class LabelStudioConfigValidator for validating Label Studio configurations and XML formats. - Updated database schema for annotation templates and labeling projects to include new fields and constraints. - Seeded initial annotation templates for various use cases including image classification, object detection, and text classification. * feat: Enhance TemplateForm with improved validation and dynamic field rendering; update LabelStudio config validation for camelCase support * feat: Update docker-compose.yml to mark datamate dataset volume and network as external * feat: Add tag configuration management and related components - Introduced new components for tag selection and browsing in the frontend. - Added API endpoint to fetch tag configuration from the backend. - Implemented tag configuration management in the backend, including loading from YAML. - Enhanced template service to support dynamic tag rendering based on configuration. - Updated validation utilities to incorporate tag configuration checks. - Refactored existing code to utilize the new tag configuration structure. * feat: Refactor LabelStudioTagConfig for improved configuration loading and validation * feat: Update Makefile to include backend-python-docker-build in the build process * feat: Migrate to poetry for better deps management * Add pyyaml dependency and update Dockerfile to use Poetry for dependency management - Added pyyaml (>=6.0.3,<7.0.0) to pyproject.toml dependencies. - Updated Dockerfile to install Poetry and manage dependencies using it. - Improved layer caching by copying only dependency files before the application code. - Removed unnecessary installation of build dependencies to keep the final image size small. * feat: Remove duplicated backend-python-docker-build target from Makefile * fix: airflow is not ready for adding yet * feat: update Python version to 3.12 and remove project installation step in Dockerfile
This commit is contained in:
@@ -122,7 +122,7 @@ class SyncService:
|
||||
return {}
|
||||
|
||||
all_tasks = result.get("tasks", [])
|
||||
logger.info(f"Successfully fetched {len(all_tasks)} tasks")
|
||||
logger.debug(f"Successfully fetched {len(all_tasks)} tasks")
|
||||
|
||||
# 使用字典推导式构建映射
|
||||
dm_file_to_task_mapping = {
|
||||
@@ -131,7 +131,7 @@ class SyncService:
|
||||
if task.get('data', {}).get('file_id') is not None
|
||||
}
|
||||
|
||||
logger.info(f"Found {len(dm_file_to_task_mapping)} existing task mappings")
|
||||
logger.debug(f"Found {len(dm_file_to_task_mapping)} existing task mappings")
|
||||
return dm_file_to_task_mapping
|
||||
|
||||
except Exception as e:
|
||||
@@ -163,10 +163,10 @@ class SyncService:
|
||||
)
|
||||
|
||||
if not files_response or not files_response.content:
|
||||
logger.info(f"No more files on page {page + 1}")
|
||||
logger.debug(f"No more files on page {page + 1}")
|
||||
break
|
||||
|
||||
logger.info(f"Processing page {page + 1}, {len(files_response.content)} files")
|
||||
logger.debug(f"Processing page {page + 1}, {len(files_response.content)} files")
|
||||
|
||||
# 筛选新文件并构建任务数据
|
||||
new_tasks = []
|
||||
@@ -178,7 +178,7 @@ class SyncService:
|
||||
task_data = self._build_task_data(file_info, dataset_id)
|
||||
new_tasks.append(task_data)
|
||||
|
||||
logger.info(f"Page {page + 1}: {len(new_tasks)} new files, {len(files_response.content) - len(new_tasks)} existing")
|
||||
logger.debug(f"Page {page + 1}: {len(new_tasks)} new files, {len(files_response.content) - len(new_tasks)} existing")
|
||||
|
||||
# 批量创建任务
|
||||
if new_tasks:
|
||||
@@ -202,16 +202,16 @@ class SyncService:
|
||||
deleted_file_ids = set(existing_dm_file_mapping.keys()) - current_file_ids
|
||||
|
||||
if not deleted_file_ids:
|
||||
logger.info("No tasks to delete")
|
||||
logger.debug("No tasks to delete")
|
||||
return 0
|
||||
|
||||
tasks_to_delete = [existing_dm_file_mapping[fid] for fid in deleted_file_ids]
|
||||
logger.info(f"Deleting {len(tasks_to_delete)} orphaned tasks")
|
||||
logger.debug(f"Deleting {len(tasks_to_delete)} orphaned tasks")
|
||||
|
||||
delete_result = await self.ls_client.delete_tasks_batch(tasks_to_delete)
|
||||
deleted_count = delete_result.get("successful", 0)
|
||||
|
||||
logger.info(f"Successfully deleted {deleted_count} tasks")
|
||||
logger.debug(f"Successfully deleted {deleted_count} tasks")
|
||||
return deleted_count
|
||||
|
||||
async def sync_dataset_files(
|
||||
@@ -229,7 +229,7 @@ class SyncService:
|
||||
Returns:
|
||||
同步结果响应
|
||||
"""
|
||||
logger.info(f"Start syncing dataset files by mapping: {mapping_id}")
|
||||
logger.debug(f"Start syncing dataset files by mapping: {mapping_id}")
|
||||
|
||||
# 获取映射关系
|
||||
mapping = await self.mapping_service.get_mapping_by_uuid(mapping_id)
|
||||
@@ -247,7 +247,7 @@ class SyncService:
|
||||
# 委托给sync_files执行实际同步
|
||||
result = await self.sync_files(mapping, batch_size)
|
||||
|
||||
logger.info(f"Sync completed: created={result['created']}, deleted={result['deleted']}, total={result['total']}")
|
||||
logger.info(f"Sync files completed: created={result['created']}, deleted={result['deleted']}, total={result['total']}")
|
||||
|
||||
return SyncDatasetResponse(
|
||||
id=mapping.id,
|
||||
@@ -342,7 +342,7 @@ class SyncService:
|
||||
Returns:
|
||||
同步统计信息: {"created": int, "deleted": int, "total": int}
|
||||
"""
|
||||
logger.info(f"Syncing files for dataset {mapping.dataset_id} to project {mapping.labeling_project_id}")
|
||||
logger.debug(f"Syncing files for dataset {mapping.dataset_id} to project {mapping.labeling_project_id}")
|
||||
|
||||
# 获取DM数据集信息
|
||||
dataset_info = await self.dm_client.get_dataset(mapping.dataset_id)
|
||||
@@ -350,12 +350,12 @@ class SyncService:
|
||||
raise NoDatasetInfoFoundError(mapping.dataset_id)
|
||||
|
||||
total_files = dataset_info.fileCount
|
||||
logger.info(f"Total files in DM dataset: {total_files}")
|
||||
logger.debug(f"Total files in DM dataset: {total_files}")
|
||||
|
||||
# 获取Label Studio中已存在的文件映射
|
||||
existing_dm_file_mapping = await self.get_existing_dm_file_mapping(mapping.labeling_project_id)
|
||||
existing_file_ids = set(existing_dm_file_mapping.keys())
|
||||
logger.info(f"{len(existing_file_ids)} tasks already exist in Label Studio")
|
||||
logger.debug(f"{len(existing_file_ids)} tasks already exist in Label Studio")
|
||||
|
||||
# 分页获取DM文件并创建新任务
|
||||
current_file_ids, created_count = await self._fetch_dm_files_paginated(
|
||||
@@ -371,7 +371,7 @@ class SyncService:
|
||||
current_file_ids
|
||||
)
|
||||
|
||||
logger.info(f"File sync completed: total={total_files}, created={created_count}, deleted={deleted_count}")
|
||||
logger.debug(f"File sync completed: total={total_files}, created={created_count}, deleted={deleted_count}")
|
||||
|
||||
return {
|
||||
"created": created_count,
|
||||
|
||||
Reference in New Issue
Block a user