Files
DataMate/runtime/datamate-python/app/module/dataset/schema/dataset_file.py
Jason Wang 45743f39f5 feat: add labeling template. refactor: switch to Poetry, build and deploy of backend Python (#79)
* feat: Enhance annotation module with template management and validation

- Added DatasetMappingCreateRequest and DatasetMappingUpdateRequest schemas to handle dataset mapping requests with camelCase and snake_case support.
- Introduced Annotation Template schemas including CreateAnnotationTemplateRequest, UpdateAnnotationTemplateRequest, and AnnotationTemplateResponse for managing annotation templates.
- Implemented AnnotationTemplateService for creating, updating, retrieving, and deleting annotation templates, including validation of configurations and XML generation.
- Added utility class LabelStudioConfigValidator for validating Label Studio configurations and XML formats.
- Updated database schema for annotation templates and labeling projects to include new fields and constraints.
- Seeded initial annotation templates for various use cases including image classification, object detection, and text classification.

* feat: Enhance TemplateForm with improved validation and dynamic field rendering; update LabelStudio config validation for camelCase support

* feat: Update docker-compose.yml to mark datamate dataset volume and network as external

* feat: Add tag configuration management and related components

- Introduced new components for tag selection and browsing in the frontend.
- Added API endpoint to fetch tag configuration from the backend.
- Implemented tag configuration management in the backend, including loading from YAML.
- Enhanced template service to support dynamic tag rendering based on configuration.
- Updated validation utilities to incorporate tag configuration checks.
- Refactored existing code to utilize the new tag configuration structure.

* feat: Refactor LabelStudioTagConfig for improved configuration loading and validation

* feat: Update Makefile to include backend-python-docker-build in the build process

* feat: Migrate to poetry for better deps management

* Add pyyaml dependency and update Dockerfile to use Poetry for dependency management

- Added pyyaml (>=6.0.3,<7.0.0) to pyproject.toml dependencies.
- Updated Dockerfile to install Poetry and manage dependencies using it.
- Improved layer caching by copying only dependency files before the application code.
- Removed unnecessary installation of build dependencies to keep the final image size small.

* feat: Remove duplicated backend-python-docker-build target from Makefile

* fix: airflow is not ready for adding yet

* feat: update Python version to 3.12 and remove project installation step in Dockerfile
2025-11-13 15:32:30 +08:00

91 lines
3.8 KiB
Python

from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
from datetime import datetime
class DatasetFileResponse(BaseModel):
"""DM服务数据集文件响应模型"""
id: str = Field(..., description="文件ID")
fileName: str = Field(..., description="文件名")
fileType: str = Field(..., description="文件类型")
filePath: str = Field(..., description="文件路径")
originalName: Optional[str] = Field(None, description="原始文件名")
size: Optional[int] = Field(None, description="文件大小(字节)")
status: Optional[str] = Field(None, description="文件状态")
uploadedAt: Optional[datetime] = Field(None, description="上传时间")
description: Optional[str] = Field(None, description="文件描述")
uploadedBy: Optional[str] = Field(None, description="上传者")
lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间")
tags: Optional[List[Dict[str, Any]]] = Field(None, description="文件标签/标注信息")
tags_updated_at: Optional[datetime] = Field(None, description="标签最后更新时间", alias="tagsUpdatedAt")
class PagedDatasetFileResponse(BaseModel):
"""DM服务分页文件响应模型"""
content: List[DatasetFileResponse] = Field(..., description="文件列表")
totalElements: int = Field(..., description="总元素数")
totalPages: int = Field(..., description="总页数")
page: int = Field(..., description="当前页码")
size: int = Field(..., description="每页大小")
class DatasetFileTag(BaseModel):
id: str = Field(..., description="标签ID")
type: str = Field(..., description="类型")
from_name: str = Field(..., description="标签名称")
value: dict = Field(..., description="标签值")
def get_tags(self) -> List[str]:
tags = []
# 如果 value 是字典类型,根据 type 获取对应的值
tag_values = self.value.get(self.type, [])
# 处理标签值
if isinstance(tag_values, list):
for tag in tag_values:
if isinstance(tag, str):
tags.append(str(tag))
elif isinstance(tag_values, str):
tags.append(tag_values)
# 如果 from_name 不为空,添加前缀
if self.from_name:
tags = [f"{self.from_name} {tag}" for tag in tags]
return tags
class FileTagUpdate(BaseModel):
"""单个文件的标签更新请求"""
file_id: str = Field(..., alias="fileId", description="文件ID")
tags: List[Dict[str, Any]] = Field(..., description="要更新的标签列表(部分更新)")
class Config:
populate_by_name = True
class BatchUpdateFileTagsRequest(BaseModel):
"""批量更新文件标签请求"""
updates: List[FileTagUpdate] = Field(..., description="文件标签更新列表", min_length=1)
class Config:
populate_by_name = True
class FileTagUpdateResult(BaseModel):
"""单个文件标签更新结果"""
file_id: str = Field(..., alias="fileId", description="文件ID")
success: bool = Field(..., description="是否更新成功")
message: Optional[str] = Field(None, description="结果信息")
tags_updated_at: Optional[datetime] = Field(None, alias="tagsUpdatedAt", description="标签更新时间")
class Config:
populate_by_name = True
class BatchUpdateFileTagsResponse(BaseModel):
"""批量更新文件标签响应"""
results: List[FileTagUpdateResult] = Field(..., description="更新结果列表")
total: int = Field(..., description="总更新数量")
success_count: int = Field(..., alias="successCount", description="成功数量")
failure_count: int = Field(..., alias="failureCount", description="失败数量")
class Config:
populate_by_name = True