You've already forked DataMate
* feat: Enhance annotation module with template management and validation - Added DatasetMappingCreateRequest and DatasetMappingUpdateRequest schemas to handle dataset mapping requests with camelCase and snake_case support. - Introduced Annotation Template schemas including CreateAnnotationTemplateRequest, UpdateAnnotationTemplateRequest, and AnnotationTemplateResponse for managing annotation templates. - Implemented AnnotationTemplateService for creating, updating, retrieving, and deleting annotation templates, including validation of configurations and XML generation. - Added utility class LabelStudioConfigValidator for validating Label Studio configurations and XML formats. - Updated database schema for annotation templates and labeling projects to include new fields and constraints. - Seeded initial annotation templates for various use cases including image classification, object detection, and text classification. * feat: Enhance TemplateForm with improved validation and dynamic field rendering; update LabelStudio config validation for camelCase support * feat: Update docker-compose.yml to mark datamate dataset volume and network as external * feat: Add tag configuration management and related components - Introduced new components for tag selection and browsing in the frontend. - Added API endpoint to fetch tag configuration from the backend. - Implemented tag configuration management in the backend, including loading from YAML. - Enhanced template service to support dynamic tag rendering based on configuration. - Updated validation utilities to incorporate tag configuration checks. - Refactored existing code to utilize the new tag configuration structure. * feat: Refactor LabelStudioTagConfig for improved configuration loading and validation * feat: Update Makefile to include backend-python-docker-build in the build process * feat: Migrate to poetry for better deps management * Add pyyaml dependency and update Dockerfile to use Poetry for dependency management - Added pyyaml (>=6.0.3,<7.0.0) to pyproject.toml dependencies. - Updated Dockerfile to install Poetry and manage dependencies using it. - Improved layer caching by copying only dependency files before the application code. - Removed unnecessary installation of build dependencies to keep the final image size small. * feat: Remove duplicated backend-python-docker-build target from Makefile * fix: airflow is not ready for adding yet * feat: update Python version to 3.12 and remove project installation step in Dockerfile
91 lines
3.8 KiB
Python
91 lines
3.8 KiB
Python
from pydantic import BaseModel, Field
|
|
from typing import List, Optional, Dict, Any
|
|
from datetime import datetime
|
|
|
|
class DatasetFileResponse(BaseModel):
|
|
"""DM服务数据集文件响应模型"""
|
|
id: str = Field(..., description="文件ID")
|
|
fileName: str = Field(..., description="文件名")
|
|
fileType: str = Field(..., description="文件类型")
|
|
filePath: str = Field(..., description="文件路径")
|
|
originalName: Optional[str] = Field(None, description="原始文件名")
|
|
size: Optional[int] = Field(None, description="文件大小(字节)")
|
|
status: Optional[str] = Field(None, description="文件状态")
|
|
uploadedAt: Optional[datetime] = Field(None, description="上传时间")
|
|
description: Optional[str] = Field(None, description="文件描述")
|
|
uploadedBy: Optional[str] = Field(None, description="上传者")
|
|
lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间")
|
|
tags: Optional[List[Dict[str, Any]]] = Field(None, description="文件标签/标注信息")
|
|
tags_updated_at: Optional[datetime] = Field(None, description="标签最后更新时间", alias="tagsUpdatedAt")
|
|
|
|
class PagedDatasetFileResponse(BaseModel):
|
|
"""DM服务分页文件响应模型"""
|
|
content: List[DatasetFileResponse] = Field(..., description="文件列表")
|
|
totalElements: int = Field(..., description="总元素数")
|
|
totalPages: int = Field(..., description="总页数")
|
|
page: int = Field(..., description="当前页码")
|
|
size: int = Field(..., description="每页大小")
|
|
|
|
class DatasetFileTag(BaseModel):
|
|
id: str = Field(..., description="标签ID")
|
|
type: str = Field(..., description="类型")
|
|
from_name: str = Field(..., description="标签名称")
|
|
value: dict = Field(..., description="标签值")
|
|
|
|
def get_tags(self) -> List[str]:
|
|
tags = []
|
|
# 如果 value 是字典类型,根据 type 获取对应的值
|
|
tag_values = self.value.get(self.type, [])
|
|
|
|
# 处理标签值
|
|
if isinstance(tag_values, list):
|
|
for tag in tag_values:
|
|
if isinstance(tag, str):
|
|
tags.append(str(tag))
|
|
elif isinstance(tag_values, str):
|
|
tags.append(tag_values)
|
|
# 如果 from_name 不为空,添加前缀
|
|
if self.from_name:
|
|
tags = [f"{self.from_name} {tag}" for tag in tags]
|
|
|
|
return tags
|
|
|
|
|
|
class FileTagUpdate(BaseModel):
|
|
"""单个文件的标签更新请求"""
|
|
file_id: str = Field(..., alias="fileId", description="文件ID")
|
|
tags: List[Dict[str, Any]] = Field(..., description="要更新的标签列表(部分更新)")
|
|
|
|
class Config:
|
|
populate_by_name = True
|
|
|
|
|
|
class BatchUpdateFileTagsRequest(BaseModel):
|
|
"""批量更新文件标签请求"""
|
|
updates: List[FileTagUpdate] = Field(..., description="文件标签更新列表", min_length=1)
|
|
|
|
class Config:
|
|
populate_by_name = True
|
|
|
|
|
|
class FileTagUpdateResult(BaseModel):
|
|
"""单个文件标签更新结果"""
|
|
file_id: str = Field(..., alias="fileId", description="文件ID")
|
|
success: bool = Field(..., description="是否更新成功")
|
|
message: Optional[str] = Field(None, description="结果信息")
|
|
tags_updated_at: Optional[datetime] = Field(None, alias="tagsUpdatedAt", description="标签更新时间")
|
|
|
|
class Config:
|
|
populate_by_name = True
|
|
|
|
|
|
class BatchUpdateFileTagsResponse(BaseModel):
|
|
"""批量更新文件标签响应"""
|
|
results: List[FileTagUpdateResult] = Field(..., description="更新结果列表")
|
|
total: int = Field(..., description="总更新数量")
|
|
success_count: int = Field(..., alias="successCount", description="成功数量")
|
|
failure_count: int = Field(..., alias="failureCount", description="失败数量")
|
|
|
|
class Config:
|
|
populate_by_name = True
|