Develop labeling module (#25)

* refactor: remove db table management from LS adapter (mv to scripts later); change adapter to use the same MySQL DB as other modules.

* refactor: Rename LS Adapter module to datamate-python
This commit is contained in:
Jinglong Wang
2025-10-27 16:16:14 +08:00
committed by GitHub
parent 46dfb389f1
commit 7f819563db
69 changed files with 1104 additions and 703 deletions

View File

@@ -0,0 +1,29 @@
# app/schemas/__init__.py
from .common import *
from .dataset_mapping import *
from .dm_service import *
from .label_studio import *
__all__ = [
# Common schemas
"StandardResponse",
# Dataset Mapping schemas
"DatasetMappingBase",
"DatasetMappingCreateRequest",
"DatasetMappingUpdateRequest",
"DatasetMappingResponse",
"DatasetMappingCreateResponse",
"SyncDatasetResponse",
"DeleteDatasetResponse",
# DM Service schemas
"DatasetFileResponse",
"PagedDatasetFileResponse",
"DatasetResponse",
# Label Studio schemas
"LabelStudioProject",
"LabelStudioTask"
]

View File

@@ -0,0 +1,63 @@
"""
通用响应模型
"""
from typing import Generic, TypeVar, Optional, List
from pydantic import BaseModel, Field
# 定义泛型类型变量
T = TypeVar('T')
# 定义一个将 snake_case 转换为 camelCase 的函数
def to_camel(string: str) -> str:
"""将 snake_case 字符串转换为 camelCase"""
components = string.split('_')
# 首字母小写,其余单词首字母大写
return components[0] + ''.join(x.title() for x in components[1:])
class BaseResponseModel(BaseModel):
"""基础响应模型,启用别名生成器"""
class Config:
populate_by_name = True
alias_generator = to_camel
class StandardResponse(BaseResponseModel, Generic[T]):
"""
标准API响应格式
所有API端点应返回此格式,确保响应的一致性
"""
code: int = Field(..., description="HTTP状态码")
message: str = Field(..., description="响应消息")
data: Optional[T] = Field(None, description="响应数据")
class Config:
populate_by_name = True
alias_generator = to_camel
json_schema_extra = {
"example": {
"code": 200,
"message": "success",
"data": {}
}
}
class PaginatedData(BaseResponseModel, Generic[T]):
"""分页数据容器"""
page: int = Field(..., description="当前页码(从1开始)")
size: int = Field(..., description="页大小")
total_elements: int = Field(..., description="总条数")
total_pages: int = Field(..., description="总页数")
content: List[T] = Field(..., description="当前页数据")
class Config:
json_schema_extra = {
"example": {
"page": 1,
"size": 20,
"totalElements": 100,
"totalPages": 5,
"content": []
}
}

View File

@@ -0,0 +1,56 @@
from pydantic import Field
from typing import Optional
from datetime import datetime
from .common import BaseResponseModel
class DatasetMappingBase(BaseResponseModel):
"""数据集映射 基础模型"""
dataset_id: str = Field(..., description="源数据集ID")
class DatasetMappingCreateRequest(DatasetMappingBase):
"""数据集映射 创建 请求模型"""
pass
class DatasetMappingCreateResponse(BaseResponseModel):
"""数据集映射 创建 响应模型"""
mapping_id: str = Field(..., description="映射UUID")
labelling_project_id: str = Field(..., description="Label Studio项目ID")
labelling_project_name: str = Field(..., description="Label Studio项目名称")
message: str = Field(..., description="响应消息")
class DatasetMappingUpdateRequest(BaseResponseModel):
"""数据集映射 更新 请求模型"""
dataset_id: Optional[str] = Field(None, description="源数据集ID")
class DatasetMappingResponse(DatasetMappingBase):
"""数据集映射 查询 响应模型"""
mapping_id: str = Field(..., description="映射UUID")
labelling_project_id: str = Field(..., description="标注项目ID")
labelling_project_name: Optional[str] = Field(None, description="标注项目名称")
created_at: datetime = Field(..., description="创建时间")
last_updated_at: datetime = Field(..., description="最后更新时间")
deleted_at: Optional[datetime] = Field(None, description="删除时间")
class Config:
from_attributes = True
populate_by_name = True
class SyncDatasetRequest(BaseResponseModel):
"""同步数据集请求模型"""
mapping_id: str = Field(..., description="映射ID(mapping UUID)")
batch_size: int = Field(50, ge=1, le=100, description="批处理大小")
class SyncDatasetResponse(BaseResponseModel):
"""同步数据集响应模型"""
mapping_id: str = Field(..., description="映射UUID")
status: str = Field(..., description="同步状态")
synced_files: int = Field(..., description="已同步文件数量")
total_files: int = Field(0, description="总文件数量")
message: str = Field(..., description="响应消息")
class DeleteDatasetResponse(BaseResponseModel):
"""删除数据集响应模型"""
mapping_id: str = Field(..., description="映射UUID")
status: str = Field(..., description="删除状态")
message: str = Field(..., description="响应消息")

View File

@@ -0,0 +1,58 @@
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
from datetime import datetime
class DatasetFileResponse(BaseModel):
"""DM服务数据集文件响应模型"""
id: str = Field(..., description="文件ID")
fileName: str = Field(..., description="文件名")
fileType: str = Field(..., description="文件类型")
filePath: str = Field(..., description="文件路径")
originalName: Optional[str] = Field(None, description="原始文件名")
size: Optional[int] = Field(None, description="文件大小(字节)")
status: Optional[str] = Field(None, description="文件状态")
uploadedAt: Optional[datetime] = Field(None, description="上传时间")
description: Optional[str] = Field(None, description="文件描述")
uploadedBy: Optional[str] = Field(None, description="上传者")
lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间")
class PagedDatasetFileResponse(BaseModel):
"""DM服务分页文件响应模型"""
content: List[DatasetFileResponse] = Field(..., description="文件列表")
totalElements: int = Field(..., description="总元素数")
totalPages: int = Field(..., description="总页数")
page: int = Field(..., description="当前页码")
size: int = Field(..., description="每页大小")
class DatasetTypeResponse(BaseModel):
"""数据集类型响应模型"""
code: str = Field(..., description="类型编码")
name: str = Field(..., description="类型名称")
description: Optional[str] = Field(None, description="类型描述")
supportedFormats: List[str] = Field(default_factory=list, description="支持的文件格式")
icon: Optional[str] = Field(None, description="图标")
class DatasetResponse(BaseModel):
"""DM服务数据集响应模型"""
id: str = Field(..., description="数据集ID")
name: str = Field(..., description="数据集名称")
description: Optional[str] = Field(None, description="数据集描述")
datasetType: str = Field(..., description="数据集类型", alias="datasetType")
status: str = Field(..., description="数据集状态")
fileCount: int = Field(..., description="文件数量")
totalSize: int = Field(..., description="总大小(字节)")
createdAt: Optional[datetime] = Field(None, description="创建时间")
updatedAt: Optional[datetime] = Field(None, description="更新时间")
createdBy: Optional[str] = Field(None, description="创建者")
# 为了向后兼容,添加一个属性方法返回类型对象
@property
def type(self) -> DatasetTypeResponse:
"""兼容属性:返回类型对象"""
return DatasetTypeResponse(
code=self.datasetType,
name=self.datasetType,
description=None,
supportedFormats=[],
icon=None
)

View File

@@ -0,0 +1,38 @@
from pydantic import Field
from typing import Dict, Any, Optional, List
from datetime import datetime
from .common import BaseResponseModel
class LabelStudioProject(BaseResponseModel):
"""Label Studio项目模型"""
id: int = Field(..., description="项目ID")
title: str = Field(..., description="项目标题")
description: Optional[str] = Field(None, description="项目描述")
label_config: str = Field(..., description="标注配置")
created_at: Optional[datetime] = Field(None, description="创建时间")
updated_at: Optional[datetime] = Field(None, description="更新时间")
class LabelStudioTaskData(BaseResponseModel):
"""Label Studio任务数据模型"""
image: Optional[str] = Field(None, description="图像URL")
text: Optional[str] = Field(None, description="文本内容")
audio: Optional[str] = Field(None, description="音频URL")
video: Optional[str] = Field(None, description="视频URL")
filename: Optional[str] = Field(None, description="文件名")
class LabelStudioTask(BaseResponseModel):
"""Label Studio任务模型"""
data: LabelStudioTaskData = Field(..., description="任务数据")
project: Optional[int] = Field(None, description="项目ID")
meta: Optional[Dict[str, Any]] = Field(None, description="元数据")
class LabelStudioCreateProjectRequest(BaseResponseModel):
"""创建Label Studio项目请求模型"""
title: str = Field(..., description="项目标题")
description: str = Field("", description="项目描述")
label_config: str = Field(..., description="标注配置")
class LabelStudioCreateTaskRequest(BaseResponseModel):
"""创建Label Studio任务请求模型"""
data: Dict[str, Any] = Field(..., description="任务数据")
project: Optional[int] = Field(None, description="项目ID")