Add Label Studio adapter module and its build scipts.

This commit is contained in:
Jason Wang
2025-10-22 15:14:01 +08:00
parent 1c97afed7d
commit c640105333
40 changed files with 2902 additions and 0 deletions

View File

@@ -0,0 +1,29 @@
# app/schemas/__init__.py
from .common import *
from .dataset_mapping import *
from .dm_service import *
from .label_studio import *
__all__ = [
# Common schemas
"StandardResponse",
# Dataset Mapping schemas
"DatasetMappingBase",
"DatasetMappingCreateRequest",
"DatasetMappingUpdateRequest",
"DatasetMappingResponse",
"DatasetMappingCreateResponse",
"SyncDatasetResponse",
"DeleteDatasetResponse",
# DM Service schemas
"DatasetFileResponse",
"PagedDatasetFileResponse",
"DatasetResponse",
# Label Studio schemas
"LabelStudioProject",
"LabelStudioTask"
]

View File

@@ -0,0 +1,27 @@
"""
通用响应模型
"""
from typing import Generic, TypeVar, Optional
from pydantic import BaseModel, Field
# 定义泛型类型变量
T = TypeVar('T')
class StandardResponse(BaseModel, Generic[T]):
"""
标准API响应格式
所有API端点应返回此格式,确保响应的一致性
"""
code: int = Field(..., description="HTTP状态码")
message: str = Field(..., description="响应消息")
data: Optional[T] = Field(None, description="响应数据")
class Config:
json_schema_extra = {
"example": {
"code": 200,
"message": "success",
"data": {}
}
}

View File

@@ -0,0 +1,53 @@
from pydantic import BaseModel, Field
from typing import Optional
from datetime import datetime
class DatasetMappingBase(BaseModel):
"""数据集映射 基础模型"""
source_dataset_id: str = Field(..., description="源数据集ID")
class DatasetMappingCreateRequest(DatasetMappingBase):
"""数据集映射 创建 请求模型"""
pass
class DatasetMappingCreateResponse(BaseModel):
"""数据集映射 创建 响应模型"""
mapping_id: str = Field(..., description="映射UUID")
labelling_project_id: str = Field(..., description="Label Studio项目ID")
labelling_project_name: str = Field(..., description="Label Studio项目名称")
message: str = Field(..., description="响应消息")
class DatasetMappingUpdateRequest(BaseModel):
"""数据集映射 更新 请求模型"""
source_dataset_id: Optional[str] = Field(None, description="源数据集ID")
class DatasetMappingResponse(DatasetMappingBase):
"""数据集映射 查询 响应模型"""
mapping_id: str = Field(..., description="映射UUID")
labelling_project_id: str = Field(..., description="标注项目ID")
labelling_project_name: Optional[str] = Field(None, description="标注项目名称")
created_at: datetime = Field(..., description="创建时间")
last_updated_at: datetime = Field(..., description="最后更新时间")
deleted_at: Optional[datetime] = Field(None, description="删除时间")
class Config:
from_attributes = True
class SyncDatasetRequest(BaseModel):
"""同步数据集请求模型"""
mapping_id: str = Field(..., description="映射ID(mapping UUID)")
batch_size: int = Field(50, ge=1, le=100, description="批处理大小")
class SyncDatasetResponse(BaseModel):
"""同步数据集响应模型"""
mapping_id: str = Field(..., description="映射UUID")
status: str = Field(..., description="同步状态")
synced_files: int = Field(..., description="已同步文件数量")
total_files: int = Field(0, description="总文件数量")
message: str = Field(..., description="响应消息")
class DeleteDatasetResponse(BaseModel):
"""删除数据集响应模型"""
mapping_id: str = Field(..., description="映射UUID")
status: str = Field(..., description="删除状态")
message: str = Field(..., description="响应消息")

View File

@@ -0,0 +1,58 @@
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
from datetime import datetime
class DatasetFileResponse(BaseModel):
"""DM服务数据集文件响应模型"""
id: str = Field(..., description="文件ID")
fileName: str = Field(..., description="文件名")
fileType: str = Field(..., description="文件类型")
filePath: str = Field(..., description="文件路径")
originalName: Optional[str] = Field(None, description="原始文件名")
size: Optional[int] = Field(None, description="文件大小(字节)")
status: Optional[str] = Field(None, description="文件状态")
uploadedAt: Optional[datetime] = Field(None, description="上传时间")
description: Optional[str] = Field(None, description="文件描述")
uploadedBy: Optional[str] = Field(None, description="上传者")
lastAccessTime: Optional[datetime] = Field(None, description="最后访问时间")
class PagedDatasetFileResponse(BaseModel):
"""DM服务分页文件响应模型"""
content: List[DatasetFileResponse] = Field(..., description="文件列表")
totalElements: int = Field(..., description="总元素数")
totalPages: int = Field(..., description="总页数")
page: int = Field(..., description="当前页码")
size: int = Field(..., description="每页大小")
class DatasetTypeResponse(BaseModel):
"""数据集类型响应模型"""
code: str = Field(..., description="类型编码")
name: str = Field(..., description="类型名称")
description: Optional[str] = Field(None, description="类型描述")
supportedFormats: List[str] = Field(default_factory=list, description="支持的文件格式")
icon: Optional[str] = Field(None, description="图标")
class DatasetResponse(BaseModel):
"""DM服务数据集响应模型"""
id: str = Field(..., description="数据集ID")
name: str = Field(..., description="数据集名称")
description: Optional[str] = Field(None, description="数据集描述")
datasetType: str = Field(..., description="数据集类型", alias="datasetType")
status: str = Field(..., description="数据集状态")
fileCount: int = Field(..., description="文件数量")
totalSize: int = Field(..., description="总大小(字节)")
createdAt: Optional[datetime] = Field(None, description="创建时间")
updatedAt: Optional[datetime] = Field(None, description="更新时间")
createdBy: Optional[str] = Field(None, description="创建者")
# 为了向后兼容,添加一个属性方法返回类型对象
@property
def type(self) -> DatasetTypeResponse:
"""兼容属性:返回类型对象"""
return DatasetTypeResponse(
code=self.datasetType,
name=self.datasetType,
description=None,
supportedFormats=[],
icon=None
)

View File

@@ -0,0 +1,37 @@
from pydantic import BaseModel, Field
from typing import Dict, Any, Optional, List
from datetime import datetime
class LabelStudioProject(BaseModel):
"""Label Studio项目模型"""
id: int = Field(..., description="项目ID")
title: str = Field(..., description="项目标题")
description: Optional[str] = Field(None, description="项目描述")
label_config: str = Field(..., description="标注配置")
created_at: Optional[datetime] = Field(None, description="创建时间")
updated_at: Optional[datetime] = Field(None, description="更新时间")
class LabelStudioTaskData(BaseModel):
"""Label Studio任务数据模型"""
image: Optional[str] = Field(None, description="图像URL")
text: Optional[str] = Field(None, description="文本内容")
audio: Optional[str] = Field(None, description="音频URL")
video: Optional[str] = Field(None, description="视频URL")
filename: Optional[str] = Field(None, description="文件名")
class LabelStudioTask(BaseModel):
"""Label Studio任务模型"""
data: LabelStudioTaskData = Field(..., description="任务数据")
project: Optional[int] = Field(None, description="项目ID")
meta: Optional[Dict[str, Any]] = Field(None, description="元数据")
class LabelStudioCreateProjectRequest(BaseModel):
"""创建Label Studio项目请求模型"""
title: str = Field(..., description="项目标题")
description: str = Field("", description="项目描述")
label_config: str = Field(..., description="标注配置")
class LabelStudioCreateTaskRequest(BaseModel):
"""创建Label Studio任务请求模型"""
data: Dict[str, Any] = Field(..., description="任务数据")
project: Optional[int] = Field(None, description="项目ID")