Files
DataMate/runtime/datamate-python/app/module/annotation/schema/mapping.py
Jerry Yan 371df12a96 feat(annotation): 添加文本数据集段落切片功能
- 在前端组件中新增 segmentationEnabled 字段控制切片开关
- 为文本数据集添加段落切片配置选项,默认启用切片功能
- 在后端接口中新增 segmentation_enabled 参数传递给标注项目
- 实现切片逻辑控制,支持文本数据的自动段落分割
- 添加数据集类型判断,仅文本数据集支持切片配置
- 更新标注任务创建和编辑表单中的切片相关字段处理
2026-01-26 12:14:27 +08:00

76 lines
3.7 KiB
Python

from pydantic import Field, BaseModel
from typing import Optional, TYPE_CHECKING
from datetime import datetime
from app.module.shared.schema import BaseResponseModel
from app.module.shared.schema import StandardResponse
if TYPE_CHECKING:
from .template import AnnotationTemplateResponse
class DatasetMappingCreateRequest(BaseModel):
"""数据集映射 创建 请求模型
Accept both snake_case and camelCase field names from frontend JSON by
declaring explicit aliases. Frontend sends `datasetId`, `name`,
`description`, `templateId` (camelCase), so provide aliases so pydantic will map them
to the internal attributes used in the service code (dataset_id, name,
description, template_id).
"""
dataset_id: str = Field(..., alias="datasetId", description="源数据集ID")
name: Optional[str] = Field(None, alias="name", description="标注项目名称")
description: Optional[str] = Field(None, alias="description", description="标注项目描述")
template_id: Optional[str] = Field(None, alias="templateId", description="标注模板ID")
label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML配置")
segmentation_enabled: Optional[bool] = Field(
None,
alias="segmentationEnabled",
description="是否启用文本分段",
)
class Config:
# allow population by field name when constructing model programmatically
validate_by_name = True
class DatasetMappingCreateResponse(BaseResponseModel):
"""数据集映射 创建 响应模型"""
id: str = Field(..., description="映射UUID")
labeling_project_id: str = Field(..., description="Label Studio项目ID")
labeling_project_name: str = Field(..., description="Label Studio项目名称")
class DatasetMappingUpdateRequest(BaseResponseModel):
"""数据集映射 更新 请求模型"""
dataset_id: Optional[str] = Field(None, description="源数据集ID")
class DatasetMappingResponse(BaseModel):
"""数据集映射 查询 响应模型"""
id: str = Field(..., description="映射UUID")
dataset_id: str = Field(..., alias="datasetId", description="源数据集ID")
dataset_name: Optional[str] = Field(None, alias="datasetName", description="数据集名称")
labeling_project_id: str = Field(..., alias="labelingProjectId", description="标注项目ID")
name: Optional[str] = Field(None, description="标注项目名称")
description: Optional[str] = Field(None, description="标注项目描述")
template_id: Optional[str] = Field(None, alias="templateId", description="关联的模板ID")
template: Optional['AnnotationTemplateResponse'] = Field(None, description="关联的标注模板详情")
label_config: Optional[str] = Field(None, alias="labelConfig", description="实际使用的 Label Studio XML 配置")
segmentation_enabled: Optional[bool] = Field(
None,
alias="segmentationEnabled",
description="是否启用文本分段",
)
total_count: int = Field(0, alias="totalCount", description="数据集总数据量")
annotated_count: int = Field(0, alias="annotatedCount", description="已标注数据量")
created_at: datetime = Field(..., alias="createdAt", description="创建时间")
updated_at: Optional[datetime] = Field(None, alias="updatedAt", description="更新时间")
deleted_at: Optional[datetime] = Field(None, alias="deletedAt", description="删除时间")
class Config:
from_attributes = True
populate_by_name = True
class DeleteDatasetResponse(BaseResponseModel):
"""删除数据集响应模型"""
id: str = Field(..., description="映射UUID")
status: str = Field(..., description="删除状态")