feat(annotation): 添加文本数据集段落切片功能

- 在前端组件中新增 segmentationEnabled 字段控制切片开关
- 为文本数据集添加段落切片配置选项,默认启用切片功能
- 在后端接口中新增 segmentation_enabled 参数传递给标注项目
- 实现切片逻辑控制,支持文本数据的自动段落分割
- 添加数据集类型判断,仅文本数据集支持切片配置
- 更新标注任务创建和编辑表单中的切片相关字段处理
This commit is contained in:
2026-01-26 12:05:21 +08:00
parent fa160164d2
commit 371df12a96
7 changed files with 218 additions and 32 deletions

View File

@@ -25,6 +25,7 @@ router = APIRouter(
tags=["annotation/project"]
)
logger = get_logger(__name__)
TEXT_DATASET_TYPE = "TEXT"
@router.get("/{mapping_id}/login")
async def login_label_studio(
@@ -62,6 +63,12 @@ async def create_mapping(
detail=f"Dataset not found in DM service: {request.dataset_id}"
)
dataset_type = (
getattr(dataset_info, "datasetType", None)
or getattr(dataset_info, "dataset_type", None)
or ""
).upper()
project_name = request.name or \
dataset_info.name or \
"A new project from DataMate"
@@ -97,6 +104,8 @@ async def create_mapping(
project_configuration["label_config"] = label_config
if project_description:
project_configuration["description"] = project_description
if dataset_type == TEXT_DATASET_TYPE and request.segmentation_enabled is not None:
project_configuration["segmentation_enabled"] = bool(request.segmentation_enabled)
labeling_project = LabelingProject(
id=str(uuid.uuid4()), # Generate UUID here