feat(annotation): 添加文本分段标注功能

- 引入文本分割器实现长文本按200字符自动分段
- 增加分段状态管理和段落导航界面
- 支持按段落保存和加载标注数据
- 实现分段模式下的标注状态跟踪
- 扩展API接口支持段落索引参数
- 添加分段相关的数据模型定义
This commit is contained in:
2026-01-19 18:18:19 +08:00
parent 3af0f0b3a1
commit 71c4a8d8a6
6 changed files with 395 additions and 41 deletions

View File

@@ -51,12 +51,30 @@ class EditorTaskListResponse(BaseModel):
model_config = ConfigDict(populate_by_name=True)
class SegmentInfo(BaseModel):
"""段落信息(用于文本分段标注)"""
idx: int = Field(..., description="段落索引")
text: str = Field(..., description="段落文本")
start: int = Field(..., description="在原文中的起始位置")
end: int = Field(..., description="在原文中的结束位置")
has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
model_config = ConfigDict(populate_by_name=True)
class EditorTaskResponse(BaseModel):
"""编辑器任务详情(可直接喂给 Label Studio Editor 的 task 对象)"""
task: Dict[str, Any] = Field(..., description="Label Studio task 对象")
annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
# 分段相关字段
segmented: bool = Field(False, description="是否启用分段模式")
segments: Optional[List[SegmentInfo]] = Field(None, description="段落列表")
total_segments: int = Field(0, alias="totalSegments", description="总段落数")
current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
model_config = ConfigDict(populate_by_name=True)
@@ -69,6 +87,12 @@ class UpsertAnnotationRequest(BaseModel):
alias="expectedUpdatedAt",
description="乐观锁:若提供则要求与当前记录 updated_at 一致,否则返回 409",
)
# 分段保存支持
segment_index: Optional[int] = Field(
None,
alias="segmentIndex",
description="段落索引(分段模式下必填)",
)
model_config = ConfigDict(populate_by_name=True)