feat(annotation): 优化文本标注分段功能实现

- 新增 getEditorTaskSegmentsUsingGet 接口用于获取任务分段信息
- 移除 SegmentInfo 中的 text、start、end 字段,精简数据结构
- 添加 EditorTaskSegmentsResponse 类型定义用于分段摘要响应
- 实现服务端 get_task_segments 方法,支持分段信息查询
- 重构前端组件缓存机制,使用 segmentSummaryFileRef 管理分段状态
- 优化分段构建逻辑,提取 _build_segment_contexts 公共方法
- 调整后端 _build_text_task 方法中的分段处理流程
- 更新 API 类型定义,统一 RequestParams 和 RequestPayload 类型
This commit is contained in:
2026-02-04 16:59:04 +08:00
parent 394e2bda18
commit cda22a720c
5 changed files with 250 additions and 108 deletions

View File

@@ -79,12 +79,9 @@ class EditorTaskListResponse(BaseModel):
class SegmentInfo(BaseModel):
"""段落信息(用于文本分段标注)"""
"""段落摘要(用于文本分段标注)"""
idx: int = Field(..., description="段落索引")
text: str = Field(..., description="段落文本")
start: int = Field(..., description="在原文中的起始位置")
end: int = Field(..., description="在原文中的结束位置")
has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引(从0开始)")
chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引(从0开始)")
@@ -100,13 +97,22 @@ class EditorTaskResponse(BaseModel):
# 分段相关字段
segmented: bool = Field(False, description="是否启用分段模式")
segments: Optional[List[SegmentInfo]] = Field(None, description="段落列表")
total_segments: int = Field(0, alias="totalSegments", description="总段落数")
current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
model_config = ConfigDict(populate_by_name=True)
class EditorTaskSegmentsResponse(BaseModel):
"""编辑器段落摘要响应"""
segmented: bool = Field(False, description="是否启用分段模式")
segments: List[SegmentInfo] = Field(default_factory=list, description="段落摘要列表")
total_segments: int = Field(0, alias="totalSegments", description="总段落数")
model_config = ConfigDict(populate_by_name=True)
class UpsertAnnotationRequest(BaseModel):
"""保存/覆盖最终标注(Label Studio annotation 原始对象)"""