You've already forked DataMate
feat(annotation): 添加文本分段标注功能
- 引入文本分割器实现长文本按200字符自动分段 - 增加分段状态管理和段落导航界面 - 支持按段落保存和加载标注数据 - 实现分段模式下的标注状态跟踪 - 扩展API接口支持段落索引参数 - 添加分段相关的数据模型定义
This commit is contained in:
@@ -51,12 +51,30 @@ class EditorTaskListResponse(BaseModel):
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
class SegmentInfo(BaseModel):
|
||||
"""段落信息(用于文本分段标注)"""
|
||||
|
||||
idx: int = Field(..., description="段落索引")
|
||||
text: str = Field(..., description="段落文本")
|
||||
start: int = Field(..., description="在原文中的起始位置")
|
||||
end: int = Field(..., description="在原文中的结束位置")
|
||||
has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
class EditorTaskResponse(BaseModel):
|
||||
"""编辑器任务详情(可直接喂给 Label Studio Editor 的 task 对象)"""
|
||||
|
||||
task: Dict[str, Any] = Field(..., description="Label Studio task 对象")
|
||||
annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
|
||||
|
||||
# 分段相关字段
|
||||
segmented: bool = Field(False, description="是否启用分段模式")
|
||||
segments: Optional[List[SegmentInfo]] = Field(None, description="段落列表")
|
||||
total_segments: int = Field(0, alias="totalSegments", description="总段落数")
|
||||
current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
@@ -69,6 +87,12 @@ class UpsertAnnotationRequest(BaseModel):
|
||||
alias="expectedUpdatedAt",
|
||||
description="乐观锁:若提供则要求与当前记录 updated_at 一致,否则返回 409",
|
||||
)
|
||||
# 分段保存支持
|
||||
segment_index: Optional[int] = Field(
|
||||
None,
|
||||
alias="segmentIndex",
|
||||
description="段落索引(分段模式下必填)",
|
||||
)
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user