You've already forked DataMate
feat(annotation): 添加分段标注统计和进度跟踪功能
- 新增 SegmentStats 类型定义用于分段统计 - 实现分段标注进度计算和缓存机制 - 添加标注任务状态判断逻辑支持分段模式 - 集成分段统计数据显示到任务列表界面 - 实现分段总数自动计算和验证功能 - 扩展标注状态枚举支持进行中标注状态 - 优化任务选择逻辑基于分段完成状态 - 添加分段统计数据预加载和同步机制
This commit is contained in:
@@ -28,6 +28,7 @@ type EditorTaskListItem = {
|
|||||||
hasAnnotation: boolean;
|
hasAnnotation: boolean;
|
||||||
annotationUpdatedAt?: string | null;
|
annotationUpdatedAt?: string | null;
|
||||||
annotationStatus?: AnnotationResultStatus | null;
|
annotationStatus?: AnnotationResultStatus | null;
|
||||||
|
segmentStats?: SegmentStats;
|
||||||
};
|
};
|
||||||
|
|
||||||
type LsfMessage = {
|
type LsfMessage = {
|
||||||
@@ -45,6 +46,11 @@ type SegmentInfo = {
|
|||||||
chunkIndex: number;
|
chunkIndex: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
type SegmentStats = {
|
||||||
|
done: number;
|
||||||
|
total: number;
|
||||||
|
};
|
||||||
|
|
||||||
type ApiResponse<T> = {
|
type ApiResponse<T> = {
|
||||||
code?: number;
|
code?: number;
|
||||||
message?: string;
|
message?: string;
|
||||||
@@ -136,6 +142,16 @@ const isAnnotationResultEmpty = (annotation?: Record<string, unknown>) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const resolveTaskStatusMeta = (item: EditorTaskListItem) => {
|
const resolveTaskStatusMeta = (item: EditorTaskListItem) => {
|
||||||
|
const segmentSummary = resolveSegmentSummary(item);
|
||||||
|
if (segmentSummary) {
|
||||||
|
if (segmentSummary.done >= segmentSummary.total) {
|
||||||
|
return { text: "已标注", type: "success" as const };
|
||||||
|
}
|
||||||
|
if (segmentSummary.done > 0) {
|
||||||
|
return { text: "标注中", type: "warning" as const };
|
||||||
|
}
|
||||||
|
return { text: "未标注", type: "secondary" as const };
|
||||||
|
}
|
||||||
if (!item.hasAnnotation) {
|
if (!item.hasAnnotation) {
|
||||||
return { text: "未标注", type: "secondary" as const };
|
return { text: "未标注", type: "secondary" as const };
|
||||||
}
|
}
|
||||||
@@ -145,6 +161,9 @@ const resolveTaskStatusMeta = (item: EditorTaskListItem) => {
|
|||||||
if (item.annotationStatus === AnnotationResultStatus.NOT_APPLICABLE) {
|
if (item.annotationStatus === AnnotationResultStatus.NOT_APPLICABLE) {
|
||||||
return { text: NOT_APPLICABLE_LABEL, type: "warning" as const };
|
return { text: NOT_APPLICABLE_LABEL, type: "warning" as const };
|
||||||
}
|
}
|
||||||
|
if (item.annotationStatus === AnnotationResultStatus.IN_PROGRESS) {
|
||||||
|
return { text: "标注中", type: "warning" as const };
|
||||||
|
}
|
||||||
return { text: "已标注", type: "success" as const };
|
return { text: "已标注", type: "success" as const };
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -184,6 +203,25 @@ const buildAnnotationSnapshot = (annotation?: Record<string, unknown>) => {
|
|||||||
const buildSnapshotKey = (fileId: string, segmentIndex?: number) =>
|
const buildSnapshotKey = (fileId: string, segmentIndex?: number) =>
|
||||||
`${fileId}::${segmentIndex ?? "full"}`;
|
`${fileId}::${segmentIndex ?? "full"}`;
|
||||||
|
|
||||||
|
const buildSegmentStats = (segmentList?: SegmentInfo[] | null): SegmentStats | null => {
|
||||||
|
if (!Array.isArray(segmentList) || segmentList.length === 0) return null;
|
||||||
|
const total = segmentList.length;
|
||||||
|
const done = segmentList.reduce((count, seg) => count + (seg.hasAnnotation ? 1 : 0), 0);
|
||||||
|
return { done, total };
|
||||||
|
};
|
||||||
|
|
||||||
|
const normalizeSegmentStats = (stats?: SegmentStats | null): SegmentStats | null => {
|
||||||
|
if (!stats) return null;
|
||||||
|
const total = Number(stats.total);
|
||||||
|
const done = Number(stats.done);
|
||||||
|
if (!Number.isFinite(total) || total <= 0) return null;
|
||||||
|
const safeDone = Math.min(Math.max(done, 0), total);
|
||||||
|
return { done: safeDone, total };
|
||||||
|
};
|
||||||
|
|
||||||
|
const resolveSegmentSummary = (item: EditorTaskListItem) =>
|
||||||
|
normalizeSegmentStats(item.segmentStats);
|
||||||
|
|
||||||
const mergeTaskItems = (base: EditorTaskListItem[], next: EditorTaskListItem[]) => {
|
const mergeTaskItems = (base: EditorTaskListItem[], next: EditorTaskListItem[]) => {
|
||||||
if (next.length === 0) return base;
|
if (next.length === 0) return base;
|
||||||
const seen = new Set(base.map((item) => item.fileId));
|
const seen = new Set(base.map((item) => item.fileId));
|
||||||
@@ -234,6 +272,9 @@ export default function LabelStudioTextEditor() {
|
|||||||
const exportCheckSeqRef = useRef(0);
|
const exportCheckSeqRef = useRef(0);
|
||||||
const savedSnapshotsRef = useRef<Record<string, string>>({});
|
const savedSnapshotsRef = useRef<Record<string, string>>({});
|
||||||
const pendingAutoAdvanceRef = useRef(false);
|
const pendingAutoAdvanceRef = useRef(false);
|
||||||
|
const segmentStatsCacheRef = useRef<Record<string, SegmentStats>>({});
|
||||||
|
const segmentStatsSeqRef = useRef(0);
|
||||||
|
const segmentStatsLoadingRef = useRef<Set<string>>(new Set());
|
||||||
|
|
||||||
const [loadingProject, setLoadingProject] = useState(true);
|
const [loadingProject, setLoadingProject] = useState(true);
|
||||||
const [loadingTasks, setLoadingTasks] = useState(false);
|
const [loadingTasks, setLoadingTasks] = useState(false);
|
||||||
@@ -276,6 +317,70 @@ export default function LabelStudioTextEditor() {
|
|||||||
win.postMessage({ type, payload }, origin);
|
win.postMessage({ type, payload }, origin);
|
||||||
}, [origin]);
|
}, [origin]);
|
||||||
|
|
||||||
|
const applySegmentStats = useCallback((fileId: string, stats: SegmentStats | null) => {
|
||||||
|
if (!fileId) return;
|
||||||
|
const normalized = normalizeSegmentStats(stats);
|
||||||
|
setTasks((prev) =>
|
||||||
|
prev.map((item) =>
|
||||||
|
item.fileId === fileId
|
||||||
|
? { ...item, segmentStats: normalized || undefined }
|
||||||
|
: item
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const updateSegmentStatsCache = useCallback((fileId: string, stats: SegmentStats | null) => {
|
||||||
|
if (!fileId) return;
|
||||||
|
const normalized = normalizeSegmentStats(stats);
|
||||||
|
if (normalized) {
|
||||||
|
segmentStatsCacheRef.current[fileId] = normalized;
|
||||||
|
} else {
|
||||||
|
delete segmentStatsCacheRef.current[fileId];
|
||||||
|
}
|
||||||
|
applySegmentStats(fileId, normalized);
|
||||||
|
}, [applySegmentStats]);
|
||||||
|
|
||||||
|
const fetchSegmentStatsForFile = useCallback(async (fileId: string, seq: number) => {
|
||||||
|
if (!projectId || !fileId) return;
|
||||||
|
if (segmentStatsCacheRef.current[fileId] || segmentStatsLoadingRef.current.has(fileId)) return;
|
||||||
|
segmentStatsLoadingRef.current.add(fileId);
|
||||||
|
try {
|
||||||
|
const resp = (await getEditorTaskUsingGet(projectId, fileId, {
|
||||||
|
segmentIndex: 0,
|
||||||
|
})) as ApiResponse<EditorTaskResponse>;
|
||||||
|
if (segmentStatsSeqRef.current !== seq) return;
|
||||||
|
const data = resp?.data;
|
||||||
|
if (!data?.segmented) return;
|
||||||
|
const stats = buildSegmentStats(data.segments);
|
||||||
|
if (!stats) return;
|
||||||
|
segmentStatsCacheRef.current[fileId] = stats;
|
||||||
|
applySegmentStats(fileId, stats);
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
|
} finally {
|
||||||
|
segmentStatsLoadingRef.current.delete(fileId);
|
||||||
|
}
|
||||||
|
}, [applySegmentStats, projectId]);
|
||||||
|
|
||||||
|
const prefetchSegmentStats = useCallback((items: EditorTaskListItem[]) => {
|
||||||
|
if (!projectId) return;
|
||||||
|
const fileIds = items
|
||||||
|
.map((item) => item.fileId)
|
||||||
|
.filter((fileId) => fileId && !segmentStatsCacheRef.current[fileId]);
|
||||||
|
if (fileIds.length === 0) return;
|
||||||
|
const seq = segmentStatsSeqRef.current;
|
||||||
|
let cursor = 0;
|
||||||
|
const workerCount = Math.min(3, fileIds.length);
|
||||||
|
const runWorker = async () => {
|
||||||
|
while (cursor < fileIds.length && segmentStatsSeqRef.current === seq) {
|
||||||
|
const fileId = fileIds[cursor];
|
||||||
|
cursor += 1;
|
||||||
|
await fetchSegmentStatsForFile(fileId, seq);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
void Promise.all(Array.from({ length: workerCount }, () => runWorker()));
|
||||||
|
}, [fetchSegmentStatsForFile, projectId]);
|
||||||
|
|
||||||
const confirmEmptyAnnotationStatus = useCallback(() => {
|
const confirmEmptyAnnotationStatus = useCallback(() => {
|
||||||
return new Promise<AnnotationResultStatus | null>((resolve) => {
|
return new Promise<AnnotationResultStatus | null>((resolve) => {
|
||||||
let resolved = false;
|
let resolved = false;
|
||||||
@@ -327,8 +432,13 @@ export default function LabelStudioTextEditor() {
|
|||||||
}, [message, projectId]);
|
}, [message, projectId]);
|
||||||
|
|
||||||
const updateTaskSelection = useCallback((items: EditorTaskListItem[]) => {
|
const updateTaskSelection = useCallback((items: EditorTaskListItem[]) => {
|
||||||
|
const isCompleted = (item: EditorTaskListItem) => {
|
||||||
|
const summary = resolveSegmentSummary(item);
|
||||||
|
if (summary) return summary.done >= summary.total;
|
||||||
|
return item.hasAnnotation;
|
||||||
|
};
|
||||||
const defaultFileId =
|
const defaultFileId =
|
||||||
items.find((item) => !item.hasAnnotation)?.fileId || items[0]?.fileId || "";
|
items.find((item) => !isCompleted(item))?.fileId || items[0]?.fileId || "";
|
||||||
setSelectedFileId((prev) => {
|
setSelectedFileId((prev) => {
|
||||||
if (prev && items.some((item) => item.fileId === prev)) return prev;
|
if (prev && items.some((item) => item.fileId === prev)) return prev;
|
||||||
return defaultFileId;
|
return defaultFileId;
|
||||||
@@ -385,6 +495,9 @@ export default function LabelStudioTextEditor() {
|
|||||||
if (mode === "reset") {
|
if (mode === "reset") {
|
||||||
prefetchSeqRef.current += 1;
|
prefetchSeqRef.current += 1;
|
||||||
setPrefetching(false);
|
setPrefetching(false);
|
||||||
|
segmentStatsSeqRef.current += 1;
|
||||||
|
segmentStatsCacheRef.current = {};
|
||||||
|
segmentStatsLoadingRef.current = new Set();
|
||||||
}
|
}
|
||||||
if (mode === "append") {
|
if (mode === "append") {
|
||||||
setLoadingMore(true);
|
setLoadingMore(true);
|
||||||
@@ -469,13 +582,16 @@ export default function LabelStudioTextEditor() {
|
|||||||
? resolveSegmentIndex(data.currentSegmentIndex) ?? 0
|
? resolveSegmentIndex(data.currentSegmentIndex) ?? 0
|
||||||
: undefined;
|
: undefined;
|
||||||
if (data?.segmented) {
|
if (data?.segmented) {
|
||||||
|
const stats = buildSegmentStats(data.segments);
|
||||||
setSegmented(true);
|
setSegmented(true);
|
||||||
setSegments(data.segments || []);
|
setSegments(data.segments || []);
|
||||||
setCurrentSegmentIndex(segmentIndex ?? 0);
|
setCurrentSegmentIndex(segmentIndex ?? 0);
|
||||||
|
updateSegmentStatsCache(fileId, stats);
|
||||||
} else {
|
} else {
|
||||||
setSegmented(false);
|
setSegmented(false);
|
||||||
setSegments([]);
|
setSegments([]);
|
||||||
setCurrentSegmentIndex(0);
|
setCurrentSegmentIndex(0);
|
||||||
|
updateSegmentStatsCache(fileId, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
const taskData = {
|
const taskData = {
|
||||||
@@ -535,7 +651,7 @@ export default function LabelStudioTextEditor() {
|
|||||||
} finally {
|
} finally {
|
||||||
if (seq === initSeqRef.current) setLoadingTaskDetail(false);
|
if (seq === initSeqRef.current) setLoadingTaskDetail(false);
|
||||||
}
|
}
|
||||||
}, [iframeReady, message, postToIframe, project, projectId]);
|
}, [iframeReady, message, postToIframe, project, projectId, updateSegmentStatsCache]);
|
||||||
|
|
||||||
const advanceAfterSave = useCallback(async (fileId: string, segmentIndex?: number) => {
|
const advanceAfterSave = useCallback(async (fileId: string, segmentIndex?: number) => {
|
||||||
if (!fileId) return;
|
if (!fileId) return;
|
||||||
@@ -643,13 +759,13 @@ export default function LabelStudioTextEditor() {
|
|||||||
|
|
||||||
// 分段模式下更新当前段落的标注状态
|
// 分段模式下更新当前段落的标注状态
|
||||||
if (segmented && segmentIndex !== undefined) {
|
if (segmented && segmentIndex !== undefined) {
|
||||||
setSegments((prev) =>
|
const nextSegments = segments.map((seg) =>
|
||||||
prev.map((seg) =>
|
seg.idx === segmentIndex
|
||||||
seg.idx === segmentIndex
|
? { ...seg, hasAnnotation: true }
|
||||||
? { ...seg, hasAnnotation: true }
|
: seg
|
||||||
: seg
|
|
||||||
)
|
|
||||||
);
|
);
|
||||||
|
setSegments(nextSegments);
|
||||||
|
updateSegmentStatsCache(String(fileId), buildSegmentStats(nextSegments));
|
||||||
}
|
}
|
||||||
if (options?.autoAdvance) {
|
if (options?.autoAdvance) {
|
||||||
await advanceAfterSave(String(fileId), segmentIndex);
|
await advanceAfterSave(String(fileId), segmentIndex);
|
||||||
@@ -669,8 +785,10 @@ export default function LabelStudioTextEditor() {
|
|||||||
message,
|
message,
|
||||||
projectId,
|
projectId,
|
||||||
segmented,
|
segmented,
|
||||||
|
segments,
|
||||||
selectedFileId,
|
selectedFileId,
|
||||||
tasks,
|
tasks,
|
||||||
|
updateSegmentStatsCache,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const requestExportForCheck = useCallback(() => {
|
const requestExportForCheck = useCallback(() => {
|
||||||
@@ -834,6 +952,9 @@ export default function LabelStudioTextEditor() {
|
|||||||
setSegments([]);
|
setSegments([]);
|
||||||
setCurrentSegmentIndex(0);
|
setCurrentSegmentIndex(0);
|
||||||
savedSnapshotsRef.current = {};
|
savedSnapshotsRef.current = {};
|
||||||
|
segmentStatsSeqRef.current += 1;
|
||||||
|
segmentStatsCacheRef.current = {};
|
||||||
|
segmentStatsLoadingRef.current = new Set();
|
||||||
if (exportCheckRef.current?.timer) {
|
if (exportCheckRef.current?.timer) {
|
||||||
window.clearTimeout(exportCheckRef.current.timer);
|
window.clearTimeout(exportCheckRef.current.timer);
|
||||||
}
|
}
|
||||||
@@ -847,6 +968,12 @@ export default function LabelStudioTextEditor() {
|
|||||||
loadTasks({ mode: "reset" });
|
loadTasks({ mode: "reset" });
|
||||||
}, [project?.supported, loadTasks]);
|
}, [project?.supported, loadTasks]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!segmented) return;
|
||||||
|
if (tasks.length === 0) return;
|
||||||
|
prefetchSegmentStats(tasks);
|
||||||
|
}, [prefetchSegmentStats, segmented, tasks]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!selectedFileId) return;
|
if (!selectedFileId) return;
|
||||||
initEditorForFile(selectedFileId);
|
initEditorForFile(selectedFileId);
|
||||||
@@ -1097,6 +1224,7 @@ export default function LabelStudioTextEditor() {
|
|||||||
dataSource={tasks}
|
dataSource={tasks}
|
||||||
loadMore={loadMoreNode}
|
loadMore={loadMoreNode}
|
||||||
renderItem={(item) => {
|
renderItem={(item) => {
|
||||||
|
const segmentSummary = resolveSegmentSummary(item);
|
||||||
const statusMeta = resolveTaskStatusMeta(item);
|
const statusMeta = resolveTaskStatusMeta(item);
|
||||||
return (
|
return (
|
||||||
<List.Item
|
<List.Item
|
||||||
@@ -1110,18 +1238,25 @@ export default function LabelStudioTextEditor() {
|
|||||||
onClick={() => setSelectedFileId(item.fileId)}
|
onClick={() => setSelectedFileId(item.fileId)}
|
||||||
>
|
>
|
||||||
<div className="flex flex-col w-full gap-1">
|
<div className="flex flex-col w-full gap-1">
|
||||||
<Typography.Text ellipsis style={{ fontSize: 13 }}>
|
<Typography.Text ellipsis style={{ fontSize: 13 }}>
|
||||||
{item.fileName}
|
{item.fileName}
|
||||||
</Typography.Text>
|
|
||||||
<div className="flex items-center justify-between">
|
|
||||||
<Typography.Text type={statusMeta.type} style={{ fontSize: 11 }}>
|
|
||||||
{statusMeta.text}
|
|
||||||
</Typography.Text>
|
</Typography.Text>
|
||||||
{item.annotationUpdatedAt && (
|
<div className="flex items-center justify-between">
|
||||||
<Typography.Text type="secondary" style={{ fontSize: 10 }}>
|
<div className="flex items-center gap-2">
|
||||||
{item.annotationUpdatedAt}
|
<Typography.Text type={statusMeta.type} style={{ fontSize: 11 }}>
|
||||||
</Typography.Text>
|
{statusMeta.text}
|
||||||
)}
|
</Typography.Text>
|
||||||
|
{segmentSummary && (
|
||||||
|
<Typography.Text type="secondary" style={{ fontSize: 10 }}>
|
||||||
|
已标注 {segmentSummary.done}/{segmentSummary.total}
|
||||||
|
</Typography.Text>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{item.annotationUpdatedAt && (
|
||||||
|
<Typography.Text type="secondary" style={{ fontSize: 10 }}>
|
||||||
|
{item.annotationUpdatedAt}
|
||||||
|
</Typography.Text>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</List.Item>
|
</List.Item>
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ export enum AnnotationTaskStatus {
|
|||||||
|
|
||||||
export enum AnnotationResultStatus {
|
export enum AnnotationResultStatus {
|
||||||
ANNOTATED = "ANNOTATED",
|
ANNOTATED = "ANNOTATED",
|
||||||
|
IN_PROGRESS = "IN_PROGRESS",
|
||||||
NO_ANNOTATION = "NO_ANNOTATION",
|
NO_ANNOTATION = "NO_ANNOTATION",
|
||||||
NOT_APPLICABLE = "NOT_APPLICABLE",
|
NOT_APPLICABLE = "NOT_APPLICABLE",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,10 +9,17 @@ from app.db.session import Base
|
|||||||
ANNOTATION_STATUS_ANNOTATED = "ANNOTATED"
|
ANNOTATION_STATUS_ANNOTATED = "ANNOTATED"
|
||||||
ANNOTATION_STATUS_NO_ANNOTATION = "NO_ANNOTATION"
|
ANNOTATION_STATUS_NO_ANNOTATION = "NO_ANNOTATION"
|
||||||
ANNOTATION_STATUS_NOT_APPLICABLE = "NOT_APPLICABLE"
|
ANNOTATION_STATUS_NOT_APPLICABLE = "NOT_APPLICABLE"
|
||||||
|
ANNOTATION_STATUS_IN_PROGRESS = "IN_PROGRESS"
|
||||||
ANNOTATION_STATUS_VALUES = {
|
ANNOTATION_STATUS_VALUES = {
|
||||||
ANNOTATION_STATUS_ANNOTATED,
|
ANNOTATION_STATUS_ANNOTATED,
|
||||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||||
|
ANNOTATION_STATUS_IN_PROGRESS,
|
||||||
|
}
|
||||||
|
ANNOTATION_STATUS_CLIENT_VALUES = {
|
||||||
|
ANNOTATION_STATUS_ANNOTATED,
|
||||||
|
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||||
|
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||||
}
|
}
|
||||||
|
|
||||||
class AnnotationTemplate(Base):
|
class AnnotationTemplate(Base):
|
||||||
@@ -101,7 +108,7 @@ class AnnotationResult(Base):
|
|||||||
String(32),
|
String(32),
|
||||||
nullable=False,
|
nullable=False,
|
||||||
default=ANNOTATION_STATUS_ANNOTATED,
|
default=ANNOTATION_STATUS_ANNOTATED,
|
||||||
comment="标注状态: ANNOTATED/NO_ANNOTATION/NOT_APPLICABLE",
|
comment="标注状态: ANNOTATED/NO_ANNOTATION/NOT_APPLICABLE/IN_PROGRESS",
|
||||||
)
|
)
|
||||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from pydantic import BaseModel, Field, ConfigDict
|
|||||||
|
|
||||||
from app.db.models.annotation_management import (
|
from app.db.models.annotation_management import (
|
||||||
ANNOTATION_STATUS_ANNOTATED,
|
ANNOTATION_STATUS_ANNOTATED,
|
||||||
|
ANNOTATION_STATUS_IN_PROGRESS,
|
||||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||||
)
|
)
|
||||||
@@ -25,6 +26,7 @@ class AnnotationStatus(str, Enum):
|
|||||||
"""标注状态枚举"""
|
"""标注状态枚举"""
|
||||||
|
|
||||||
ANNOTATED = ANNOTATION_STATUS_ANNOTATED
|
ANNOTATED = ANNOTATION_STATUS_ANNOTATED
|
||||||
|
IN_PROGRESS = ANNOTATION_STATUS_IN_PROGRESS
|
||||||
NO_ANNOTATION = ANNOTATION_STATUS_NO_ANNOTATION
|
NO_ANNOTATION = ANNOTATION_STATUS_NO_ANNOTATION
|
||||||
NOT_APPLICABLE = ANNOTATION_STATUS_NOT_APPLICABLE
|
NOT_APPLICABLE = ANNOTATION_STATUS_NOT_APPLICABLE
|
||||||
|
|
||||||
@@ -112,7 +114,7 @@ class UpsertAnnotationRequest(BaseModel):
|
|||||||
annotation_status: Optional[AnnotationStatus] = Field(
|
annotation_status: Optional[AnnotationStatus] = Field(
|
||||||
None,
|
None,
|
||||||
alias="annotationStatus",
|
alias="annotationStatus",
|
||||||
description="标注状态(无标注传 NO_ANNOTATION,不适用传 NOT_APPLICABLE)",
|
description="标注状态(无标注传 NO_ANNOTATION,不适用传 NOT_APPLICABLE,IN_PROGRESS 由后端维护)",
|
||||||
)
|
)
|
||||||
expected_updated_at: Optional[datetime] = Field(
|
expected_updated_at: Optional[datetime] = Field(
|
||||||
None,
|
None,
|
||||||
|
|||||||
@@ -26,9 +26,10 @@ from app.core.logging import get_logger
|
|||||||
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
|
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
|
||||||
from app.db.models.annotation_management import (
|
from app.db.models.annotation_management import (
|
||||||
ANNOTATION_STATUS_ANNOTATED,
|
ANNOTATION_STATUS_ANNOTATED,
|
||||||
|
ANNOTATION_STATUS_IN_PROGRESS,
|
||||||
|
ANNOTATION_STATUS_CLIENT_VALUES,
|
||||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||||
ANNOTATION_STATUS_VALUES,
|
|
||||||
)
|
)
|
||||||
from app.module.annotation.config import LabelStudioTagConfig
|
from app.module.annotation.config import LabelStudioTagConfig
|
||||||
from app.module.annotation.schema.editor import (
|
from app.module.annotation.schema.editor import (
|
||||||
@@ -61,6 +62,7 @@ SEGMENT_INDEX_KEY = "segment_index"
|
|||||||
SEGMENT_INDEX_CAMEL_KEY = "segmentIndex"
|
SEGMENT_INDEX_CAMEL_KEY = "segmentIndex"
|
||||||
SEGMENTED_KEY = "segmented"
|
SEGMENTED_KEY = "segmented"
|
||||||
SEGMENTS_KEY = "segments"
|
SEGMENTS_KEY = "segments"
|
||||||
|
SEGMENT_TOTAL_KEY = "total_segments"
|
||||||
SEGMENT_RESULT_KEY = "result"
|
SEGMENT_RESULT_KEY = "result"
|
||||||
SEGMENT_CREATED_AT_KEY = "created_at"
|
SEGMENT_CREATED_AT_KEY = "created_at"
|
||||||
SEGMENT_UPDATED_AT_KEY = "updated_at"
|
SEGMENT_UPDATED_AT_KEY = "updated_at"
|
||||||
@@ -416,6 +418,76 @@ class AnnotationEditorService:
|
|||||||
result = payload.get(SEGMENT_RESULT_KEY)
|
result = payload.get(SEGMENT_RESULT_KEY)
|
||||||
return isinstance(result, list) and len(result) > 0
|
return isinstance(result, list) and len(result) > 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _resolve_segment_total(payload: Optional[Dict[str, Any]]) -> Optional[int]:
|
||||||
|
if not payload or not isinstance(payload, dict):
|
||||||
|
return None
|
||||||
|
value = payload.get(SEGMENT_TOTAL_KEY)
|
||||||
|
if isinstance(value, int):
|
||||||
|
return value if value > 0 else None
|
||||||
|
if isinstance(value, float) and value.is_integer():
|
||||||
|
return int(value) if value > 0 else None
|
||||||
|
if isinstance(value, str) and value.isdigit():
|
||||||
|
parsed = int(value)
|
||||||
|
return parsed if parsed > 0 else None
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _compute_segment_total(
|
||||||
|
self,
|
||||||
|
project: LabelingProject,
|
||||||
|
file_record: DatasetFiles,
|
||||||
|
file_id: str,
|
||||||
|
) -> Optional[int]:
|
||||||
|
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
|
||||||
|
if dataset_type != DATASET_TYPE_TEXT:
|
||||||
|
return None
|
||||||
|
if not self._resolve_segmentation_enabled(project):
|
||||||
|
return None
|
||||||
|
|
||||||
|
text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
|
||||||
|
if not isinstance(text_content, str):
|
||||||
|
return None
|
||||||
|
|
||||||
|
label_config = await self._resolve_project_label_config(project)
|
||||||
|
primary_text_key = self._resolve_primary_text_key(label_config)
|
||||||
|
file_name = str(getattr(file_record, "file_name", "")).lower()
|
||||||
|
|
||||||
|
records: List[Tuple[Optional[Dict[str, Any]], str]] = []
|
||||||
|
if file_name.endswith(JSONL_EXTENSION):
|
||||||
|
records = self._parse_jsonl_records(text_content)
|
||||||
|
else:
|
||||||
|
parsed_payload = self._try_parse_json_payload(text_content)
|
||||||
|
if parsed_payload:
|
||||||
|
records = [(parsed_payload, text_content)]
|
||||||
|
|
||||||
|
if not records:
|
||||||
|
records = [(None, text_content)]
|
||||||
|
|
||||||
|
record_texts = [
|
||||||
|
self._resolve_primary_text_value(payload, raw_text, primary_text_key)
|
||||||
|
for payload, raw_text in records
|
||||||
|
]
|
||||||
|
if not record_texts:
|
||||||
|
record_texts = [text_content]
|
||||||
|
|
||||||
|
needs_segmentation = len(records) > 1 or any(
|
||||||
|
len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts
|
||||||
|
)
|
||||||
|
if not needs_segmentation:
|
||||||
|
return None
|
||||||
|
|
||||||
|
splitter = AnnotationTextSplitter(max_chars=self.SEGMENT_THRESHOLD)
|
||||||
|
total_segments = 0
|
||||||
|
for record_text in record_texts:
|
||||||
|
normalized_text = record_text or ""
|
||||||
|
if len(normalized_text) > self.SEGMENT_THRESHOLD:
|
||||||
|
raw_segments = splitter.split(normalized_text)
|
||||||
|
total_segments += len(raw_segments) if raw_segments else 1
|
||||||
|
else:
|
||||||
|
total_segments += 1
|
||||||
|
|
||||||
|
return total_segments if total_segments > 0 else 1
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_source_document_filter(cls):
|
def _build_source_document_filter(cls):
|
||||||
file_type_lower = func.lower(DatasetFiles.file_type)
|
file_type_lower = func.lower(DatasetFiles.file_type)
|
||||||
@@ -946,19 +1018,36 @@ class AnnotationEditorService:
|
|||||||
final_payload = annotation_payload
|
final_payload = annotation_payload
|
||||||
|
|
||||||
requested_status = request.annotation_status
|
requested_status = request.annotation_status
|
||||||
if requested_status is not None and requested_status not in ANNOTATION_STATUS_VALUES:
|
if requested_status is not None and requested_status not in ANNOTATION_STATUS_CLIENT_VALUES:
|
||||||
raise HTTPException(status_code=400, detail="annotationStatus 不合法")
|
raise HTTPException(status_code=400, detail="annotationStatus 不合法")
|
||||||
|
|
||||||
has_result = self._has_annotation_result(final_payload)
|
segment_total = None
|
||||||
if has_result:
|
segment_done = None
|
||||||
final_status = ANNOTATION_STATUS_ANNOTATED
|
if request.segment_index is not None:
|
||||||
|
segment_total = self._resolve_segment_total(final_payload)
|
||||||
|
if segment_total is None:
|
||||||
|
segment_total = await self._compute_segment_total(project, file_record, file_id)
|
||||||
|
if segment_total and segment_total > 0:
|
||||||
|
final_payload[SEGMENT_TOTAL_KEY] = segment_total
|
||||||
|
segment_done = len(self._extract_segment_annotations(final_payload))
|
||||||
|
|
||||||
|
if (
|
||||||
|
segment_total is not None
|
||||||
|
and segment_done is not None
|
||||||
|
and segment_done < segment_total
|
||||||
|
):
|
||||||
|
final_status = ANNOTATION_STATUS_IN_PROGRESS
|
||||||
else:
|
else:
|
||||||
if requested_status == ANNOTATION_STATUS_NO_ANNOTATION:
|
has_result = self._has_annotation_result(final_payload)
|
||||||
final_status = ANNOTATION_STATUS_NO_ANNOTATION
|
if has_result:
|
||||||
elif requested_status == ANNOTATION_STATUS_NOT_APPLICABLE:
|
final_status = ANNOTATION_STATUS_ANNOTATED
|
||||||
final_status = ANNOTATION_STATUS_NOT_APPLICABLE
|
|
||||||
else:
|
else:
|
||||||
raise HTTPException(status_code=400, detail="未发现标注内容,请确认无标注/不适用后再保存")
|
if requested_status == ANNOTATION_STATUS_NO_ANNOTATION:
|
||||||
|
final_status = ANNOTATION_STATUS_NO_ANNOTATION
|
||||||
|
elif requested_status == ANNOTATION_STATUS_NOT_APPLICABLE:
|
||||||
|
final_status = ANNOTATION_STATUS_NOT_APPLICABLE
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=400, detail="未发现标注内容,请确认无标注/不适用后再保存")
|
||||||
|
|
||||||
if existing:
|
if existing:
|
||||||
if request.expected_updated_at and existing.updated_at:
|
if request.expected_updated_at and existing.updated_at:
|
||||||
|
|||||||
Reference in New Issue
Block a user