Files
DataMate/frontend/src/pages/DataAnnotation/OperatorCreate/hooks/useOperatorOperations.ts
Jerry Yan 49f99527cc feat(auto-annotation): add LLM-based annotation operators
Add three new LLM-powered auto-annotation operators:
- LLMTextClassification: Text classification using LLM
- LLMNamedEntityRecognition: Named entity recognition with type validation
- LLMRelationExtraction: Relation extraction with entity and relation type validation

Key features:
- Load LLM config from t_model_config table via modelId parameter
- Lazy loading of LLM configuration on first execute()
- Result validation with whitelist checking for entity/relation types
- Fault-tolerant: returns empty results on LLM failure instead of throwing
- Fully compatible with existing Worker pipeline

Files added:
- runtime/ops/annotation/_llm_utils.py: Shared LLM utilities
- runtime/ops/annotation/llm_text_classification/: Text classification operator
- runtime/ops/annotation/llm_named_entity_recognition/: NER operator
- runtime/ops/annotation/llm_relation_extraction/: Relation extraction operator

Files modified:
- runtime/ops/annotation/__init__.py: Register 3 new operators
- runtime/python-executor/datamate/auto_annotation_worker.py: Add to Worker whitelist
- frontend/src/pages/DataAnnotation/OperatorCreate/hooks/useOperatorOperations.ts: Add to frontend whitelist
2026-02-10 15:22:23 +08:00

229 lines
6.1 KiB
TypeScript

import { useEffect, useMemo, useState } from "react";
import {
queryCategoryTreeUsingGet,
queryOperatorsUsingPost,
} from "@/pages/OperatorMarket/operator.api";
import { CategoryI, ConfigI, OperatorI } from "@/pages/OperatorMarket/operator.model";
type OperatorConfigMap = Record<string, ConfigI>;
type OperatorWithDefaults = OperatorI & {
defaultParams?: Record<string, unknown>;
};
type CategoryNode = CategoryI & {
label?: string;
value?: string;
count?: number;
};
type CategoryGroup = {
name: string;
categories: CategoryNode[];
};
const ANNOTATION_OPERATOR_ID_WHITELIST = new Set([
"ImageObjectDetectionBoundingBox",
"test_annotation_marker",
"LLMTextClassification",
"LLMNamedEntityRecognition",
"LLMRelationExtraction",
]);
const ensureArray = (value: unknown): string[] => {
if (Array.isArray(value)) {
return value.map((item) => String(item));
}
if (typeof value === "string") {
return value
.split(",")
.map((item) => item.trim())
.filter(Boolean);
}
return [];
};
const parseSettings = (settings?: string): OperatorConfigMap => {
if (!settings) return {};
try {
const parsed = JSON.parse(settings);
if (!parsed || typeof parsed !== "object") {
return {};
}
return parsed as OperatorConfigMap;
} catch {
return {};
}
};
const mapOperator = (operator: OperatorI): OperatorWithDefaults => {
const configs = parseSettings(operator.settings);
const defaultParams: Record<string, unknown> = {};
Object.entries(configs).forEach(([key, config]) => {
if (!(config && typeof config === "object" && "defaultVal" in config)) {
return;
}
const defaultVal = config.defaultVal as unknown;
const normalizedKey = key.trim().toLowerCase().replace(/_/g, "");
if (normalizedKey === "outputdir" && (defaultVal === "" || defaultVal === null || defaultVal === undefined)) {
return;
}
defaultParams[key] = defaultVal;
});
return {
...operator,
categories: ensureArray(operator.categories),
configs,
defaultParams,
};
};
const isAnnotationOperator = (operator: OperatorWithDefaults) => {
if (ANNOTATION_OPERATOR_ID_WHITELIST.has(operator.id)) {
return true;
}
const name = operator.name?.toLowerCase() || "";
const desc = operator.description?.toLowerCase() || "";
const runtime = operator.runtime?.toLowerCase() || "";
if (runtime.includes("annotation") || runtime.includes("/annotation/")) {
return true;
}
return name.includes("标注") || desc.includes("标注") || name.includes("annotation");
};
export function useOperatorOperations() {
const [operators, setOperators] = useState<OperatorWithDefaults[]>([]);
const [selectedOperators, setSelectedOperators] = useState<OperatorWithDefaults[]>(
[]
);
const [configOperator, setConfigOperator] = useState<OperatorWithDefaults | null>(
null
);
const [categoryOptions, setCategoryOptions] = useState<CategoryI[]>([]);
const [loading, setLoading] = useState(false);
const selectedOperatorIds = useMemo(
() => new Set(selectedOperators.map((operator) => operator.id)),
[selectedOperators]
);
const selectedCategoryOptions = useMemo(
() =>
categoryOptions.filter((category) =>
operators.some((operator) => operator.categories?.includes(category.id))
),
[categoryOptions, operators]
);
const initOperators = async () => {
setLoading(true);
try {
const [categoryRes, operatorRes] = await Promise.all([
queryCategoryTreeUsingGet(),
queryOperatorsUsingPost({ page: 0, size: 1000 }),
]);
const allOperators = (operatorRes?.data?.content || []).map(mapOperator);
const annotationOperators = allOperators.filter(isAnnotationOperator);
setOperators(annotationOperators);
const options = ((categoryRes?.data?.content || []) as CategoryGroup[]).reduce(
(acc: CategoryNode[], item) => {
const children = (item.categories || []).map((category) => {
const matchedCount = annotationOperators.filter((operator) =>
operator.categories?.includes(category.id)
).length;
return {
...category,
type: item.name,
label: category.name,
value: category.id,
count: matchedCount,
};
});
acc.push(...children);
return acc;
},
[]
);
setCategoryOptions(
options.filter((item) => (item.count || 0) > 0) as CategoryI[]
);
} finally {
setLoading(false);
}
};
useEffect(() => {
initOperators();
}, []);
const toggleOperator = (operator: OperatorWithDefaults) => {
if (selectedOperatorIds.has(operator.id)) {
setSelectedOperators((prev) => prev.filter((item) => item.id !== operator.id));
if (configOperator?.id === operator.id) {
setConfigOperator(null);
}
return;
}
setSelectedOperators((prev) => [...prev, { ...operator }]);
};
const removeOperator = (id: string) => {
setSelectedOperators((prev) => prev.filter((operator) => operator.id !== id));
if (configOperator?.id === id) {
setConfigOperator(null);
}
};
const handleConfigChange = (
operatorId: string,
paramKey: string,
value: unknown
) => {
setSelectedOperators((prev) =>
prev.map((operator) =>
operator.id === operatorId
? {
...operator,
overrides: {
...(operator.overrides || operator.defaultParams || {}),
[paramKey]: value,
},
}
: operator
)
);
setConfigOperator((prev) => {
if (!prev || prev.id !== operatorId) return prev;
return {
...prev,
overrides: {
...(prev.overrides || prev.defaultParams || {}),
[paramKey]: value,
},
};
});
};
return {
loading,
operators,
selectedOperators,
configOperator,
categoryOptions: selectedCategoryOptions,
setConfigOperator,
setSelectedOperators,
handleConfigChange,
toggleOperator,
removeOperator,
};
}