You've already forked DataMate
Add three new LLM-powered auto-annotation operators: - LLMTextClassification: Text classification using LLM - LLMNamedEntityRecognition: Named entity recognition with type validation - LLMRelationExtraction: Relation extraction with entity and relation type validation Key features: - Load LLM config from t_model_config table via modelId parameter - Lazy loading of LLM configuration on first execute() - Result validation with whitelist checking for entity/relation types - Fault-tolerant: returns empty results on LLM failure instead of throwing - Fully compatible with existing Worker pipeline Files added: - runtime/ops/annotation/_llm_utils.py: Shared LLM utilities - runtime/ops/annotation/llm_text_classification/: Text classification operator - runtime/ops/annotation/llm_named_entity_recognition/: NER operator - runtime/ops/annotation/llm_relation_extraction/: Relation extraction operator Files modified: - runtime/ops/annotation/__init__.py: Register 3 new operators - runtime/python-executor/datamate/auto_annotation_worker.py: Add to Worker whitelist - frontend/src/pages/DataAnnotation/OperatorCreate/hooks/useOperatorOperations.ts: Add to frontend whitelist
229 lines
6.1 KiB
TypeScript
229 lines
6.1 KiB
TypeScript
import { useEffect, useMemo, useState } from "react";
|
|
import {
|
|
queryCategoryTreeUsingGet,
|
|
queryOperatorsUsingPost,
|
|
} from "@/pages/OperatorMarket/operator.api";
|
|
import { CategoryI, ConfigI, OperatorI } from "@/pages/OperatorMarket/operator.model";
|
|
|
|
type OperatorConfigMap = Record<string, ConfigI>;
|
|
type OperatorWithDefaults = OperatorI & {
|
|
defaultParams?: Record<string, unknown>;
|
|
};
|
|
type CategoryNode = CategoryI & {
|
|
label?: string;
|
|
value?: string;
|
|
count?: number;
|
|
};
|
|
type CategoryGroup = {
|
|
name: string;
|
|
categories: CategoryNode[];
|
|
};
|
|
|
|
const ANNOTATION_OPERATOR_ID_WHITELIST = new Set([
|
|
"ImageObjectDetectionBoundingBox",
|
|
"test_annotation_marker",
|
|
"LLMTextClassification",
|
|
"LLMNamedEntityRecognition",
|
|
"LLMRelationExtraction",
|
|
]);
|
|
|
|
const ensureArray = (value: unknown): string[] => {
|
|
if (Array.isArray(value)) {
|
|
return value.map((item) => String(item));
|
|
}
|
|
if (typeof value === "string") {
|
|
return value
|
|
.split(",")
|
|
.map((item) => item.trim())
|
|
.filter(Boolean);
|
|
}
|
|
return [];
|
|
};
|
|
|
|
const parseSettings = (settings?: string): OperatorConfigMap => {
|
|
if (!settings) return {};
|
|
try {
|
|
const parsed = JSON.parse(settings);
|
|
if (!parsed || typeof parsed !== "object") {
|
|
return {};
|
|
}
|
|
return parsed as OperatorConfigMap;
|
|
} catch {
|
|
return {};
|
|
}
|
|
};
|
|
|
|
const mapOperator = (operator: OperatorI): OperatorWithDefaults => {
|
|
const configs = parseSettings(operator.settings);
|
|
const defaultParams: Record<string, unknown> = {};
|
|
|
|
Object.entries(configs).forEach(([key, config]) => {
|
|
if (!(config && typeof config === "object" && "defaultVal" in config)) {
|
|
return;
|
|
}
|
|
|
|
const defaultVal = config.defaultVal as unknown;
|
|
const normalizedKey = key.trim().toLowerCase().replace(/_/g, "");
|
|
|
|
if (normalizedKey === "outputdir" && (defaultVal === "" || defaultVal === null || defaultVal === undefined)) {
|
|
return;
|
|
}
|
|
|
|
defaultParams[key] = defaultVal;
|
|
});
|
|
|
|
return {
|
|
...operator,
|
|
categories: ensureArray(operator.categories),
|
|
configs,
|
|
defaultParams,
|
|
};
|
|
};
|
|
|
|
const isAnnotationOperator = (operator: OperatorWithDefaults) => {
|
|
if (ANNOTATION_OPERATOR_ID_WHITELIST.has(operator.id)) {
|
|
return true;
|
|
}
|
|
|
|
const name = operator.name?.toLowerCase() || "";
|
|
const desc = operator.description?.toLowerCase() || "";
|
|
const runtime = operator.runtime?.toLowerCase() || "";
|
|
|
|
if (runtime.includes("annotation") || runtime.includes("/annotation/")) {
|
|
return true;
|
|
}
|
|
|
|
return name.includes("标注") || desc.includes("标注") || name.includes("annotation");
|
|
};
|
|
|
|
export function useOperatorOperations() {
|
|
const [operators, setOperators] = useState<OperatorWithDefaults[]>([]);
|
|
const [selectedOperators, setSelectedOperators] = useState<OperatorWithDefaults[]>(
|
|
[]
|
|
);
|
|
const [configOperator, setConfigOperator] = useState<OperatorWithDefaults | null>(
|
|
null
|
|
);
|
|
const [categoryOptions, setCategoryOptions] = useState<CategoryI[]>([]);
|
|
const [loading, setLoading] = useState(false);
|
|
|
|
const selectedOperatorIds = useMemo(
|
|
() => new Set(selectedOperators.map((operator) => operator.id)),
|
|
[selectedOperators]
|
|
);
|
|
|
|
const selectedCategoryOptions = useMemo(
|
|
() =>
|
|
categoryOptions.filter((category) =>
|
|
operators.some((operator) => operator.categories?.includes(category.id))
|
|
),
|
|
[categoryOptions, operators]
|
|
);
|
|
|
|
const initOperators = async () => {
|
|
setLoading(true);
|
|
try {
|
|
const [categoryRes, operatorRes] = await Promise.all([
|
|
queryCategoryTreeUsingGet(),
|
|
queryOperatorsUsingPost({ page: 0, size: 1000 }),
|
|
]);
|
|
|
|
const allOperators = (operatorRes?.data?.content || []).map(mapOperator);
|
|
const annotationOperators = allOperators.filter(isAnnotationOperator);
|
|
setOperators(annotationOperators);
|
|
|
|
const options = ((categoryRes?.data?.content || []) as CategoryGroup[]).reduce(
|
|
(acc: CategoryNode[], item) => {
|
|
const children = (item.categories || []).map((category) => {
|
|
const matchedCount = annotationOperators.filter((operator) =>
|
|
operator.categories?.includes(category.id)
|
|
).length;
|
|
return {
|
|
...category,
|
|
type: item.name,
|
|
label: category.name,
|
|
value: category.id,
|
|
count: matchedCount,
|
|
};
|
|
});
|
|
acc.push(...children);
|
|
return acc;
|
|
},
|
|
[]
|
|
);
|
|
|
|
setCategoryOptions(
|
|
options.filter((item) => (item.count || 0) > 0) as CategoryI[]
|
|
);
|
|
} finally {
|
|
setLoading(false);
|
|
}
|
|
};
|
|
|
|
useEffect(() => {
|
|
initOperators();
|
|
}, []);
|
|
|
|
const toggleOperator = (operator: OperatorWithDefaults) => {
|
|
if (selectedOperatorIds.has(operator.id)) {
|
|
setSelectedOperators((prev) => prev.filter((item) => item.id !== operator.id));
|
|
if (configOperator?.id === operator.id) {
|
|
setConfigOperator(null);
|
|
}
|
|
return;
|
|
}
|
|
setSelectedOperators((prev) => [...prev, { ...operator }]);
|
|
};
|
|
|
|
const removeOperator = (id: string) => {
|
|
setSelectedOperators((prev) => prev.filter((operator) => operator.id !== id));
|
|
if (configOperator?.id === id) {
|
|
setConfigOperator(null);
|
|
}
|
|
};
|
|
|
|
const handleConfigChange = (
|
|
operatorId: string,
|
|
paramKey: string,
|
|
value: unknown
|
|
) => {
|
|
setSelectedOperators((prev) =>
|
|
prev.map((operator) =>
|
|
operator.id === operatorId
|
|
? {
|
|
...operator,
|
|
overrides: {
|
|
...(operator.overrides || operator.defaultParams || {}),
|
|
[paramKey]: value,
|
|
},
|
|
}
|
|
: operator
|
|
)
|
|
);
|
|
|
|
setConfigOperator((prev) => {
|
|
if (!prev || prev.id !== operatorId) return prev;
|
|
return {
|
|
...prev,
|
|
overrides: {
|
|
...(prev.overrides || prev.defaultParams || {}),
|
|
[paramKey]: value,
|
|
},
|
|
};
|
|
});
|
|
};
|
|
|
|
return {
|
|
loading,
|
|
operators,
|
|
selectedOperators,
|
|
configOperator,
|
|
categoryOptions: selectedCategoryOptions,
|
|
setConfigOperator,
|
|
setSelectedOperators,
|
|
handleConfigChange,
|
|
toggleOperator,
|
|
removeOperator,
|
|
};
|
|
}
|