feature: add data-evaluation

* feature: add evaluation task management function

* feature: add evaluation task detail page

* fix: delete duplicate definition for table t_model_config

* refactor: rename package synthesis to ratio

* refactor: add eval file table and  refactor related code

* fix: calling large models in parallel during evaluation
This commit is contained in:
hefanli
2025-12-04 09:23:54 +08:00
committed by GitHub
parent 265e284fb8
commit 1d19cd3a62
52 changed files with 2882 additions and 1244 deletions

View File

@@ -1,574 +1,395 @@
import { useState } from "react";
import {
Button,
Card,
Badge,
Input,
Select,
Checkbox,
Form,
Typography,
} from "antd";
import {
PlusOutlined,
ArrowLeftOutlined,
EditOutlined,
SaveOutlined,
DeleteOutlined,
} from "@ant-design/icons";
import {
evaluationTemplates,
presetEvaluationDimensions,
sliceOperators,
} from "@/mock/evaluation";
import { useNavigate } from "react-router";
import React, { useState, useEffect } from 'react';
import { Button, Form, Input, Select, message, Modal, Row, Col, Table, Space } from 'antd';
import { EyeOutlined } from '@ant-design/icons';
import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api.ts";
import { mapDataset } from "@/pages/DataManagement/dataset.const.tsx";
import { queryModelListUsingGet } from "@/pages/SettingsPage/settings.apis.ts";
import { ModelI } from "@/pages/SettingsPage/ModelAccess.tsx";
import { createEvaluationTaskUsingPost } from "@/pages/DataEvaluation/evaluation.api.ts";
import { queryPromptTemplatesUsingGet } from "@/pages/DataEvaluation/evaluation.api.ts";
import PreviewPromptModal from "@/pages/DataEvaluation/Create/PreviewPrompt.tsx";
const { Title, Paragraph } = Typography;
const { Option } = Select;
interface Dataset {
id: string;
name: string;
fileCount: number;
size: string;
}
const EvaluationTaskCreate = () => {
const navigate = useNavigate();
const [datasets, setDatasets] = useState([]);
const [selectedTemplate, setSelectedTemplate] =
useState<string>("dialogue_text");
const [allDimensions, setAllDimensions] = useState<EvaluationDimension[]>([
...presetEvaluationDimensions,
]);
const [editingDimension, setEditingDimension] = useState<string | null>(null);
const [newDimension, setNewDimension] = useState({
name: "",
description: "",
});
const [createForm, setCreateForm] = useState({
name: "",
datasetId: "",
evaluationType: "model" as "model" | "manual",
dimensions: [] as string[],
customDimensions: [] as EvaluationDimension[],
sliceConfig: {
threshold: 0.8,
sampleCount: 100,
method: "语义分割",
},
modelConfig: {
url: "",
apiKey: "",
prompt: "",
temperature: 0.3,
maxTokens: 2000,
},
interface Dimension {
key: string;
dimension: string;
description: string;
}
interface PromptTemplate {
evalType: string;
prompt: string;
defaultDimensions: Dimension[];
}
interface CreateTaskModalProps {
visible: boolean;
onCancel: () => void;
onSuccess: () => void;
}
const TASK_TYPES = [
{ label: 'QA评估', value: 'QA' },
];
const EVAL_METHODS = [
{ label: '模型自动评估', value: 'AUTO' },
];
const DEFAULT_EVAL_METHOD = 'AUTO';
const DEFAULT_TASK_TYPE = 'QA';
const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, onSuccess }) => {
const [form] = Form.useForm();
const [loading, setLoading] = useState(false);
const [datasets, setDatasets] = useState<Dataset[]>([]);
const [models, setModels] = useState<ModelI[]>([]);
const [dimensions, setDimensions] = useState<Dimension[]>([]);
const [newDimension, setNewDimension] = useState<Omit<Dimension, 'key'>>({
dimension: '',
description: ''
});
const [taskType, setTaskType] = useState<string>("QA");
const [promptTemplates, setPromptTemplates] = useState<PromptTemplate[]>([]);
const [previewVisible, setPreviewVisible] = useState(false);
const [evaluationPrompt, setEvaluationPrompt] = useState('');
const handleTemplateChange = (templateKey: string) => {
setSelectedTemplate(templateKey);
const template =
evaluationTemplates[templateKey as keyof typeof evaluationTemplates];
if (template) {
const customDimensions = allDimensions.filter((d) => d.isCustom);
setAllDimensions([...template.dimensions, ...customDimensions]);
const handleAddDimension = () => {
if (!newDimension.dimension.trim()) {
message.warning('请输入维度名称');
return;
}
setDimensions([...dimensions, { ...newDimension, key: `dim-${Date.now()}` }]);
setNewDimension({ dimension: '', description: '' });
};
const handleDeleteDimension = (key: string) => {
if (dimensions.length <= 1) {
message.warning('至少需要保留一个评估维度');
return;
}
setDimensions(dimensions.filter(item => item.key !== key));
};
useEffect(() => {
if (visible) {
fetchDatasets().then();
fetchModels().then();
fetchPromptTemplates().then();
}
}, [visible]);
const fetchDatasets = async () => {
try {
const { data } = await queryDatasetsUsingGet({ page: 1, size: 1000 });
setDatasets(data.content.map(mapDataset) || []);
} catch (error) {
console.error('Error fetching datasets:', error);
message.error('获取数据集列表失败');
}
};
const handleAddCustomDimension = () => {
if (newDimension.name.trim() && newDimension.description.trim()) {
const customDimension: EvaluationDimension = {
id: `custom_${Date.now()}`,
name: newDimension.name.trim(),
description: newDimension.description.trim(),
category: "custom",
isCustom: true,
isEnabled: true,
};
setAllDimensions([...allDimensions, customDimension]);
setNewDimension({ name: "", description: "" });
const fetchModels = async () => {
try {
const { data } = await queryModelListUsingGet({ page: 0, size: 1000 });
setModels(data.content || []);
} catch (error) {
console.error('Error fetching models:', error);
message.error('获取模型列表失败');
}
};
const handleDimensionToggle = (id: string, checked: boolean) => {
setAllDimensions(
allDimensions.map((d) => (d.id === id ? { ...d, isEnabled: checked } : d))
);
};
const handleEditDimension = (
id: string,
field: "name" | "description",
value: string
) => {
setAllDimensions(
allDimensions.map((d) => (d.id === id ? { ...d, [field]: value } : d))
);
};
const handleDeleteCustomDimension = (id: string) => {
setAllDimensions(allDimensions.filter((d) => d.id !== id));
};
const handleDeletePresetDimension = (id: string) => {
setAllDimensions(
allDimensions.map((d) => (d.id === id ? { ...d, isEnabled: false } : d))
);
};
const handleCreateTask = () => {
const selectedDataset = datasets.find((d) => d.id === createForm.datasetId);
if (!selectedDataset) return;
const enabledDimensions = allDimensions.filter((d) => d.isEnabled);
const presetDimensionIds = enabledDimensions
.filter((d) => !d.isCustom)
.map((d) => d.id);
const customDimensions = enabledDimensions.filter((d) => d.isCustom);
let finalPrompt = createForm.modelConfig.prompt;
if (createForm.evaluationType === "model" && !finalPrompt.trim()) {
finalPrompt = generateDefaultPrompt(selectedDataset.name);
}
const newTask: EvaluationTask = {
id: Date.now().toString(),
name: createForm.name,
datasetId: createForm.datasetId,
datasetName: selectedDataset.name,
evaluationType: createForm.evaluationType,
status: "pending",
progress: 0,
createdAt: new Date().toLocaleString(),
description: `${
createForm.evaluationType === "model" ? "模型自动" : "人工"
}评估${selectedDataset.name}`,
dimensions: presetDimensionIds,
customDimensions: customDimensions,
modelConfig:
createForm.evaluationType === "model"
? {
...createForm.modelConfig,
prompt: finalPrompt,
}
: undefined,
metrics: {
accuracy: 0,
completeness: 0,
consistency: 0,
relevance: 0,
},
issues: [],
};
// 重置表单
setCreateForm({
name: "",
datasetId: "",
evaluationType: "model",
dimensions: [],
customDimensions: [],
modelConfig: {
url: "",
apiKey: "",
prompt: "",
temperature: 0.3,
maxTokens: 2000,
},
const formatDimensionsForPrompt = (dimensions: Dimension[]) => {
let result = "\n";
dimensions.forEach((dim, index) => {
result += `### ${index + 1}. ${dim.dimension}\n**评估标准:**\n${dim.description}\n\n`;
});
navigate("/data/evaluation");
return result;
};
const formatResultExample = (dimensions: Dimension[]) => {
return dimensions.map(dim => `\n "${dim.dimension}": "Y",`).join('');
};
const fetchPromptTemplates = async () => {
try {
const response = await queryPromptTemplatesUsingGet();
const templates: PromptTemplate[] = response.data?.templates
setPromptTemplates(templates)
if (taskType) {
const template = templates.find(t => t.evalType === taskType);
if (template) {
setDimensions(template.defaultDimensions.map((dim: any, index: number) => ({
key: `dim-${index}`,
dimension: dim.dimension,
description: dim.description
})));
}
}
} catch (error) {
console.error('Error fetching prompt templates:', error);
message.error('获取评估维度失败');
}
};
const generateEvaluationPrompt = () => {
if (dimensions.length === 0) {
message.warning('请先添加评估维度');
return;
}
const template = promptTemplates.find(t => t.evalType === taskType);
setEvaluationPrompt(template?.prompt.replace("{dimensions}", formatDimensionsForPrompt(dimensions))
.replace('{result_example}', formatResultExample(dimensions)));
setPreviewVisible(true);
};
const chatModelOptions = models
.filter((model) => model.type === "CHAT")
.map((model) => ({
label: `${model.modelName} (${model.provider})`,
value: model.id,
}));
const handleSubmit = async (values: any) => {
if (dimensions.length === 0) {
message.warning('请至少添加一个评估维度');
return;
}
try {
setLoading(true);
const { datasetId, modelId, ...rest } = values;
const selectedDataset = datasets.find(d => d.id === datasetId);
const selectedModel = models.find(d => d.id === modelId);
const payload = {
...rest,
sourceType: 'DATASET',
sourceId: datasetId,
sourceName: selectedDataset?.name,
evalConfig: {
modelId: selectedModel?.id,
dimensions: dimensions.map(d => ({
dimension: d.dimension,
description: d.description
}))
}
};
await createEvaluationTaskUsingPost(payload);
message.success('评估任务创建成功');
onSuccess();
form.resetFields();
onCancel();
} catch (error: any) {
console.error('Error creating task:', error);
message.error(error.response?.data?.message || '创建评估任务失败');
} finally {
setLoading(false);
}
};
const columns = [
{
title: '维度',
dataIndex: 'dimension',
key: 'dimension',
width: '30%',
},
{
title: '描述',
dataIndex: 'description',
key: 'description',
width: '60%',
},
{
title: '操作',
key: 'action',
width: '10%',
render: (_: any, record: any) => (
<Space size="middle">
<a
onClick={() => handleDeleteDimension(record.key)}
style={{ color: dimensions.length <= 1 ? '#ccc' : '#ff4d4f' }}
className={dimensions.length <= 1 ? 'disabled-link' : ''}
>
</a>
</Space>
),
},
];
return (
<div className="h-full">
{/* 页面头部 */}
<div className="flex items-center mb-2">
<Button
type="text"
icon={<ArrowLeftOutlined />}
onClick={() => navigate("/data/evaluation")}
></Button>
<div className="text-xl font-bold"></div>
</div>
<Modal
title="创建评估任务"
open={visible}
onCancel={onCancel}
footer={null}
width={800}
destroyOnClose
>
<Form
form={form}
layout="vertical"
onFinish={handleSubmit}
initialValues={{
evalMethod: DEFAULT_EVAL_METHOD,
taskType: DEFAULT_TASK_TYPE,
}}
>
<Row gutter={16}>
<Col span={12}>
<Form.Item
label="任务名称"
name="name"
rules={[{ required: true, message: '请输入任务名称' }]}
>
<Input placeholder="输入任务名称" />
</Form.Item>
</Col>
<Col span={12}>
<Form.Item
label="任务类型"
name="taskType"
rules={[{ required: true, message: '请选择任务类型' }]}
>
<Select options={TASK_TYPES} />
</Form.Item>
</Col>
</Row>
<Form layout="vertical">
{/* 基本信息 */}
<Card title="基本信息" style={{ marginBottom: 24 }}>
<Form.Item label="任务名称" required>
<Input
value={createForm.name}
onChange={(e) =>
setCreateForm({ ...createForm, name: e.target.value })
}
placeholder="输入任务名称"
/>
</Form.Item>
<Form.Item label="选择数据集" required>
<Select
value={createForm.datasetId || undefined}
onChange={(value) =>
setCreateForm({ ...createForm, datasetId: value })
}
placeholder="选择要评估的数据集"
>
{datasets.map((dataset) => (
<Option key={dataset.id} value={dataset.id}>
{dataset.name}{dataset.fileCount} {dataset.size}
</Option>
))}
</Select>
</Form.Item>
<Form.Item label="评估方式" required>
<Select
value={createForm.evaluationType}
onChange={(value: "model" | "manual") =>
setCreateForm({ ...createForm, evaluationType: value })
}
>
<Option value="model"></Option>
<Option value="manual"></Option>
</Select>
</Form.Item>
</Card>
{/* 算子配置 */}
<Card title="切片算子配置" style={{ marginBottom: 24 }}>
<Form.Item label="切片算子">
<Select
value={createForm.sliceConfig.method}
onChange={(value) =>
setCreateForm({
...createForm,
sliceConfig: { ...createForm.sliceConfig, method: value },
})
}
placeholder="选择切片算子"
>
{sliceOperators.map((operator) => (
<Option key={operator.id} value={operator.name}>
{operator.name}{" "}
<Badge style={{ marginLeft: 8 }} count={operator.type} />
</Option>
))}
</Select>
</Form.Item>
<Form.Item label="分隔符">
<Input
placeholder="输入分隔符,如 \\n\\n"
value={createForm.sliceConfig.delimiter}
onChange={(e) =>
setCreateForm({
...createForm,
sliceConfig: {
...createForm.sliceConfig,
delimiter: e.target.value,
},
})
}
/>
</Form.Item>
<Form.Item label="分块大小">
<Input
type="number"
value={createForm.sliceConfig.chunkSize}
onChange={(e) =>
setCreateForm({
...createForm,
sliceConfig: {
...createForm.sliceConfig,
chunkSize: Number(e.target.value),
},
})
}
/>
</Form.Item>
<Form.Item label="重叠长度">
<Input
type="number"
value={createForm.sliceConfig.overlapLength}
onChange={(e) =>
setCreateForm({
...createForm,
sliceConfig: {
...createForm.sliceConfig,
overlapLength: Number(e.target.value),
},
})
}
/>
</Form.Item>
<Form.Item label="抽样比例">
<Input
type="number"
value={createForm.sliceConfig.threshold}
onChange={(e) =>
setCreateForm({
...createForm,
sliceConfig: {
...createForm.sliceConfig,
threshold: Number(e.target.value),
},
})
}
/>
</Form.Item>
</Card>
{/* 评估维度配置 */}
<Card
title={
<div
style={{
display: "flex",
justifyContent: "space-between",
alignItems: "center",
}}
>
<span></span>
<div style={{ display: "flex", alignItems: "center", gap: 8 }}>
<Select
value={selectedTemplate}
onChange={handleTemplateChange}
style={{ width: 160 }}
>
{Object.entries(evaluationTemplates).map(
([key, template]) => (
<Option key={key} value={key}>
{template.name}
</Option>
)
)}
</Select>
<Badge
count={allDimensions.filter((d) => d.isEnabled).length}
style={{ background: "#f0f0f0", color: "#333" }}
/>
</div>
</div>
}
style={{ marginBottom: 24 }}
<Form.Item
label="任务描述"
name="description"
>
{/* 维度表格 */}
<div
style={{
border: "1px solid #f0f0f0",
borderRadius: 6,
marginBottom: 16,
}}
>
<div
style={{
background: "#fafafa",
padding: "8px 12px",
borderBottom: "1px solid #f0f0f0",
fontWeight: 500,
fontSize: 13,
}}
<Input.TextArea placeholder="输入任务描述(可选)" rows={3} />
</Form.Item>
<Row gutter={16}>
<Col span={12}>
<Form.Item
label="选择数据集"
name="datasetId"
rules={[{ required: true, message: '请选择数据集' }]}
>
<div style={{ display: "flex", alignItems: "center" }}>
<div style={{ width: 60 }}></div>
<div style={{ width: 160 }}></div>
<div style={{ flex: 1 }}></div>
<div style={{ width: 120 }}></div>
</div>
</div>
<div style={{ maxHeight: 320, overflowY: "auto" }}>
{allDimensions.map((dimension) => (
<div
key={dimension.id}
style={{
display: "flex",
alignItems: "center",
padding: "8px 12px",
borderBottom: "1px solid #f5f5f5",
}}
>
<div style={{ width: 60 }}>
<Checkbox
checked={dimension.isEnabled}
onChange={(e) =>
handleDimensionToggle(dimension.id, e.target.checked!)
}
/>
</div>
<div style={{ width: 160 }}>
{editingDimension === dimension.id && dimension.isCustom ? (
<Input
value={dimension.name}
onChange={(e) =>
handleEditDimension(
dimension.id,
"name",
e.target.value
)
}
size="small"
/>
) : (
<span style={{ fontWeight: 500 }}>
{dimension.name}
{dimension.isCustom && (
<Badge
style={{
marginLeft: 4,
background: "#f9f0ff",
color: "#722ed1",
}}
count="自定义"
/>
)}
</span>
)}
</div>
<div style={{ flex: 1 }}>
{editingDimension === dimension.id && dimension.isCustom ? (
<Input
value={dimension.description}
onChange={(e) =>
handleEditDimension(
dimension.id,
"description",
e.target.value
)
}
size="small"
/>
) : (
<span style={{ color: "#888" }}>
{dimension.description}
</span>
)}
</div>
<div style={{ width: 120 }}>
{editingDimension === dimension.id && dimension.isCustom ? (
<Button
type="text"
icon={<SaveOutlined />}
size="small"
onClick={() => setEditingDimension(null)}
/>
) : (
dimension.isCustom && (
<Button
type="text"
icon={<EditOutlined />}
size="small"
onClick={() => setEditingDimension(dimension.id)}
/>
)
)}
<Button
type="text"
icon={<DeleteOutlined />}
size="small"
danger
onClick={() =>
dimension.isCustom
? handleDeleteCustomDimension(dimension.id)
: handleDeletePresetDimension(dimension.id)
}
disabled={
allDimensions.filter((d) => d.isEnabled).length <= 1 &&
dimension.isEnabled
}
/>
</div>
<Select
placeholder="请选择要评估的数据集"
showSearch
optionFilterProp="label"
>
{datasets.map((dataset) => (
<Select.Option key={dataset.id} value={dataset.id} label={dataset.name}>
<div className="flex justify-between w-full">
<span>{dataset.name}</span>
<span className="text-gray-500">{dataset.size}</span>
</div>
</Select.Option>
))}
</Select>
</Form.Item>
</Col>
<Col span={12}>
<Form.Item
label="评估方式"
name="evalMethod"
initialValue={DEFAULT_EVAL_METHOD}
>
<Select options={EVAL_METHODS} />
</Form.Item>
</Col>
</Row>
<Form.Item
noStyle
shouldUpdate={(prevValues, currentValues) =>
prevValues.evalMethod !== currentValues.evalMethod
}
>
{({ getFieldValue }) => getFieldValue('evalMethod') === 'AUTO' && (
<>
<Form.Item
label="评估模型"
name="modelId"
rules={[{ required: true, message: '请选择评估模型' }]}
>
<Select
placeholder="请选择模型"
options={chatModelOptions}
showSearch
optionFilterProp="label"
/>
</Form.Item>
<Form.Item label="评估维度">
<Table
columns={columns}
dataSource={dimensions}
pagination={false}
size="small"
rowKey="key"
/>
<div style={{ display: 'flex', gap: 8, marginBottom: 16 }}>
<Input
placeholder="输入维度名称"
value={newDimension.dimension}
onChange={(e) => setNewDimension({...newDimension, dimension: e.target.value})}
style={{ flex: 1 }}
/>
<Input
placeholder="输入维度描述"
value={newDimension.description}
onChange={(e) => setNewDimension({...newDimension, description: e.target.value})}
style={{ flex: 2 }}
/>
<Button
type="primary"
onClick={handleAddDimension}
disabled={!newDimension.dimension.trim()}
>
</Button>
</div>
))}
</div>
</div>
{/* 添加自定义维度 */}
<div style={{ background: "#fafafa", borderRadius: 6, padding: 16 }}>
<div style={{ fontWeight: 500, marginBottom: 8 }}>
</div>
<Input
value={newDimension.name}
onChange={(e) =>
setNewDimension({ ...newDimension, name: e.target.value })
}
placeholder="维度名称"
style={{ width: 180, marginRight: 8 }}
size="small"
/>
<Input
value={newDimension.description}
onChange={(e) =>
setNewDimension({
...newDimension,
description: e.target.value,
})
}
placeholder="维度描述"
style={{ width: 260, marginRight: 8 }}
size="small"
/>
</Form.Item>
</>
)}
</Form.Item>
<Form.Item>
<div style={{ display: 'flex', justifyContent: 'space-between', marginTop: '16px' }}>
<Button
icon={<PlusOutlined />}
onClick={handleAddCustomDimension}
disabled={
!newDimension.name.trim() || !newDimension.description.trim()
}
size="small"
type="link"
icon={<EyeOutlined />}
onClick={generateEvaluationPrompt}
>
</Button>
</div>
</Card>
</Form.Item>
{/* 模型配置(仅在选择模型评估时显示) */}
{createForm.evaluationType === "model" && (
<Card title="模型配置" style={{ marginBottom: 24 }}>
<Form.Item label="模型 URL" required>
<Input
value={createForm.modelConfig.url}
onChange={(e) =>
setCreateForm({
...createForm,
modelConfig: {
...createForm.modelConfig,
url: e.target.value,
},
})
}
placeholder="https://api.openai.com/v1/chat/completions"
/>
</Form.Item>
<Form.Item label="API Key" required>
<Input.Password
value={createForm.modelConfig.apiKey}
onChange={(e) =>
setCreateForm({
...createForm,
modelConfig: {
...createForm.modelConfig,
apiKey: e.target.value,
},
})
}
placeholder="sk-***"
/>
</Form.Item>
</Card>
)}
{/* 操作按钮 */}
<Form.Item>
<div style={{ display: "flex", justifyContent: "flex-end", gap: 12 }}>
<Button onClick={() => navigate("/data/evaluation")}></Button>
<Button
type="primary"
onClick={handleCreateTask}
disabled={
!createForm.name ||
!createForm.datasetId ||
allDimensions.filter((d) => d.isEnabled).length === 0 ||
(createForm.evaluationType === "model" &&
(!createForm.modelConfig.url ||
!createForm.modelConfig.apiKey))
}
>
<div style={{ textAlign: 'right' }}>
<Button onClick={onCancel} style={{ marginRight: 8 }}>
</Button>
<Button type="primary" htmlType="submit" loading={loading}>
</Button>
</div>
</Form.Item>
</Form>
</div>
<PreviewPromptModal
previewVisible={previewVisible}
onCancel={() => setPreviewVisible(false)}
evaluationPrompt={evaluationPrompt}
/>
</Modal>
);
};
export default EvaluationTaskCreate;
export default CreateTaskModal;