You've already forked DataMate
fix: 修复评估时模型输出json格式不对导致读取错误的问题 (#133)
* feature: add cot data evaluation function * fix: added verification to evaluation results * fix: fix the prompt for evaluating * fix: 修复当评估结果为空导致读取失败的问题
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
// TypeScript
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import { Button, Form, Input, Select, message, Modal, Row, Col, Table, Space } from 'antd';
|
||||
import { EyeOutlined } from '@ant-design/icons';
|
||||
@@ -36,6 +37,7 @@ interface CreateTaskModalProps {
|
||||
|
||||
const TASK_TYPES = [
|
||||
{ label: 'QA评估', value: 'QA' },
|
||||
{ label: 'COT评估', value: 'COT' },
|
||||
];
|
||||
|
||||
const EVAL_METHODS = [
|
||||
@@ -55,7 +57,7 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
|
||||
dimension: '',
|
||||
description: ''
|
||||
});
|
||||
const [taskType, setTaskType] = useState<string>("QA");
|
||||
const [taskType, setTaskType] = useState<string>(DEFAULT_TASK_TYPE);
|
||||
const [promptTemplates, setPromptTemplates] = useState<PromptTemplate[]>([]);
|
||||
const [previewVisible, setPreviewVisible] = useState(false);
|
||||
const [evaluationPrompt, setEvaluationPrompt] = useState('');
|
||||
@@ -82,9 +84,24 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
|
||||
fetchDatasets().then();
|
||||
fetchModels().then();
|
||||
fetchPromptTemplates().then();
|
||||
// sync form with local taskType default
|
||||
form.setFieldsValue({ taskType: DEFAULT_TASK_TYPE });
|
||||
}
|
||||
}, [visible]);
|
||||
|
||||
// when promptTemplates or taskType change, switch dimensions to template defaults (COT/QA)
|
||||
useEffect(() => {
|
||||
if (!promptTemplates || promptTemplates.length === 0) return;
|
||||
const template = promptTemplates.find(t => t.evalType === taskType);
|
||||
if (template && template.defaultDimensions) {
|
||||
setDimensions(template.defaultDimensions.map((dim: any, index: number) => ({
|
||||
key: `dim-${index}`,
|
||||
dimension: dim.dimension,
|
||||
description: dim.description
|
||||
})));
|
||||
}
|
||||
}, [taskType, promptTemplates]);
|
||||
|
||||
const fetchDatasets = async () => {
|
||||
try {
|
||||
const { data } = await queryDatasetsUsingGet({ page: 1, size: 1000 });
|
||||
@@ -106,31 +123,46 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
|
||||
};
|
||||
|
||||
const formatDimensionsForPrompt = (dimensions: Dimension[]) => {
|
||||
let result = "\n";
|
||||
let result = "";
|
||||
dimensions.forEach((dim, index) => {
|
||||
result += `### ${index + 1}. ${dim.dimension}\n**评估标准:**\n${dim.description}\n\n`;
|
||||
if (index > 0) {
|
||||
result += "\n";
|
||||
}
|
||||
result += `### ${index + 1}. ${dim.dimension}\n**评估标准:**\n${dim.description}`;
|
||||
if (index < dimensions.length - 1) {
|
||||
result += "\n";
|
||||
}
|
||||
});
|
||||
return result;
|
||||
};
|
||||
|
||||
const formatResultExample = (dimensions: Dimension[]) => {
|
||||
return dimensions.map(dim => `\n "${dim.dimension}": "Y",`).join('');
|
||||
let result = "";
|
||||
dimensions.forEach((dim, index) => {
|
||||
if (index > 0) {
|
||||
result += "\n ";
|
||||
}
|
||||
result += `"${dim.dimension}": "Y"`;
|
||||
if (index < dimensions.length - 1) {
|
||||
result += ",";
|
||||
}
|
||||
});
|
||||
return result;
|
||||
};
|
||||
|
||||
const fetchPromptTemplates = async () => {
|
||||
try {
|
||||
const response = await queryPromptTemplatesUsingGet();
|
||||
const templates: PromptTemplate[] = response.data?.templates
|
||||
setPromptTemplates(templates)
|
||||
if (taskType) {
|
||||
const template = templates.find(t => t.evalType === taskType);
|
||||
if (template) {
|
||||
setDimensions(template.defaultDimensions.map((dim: any, index: number) => ({
|
||||
key: `dim-${index}`,
|
||||
dimension: dim.dimension,
|
||||
description: dim.description
|
||||
})));
|
||||
}
|
||||
const templates: PromptTemplate[] = response.data?.templates || [];
|
||||
setPromptTemplates(templates);
|
||||
// if a template exists for current taskType, initialize dimensions (handled also by useEffect)
|
||||
const template = templates.find(t => t.evalType === taskType);
|
||||
if (template) {
|
||||
setDimensions(template.defaultDimensions.map((dim: any, index: number) => ({
|
||||
key: `dim-${index}`,
|
||||
dimension: dim.dimension,
|
||||
description: dim.description
|
||||
})));
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error fetching prompt templates:', error);
|
||||
@@ -144,8 +176,11 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
|
||||
return;
|
||||
}
|
||||
const template = promptTemplates.find(t => t.evalType === taskType);
|
||||
setEvaluationPrompt(template?.prompt.replace("{dimensions}", formatDimensionsForPrompt(dimensions))
|
||||
.replace('{result_example}', formatResultExample(dimensions)));
|
||||
const basePrompt = template?.prompt || '';
|
||||
const filled = basePrompt
|
||||
.replace('{dimensions}', formatDimensionsForPrompt(dimensions))
|
||||
.replace('{result_example}', formatResultExample(dimensions));
|
||||
setEvaluationPrompt(filled);
|
||||
setPreviewVisible(true);
|
||||
};
|
||||
|
||||
@@ -243,6 +278,13 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
|
||||
evalMethod: DEFAULT_EVAL_METHOD,
|
||||
taskType: DEFAULT_TASK_TYPE,
|
||||
}}
|
||||
onValuesChange={(changed) => {
|
||||
if (changed.taskType) {
|
||||
setTaskType(changed.taskType);
|
||||
setEvaluationPrompt('');
|
||||
setPreviewVisible(false);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Row gutter={16}>
|
||||
<Col span={12}>
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
import { Table, Typography, Button, Space, Spin, Empty, message, Tooltip } from 'antd';
|
||||
import { Table, Typography, Button, Space, Empty, Tooltip } from 'antd';
|
||||
import { FolderOpen, FileText, ArrowLeft } from 'lucide-react';
|
||||
import { queryEvaluationFilesUsingGet, queryEvaluationItemsUsingGet } from '../../evaluation.api';
|
||||
import useFetchData from '@/hooks/useFetchData';
|
||||
|
||||
const { Text } = Typography;
|
||||
|
||||
@@ -39,63 +40,52 @@ type EvalItem = {
|
||||
};
|
||||
|
||||
export default function EvaluationItems({ task }: { task: any }) {
|
||||
const [loadingFiles, setLoadingFiles] = useState<boolean>(false);
|
||||
const [files, setFiles] = useState<EvalFile[]>([]);
|
||||
const [filePagination, setFilePagination] = useState({ current: 1, pageSize: 10, total: 0 });
|
||||
|
||||
const [selectedFile, setSelectedFile] = useState<{ fileId: string; fileName: string } | null>(null);
|
||||
const [loadingItems, setLoadingItems] = useState<boolean>(false);
|
||||
const [items, setItems] = useState<EvalItem[]>([]);
|
||||
const [itemPagination, setItemPagination] = useState({ current: 1, pageSize: 10, total: 0 });
|
||||
|
||||
// Fetch files list
|
||||
useEffect(() => {
|
||||
if (!task?.id || selectedFile) return;
|
||||
const fetchFiles = async () => {
|
||||
setLoadingFiles(true);
|
||||
try {
|
||||
const res = await queryEvaluationFilesUsingGet({ taskId: task.id, page: filePagination.current, size: filePagination.pageSize });
|
||||
const data = res?.data;
|
||||
const list: EvalFile[] = data?.content || [];
|
||||
setFiles(list);
|
||||
setFilePagination((p) => ({ ...p, total: data?.totalElements || 0 }));
|
||||
} catch (e) {
|
||||
message.error('加载评估文件失败');
|
||||
console.error(e);
|
||||
} finally {
|
||||
setLoadingFiles(false);
|
||||
}
|
||||
};
|
||||
fetchFiles();
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [task?.id, filePagination.current, filePagination.pageSize, selectedFile]);
|
||||
// 文件列表数据(使用 useFetchData),pageOffset=0 表示后端分页为 1 基
|
||||
const {
|
||||
loading: loadingFiles,
|
||||
tableData: files,
|
||||
pagination: filePagination,
|
||||
setSearchParams: setFileSearchParams,
|
||||
} = useFetchData<EvalFile>(
|
||||
(params) => queryEvaluationFilesUsingGet({ taskId: task?.id, ...params }),
|
||||
(d) => d as unknown as EvalFile,
|
||||
30000,
|
||||
false,
|
||||
[],
|
||||
0
|
||||
);
|
||||
|
||||
// Fetch items of selected file
|
||||
useEffect(() => {
|
||||
if (!task?.id || !selectedFile) return;
|
||||
const fetchItems = async () => {
|
||||
setLoadingItems(true);
|
||||
try {
|
||||
const res = await queryEvaluationItemsUsingGet({
|
||||
taskId: task.id,
|
||||
page: itemPagination.current,
|
||||
size: itemPagination.pageSize,
|
||||
file_id: selectedFile.fileId,
|
||||
});
|
||||
const data = res?.data;
|
||||
const list: EvalItem[] = data?.content || [];
|
||||
setItems(list);
|
||||
setItemPagination((p) => ({ ...p, total: data?.totalElements || 0 }));
|
||||
} catch (e) {
|
||||
message.error('加载评估条目失败');
|
||||
console.error(e);
|
||||
} finally {
|
||||
setLoadingItems(false);
|
||||
// 评估条目数据(使用 useFetchData),依赖选中文件
|
||||
const {
|
||||
loading: loadingItems,
|
||||
tableData: items,
|
||||
pagination: itemPagination,
|
||||
setSearchParams: setItemSearchParams,
|
||||
fetchData: fetchItems,
|
||||
} = useFetchData<EvalItem>(
|
||||
(params) => {
|
||||
if (!task?.id || !selectedFile?.fileId) {
|
||||
return Promise.resolve({ data: { content: [], totalElements: 0 } });
|
||||
}
|
||||
};
|
||||
fetchItems();
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [task?.id, selectedFile?.fileId, itemPagination.current, itemPagination.pageSize]);
|
||||
return queryEvaluationItemsUsingGet({ taskId: task.id, file_id: selectedFile.fileId, ...params });
|
||||
},
|
||||
(d) => d as unknown as EvalItem,
|
||||
30000,
|
||||
false,
|
||||
[],
|
||||
0
|
||||
);
|
||||
|
||||
// 当选择文件变化时,主动触发一次条目查询,避免仅依赖 searchParams 变更导致未触发
|
||||
useEffect(() => {
|
||||
if (task?.id && selectedFile?.fileId) {
|
||||
setItemSearchParams((prev: any) => ({ ...prev, current: 1 }));
|
||||
// 立即拉取一次,保证点击后立刻出现数据
|
||||
fetchItems();
|
||||
}
|
||||
}, [task?.id, selectedFile?.fileId]);
|
||||
|
||||
const fileColumns = [
|
||||
{
|
||||
@@ -228,19 +218,20 @@ export default function EvaluationItems({ task }: { task: any }) {
|
||||
dataSource={files}
|
||||
loading={loadingFiles}
|
||||
size="middle"
|
||||
onRow={(record) => ({ onClick: () => setSelectedFile({ fileId: record.fileId, fileName: record.fileName }) })}
|
||||
pagination={{
|
||||
current: filePagination.current,
|
||||
pageSize: filePagination.pageSize,
|
||||
total: filePagination.total,
|
||||
onChange: (current, pageSize) => setFilePagination({ current, pageSize, total: filePagination.total }),
|
||||
}}
|
||||
onRow={(record) => ({
|
||||
onClick: () => {
|
||||
setSelectedFile({ fileId: record.fileId, fileName: record.fileName });
|
||||
// 切换文件时,重置条目表到第一页
|
||||
setItemSearchParams((prev: any) => ({ ...prev, current: 1 }));
|
||||
},
|
||||
})}
|
||||
pagination={filePagination}
|
||||
/>
|
||||
) : (
|
||||
<div className="flex flex-col gap-3">
|
||||
<div className="sticky top-0 z-10 bg-white py-2" style={{ borderBottom: '1px solid #f0f0f0' }}>
|
||||
<Space wrap>
|
||||
<Button icon={<ArrowLeft size={16} />} onClick={() => { setSelectedFile(null); setItems([]); }}>
|
||||
<Button icon={<ArrowLeft size={16} />} onClick={() => { setSelectedFile(null); }}>
|
||||
返回文件列表
|
||||
</Button>
|
||||
<Space>
|
||||
@@ -257,12 +248,7 @@ export default function EvaluationItems({ task }: { task: any }) {
|
||||
dataSource={items}
|
||||
loading={loadingItems}
|
||||
size="middle"
|
||||
pagination={{
|
||||
current: itemPagination.current,
|
||||
pageSize: itemPagination.pageSize,
|
||||
total: itemPagination.total,
|
||||
onChange: (current, pageSize) => setItemPagination({ current, pageSize, total: itemPagination.total }),
|
||||
}}
|
||||
pagination={itemPagination}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -82,6 +82,7 @@ export default function DataEvaluationPage() {
|
||||
label: '任务类型',
|
||||
options: [
|
||||
{ value: 'QA', label: 'QA评估' },
|
||||
{ value: 'COT', label: 'COPT评估' },
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -89,7 +90,6 @@ export default function DataEvaluationPage() {
|
||||
label: '评估方式',
|
||||
options: [
|
||||
{ value: 'AUTO', label: '自动评估' },
|
||||
{ value: 'MANUAL', label: '人工评估' },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user