fix: 修复评估时模型输出json格式不对导致读取错误的问题 (#133)

* feature: add cot data evaluation function

* fix: added verification to evaluation results

* fix: fix the prompt for evaluating

* fix: 修复当评估结果为空导致读取失败的问题
This commit is contained in:
hefanli
2025-12-04 18:49:50 +08:00
committed by GitHub
parent 31c4966608
commit 744d15ba24
14 changed files with 373 additions and 219 deletions

View File

@@ -1,3 +1,4 @@
// TypeScript
import React, { useState, useEffect } from 'react';
import { Button, Form, Input, Select, message, Modal, Row, Col, Table, Space } from 'antd';
import { EyeOutlined } from '@ant-design/icons';
@@ -36,6 +37,7 @@ interface CreateTaskModalProps {
const TASK_TYPES = [
{ label: 'QA评估', value: 'QA' },
{ label: 'COT评估', value: 'COT' },
];
const EVAL_METHODS = [
@@ -55,7 +57,7 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
dimension: '',
description: ''
});
const [taskType, setTaskType] = useState<string>("QA");
const [taskType, setTaskType] = useState<string>(DEFAULT_TASK_TYPE);
const [promptTemplates, setPromptTemplates] = useState<PromptTemplate[]>([]);
const [previewVisible, setPreviewVisible] = useState(false);
const [evaluationPrompt, setEvaluationPrompt] = useState('');
@@ -82,9 +84,24 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
fetchDatasets().then();
fetchModels().then();
fetchPromptTemplates().then();
// sync form with local taskType default
form.setFieldsValue({ taskType: DEFAULT_TASK_TYPE });
}
}, [visible]);
// when promptTemplates or taskType change, switch dimensions to template defaults (COT/QA)
useEffect(() => {
if (!promptTemplates || promptTemplates.length === 0) return;
const template = promptTemplates.find(t => t.evalType === taskType);
if (template && template.defaultDimensions) {
setDimensions(template.defaultDimensions.map((dim: any, index: number) => ({
key: `dim-${index}`,
dimension: dim.dimension,
description: dim.description
})));
}
}, [taskType, promptTemplates]);
const fetchDatasets = async () => {
try {
const { data } = await queryDatasetsUsingGet({ page: 1, size: 1000 });
@@ -106,31 +123,46 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
};
const formatDimensionsForPrompt = (dimensions: Dimension[]) => {
let result = "\n";
let result = "";
dimensions.forEach((dim, index) => {
result += `### ${index + 1}. ${dim.dimension}\n**评估标准:**\n${dim.description}\n\n`;
if (index > 0) {
result += "\n";
}
result += `### ${index + 1}. ${dim.dimension}\n**评估标准:**\n${dim.description}`;
if (index < dimensions.length - 1) {
result += "\n";
}
});
return result;
};
const formatResultExample = (dimensions: Dimension[]) => {
return dimensions.map(dim => `\n "${dim.dimension}": "Y",`).join('');
let result = "";
dimensions.forEach((dim, index) => {
if (index > 0) {
result += "\n ";
}
result += `"${dim.dimension}": "Y"`;
if (index < dimensions.length - 1) {
result += ",";
}
});
return result;
};
const fetchPromptTemplates = async () => {
try {
const response = await queryPromptTemplatesUsingGet();
const templates: PromptTemplate[] = response.data?.templates
setPromptTemplates(templates)
if (taskType) {
const template = templates.find(t => t.evalType === taskType);
if (template) {
setDimensions(template.defaultDimensions.map((dim: any, index: number) => ({
key: `dim-${index}`,
dimension: dim.dimension,
description: dim.description
})));
}
const templates: PromptTemplate[] = response.data?.templates || [];
setPromptTemplates(templates);
// if a template exists for current taskType, initialize dimensions (handled also by useEffect)
const template = templates.find(t => t.evalType === taskType);
if (template) {
setDimensions(template.defaultDimensions.map((dim: any, index: number) => ({
key: `dim-${index}`,
dimension: dim.dimension,
description: dim.description
})));
}
} catch (error) {
console.error('Error fetching prompt templates:', error);
@@ -144,8 +176,11 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
return;
}
const template = promptTemplates.find(t => t.evalType === taskType);
setEvaluationPrompt(template?.prompt.replace("{dimensions}", formatDimensionsForPrompt(dimensions))
.replace('{result_example}', formatResultExample(dimensions)));
const basePrompt = template?.prompt || '';
const filled = basePrompt
.replace('{dimensions}', formatDimensionsForPrompt(dimensions))
.replace('{result_example}', formatResultExample(dimensions));
setEvaluationPrompt(filled);
setPreviewVisible(true);
};
@@ -243,6 +278,13 @@ const CreateTaskModal: React.FC<CreateTaskModalProps> = ({ visible, onCancel, on
evalMethod: DEFAULT_EVAL_METHOD,
taskType: DEFAULT_TASK_TYPE,
}}
onValuesChange={(changed) => {
if (changed.taskType) {
setTaskType(changed.taskType);
setEvaluationPrompt('');
setPreviewVisible(false);
}
}}
>
<Row gutter={16}>
<Col span={12}>

View File

@@ -1,7 +1,8 @@
import { useEffect, useState } from 'react';
import { Table, Typography, Button, Space, Spin, Empty, message, Tooltip } from 'antd';
import { Table, Typography, Button, Space, Empty, Tooltip } from 'antd';
import { FolderOpen, FileText, ArrowLeft } from 'lucide-react';
import { queryEvaluationFilesUsingGet, queryEvaluationItemsUsingGet } from '../../evaluation.api';
import useFetchData from '@/hooks/useFetchData';
const { Text } = Typography;
@@ -39,63 +40,52 @@ type EvalItem = {
};
export default function EvaluationItems({ task }: { task: any }) {
const [loadingFiles, setLoadingFiles] = useState<boolean>(false);
const [files, setFiles] = useState<EvalFile[]>([]);
const [filePagination, setFilePagination] = useState({ current: 1, pageSize: 10, total: 0 });
const [selectedFile, setSelectedFile] = useState<{ fileId: string; fileName: string } | null>(null);
const [loadingItems, setLoadingItems] = useState<boolean>(false);
const [items, setItems] = useState<EvalItem[]>([]);
const [itemPagination, setItemPagination] = useState({ current: 1, pageSize: 10, total: 0 });
// Fetch files list
useEffect(() => {
if (!task?.id || selectedFile) return;
const fetchFiles = async () => {
setLoadingFiles(true);
try {
const res = await queryEvaluationFilesUsingGet({ taskId: task.id, page: filePagination.current, size: filePagination.pageSize });
const data = res?.data;
const list: EvalFile[] = data?.content || [];
setFiles(list);
setFilePagination((p) => ({ ...p, total: data?.totalElements || 0 }));
} catch (e) {
message.error('加载评估文件失败');
console.error(e);
} finally {
setLoadingFiles(false);
}
};
fetchFiles();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [task?.id, filePagination.current, filePagination.pageSize, selectedFile]);
// 文件列表数据(使用 useFetchData),pageOffset=0 表示后端分页为 1 基
const {
loading: loadingFiles,
tableData: files,
pagination: filePagination,
setSearchParams: setFileSearchParams,
} = useFetchData<EvalFile>(
(params) => queryEvaluationFilesUsingGet({ taskId: task?.id, ...params }),
(d) => d as unknown as EvalFile,
30000,
false,
[],
0
);
// Fetch items of selected file
useEffect(() => {
if (!task?.id || !selectedFile) return;
const fetchItems = async () => {
setLoadingItems(true);
try {
const res = await queryEvaluationItemsUsingGet({
taskId: task.id,
page: itemPagination.current,
size: itemPagination.pageSize,
file_id: selectedFile.fileId,
});
const data = res?.data;
const list: EvalItem[] = data?.content || [];
setItems(list);
setItemPagination((p) => ({ ...p, total: data?.totalElements || 0 }));
} catch (e) {
message.error('加载评估条目失败');
console.error(e);
} finally {
setLoadingItems(false);
// 评估条目数据(使用 useFetchData),依赖选中文件
const {
loading: loadingItems,
tableData: items,
pagination: itemPagination,
setSearchParams: setItemSearchParams,
fetchData: fetchItems,
} = useFetchData<EvalItem>(
(params) => {
if (!task?.id || !selectedFile?.fileId) {
return Promise.resolve({ data: { content: [], totalElements: 0 } });
}
};
fetchItems();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [task?.id, selectedFile?.fileId, itemPagination.current, itemPagination.pageSize]);
return queryEvaluationItemsUsingGet({ taskId: task.id, file_id: selectedFile.fileId, ...params });
},
(d) => d as unknown as EvalItem,
30000,
false,
[],
0
);
// 当选择文件变化时,主动触发一次条目查询,避免仅依赖 searchParams 变更导致未触发
useEffect(() => {
if (task?.id && selectedFile?.fileId) {
setItemSearchParams((prev: any) => ({ ...prev, current: 1 }));
// 立即拉取一次,保证点击后立刻出现数据
fetchItems();
}
}, [task?.id, selectedFile?.fileId]);
const fileColumns = [
{
@@ -228,19 +218,20 @@ export default function EvaluationItems({ task }: { task: any }) {
dataSource={files}
loading={loadingFiles}
size="middle"
onRow={(record) => ({ onClick: () => setSelectedFile({ fileId: record.fileId, fileName: record.fileName }) })}
pagination={{
current: filePagination.current,
pageSize: filePagination.pageSize,
total: filePagination.total,
onChange: (current, pageSize) => setFilePagination({ current, pageSize, total: filePagination.total }),
}}
onRow={(record) => ({
onClick: () => {
setSelectedFile({ fileId: record.fileId, fileName: record.fileName });
// 切换文件时,重置条目表到第一页
setItemSearchParams((prev: any) => ({ ...prev, current: 1 }));
},
})}
pagination={filePagination}
/>
) : (
<div className="flex flex-col gap-3">
<div className="sticky top-0 z-10 bg-white py-2" style={{ borderBottom: '1px solid #f0f0f0' }}>
<Space wrap>
<Button icon={<ArrowLeft size={16} />} onClick={() => { setSelectedFile(null); setItems([]); }}>
<Button icon={<ArrowLeft size={16} />} onClick={() => { setSelectedFile(null); }}>
</Button>
<Space>
@@ -257,12 +248,7 @@ export default function EvaluationItems({ task }: { task: any }) {
dataSource={items}
loading={loadingItems}
size="middle"
pagination={{
current: itemPagination.current,
pageSize: itemPagination.pageSize,
total: itemPagination.total,
onChange: (current, pageSize) => setItemPagination({ current, pageSize, total: itemPagination.total }),
}}
pagination={itemPagination}
/>
</div>
)}

View File

@@ -82,6 +82,7 @@ export default function DataEvaluationPage() {
label: '任务类型',
options: [
{ value: 'QA', label: 'QA评估' },
{ value: 'COT', label: 'COPT评估' },
],
},
{
@@ -89,7 +90,6 @@ export default function DataEvaluationPage() {
label: '评估方式',
options: [
{ value: 'AUTO', label: '自动评估' },
{ value: 'MANUAL', label: '人工评估' },
],
},
];