feature: add data-evaluation

* feature: add evaluation task management function

* feature: add evaluation task detail page

* fix: delete duplicate definition for table t_model_config

* refactor: rename package synthesis to ratio

* refactor: add eval file table and  refactor related code

* fix: calling large models in parallel during evaluation
This commit is contained in:
hefanli
2025-12-04 09:23:54 +08:00
committed by GitHub
parent 265e284fb8
commit 1d19cd3a62
52 changed files with 2882 additions and 1244 deletions

View File

@@ -1,4 +1,4 @@
import { useMemo, useState } from "react";
import { useState } from "react";
import { Button, Form, message } from "antd";
import { ArrowLeft, ChevronRight } from "lucide-react";
import { createRatioTaskUsingPost } from "@/pages/RatioTask/ratio.api.ts";

View File

@@ -2,14 +2,12 @@ import React, { useMemo, useState, useEffect, FC } from "react";
import {
Badge,
Card,
Progress,
Button,
Select,
Table,
InputNumber,
Space,
} from "antd";
import { BarChart3, Filter } from "lucide-react";
import { BarChart3 } from "lucide-react";
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
const TIME_RANGE_OPTIONS = [
@@ -20,6 +18,11 @@ const TIME_RANGE_OPTIONS = [
{ label: '最近30天', value: 30 },
];
interface LabelFilter {
label: string;
value: string;
}
interface RatioConfigItem {
id: string;
name: string;
@@ -27,7 +30,7 @@ interface RatioConfigItem {
quantity: number;
percentage: number;
source: string; // dataset id
labelFilter?: string;
labelFilter?: LabelFilter;
dateRange?: number;
}
@@ -36,7 +39,8 @@ interface RatioConfigProps {
selectedDatasets: string[];
datasets: Dataset[];
totalTargetCount: number;
distributions: Record<string, Record<string, number>>;
// distributions now: { datasetId: { labelName: { labelValue: count } } }
distributions: Record<string, Record<string, Record<string, number>>>;
onChange?: (configs: RatioConfigItem[]) => void;
}
@@ -63,6 +67,10 @@ const RatioConfig: FC<RatioConfigProps> = ({
return Object.keys(dist);
};
const getLabelValues = (datasetId: string, label: string): string[] => {
return Object.keys(distributions[String(datasetId)]?.[label] || {});
};
const addConfig = (datasetId: string) => {
const dataset = datasets.find((d) => String(d.id) === datasetId);
const newConfig: RatioConfigItem = {
@@ -208,46 +216,85 @@ const RatioConfig: FC<RatioConfigProps> = ({
);
const labels = getDatasetLabels(datasetId);
const usedLabels = datasetConfigs
.map((c) => c.labelFilter)
.filter(Boolean) as string[];
// helper: used values per label for this dataset (exclude a given row when needed)
const getUsedValuesForLabel = (label: string, excludeId?: string) => {
return new Set(
datasetConfigs
.filter((c) => c.id !== excludeId && c.labelFilter?.label === label)
.map((c) => c.labelFilter?.value)
.filter(Boolean) as string[]
);
};
const columns = [
{
title: "配比项",
dataIndex: "id",
key: "id",
render: (_: any, record: RatioConfigItem) => (
<Space>
<Filter size={14} className="text-gray-400" />
<span className="text-sm">{record.name}</span>
</Space>
),
},
{
title: "标签筛选",
title: "标签",
dataIndex: "labelFilter",
key: "labelFilter",
render: (_: any, record: RatioConfigItem) => {
const availableLabels = labels
.map((l) => ({ label: l, value: l }))
.filter(
(opt) =>
opt.value === record.labelFilter ||
!usedLabels.includes(opt.value)
);
.map((l) => ({
label: l,
value: l,
disabled: getLabelValues(datasetId, l).every((v) => getUsedValuesForLabel(l, record.id).has(v)),
}))
return (
<Select
style={{ width: "160px" }}
placeholder="选择标签"
value={record.labelFilter}
value={record.labelFilter?.label}
options={availableLabels}
allowClear
onChange={(value) =>
onChange={(value) => {
if (!value) {
updateConfig(record.id, { labelFilter: undefined });
} else {
// reset value when label changes
updateConfig(record.id, {
labelFilter: { label: value, value: "" },
});
}
}}
/>
);
},
},
{
title: "标签值",
dataIndex: "labelValue",
key: "labelValue",
render: (_: any, record: RatioConfigItem) => {
const selectedLabel = record.labelFilter?.label;
const options = selectedLabel
? getLabelValues(datasetId, selectedLabel).map((v) => ({
label: v,
value: v,
disabled: datasetConfigs.some(
(c) =>
c.id !== record.id &&
c.labelFilter?.label === selectedLabel &&
c.labelFilter?.value === v
),
}))
: [];
return (
<Select
style={{ width: "180px" }}
placeholder="选择标签值"
value={record.labelFilter?.value || undefined}
options={options}
allowClear
disabled={!selectedLabel}
onChange={(value) => {
if (!selectedLabel) return;
updateConfig(record.id, {
labelFilter: value || undefined,
})
}
labelFilter: {
label: selectedLabel,
value: value || "",
},
});
}}
/>
);
},
@@ -286,23 +333,6 @@ const RatioConfig: FC<RatioConfigProps> = ({
/>
),
},
{
title: "占比",
dataIndex: "percentage",
key: "percentage",
render: (_: any, record: RatioConfigItem) => (
<div style={{ minWidth: 140 }}>
<div className="text-xs mb-1">
{record.percentage ?? 0}%
</div>
<Progress
percent={record.percentage ?? 0}
size="small"
showInfo={false}
/>
</div>
),
},
{
title: "操作",
dataIndex: "actions",

View File

@@ -1,3 +1,4 @@
// typescript
import React, { useEffect, useState } from "react";
import { Badge, Button, Card, Checkbox, Input, Pagination } from "antd";
import { Search as SearchIcon } from "lucide-react";
@@ -5,32 +6,48 @@ import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
import {
queryDatasetsUsingGet,
queryDatasetByIdUsingGet,
queryDatasetStatisticsByIdUsingGet,
} from "@/pages/DataManagement/dataset.api.ts";
interface SelectDatasetProps {
selectedDatasets: string[];
onSelectedDatasetsChange: (next: string[]) => void;
// distributions now: { datasetId: { labelName: { labelValue: count } } }
onDistributionsChange?: (
next: Record<string, Record<string, number>>
next: Record<string, Record<string, Record<string, number>>>
) => void;
onDatasetsChange?: (list: Dataset[]) => void;
}
const SelectDataset: React.FC<SelectDatasetProps> = ({
selectedDatasets,
onSelectedDatasetsChange,
onDistributionsChange,
onDatasetsChange,
}) => {
selectedDatasets,
onSelectedDatasetsChange,
onDistributionsChange,
onDatasetsChange,
}) => {
const [datasets, setDatasets] = useState<Dataset[]>([]);
const [loading, setLoading] = useState(false);
const [searchQuery, setSearchQuery] = useState("");
const [pagination, setPagination] = useState({ page: 1, size: 10, total: 0 });
const [distributions, setDistributions] = useState<
Record<string, Record<string, number>>
Record<string, Record<string, Record<string, number>>>
>({});
// Helper: flatten nested distribution for preview and filter logic
const flattenDistribution = (
dist?: Record<string, Record<string, number>>
): Array<{ label: string; value: string; count: number }> => {
if (!dist) return [];
const items: Array<{ label: string; value: string; count: number }> = [];
Object.entries(dist).forEach(([label, values]) => {
if (values && typeof values === "object") {
Object.entries(values).forEach(([val, cnt]) => {
items.push({ label, value: val, count: cnt });
});
}
});
return items;
};
// Fetch dataset list
useEffect(() => {
const fetchDatasets = async () => {
@@ -52,10 +69,10 @@ const SelectDataset: React.FC<SelectDatasetProps> = ({
setLoading(false);
}
};
fetchDatasets();
fetchDatasets().then(() => {});
}, [pagination.page, pagination.size, searchQuery]);
// Fetch label distributions when in label mode
// Fetch label distributions when datasets change
useEffect(() => {
const fetchDistributions = async () => {
if (!datasets?.length) return;
@@ -64,74 +81,25 @@ const SelectDataset: React.FC<SelectDatasetProps> = ({
.filter((id) => !distributions[id]);
if (!idsToFetch.length) return;
try {
const results = await Promise.all(
idsToFetch.map(async (id) => {
try {
const statRes = await queryDatasetStatisticsByIdUsingGet(id);
return { id, stats: statRes?.data };
} catch {
return { id, stats: null };
}
})
);
const next: Record<string, Record<string, number>> = {
...distributions,
};
for (const { id, stats } of results) {
let dist: Record<string, number> | undefined = undefined;
if (stats) {
const candidates: any[] = [
(stats as any).labelDistribution,
(stats as any).tagDistribution,
(stats as any).label_stats,
(stats as any).labels,
(stats as any).distribution,
];
let picked = candidates.find(
(c) => c && (typeof c === "object" || Array.isArray(c))
);
if (Array.isArray(picked)) {
const obj: Record<string, number> = {};
picked.forEach((it: any) => {
const key = it?.label ?? it?.name ?? it?.tag ?? it?.key;
const val = it?.count ?? it?.value ?? it?.num ?? it?.total;
if (key != null && typeof val === "number")
obj[String(key)] = val;
});
dist = obj;
} else if (picked && typeof picked === "object") {
dist = picked as Record<string, number>;
}
}
if (!dist) {
try {
const detRes = await queryDatasetByIdUsingGet(id);
const det = detRes?.data;
if (det) {
let picked =
(det as any).distribution ||
(det as any).labelDistribution ||
(det as any).tagDistribution ||
(det as any).label_stats ||
(det as any).labels ||
undefined;
if (Array.isArray(picked)) {
const obj: Record<string, number> = {};
picked.forEach((it: any) => {
const key = it?.label ?? it?.name ?? it?.tag ?? it?.key;
const val = it?.count ?? it?.value ?? it?.num ?? it?.total;
if (key != null && typeof val === "number")
obj[String(key)] = val;
});
dist = obj;
} else if (picked && typeof picked === "object") {
dist = picked as Record<string, number>;
}
const next: Record<
string,
Record<string, Record<string, number>>
> = { ...distributions };
for (const id of idsToFetch) {
let dist: Record<string, Record<string, number>> | undefined =
undefined;
try {
const detRes = await queryDatasetByIdUsingGet(id);
const det = detRes?.data;
if (det) {
const picked = det?.distribution;
if (picked && typeof picked === "object") {
// Assume picked is now { labelName: { labelValue: count } }
dist = picked as Record<string, Record<string, number>>;
}
} catch {
dist = undefined;
}
} catch {
dist = undefined;
}
next[String(id)] = dist || {};
}
@@ -141,7 +109,7 @@ const SelectDataset: React.FC<SelectDatasetProps> = ({
// ignore
}
};
fetchDistributions();
fetchDistributions().then(() => {});
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [datasets]);
@@ -195,6 +163,8 @@ const SelectDataset: React.FC<SelectDatasetProps> = ({
datasets.map((dataset) => {
const idStr = String(dataset.id);
const checked = selectedDatasets.includes(idStr);
const distFor = distributions[idStr];
const flat = flattenDistribution(distFor);
return (
<Card
key={dataset.id}
@@ -224,17 +194,15 @@ const SelectDataset: React.FC<SelectDatasetProps> = ({
<span>{dataset.size}</span>
</div>
<div className="mt-2">
{distributions[idStr] ? (
Object.entries(distributions[idStr]).length > 0 ? (
{distFor ? (
flat.length > 0 ? (
<div className="flex flex-wrap gap-2 text-xs">
{Object.entries(distributions[idStr])
.slice(0, 8)
.map(([tag, count]) => (
<Badge
key={tag}
color="gray"
>{`${tag}: ${count}`}</Badge>
))}
{flat.slice(0, 8).map((it) => (
<Badge
key={`${it.label}_${it.value}`}
color="gray"
>{`${it.label}/${it.value}: ${it.count}`}</Badge>
))}
</div>
) : (
<div className="text-xs text-gray-400">

View File

@@ -45,9 +45,7 @@ export function mapRatioTask(task: Partial<RatioTaskItem>): RatioTaskItem {
status: ratioTaskStatusMap[task.status || RatioStatus.PENDING],
createdAt: formatDate(task.created_at),
updatedAt: formatDate(task.updated_at),
description:
task.description ||
(task.ratio_method === "DATASET" ? "按数据集配比" : "按标签配比"),
description: task.description,
icon: <BarChart3 />,
iconColor: task.ratio_method === "DATASET" ? "bg-blue-100" : "bg-green-100",
statistics: [
@@ -73,16 +71,5 @@ export function mapRatioTask(task: Partial<RatioTaskItem>): RatioTaskItem {
value: task.created_at || "-",
},
],
type: task.ratio_method === "DATASET" ? "数据集配比" : "标签配比",
// progress: 100,
// sourceDatasets: ["sentiment_dataset", "news_classification"],
// targetRatio: { 正面: 33, 负面: 33, 中性: 34 },
// currentRatio: { 正面: 33, 负面: 33, 中性: 34 },
// totalRecords: 15000,
// processedRecords: 15000,
// estimatedTime: "已完成",
// quality: 95,
// strategy: "随机下采样",
// outputPath: "/data/balanced/sentiment_balanced_20250120",
};
}