You've already forked DataMate
feature: add data-evaluation
* feature: add evaluation task management function * feature: add evaluation task detail page * fix: delete duplicate definition for table t_model_config * refactor: rename package synthesis to ratio * refactor: add eval file table and refactor related code * fix: calling large models in parallel during evaluation
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { useMemo, useState } from "react";
|
||||
import { useState } from "react";
|
||||
import { Button, Form, message } from "antd";
|
||||
import { ArrowLeft, ChevronRight } from "lucide-react";
|
||||
import { createRatioTaskUsingPost } from "@/pages/RatioTask/ratio.api.ts";
|
||||
|
||||
@@ -2,14 +2,12 @@ import React, { useMemo, useState, useEffect, FC } from "react";
|
||||
import {
|
||||
Badge,
|
||||
Card,
|
||||
Progress,
|
||||
Button,
|
||||
Select,
|
||||
Table,
|
||||
InputNumber,
|
||||
Space,
|
||||
} from "antd";
|
||||
import { BarChart3, Filter } from "lucide-react";
|
||||
import { BarChart3 } from "lucide-react";
|
||||
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
|
||||
|
||||
const TIME_RANGE_OPTIONS = [
|
||||
@@ -20,6 +18,11 @@ const TIME_RANGE_OPTIONS = [
|
||||
{ label: '最近30天', value: 30 },
|
||||
];
|
||||
|
||||
interface LabelFilter {
|
||||
label: string;
|
||||
value: string;
|
||||
}
|
||||
|
||||
interface RatioConfigItem {
|
||||
id: string;
|
||||
name: string;
|
||||
@@ -27,7 +30,7 @@ interface RatioConfigItem {
|
||||
quantity: number;
|
||||
percentage: number;
|
||||
source: string; // dataset id
|
||||
labelFilter?: string;
|
||||
labelFilter?: LabelFilter;
|
||||
dateRange?: number;
|
||||
}
|
||||
|
||||
@@ -36,7 +39,8 @@ interface RatioConfigProps {
|
||||
selectedDatasets: string[];
|
||||
datasets: Dataset[];
|
||||
totalTargetCount: number;
|
||||
distributions: Record<string, Record<string, number>>;
|
||||
// distributions now: { datasetId: { labelName: { labelValue: count } } }
|
||||
distributions: Record<string, Record<string, Record<string, number>>>;
|
||||
onChange?: (configs: RatioConfigItem[]) => void;
|
||||
}
|
||||
|
||||
@@ -63,6 +67,10 @@ const RatioConfig: FC<RatioConfigProps> = ({
|
||||
return Object.keys(dist);
|
||||
};
|
||||
|
||||
const getLabelValues = (datasetId: string, label: string): string[] => {
|
||||
return Object.keys(distributions[String(datasetId)]?.[label] || {});
|
||||
};
|
||||
|
||||
const addConfig = (datasetId: string) => {
|
||||
const dataset = datasets.find((d) => String(d.id) === datasetId);
|
||||
const newConfig: RatioConfigItem = {
|
||||
@@ -208,46 +216,85 @@ const RatioConfig: FC<RatioConfigProps> = ({
|
||||
);
|
||||
|
||||
const labels = getDatasetLabels(datasetId);
|
||||
const usedLabels = datasetConfigs
|
||||
.map((c) => c.labelFilter)
|
||||
.filter(Boolean) as string[];
|
||||
|
||||
// helper: used values per label for this dataset (exclude a given row when needed)
|
||||
const getUsedValuesForLabel = (label: string, excludeId?: string) => {
|
||||
return new Set(
|
||||
datasetConfigs
|
||||
.filter((c) => c.id !== excludeId && c.labelFilter?.label === label)
|
||||
.map((c) => c.labelFilter?.value)
|
||||
.filter(Boolean) as string[]
|
||||
);
|
||||
};
|
||||
|
||||
const columns = [
|
||||
{
|
||||
title: "配比项",
|
||||
dataIndex: "id",
|
||||
key: "id",
|
||||
render: (_: any, record: RatioConfigItem) => (
|
||||
<Space>
|
||||
<Filter size={14} className="text-gray-400" />
|
||||
<span className="text-sm">{record.name}</span>
|
||||
</Space>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "标签筛选",
|
||||
title: "标签",
|
||||
dataIndex: "labelFilter",
|
||||
key: "labelFilter",
|
||||
render: (_: any, record: RatioConfigItem) => {
|
||||
const availableLabels = labels
|
||||
.map((l) => ({ label: l, value: l }))
|
||||
.filter(
|
||||
(opt) =>
|
||||
opt.value === record.labelFilter ||
|
||||
!usedLabels.includes(opt.value)
|
||||
);
|
||||
.map((l) => ({
|
||||
label: l,
|
||||
value: l,
|
||||
disabled: getLabelValues(datasetId, l).every((v) => getUsedValuesForLabel(l, record.id).has(v)),
|
||||
}))
|
||||
return (
|
||||
<Select
|
||||
style={{ width: "160px" }}
|
||||
placeholder="选择标签"
|
||||
value={record.labelFilter}
|
||||
value={record.labelFilter?.label}
|
||||
options={availableLabels}
|
||||
allowClear
|
||||
onChange={(value) =>
|
||||
onChange={(value) => {
|
||||
if (!value) {
|
||||
updateConfig(record.id, { labelFilter: undefined });
|
||||
} else {
|
||||
// reset value when label changes
|
||||
updateConfig(record.id, {
|
||||
labelFilter: { label: value, value: "" },
|
||||
});
|
||||
}
|
||||
}}
|
||||
/>
|
||||
);
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "标签值",
|
||||
dataIndex: "labelValue",
|
||||
key: "labelValue",
|
||||
render: (_: any, record: RatioConfigItem) => {
|
||||
const selectedLabel = record.labelFilter?.label;
|
||||
const options = selectedLabel
|
||||
? getLabelValues(datasetId, selectedLabel).map((v) => ({
|
||||
label: v,
|
||||
value: v,
|
||||
disabled: datasetConfigs.some(
|
||||
(c) =>
|
||||
c.id !== record.id &&
|
||||
c.labelFilter?.label === selectedLabel &&
|
||||
c.labelFilter?.value === v
|
||||
),
|
||||
}))
|
||||
: [];
|
||||
return (
|
||||
<Select
|
||||
style={{ width: "180px" }}
|
||||
placeholder="选择标签值"
|
||||
value={record.labelFilter?.value || undefined}
|
||||
options={options}
|
||||
allowClear
|
||||
disabled={!selectedLabel}
|
||||
onChange={(value) => {
|
||||
if (!selectedLabel) return;
|
||||
updateConfig(record.id, {
|
||||
labelFilter: value || undefined,
|
||||
})
|
||||
}
|
||||
labelFilter: {
|
||||
label: selectedLabel,
|
||||
value: value || "",
|
||||
},
|
||||
});
|
||||
}}
|
||||
/>
|
||||
);
|
||||
},
|
||||
@@ -286,23 +333,6 @@ const RatioConfig: FC<RatioConfigProps> = ({
|
||||
/>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "占比",
|
||||
dataIndex: "percentage",
|
||||
key: "percentage",
|
||||
render: (_: any, record: RatioConfigItem) => (
|
||||
<div style={{ minWidth: 140 }}>
|
||||
<div className="text-xs mb-1">
|
||||
{record.percentage ?? 0}%
|
||||
</div>
|
||||
<Progress
|
||||
percent={record.percentage ?? 0}
|
||||
size="small"
|
||||
showInfo={false}
|
||||
/>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "操作",
|
||||
dataIndex: "actions",
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
// typescript
|
||||
import React, { useEffect, useState } from "react";
|
||||
import { Badge, Button, Card, Checkbox, Input, Pagination } from "antd";
|
||||
import { Search as SearchIcon } from "lucide-react";
|
||||
@@ -5,32 +6,48 @@ import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
|
||||
import {
|
||||
queryDatasetsUsingGet,
|
||||
queryDatasetByIdUsingGet,
|
||||
queryDatasetStatisticsByIdUsingGet,
|
||||
} from "@/pages/DataManagement/dataset.api.ts";
|
||||
|
||||
interface SelectDatasetProps {
|
||||
selectedDatasets: string[];
|
||||
onSelectedDatasetsChange: (next: string[]) => void;
|
||||
// distributions now: { datasetId: { labelName: { labelValue: count } } }
|
||||
onDistributionsChange?: (
|
||||
next: Record<string, Record<string, number>>
|
||||
next: Record<string, Record<string, Record<string, number>>>
|
||||
) => void;
|
||||
onDatasetsChange?: (list: Dataset[]) => void;
|
||||
}
|
||||
|
||||
const SelectDataset: React.FC<SelectDatasetProps> = ({
|
||||
selectedDatasets,
|
||||
onSelectedDatasetsChange,
|
||||
onDistributionsChange,
|
||||
onDatasetsChange,
|
||||
}) => {
|
||||
selectedDatasets,
|
||||
onSelectedDatasetsChange,
|
||||
onDistributionsChange,
|
||||
onDatasetsChange,
|
||||
}) => {
|
||||
const [datasets, setDatasets] = useState<Dataset[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [pagination, setPagination] = useState({ page: 1, size: 10, total: 0 });
|
||||
const [distributions, setDistributions] = useState<
|
||||
Record<string, Record<string, number>>
|
||||
Record<string, Record<string, Record<string, number>>>
|
||||
>({});
|
||||
|
||||
// Helper: flatten nested distribution for preview and filter logic
|
||||
const flattenDistribution = (
|
||||
dist?: Record<string, Record<string, number>>
|
||||
): Array<{ label: string; value: string; count: number }> => {
|
||||
if (!dist) return [];
|
||||
const items: Array<{ label: string; value: string; count: number }> = [];
|
||||
Object.entries(dist).forEach(([label, values]) => {
|
||||
if (values && typeof values === "object") {
|
||||
Object.entries(values).forEach(([val, cnt]) => {
|
||||
items.push({ label, value: val, count: cnt });
|
||||
});
|
||||
}
|
||||
});
|
||||
return items;
|
||||
};
|
||||
|
||||
// Fetch dataset list
|
||||
useEffect(() => {
|
||||
const fetchDatasets = async () => {
|
||||
@@ -52,10 +69,10 @@ const SelectDataset: React.FC<SelectDatasetProps> = ({
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
fetchDatasets();
|
||||
fetchDatasets().then(() => {});
|
||||
}, [pagination.page, pagination.size, searchQuery]);
|
||||
|
||||
// Fetch label distributions when in label mode
|
||||
// Fetch label distributions when datasets change
|
||||
useEffect(() => {
|
||||
const fetchDistributions = async () => {
|
||||
if (!datasets?.length) return;
|
||||
@@ -64,74 +81,25 @@ const SelectDataset: React.FC<SelectDatasetProps> = ({
|
||||
.filter((id) => !distributions[id]);
|
||||
if (!idsToFetch.length) return;
|
||||
try {
|
||||
const results = await Promise.all(
|
||||
idsToFetch.map(async (id) => {
|
||||
try {
|
||||
const statRes = await queryDatasetStatisticsByIdUsingGet(id);
|
||||
return { id, stats: statRes?.data };
|
||||
} catch {
|
||||
return { id, stats: null };
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
const next: Record<string, Record<string, number>> = {
|
||||
...distributions,
|
||||
};
|
||||
for (const { id, stats } of results) {
|
||||
let dist: Record<string, number> | undefined = undefined;
|
||||
if (stats) {
|
||||
const candidates: any[] = [
|
||||
(stats as any).labelDistribution,
|
||||
(stats as any).tagDistribution,
|
||||
(stats as any).label_stats,
|
||||
(stats as any).labels,
|
||||
(stats as any).distribution,
|
||||
];
|
||||
let picked = candidates.find(
|
||||
(c) => c && (typeof c === "object" || Array.isArray(c))
|
||||
);
|
||||
if (Array.isArray(picked)) {
|
||||
const obj: Record<string, number> = {};
|
||||
picked.forEach((it: any) => {
|
||||
const key = it?.label ?? it?.name ?? it?.tag ?? it?.key;
|
||||
const val = it?.count ?? it?.value ?? it?.num ?? it?.total;
|
||||
if (key != null && typeof val === "number")
|
||||
obj[String(key)] = val;
|
||||
});
|
||||
dist = obj;
|
||||
} else if (picked && typeof picked === "object") {
|
||||
dist = picked as Record<string, number>;
|
||||
}
|
||||
}
|
||||
if (!dist) {
|
||||
try {
|
||||
const detRes = await queryDatasetByIdUsingGet(id);
|
||||
const det = detRes?.data;
|
||||
if (det) {
|
||||
let picked =
|
||||
(det as any).distribution ||
|
||||
(det as any).labelDistribution ||
|
||||
(det as any).tagDistribution ||
|
||||
(det as any).label_stats ||
|
||||
(det as any).labels ||
|
||||
undefined;
|
||||
if (Array.isArray(picked)) {
|
||||
const obj: Record<string, number> = {};
|
||||
picked.forEach((it: any) => {
|
||||
const key = it?.label ?? it?.name ?? it?.tag ?? it?.key;
|
||||
const val = it?.count ?? it?.value ?? it?.num ?? it?.total;
|
||||
if (key != null && typeof val === "number")
|
||||
obj[String(key)] = val;
|
||||
});
|
||||
dist = obj;
|
||||
} else if (picked && typeof picked === "object") {
|
||||
dist = picked as Record<string, number>;
|
||||
}
|
||||
const next: Record<
|
||||
string,
|
||||
Record<string, Record<string, number>>
|
||||
> = { ...distributions };
|
||||
for (const id of idsToFetch) {
|
||||
let dist: Record<string, Record<string, number>> | undefined =
|
||||
undefined;
|
||||
try {
|
||||
const detRes = await queryDatasetByIdUsingGet(id);
|
||||
const det = detRes?.data;
|
||||
if (det) {
|
||||
const picked = det?.distribution;
|
||||
if (picked && typeof picked === "object") {
|
||||
// Assume picked is now { labelName: { labelValue: count } }
|
||||
dist = picked as Record<string, Record<string, number>>;
|
||||
}
|
||||
} catch {
|
||||
dist = undefined;
|
||||
}
|
||||
} catch {
|
||||
dist = undefined;
|
||||
}
|
||||
next[String(id)] = dist || {};
|
||||
}
|
||||
@@ -141,7 +109,7 @@ const SelectDataset: React.FC<SelectDatasetProps> = ({
|
||||
// ignore
|
||||
}
|
||||
};
|
||||
fetchDistributions();
|
||||
fetchDistributions().then(() => {});
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [datasets]);
|
||||
|
||||
@@ -195,6 +163,8 @@ const SelectDataset: React.FC<SelectDatasetProps> = ({
|
||||
datasets.map((dataset) => {
|
||||
const idStr = String(dataset.id);
|
||||
const checked = selectedDatasets.includes(idStr);
|
||||
const distFor = distributions[idStr];
|
||||
const flat = flattenDistribution(distFor);
|
||||
return (
|
||||
<Card
|
||||
key={dataset.id}
|
||||
@@ -224,17 +194,15 @@ const SelectDataset: React.FC<SelectDatasetProps> = ({
|
||||
<span>{dataset.size}</span>
|
||||
</div>
|
||||
<div className="mt-2">
|
||||
{distributions[idStr] ? (
|
||||
Object.entries(distributions[idStr]).length > 0 ? (
|
||||
{distFor ? (
|
||||
flat.length > 0 ? (
|
||||
<div className="flex flex-wrap gap-2 text-xs">
|
||||
{Object.entries(distributions[idStr])
|
||||
.slice(0, 8)
|
||||
.map(([tag, count]) => (
|
||||
<Badge
|
||||
key={tag}
|
||||
color="gray"
|
||||
>{`${tag}: ${count}`}</Badge>
|
||||
))}
|
||||
{flat.slice(0, 8).map((it) => (
|
||||
<Badge
|
||||
key={`${it.label}_${it.value}`}
|
||||
color="gray"
|
||||
>{`${it.label}/${it.value}: ${it.count}`}</Badge>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-xs text-gray-400">
|
||||
|
||||
@@ -45,9 +45,7 @@ export function mapRatioTask(task: Partial<RatioTaskItem>): RatioTaskItem {
|
||||
status: ratioTaskStatusMap[task.status || RatioStatus.PENDING],
|
||||
createdAt: formatDate(task.created_at),
|
||||
updatedAt: formatDate(task.updated_at),
|
||||
description:
|
||||
task.description ||
|
||||
(task.ratio_method === "DATASET" ? "按数据集配比" : "按标签配比"),
|
||||
description: task.description,
|
||||
icon: <BarChart3 />,
|
||||
iconColor: task.ratio_method === "DATASET" ? "bg-blue-100" : "bg-green-100",
|
||||
statistics: [
|
||||
@@ -73,16 +71,5 @@ export function mapRatioTask(task: Partial<RatioTaskItem>): RatioTaskItem {
|
||||
value: task.created_at || "-",
|
||||
},
|
||||
],
|
||||
type: task.ratio_method === "DATASET" ? "数据集配比" : "标签配比",
|
||||
// progress: 100,
|
||||
// sourceDatasets: ["sentiment_dataset", "news_classification"],
|
||||
// targetRatio: { 正面: 33, 负面: 33, 中性: 34 },
|
||||
// currentRatio: { 正面: 33, 负面: 33, 中性: 34 },
|
||||
// totalRecords: 15000,
|
||||
// processedRecords: 15000,
|
||||
// estimatedTime: "已完成",
|
||||
// quality: 95,
|
||||
// strategy: "随机下采样",
|
||||
// outputPath: "/data/balanced/sentiment_balanced_20250120",
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user