feature:增加数据配比功能 (#52)

* refactor: 修改调整数据归集实现,删除无用代码,优化代码结构

* feature: 每天凌晨00:00扫描所有数据集,检查数据集是否超过了预设的保留天数,超出保留天数的数据集调用删除接口进行删除

* fix: 修改删除数据集文件的逻辑,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录

* fix: 增加参数校验和接口定义,删除不使用的接口

* fix: 数据集统计数据默认为0

* feature: 数据集状态增加流转,创建时为草稿状态,上传文件或者归集文件后修改为活动状态

* refactor: 修改分页查询归集任务的代码

* fix: 更新后重新执行;归集任务执行增加事务控制

* feature: 创建归集任务时能够同步创建数据集,更新归集任务时能更新到指定数据集

* fix: 创建归集任务不需要创建数据集时不应该报错

* fix: 修复删除文件时数据集的统计数据不变动

* feature: 查询数据集详情时能够获取到文件标签分布

* fix: tags为空时不进行分析

* fix: 状态修改为ACTIVE

* fix: 修改解析tag的方法

* feature: 实现创建、分页查询、删除配比任务

* feature: 实现创建、分页查询、删除配比任务的前端交互

* fix: 修复进度计算异常导致的页面报错
This commit is contained in:
hefanli
2025-11-03 10:17:39 +08:00
committed by GitHub
parent 07edf16044
commit 08bd4eca5c
32 changed files with 1894 additions and 1028 deletions

View File

@@ -10,6 +10,7 @@ import com.datamate.common.interfaces.PagedResponse;
import com.datamate.datamanagement.application.DatasetApplicationService;
import com.datamate.datamanagement.domain.model.dataset.Dataset;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
import jakarta.validation.Valid;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@@ -35,11 +36,13 @@ public class CollectionTaskController{
public ResponseEntity<CollectionTaskResponse> createTask(@Valid @RequestBody CreateCollectionTaskRequest request) {
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
String datasetId = null;
DatasetResponse dataset = null;
if (Objects.nonNull(request.getDataset())) {
datasetId = datasetService.createDataset(request.getDataset()).getId();
dataset = DatasetConverter.INSTANCE.convertToResponse(datasetService.createDataset(request.getDataset()));
datasetId = dataset.getId();
}
CollectionTaskResponse response = CollectionTaskConverter.INSTANCE.toResponse(taskService.create(task, datasetId));
response.setDataset(DatasetConverter.INSTANCE.convertToResponse(datasetService.getDataset(datasetId)));
response.setDataset(dataset);
return ResponseEntity.ok().body(response);
}

View File

@@ -119,6 +119,8 @@ public class DatasetApplicationService {
public Dataset getDataset(String datasetId) {
Dataset dataset = datasetRepository.getById(datasetId);
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
List<DatasetFile> datasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
dataset.setFiles(datasetFiles);
return dataset;
}

View File

@@ -102,6 +102,10 @@ public class DatasetFileApplicationService {
public void deleteDatasetFile(String datasetId, String fileId) {
DatasetFile file = getDatasetFile(datasetId, fileId);
Dataset dataset = datasetRepository.getById(datasetId);
dataset.setFiles(new ArrayList<>(Collections.singleton(file)));
datasetFileRepository.removeById(fileId);
dataset.removeFile(file);
datasetRepository.updateById(dataset);
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
if (file.getFilePath().startsWith(dataset.getPath())) {
try {
@@ -111,9 +115,6 @@ public class DatasetFileApplicationService {
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
}
}
datasetFileRepository.removeById(fileId);
dataset.removeFile(file);
datasetRepository.updateById(dataset);
}
/**

View File

@@ -110,7 +110,7 @@ public class FileMetadataService {
.fileType(fileType)
.uploadTime(LocalDateTime.now())
.lastAccessTime(LocalDateTime.now())
.status("UPLOADED")
.status("ACTIVE")
.build();
}

View File

@@ -2,9 +2,13 @@ package com.datamate.datamanagement.domain.model.dataset;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.*;
import org.apache.commons.lang3.StringUtils;
import java.time.LocalDateTime;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/**
@@ -25,11 +29,25 @@ public class DatasetFile {
private String fileType; // JPG/PNG/DCM/TXT
private Long fileSize; // bytes
private String checkSum;
private List<String> tags;
private String tags;
private String metadata;
private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR
private LocalDateTime uploadTime;
private LocalDateTime lastAccessTime;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
/**
* 解析标签
*
* @return 标签列表
*/
public List<String> analyzeTag() {
try {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(tags, List.class);
} catch (Exception e) {
return Collections.emptyList();
}
}
}

View File

@@ -1,33 +0,0 @@
package com.datamate.datamanagement.domain.model.dataset;
/**
* 状态常量类 - 统一管理所有状态枚举值
*/
public final class StatusConstants {
/**
* 数据集状态
*/
public static final class DatasetStatuses {
public static final String DRAFT = "DRAFT";
public static final String ACTIVE = "ACTIVE";
public static final String ARCHIVED = "ARCHIVED";
public static final String PROCESSING = "PROCESSING";
private DatasetStatuses() {}
}
/**
* 数据集文件状态
*/
public static final class DatasetFileStatuses {
public static final String UPLOADED = "UPLOADED";
public static final String PROCESSING = "PROCESSING";
public static final String COMPLETED = "COMPLETED";
public static final String ERROR = "ERROR";
private DatasetFileStatuses() {}
}
private StatusConstants() {}
}

View File

@@ -1,5 +1,7 @@
package com.datamate.datamanagement.interfaces.converter;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
@@ -7,11 +9,16 @@ import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
import com.datamate.common.domain.model.ChunkUploadRequest;
import com.datamate.datamanagement.domain.model.dataset.Dataset;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.collections4.CollectionUtils;
import org.mapstruct.Mapper;
import org.mapstruct.Mapping;
import org.mapstruct.Named;
import org.mapstruct.factory.Mappers;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 数据集文件转换器
@@ -26,6 +33,7 @@ public interface DatasetConverter {
*/
@Mapping(source = "sizeBytes", target = "totalSize")
@Mapping(source = "path", target = "targetLocation")
@Mapping(source = "files", target = "distribution", qualifiedByName = "getDistribution")
DatasetResponse convertToResponse(Dataset dataset);
/**
@@ -49,4 +57,28 @@ public interface DatasetConverter {
* 将数据集文件转换为响应
*/
DatasetFileResponse convertToResponse(DatasetFile datasetFile);
/**
* 获取数据文件的标签分布
*
* @param datasetFiles 数据集文件
* @return 标签分布
*/
@Named("getDistribution")
default Map<String, Long> getDistribution(List<DatasetFile> datasetFiles) {
Map<String, Long> distribution = new HashMap<>();
if (CollectionUtils.isEmpty(datasetFiles)) {
return distribution;
}
for (DatasetFile datasetFile : datasetFiles) {
List<String> tags = datasetFile.analyzeTag();
if (CollectionUtils.isEmpty(tags)) {
continue;
}
for (String tag : tags) {
distribution.put(tag, distribution.getOrDefault(tag, 0L) + 1);
}
}
return distribution;
}
}

View File

@@ -5,6 +5,7 @@ import lombok.Setter;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Map;
/**
* 数据集响应DTO
@@ -42,8 +43,8 @@ public class DatasetResponse {
private LocalDateTime updatedAt;
/** 创建者 */
private String createdBy;
/**
* 更新者
*/
/** 更新者 */
private String updatedBy;
/** 分布 */
private Map<String, Long> distribution ;
}

View File

@@ -1,4 +1,4 @@
import type { RatioTask } from "@/pages/RatioTask/ratio";
import type { RatioTask } from "@/pages/RatioTask/ratio.model.ts";
export const mockRatioTasks: RatioTask[] = [
{

View File

@@ -49,11 +49,13 @@ export interface Dataset {
status: DatasetStatus;
size?: string;
itemCount?: number;
fileCount?: number;
createdBy: string;
createdAt: string;
updatedAt: string;
tags: string[];
targetLocation?: string;
distribution?: Record<string, number>;
}
export interface TagItem {

View File

@@ -0,0 +1,314 @@
import { useState } from "react";
import { Button, Card, Form, Divider, message } from "antd";
import { ArrowLeft, Play, BarChart3, Shuffle, PieChart } from "lucide-react";
import { createRatioTaskUsingPost } from "@/pages/RatioTask/ratio.api.ts";
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
import { useNavigate } from "react-router";
import SelectDataset from "@/pages/RatioTask/Create/components/SelectDataset.tsx";
import BasicInformation from "@/pages/RatioTask/Create/components/BasicInformation.tsx";
import RatioConfig from "@/pages/RatioTask/Create/components/RatioConfig.tsx";
export default function CreateRatioTask() {
const navigate = useNavigate();
const [form] = Form.useForm();
// 配比任务相关状态
const [ratioTaskForm, setRatioTaskForm] = useState({
name: "",
description: "",
ratioType: "dataset" as "dataset" | "label",
selectedDatasets: [] as string[],
ratioConfigs: [] as any[],
totalTargetCount: 10000,
autoStart: true,
});
const [datasets, setDatasets] = useState<Dataset[]>([]);
const [creating, setCreating] = useState(false);
const [distributions, setDistributions] = useState<Record<string, Record<string, number>>>({});
const handleCreateRatioTask = async () => {
try {
const values = await form.validateFields();
if (!ratioTaskForm.ratioConfigs.length) {
message.error("请配置配比项");
return;
}
// Build request payload
const ratio_method = ratioTaskForm.ratioType === "dataset" ? "DATASET" : "TAG";
const totals = String(values.totalTargetCount);
const config = ratioTaskForm.ratioConfigs.map((c) => {
if (ratio_method === "DATASET") {
return {
datasetId: String(c.source),
counts: String(c.quantity ?? 0),
filter_conditions: "",
};
}
// TAG mode: source key like `${datasetId}_${label}`
const source = String(c.source || "");
const idx = source.indexOf("_");
const datasetId = idx > 0 ? source.slice(0, idx) : source;
const label = idx > 0 ? source.slice(idx + 1) : "";
return {
datasetId,
counts: String(c.quantity ?? 0),
filter_conditions: label ? JSON.stringify({ label }) : "",
};
});
setCreating(true);
await createRatioTaskUsingPost({
name: values.name,
description: values.description,
totals,
ratio_method,
config,
});
message.success("配比任务创建成功");
navigate("/data/synthesis/ratio-task");
} catch {
// 校验失败
} finally {
setCreating(false);
}
};
// dataset selection is handled inside SelectDataset via onSelectedDatasetsChange
const updateRatioConfig = (source: string, quantity: number) => {
setRatioTaskForm((prev) => {
const existingIndex = prev.ratioConfigs.findIndex(
(config) => config.source === source
);
const totalOtherQuantity = prev.ratioConfigs
.filter((config) => config.source !== source)
.reduce((sum, config) => sum + config.quantity, 0);
const newConfig = {
id: source,
name: source,
type: prev.ratioType,
quantity: Math.min(
quantity,
prev.totalTargetCount - totalOtherQuantity
),
percentage: Math.round((quantity / prev.totalTargetCount) * 100),
source,
};
if (existingIndex >= 0) {
const newConfigs = [...prev.ratioConfigs];
newConfigs[existingIndex] = newConfig;
return { ...prev, ratioConfigs: newConfigs };
} else {
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
}
});
};
const generateAutoRatio = () => {
const selectedCount = ratioTaskForm.selectedDatasets.length;
if (selectedCount === 0) return;
const baseQuantity = Math.floor(
ratioTaskForm.totalTargetCount / selectedCount
);
const remainder = ratioTaskForm.totalTargetCount % selectedCount;
const newConfigs = ratioTaskForm.selectedDatasets.map(
(datasetId, index) => {
const quantity = baseQuantity + (index < remainder ? 1 : 0);
return {
id: datasetId,
name: datasetId,
type: ratioTaskForm.ratioType,
quantity,
percentage: Math.round(
(quantity / ratioTaskForm.totalTargetCount) * 100
),
source: datasetId,
};
}
);
setRatioTaskForm((prev) => ({ ...prev, ratioConfigs: newConfigs }));
};
// 标签模式下,更新某数据集的某个标签的数量
const updateLabelRatioConfig = (datasetId: string, label: string, quantity: number) => {
const sourceKey = `${datasetId}_${label}`;
setRatioTaskForm((prev) => {
const existingIndex = prev.ratioConfigs.findIndex((c) => c.source === sourceKey);
const totalOtherQuantity = prev.ratioConfigs
.filter((c) => c.source !== sourceKey)
.reduce((sum, c) => sum + c.quantity, 0);
const dist = distributions[datasetId] || {};
const labelMax = dist[label] ?? Infinity;
const cappedQuantity = Math.max(
0,
Math.min(quantity, prev.totalTargetCount - totalOtherQuantity, labelMax)
);
const newConfig = {
id: sourceKey,
name: label,
type: "label",
quantity: cappedQuantity,
percentage: Math.round((cappedQuantity / prev.totalTargetCount) * 100),
source: sourceKey,
};
if (existingIndex >= 0) {
const newConfigs = [...prev.ratioConfigs];
newConfigs[existingIndex] = newConfig;
return { ...prev, ratioConfigs: newConfigs };
} else {
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
}
});
};
const handleValuesChange = (_, allValues) => {
setRatioTaskForm({ ...ratioTaskForm, ...allValues });
};
return (
<div className="min-h-screen">
{/* Header */}
<div className="flex items-center justify-between mb-2">
<div className="flex items-center">
<Button
type="text"
onClick={() => navigate("/data/synthesis/ratio-task")}
>
<ArrowLeft className="w-4 h-4 mr-1" />
</Button>
<h1 className="text-xl font-bold bg-clip-text"></h1>
</div>
</div>
<Card className="overflow-y-auto p-2">
<Form
form={form}
initialValues={ratioTaskForm}
onValuesChange={handleValuesChange}
layout="vertical"
>
<div className="grid grid-cols-12 gap-6">
{/* 左侧:数据集选择 */}
<SelectDataset
selectedDatasets={ratioTaskForm.selectedDatasets}
ratioType={ratioTaskForm.ratioType}
onRatioTypeChange={(value) => setRatioTaskForm({ ...ratioTaskForm, ratioType: value, ratioConfigs: [] })}
onSelectedDatasetsChange={(next) => {
setRatioTaskForm((prev) => ({
...prev,
selectedDatasets: next,
ratioConfigs: prev.ratioConfigs.filter((c) => {
const id = String(c.source);
// keep only items whose dataset id remains selected
const dsId = id.includes("_") ? id.split("_")[0] : id;
return next.includes(dsId);
}),
}));
}}
onDistributionsChange={(next) => setDistributions(next)}
onDatasetsChange={(list) => setDatasets(list)}
/>
{/* 右侧:配比配置 */}
<div className="col-span-7">
<h2 className="font-medium text-gray-900 text-lg mb-2 flex items-center gap-2">
<PieChart className="w-5 h-5" />
</h2>
<Card>
<div className="flex items-center justify-between mb-4">
<div>
<span className="flex items-center gap-2 font-semibold">
<BarChart3 className="w-5 h-5" />
</span>
<div className="text-gray-500 text-xs">
</div>
</div>
<Button
icon={<Shuffle />}
size="small"
onClick={generateAutoRatio}
disabled={ratioTaskForm.selectedDatasets.length === 0}
>
</Button>
</div>
<BasicInformation totalTargetCount={ratioTaskForm.totalTargetCount} />
<RatioConfig
ratioType={ratioTaskForm.ratioType}
selectedDatasets={ratioTaskForm.selectedDatasets}
datasets={datasets}
ratioConfigs={ratioTaskForm.ratioConfigs as any}
totalTargetCount={ratioTaskForm.totalTargetCount}
distributions={distributions}
onUpdateDatasetQuantity={(datasetId, quantity) => updateRatioConfig(datasetId, quantity)}
onUpdateLabelQuantity={(datasetId, label, quantity) => updateLabelRatioConfig(datasetId, label, quantity)}
/>
{/* 配比预览 */}
{ratioTaskForm.ratioConfigs.length > 0 && (
<div className="mb-4">
<span className="text-sm font-medium"></span>
<div className="p-3 bg-gray-50 rounded-lg">
<div className="grid grid-cols-2 gap-4 text-sm">
<div>
<span className="text-gray-500">:</span>
<span className="ml-2 font-medium">
{ratioTaskForm.ratioConfigs
.reduce((sum, config) => sum + config.quantity, 0)
.toLocaleString()}
</span>
</div>
<div>
<span className="text-gray-500">:</span>
<span className="ml-2 font-medium">
{ratioTaskForm.totalTargetCount.toLocaleString()}
</span>
</div>
<div>
<span className="text-gray-500">:</span>
<span className="ml-2 font-medium">
{ratioTaskForm.ratioConfigs.length}
</span>
</div>
</div>
</div>
</div>
)}
<Divider />
<div className="flex justify-end gap-2">
<Button
onClick={() => navigate("/data/synthesis/ratio-task")}
>
</Button>
<Button
type="primary"
onClick={handleCreateRatioTask}
loading={creating}
disabled={
!ratioTaskForm.name ||
ratioTaskForm.ratioConfigs.length === 0
}
>
<Play className="w-4 h-4 mr-2" />
</Button>
</div>
</Card>
</div>
</div>
</Form>
</Card>
</div>
);
}

View File

@@ -0,0 +1,34 @@
import React from "react";
import { Form, Input } from "antd";
const { TextArea } = Input;
interface BasicInformationProps {
totalTargetCount: number;
}
const BasicInformation: React.FC<BasicInformationProps> = ({ totalTargetCount }) => {
return (
<div className="grid grid-cols-2 gap-4 mb-4">
<Form.Item
label="任务名称"
name="name"
rules={[{ required: true, message: "请输入配比任务名称" }]}
>
<Input placeholder="输入配比任务名称" />
</Form.Item>
<Form.Item
label="目标总数量"
name="totalTargetCount"
rules={[{ required: true, message: "请输入目标总数量" }]}
>
<Input type="number" placeholder="目标总数量" min={1} />
</Form.Item>
<Form.Item label="任务描述" name="description" className="col-span-2">
<TextArea placeholder="描述配比任务的目的和要求(可选)" rows={2} />
</Form.Item>
</div>
);
};
export default BasicInformation;

View File

@@ -0,0 +1,132 @@
import React from "react";
import { Badge, Card, Input, Progress } from "antd";
import { BarChart3 } from "lucide-react";
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
interface RatioConfigItem {
id: string;
name: string;
type: "dataset" | "label";
quantity: number;
percentage: number;
source: string;
}
interface RatioConfigProps {
ratioType: "dataset" | "label";
selectedDatasets: string[];
datasets: Dataset[];
ratioConfigs: RatioConfigItem[];
totalTargetCount: number;
distributions: Record<string, Record<string, number>>;
onUpdateDatasetQuantity: (datasetId: string, quantity: number) => void;
onUpdateLabelQuantity: (datasetId: string, label: string, quantity: number) => void;
}
const RatioConfig: React.FC<RatioConfigProps> = ({
ratioType,
selectedDatasets,
datasets,
ratioConfigs,
totalTargetCount,
distributions,
onUpdateDatasetQuantity,
onUpdateLabelQuantity,
}) => {
const totalConfigured = ratioConfigs.reduce((sum, c) => sum + (c.quantity || 0), 0);
return (
<div className="mb-4">
<div className="flex items-center justify-between">
<span className="text-sm font-medium"></span>
<span className="text-xs text-gray-500">
: {totalConfigured} / {totalTargetCount}
</span>
</div>
{selectedDatasets.length === 0 ? (
<div className="text-center py-8 text-gray-500">
<BarChart3 className="w-12 h-12 mx-auto mb-2 text-gray-300" />
<p className="text-sm"></p>
</div>
) : (
<div style={{ maxHeight: 500, overflowY: "auto" }}>
{selectedDatasets.map((datasetId) => {
const dataset = datasets.find((d) => String(d.id) === datasetId);
const config = ratioConfigs.find((c) => c.source === datasetId);
const currentQuantity = config?.quantity || 0;
if (!dataset) return null;
return (
<Card key={datasetId} size="small" className="mb-2">
<div className="flex items-center justify-between mb-3">
<div className="flex items-center gap-2">
<span className="font-medium text-sm">{dataset.name}</span>
<Badge color="gray">{dataset.fileCount}</Badge>
</div>
<div className="text-xs text-gray-500">{config?.percentage || 0}%</div>
</div>
{ratioType === "dataset" ? (
<div>
<div className="flex items-center gap-2 mb-2">
<span className="text-xs">:</span>
<Input
type="number"
value={currentQuantity}
onChange={(e) => onUpdateDatasetQuantity(datasetId, Number(e.target.value))}
style={{ width: 80 }}
min={0}
max={Math.min(dataset.fileCount || 0, totalTargetCount)}
/>
<span className="text-xs text-gray-500"></span>
</div>
<Progress
percent={Math.round((currentQuantity / totalTargetCount) * 100)}
size="small"
/>
</div>
) : (
<div>
{!distributions[String(dataset.id)] ? (
<div className="text-xs text-gray-400">...</div>
) : Object.entries(distributions[String(dataset.id)]).length === 0 ? (
<div className="text-xs text-gray-400"></div>
) : (
<div className="flex flex-col gap-2">
{Object.entries(distributions[String(dataset.id)]).map(([label, count]) => {
const sourceKey = `${datasetId}_${label}`;
const labelConfig = ratioConfigs.find((c) => c.source === sourceKey);
const labelQuantity = labelConfig?.quantity || 0;
return (
<div key={label} className="flex items-center justify-between gap-2">
<div className="flex items-center gap-2">
<Badge color="gray">{label}</Badge>
<span className="text-xs text-gray-500">{count}</span>
</div>
<div className="flex items-center gap-2">
<span className="text-xs">:</span>
<Input
type="number"
value={labelQuantity}
onChange={(e) => onUpdateLabelQuantity(datasetId, label, Number(e.target.value))}
style={{ width: 80 }}
min={0}
max={Math.min(Number(count) || 0, totalTargetCount)}
/>
<span className="text-xs text-gray-500"></span>
</div>
</div>
);
})}
</div>
)}
</div>
)}
</Card>
);
})}
</div>
)}
</div>
);
};
export default RatioConfig;

View File

@@ -0,0 +1,250 @@
import React, { useEffect, useState } from "react";
import { Badge, Button, Card, Checkbox, Input, Pagination, Select } from "antd";
import { Database, Search as SearchIcon } from "lucide-react";
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
import { queryDatasetsUsingGet, queryDatasetByIdUsingGet, queryDatasetStatisticsByIdUsingGet } from "@/pages/DataManagement/dataset.api.ts";
interface SelectDatasetProps {
selectedDatasets: string[];
ratioType: "dataset" | "label";
onRatioTypeChange: (val: "dataset" | "label") => void;
onSelectedDatasetsChange: (next: string[]) => void;
onDistributionsChange?: (next: Record<string, Record<string, number>>) => void;
onDatasetsChange?: (list: Dataset[]) => void;
}
const SelectDataset: React.FC<SelectDatasetProps> = ({
selectedDatasets,
ratioType,
onRatioTypeChange,
onSelectedDatasetsChange,
onDistributionsChange,
onDatasetsChange,
}) => {
const [datasets, setDatasets] = useState<Dataset[]>([]);
const [loading, setLoading] = useState(false);
const [searchQuery, setSearchQuery] = useState("");
const [pagination, setPagination] = useState({ page: 1, size: 10, total: 0 });
const [distributions, setDistributions] = useState<Record<string, Record<string, number>>>({});
// Fetch dataset list
useEffect(() => {
const fetchDatasets = async () => {
try {
setLoading(true);
const { data } = await queryDatasetsUsingGet({
page: pagination.page,
size: pagination.size,
keyword: searchQuery?.trim() || undefined,
});
const list = data?.content || data?.data || [];
setDatasets(list);
onDatasetsChange?.(list);
setPagination((prev) => ({ ...prev, total: data?.totalElements ?? data?.total ?? 0 }));
} finally {
setLoading(false);
}
};
fetchDatasets();
}, [pagination.page, pagination.size, searchQuery]);
// Fetch label distributions when in label mode
useEffect(() => {
const fetchDistributions = async () => {
if (ratioType !== "label" || !datasets?.length) return;
const idsToFetch = datasets.map((d) => String(d.id)).filter((id) => !distributions[id]);
if (!idsToFetch.length) return;
try {
const results = await Promise.all(
idsToFetch.map(async (id) => {
try {
const statRes = await queryDatasetStatisticsByIdUsingGet(id);
return { id, stats: statRes?.data };
} catch {
return { id, stats: null };
}
})
);
const next: Record<string, Record<string, number>> = { ...distributions };
for (const { id, stats } of results) {
let dist: Record<string, number> | undefined = undefined;
if (stats) {
const candidates: any[] = [
(stats as any).labelDistribution,
(stats as any).tagDistribution,
(stats as any).label_stats,
(stats as any).labels,
(stats as any).distribution,
];
let picked = candidates.find((c) => c && (typeof c === "object" || Array.isArray(c)));
if (Array.isArray(picked)) {
const obj: Record<string, number> = {};
picked.forEach((it: any) => {
const key = it?.label ?? it?.name ?? it?.tag ?? it?.key;
const val = it?.count ?? it?.value ?? it?.num ?? it?.total;
if (key != null && typeof val === "number") obj[String(key)] = val;
});
dist = obj;
} else if (picked && typeof picked === "object") {
dist = picked as Record<string, number>;
}
}
if (!dist) {
try {
const detRes = await queryDatasetByIdUsingGet(id);
const det = detRes?.data;
if (det) {
let picked =
(det as any).distribution ||
(det as any).labelDistribution ||
(det as any).tagDistribution ||
(det as any).label_stats ||
(det as any).labels ||
undefined;
if (Array.isArray(picked)) {
const obj: Record<string, number> = {};
picked.forEach((it: any) => {
const key = it?.label ?? it?.name ?? it?.tag ?? it?.key;
const val = it?.count ?? it?.value ?? it?.num ?? it?.total;
if (key != null && typeof val === "number") obj[String(key)] = val;
});
dist = obj;
} else if (picked && typeof picked === "object") {
dist = picked as Record<string, number>;
}
}
} catch {
dist = undefined;
}
}
next[String(id)] = dist || {};
}
setDistributions(next);
onDistributionsChange?.(next);
} catch {
// ignore
}
};
fetchDistributions();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [ratioType, datasets]);
const onToggleDataset = (datasetId: string, checked: boolean) => {
if (checked) {
const next = Array.from(new Set([...selectedDatasets, datasetId]));
onSelectedDatasetsChange(next);
} else {
onSelectedDatasetsChange(selectedDatasets.filter((id) => id !== datasetId));
}
};
const onClearSelection = () => {
onSelectedDatasetsChange([]);
};
return (
<div className="col-span-5">
<h2 className="font-medium text-gray-900 text-lg mb-2 flex items-center gap-2">
<Database className="w-5 h-5" />
</h2>
<Card>
<div className="flex items-center gap-4 mb-4">
<span className="text-sm">:</span>
<Select
style={{ width: 120 }}
value={ratioType}
onChange={(v) => onRatioTypeChange(v)}
options={[
{ label: "按数据集", value: "dataset" },
{ label: "按标签", value: "label" },
]}
/>
</div>
<Input
prefix={<SearchIcon className="text-gray-400" />}
placeholder="搜索数据集"
value={searchQuery}
onChange={(e) => {
setSearchQuery(e.target.value);
setPagination((p) => ({ ...p, page: 1 }));
}}
/>
<div style={{ maxHeight: 500, overflowY: "auto" }}>
{loading && (
<div className="text-center text-gray-500 py-8">...</div>
)}
{!loading &&
datasets.map((dataset) => {
const idStr = String(dataset.id);
const checked = selectedDatasets.includes(idStr);
return (
<Card
key={dataset.id}
size="small"
className={`mb-2 cursor-pointer ${checked ? "border-blue-500" : "hover:border-blue-200"}`}
onClick={() => onToggleDataset(idStr, !checked)}
>
<div className="flex items-start gap-3">
<Checkbox
checked={checked}
onChange={(e) => onToggleDataset(idStr, e.target.checked)}
/>
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2">
<span className="font-medium text-sm truncate">{dataset.name}</span>
<Badge color="blue">{dataset.datasetType}</Badge>
</div>
<div className="text-xs text-gray-500 mt-1">{dataset.description}</div>
<div className="flex items-center gap-4 mt-2 text-xs text-gray-500">
<span>{dataset.fileCount}</span>
<span>{dataset.size}</span>
</div>
{ratioType === "label" && (
<div className="mt-2">
{distributions[idStr] ? (
Object.entries(distributions[idStr]).length > 0 ? (
<div className="flex flex-wrap gap-2 text-xs">
{Object.entries(distributions[idStr])
.slice(0, 8)
.map(([tag, count]) => (
<Badge key={tag} color="gray">{`${tag}: ${count}`}</Badge>
))}
</div>
) : (
<div className="text-xs text-gray-400"></div>
)
) : (
<div className="text-xs text-gray-400">...</div>
)}
</div>
)}
</div>
</div>
</Card>
);
})}
</div>
<div className="flex justify-between mt-3 items-center">
<span className="text-sm text-gray-600"> {selectedDatasets.length} </span>
<div className="flex items-center gap-3">
<Button size="small" onClick={onClearSelection}>
</Button>
<Pagination
size="small"
current={pagination.page}
pageSize={pagination.size}
total={pagination.total}
showSizeChanger
onChange={(p, ps) => setPagination((prev) => ({ ...prev, page: p, size: ps }))}
/>
</div>
</div>
</Card>
</div>
);
};
export default SelectDataset;

View File

@@ -1,571 +0,0 @@
import { useState } from "react";
import {
Button,
Card,
Input,
Select,
Badge,
Progress,
Checkbox,
Switch,
Form,
Divider,
message,
} from "antd";
import {
ArrowLeft,
Play,
Search as SearchIcon,
Database,
BarChart3,
Shuffle,
PieChart,
} from "lucide-react";
import type { RatioConfig, RatioTask } from "@/pages/RatioTask/ratio";
import { mockRatioTasks } from "@/mock/ratio";
import type { Dataset } from "@/pages/DataManagement/dataset.model";
import { useNavigate } from "react-router";
import DevelopmentInProgress from "@/components/DevelopmentInProgress";
const { TextArea } = Input;
const { Option } = Select;
export default function CreateRatioTask() {
return <DevelopmentInProgress showTime="2025.11.30" />;
const navigate = useNavigate();
const [form] = Form.useForm();
// 配比任务相关状态
const [ratioTaskForm, setRatioTaskForm] = useState({
name: "",
description: "",
ratioType: "dataset" as "dataset" | "label",
selectedDatasets: [] as string[],
ratioConfigs: [] as RatioConfig[],
totalTargetCount: 10000,
autoStart: true,
});
const [tasks, setTasks] = useState<RatioTask[]>(mockRatioTasks);
const [datasets] = useState<Dataset[]>([]);
const handleCreateRatioTask = async () => {
try {
const values = await form.validateFields();
if (!ratioTaskForm.ratioConfigs.length) {
message.error("请配置配比项");
return;
}
const newTask: RatioTask = {
id: Date.now(),
name: values.name,
status: ratioTaskForm.autoStart ? "pending" : "paused",
progress: 0,
sourceDatasets: ratioTaskForm.selectedDatasets,
targetCount: values.totalTargetCount,
generatedCount: 0,
createdAt: new Date().toISOString().split("T")[0],
ratioType: ratioTaskForm.ratioType,
estimatedTime: "预计 20 分钟",
ratioConfigs: ratioTaskForm.ratioConfigs,
};
setTasks([newTask, ...tasks]);
setRatioTaskForm({
name: "",
description: "",
ratioType: "dataset",
selectedDatasets: [],
ratioConfigs: [],
totalTargetCount: 10000,
autoStart: true,
});
form.resetFields();
message.success("配比任务创建成功");
navigate("/data/ratio-task");
} catch {
// 校验失败
}
};
const handleDatasetSelection = (datasetId: string, checked: boolean) => {
if (checked) {
setRatioTaskForm((prev) => ({
...prev,
selectedDatasets: [...prev.selectedDatasets, datasetId],
}));
} else {
setRatioTaskForm((prev) => ({
...prev,
selectedDatasets: prev.selectedDatasets.filter(
(id) => id !== datasetId
),
ratioConfigs: prev.ratioConfigs.filter(
(config) => config.source !== datasetId
),
}));
}
};
const updateRatioConfig = (source: string, quantity: number) => {
setRatioTaskForm((prev) => {
const existingIndex = prev.ratioConfigs.findIndex(
(config) => config.source === source
);
const totalOtherQuantity = prev.ratioConfigs
.filter((config) => config.source !== source)
.reduce((sum, config) => sum + config.quantity, 0);
const newConfig: RatioConfig = {
id: source,
name: source,
type: prev.ratioType,
quantity: Math.min(
quantity,
prev.totalTargetCount - totalOtherQuantity
),
percentage: Math.round((quantity / prev.totalTargetCount) * 100),
source,
};
if (existingIndex >= 0) {
const newConfigs = [...prev.ratioConfigs];
newConfigs[existingIndex] = newConfig;
return { ...prev, ratioConfigs: newConfigs };
} else {
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
}
});
};
const generateAutoRatio = () => {
const selectedCount = ratioTaskForm.selectedDatasets.length;
if (selectedCount === 0) return;
const baseQuantity = Math.floor(
ratioTaskForm.totalTargetCount / selectedCount
);
const remainder = ratioTaskForm.totalTargetCount % selectedCount;
const newConfigs: RatioConfig[] = ratioTaskForm.selectedDatasets.map(
(datasetId, index) => {
const quantity = baseQuantity + (index < remainder ? 1 : 0);
return {
id: datasetId,
name: datasetId,
type: ratioTaskForm.ratioType,
quantity,
percentage: Math.round(
(quantity / ratioTaskForm.totalTargetCount) * 100
),
source: datasetId,
};
}
);
setRatioTaskForm((prev) => ({ ...prev, ratioConfigs: newConfigs }));
};
const handleValuesChange = (_, allValues) => {
setRatioTaskForm({ ...ratioTaskForm, ...allValues });
};
return (
<div className="min-h-screen">
{/* Header */}
<div className="flex items-center justify-between mb-2">
<div className="flex items-center">
<Button
type="text"
onClick={() => navigate("/data/synthesis/ratio-task")}
>
<ArrowLeft className="w-4 h-4 mr-1" />
</Button>
<h1 className="text-xl font-bold bg-clip-text"></h1>
</div>
</div>
<Card className="overflow-y-auto p-2">
<Form
form={form}
initialValues={ratioTaskForm}
onValuesChange={handleValuesChange}
layout="vertical"
>
<div className="grid grid-cols-12 gap-6">
{/* 左侧:数据集选择 */}
<div className="col-span-5">
<h2 className="font-medium text-gray-900 text-lg mb-2 flex items-center gap-2">
<Database className="w-5 h-5" />
</h2>
<Card>
<div className="flex items-center gap-4 mb-4">
<div className="flex items-center gap-2">
<span className="text-sm">:</span>
<Form.Item name="ratioType" noStyle>
<Select
style={{ width: 120 }}
onChange={(value: "dataset" | "label") =>
setRatioTaskForm({
...ratioTaskForm,
ratioType: value,
ratioConfigs: [],
})
}
>
<Option value="dataset"></Option>
<Option value="label"></Option>
</Select>
</Form.Item>
</div>
<Input
prefix={<SearchIcon className="text-gray-400" />}
placeholder="搜索数据集"
style={{ width: 180 }}
// 可加搜索逻辑
/>
</div>
<div style={{ maxHeight: 500, overflowY: "auto" }}>
{datasets.map((dataset) => (
<Card
key={dataset.id}
size="small"
className={`mb-2 cursor-pointer ${
ratioTaskForm.selectedDatasets.includes(dataset.id)
? "border-blue-500"
: "hover:border-blue-200"
}`}
onClick={() =>
handleDatasetSelection(
dataset.id,
!ratioTaskForm.selectedDatasets.includes(dataset.id)
)
}
>
<div className="flex items-start gap-3">
<Checkbox
checked={ratioTaskForm.selectedDatasets.includes(
dataset.id
)}
onChange={(e) =>
handleDatasetSelection(dataset.id, e.target.checked)
}
/>
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2">
<span className="font-medium text-sm truncate">
{dataset.name}
</span>
<Badge color="blue">{dataset.type}</Badge>
</div>
<div className="text-xs text-gray-500 mt-1">
{dataset.description}
</div>
<div className="flex items-center gap-4 mt-2 text-xs text-gray-500">
<span>{dataset.records?.toLocaleString()}</span>
<span>{dataset.size}</span>
<span>{dataset.format}</span>
</div>
{ratioTaskForm.ratioType === "label" &&
dataset.labels && (
<div className="flex flex-wrap gap-1 mt-2">
{dataset.labels.map((label, index) => (
<Badge key={index} color="gray">
{label}
</Badge>
))}
</div>
)}
</div>
</div>
</Card>
))}
</div>
<div className="flex items-center justify-between p-3 bg-gray-50 rounded-lg mt-4">
<span className="text-sm text-gray-600">
{ratioTaskForm.selectedDatasets.length}
</span>
<Button
size="small"
onClick={() =>
setRatioTaskForm({
...ratioTaskForm,
selectedDatasets: [],
ratioConfigs: [],
})
}
>
</Button>
</div>
</Card>
</div>
{/* 右侧:配比配置 */}
<div className="col-span-7">
<h2 className="font-medium text-gray-900 text-lg mb-2 flex items-center gap-2">
<PieChart className="w-5 h-5" />
</h2>
<Card>
<div className="flex items-center justify-between mb-4">
<div>
<span className="flex items-center gap-2 font-semibold">
<BarChart3 className="w-5 h-5" />
</span>
<div className="text-gray-500 text-xs">
</div>
</div>
<Button
icon={<Shuffle />}
size="small"
onClick={generateAutoRatio}
disabled={ratioTaskForm.selectedDatasets.length === 0}
>
</Button>
</div>
<div className="grid grid-cols-2 gap-4 mb-4">
<Form.Item
label="任务名称"
name="name"
rules={[{ required: true, message: "请输入配比任务名称" }]}
>
<Input
placeholder="输入配比任务名称"
value={ratioTaskForm.name}
/>
</Form.Item>
<Form.Item
label="目标总数量"
name="totalTargetCount"
rules={[{ required: true, message: "请输入目标总数量" }]}
>
<Input
type="number"
placeholder="目标总数量"
min={1}
value={ratioTaskForm.totalTargetCount}
/>
</Form.Item>
</div>
<Form.Item label="任务描述" name="description">
<TextArea
placeholder="描述配比任务的目的和要求(可选)"
rows={2}
value={ratioTaskForm.description}
/>
</Form.Item>
<div className="mb-4">
<div className="flex items-center justify-between">
<span className="text-sm font-medium"></span>
<span className="text-xs text-gray-500">
:{" "}
{ratioTaskForm.ratioConfigs.reduce(
(sum, config) => sum + config.quantity,
0
)}{" "}
/ {ratioTaskForm.totalTargetCount}
</span>
</div>
{ratioTaskForm.selectedDatasets.length === 0 ? (
<div className="text-center py-8 text-gray-500">
<BarChart3 className="w-12 h-12 mx-auto mb-2 text-gray-300" />
<p className="text-sm"></p>
</div>
) : (
<div style={{ maxHeight: 500, overflowY: "auto" }}>
{ratioTaskForm.selectedDatasets.map((datasetId) => {
const dataset = datasets.find(
(d) => d.id === datasetId
);
const config = ratioTaskForm.ratioConfigs.find(
(c) => c.source === datasetId
);
const currentQuantity = config?.quantity || 0;
if (!dataset) return null;
return (
<Card key={datasetId} size="small" className="mb-2">
<div className="flex items-center justify-between mb-3">
<div className="flex items-center gap-2">
<span className="font-medium text-sm">
{dataset.name}
</span>
<Badge color="gray">
{dataset.records.toLocaleString()}
</Badge>
</div>
<div className="text-xs text-gray-500">
{config?.percentage || 0}%
</div>
</div>
{ratioTaskForm.ratioType === "dataset" ? (
<div>
<div className="flex items-center gap-2 mb-2">
<span className="text-xs">:</span>
<Input
type="number"
value={currentQuantity}
onChange={(e) =>
updateRatioConfig(
datasetId,
Number(e.target.value)
)
}
style={{ width: 80 }}
min={0}
max={ratioTaskForm.totalTargetCount}
/>
<span className="text-xs text-gray-500">
</span>
</div>
<Progress
percent={Math.round(
(currentQuantity /
ratioTaskForm.totalTargetCount) *
100
)}
size="small"
/>
</div>
) : (
<div>
{dataset.labels?.map((label, index) => {
const labelConfig =
ratioTaskForm.ratioConfigs.find(
(c) =>
c.source === `${datasetId}_${label}`
);
const labelQuantity =
labelConfig?.quantity || 0;
return (
<div
key={index}
className="flex items-center gap-2 mb-2"
>
<Badge color="gray">{label}</Badge>
<Input
type="number"
value={labelQuantity}
onChange={(e) =>
updateRatioConfig(
`${datasetId}_${label}`,
Number(e.target.value)
)
}
style={{ width: 70 }}
min={0}
/>
<span className="text-xs text-gray-500">
</span>
<Progress
percent={Math.round(
(labelQuantity /
ratioTaskForm.totalTargetCount) *
100
)}
size="small"
style={{ width: 80 }}
/>
<span className="text-xs text-gray-500 min-w-8">
{Math.round(
(labelQuantity /
ratioTaskForm.totalTargetCount) *
100
)}
%
</span>
</div>
);
})}
</div>
)}
</Card>
);
})}
</div>
)}
</div>
{/* 配比预览 */}
{ratioTaskForm.ratioConfigs.length > 0 && (
<div className="mb-4">
<span className="text-sm font-medium"></span>
<div className="p-3 bg-gray-50 rounded-lg">
<div className="grid grid-cols-2 gap-4 text-sm">
<div>
<span className="text-gray-500">:</span>
<span className="ml-2 font-medium">
{ratioTaskForm.ratioConfigs
.reduce((sum, config) => sum + config.quantity, 0)
.toLocaleString()}
</span>
</div>
<div>
<span className="text-gray-500">:</span>
<span className="ml-2 font-medium">
{ratioTaskForm.totalTargetCount.toLocaleString()}
</span>
</div>
<div>
<span className="text-gray-500">:</span>
<span className="ml-2 font-medium">
{ratioTaskForm.ratioConfigs.length}
</span>
</div>
<div>
<span className="text-gray-500">:</span>
<span className="ml-2 font-medium"> 20 </span>
</div>
</div>
</div>
</div>
)}
<div className="flex items-center justify-between p-3 border rounded-lg mb-4">
<div>
<span className="text-sm font-medium"></span>
<div className="text-xs text-gray-500 mt-1">
</div>
</div>
<Form.Item name="autoStart" valuePropName="checked" noStyle>
<Switch
checked={ratioTaskForm.autoStart}
onChange={(checked) =>
setRatioTaskForm({
...ratioTaskForm,
autoStart: checked,
})
}
/>
</Form.Item>
</div>
<Divider />
<div className="flex justify-end gap-2">
<Button
onClick={() => navigate("/data/synthesis/ratio-task")}
>
</Button>
<Button
type="primary"
onClick={handleCreateRatioTask}
disabled={
!ratioTaskForm.name ||
ratioTaskForm.ratioConfigs.length === 0
}
>
<Play className="w-4 h-4 mr-2" />
</Button>
</div>
</Card>
</div>
</div>
</Form>
</Card>
</div>
);
}

View File

@@ -0,0 +1,246 @@
import { useState } from "react";
import { Button, Card, Table, Tooltip, App } from "antd";
import { Plus, Clock, Play, CheckCircle, AlertCircle, Pause, BarChart3 } from "lucide-react";
import { DeleteOutlined } from "@ant-design/icons";
import type { RatioTaskItem } from "@/pages/RatioTask/ratio.model.ts";
import { useNavigate } from "react-router";
import CardView from "@/components/CardView.tsx";
import { SearchControls } from "@/components/SearchControls.tsx";
import { queryRatioTasksUsingGet, deleteRatioTasksUsingDelete } from "@/pages/RatioTask/ratio.api.ts";
import useFetchData from "@/hooks/useFetchData";
export default function RatioTasksPage() {
const navigate = useNavigate();
const [viewMode, setViewMode] = useState<"card" | "list">("card");
const { message } = App.useApp();
const { loading, tableData, pagination, searchParams, setSearchParams, handleFiltersChange, fetchData } =
useFetchData<RatioTaskItem>(queryRatioTasksUsingGet, (d) => d as RatioTaskItem, 30000, true, [], 0);
const handleDelete = async (id: string) => {
await deleteRatioTasksUsingDelete([id]);
message.success("删除成功");
await fetchData();
};
const getStatusBadge = (status: string) => {
const s = (status || "").toUpperCase();
const statusConfig = {
PENDING: {
label: "等待中",
color: "#f09e10ff",
icon: <Clock className="w-4 h-4 inline mr-1" />,
},
RUNNING: {
label: "运行中",
color: "#007bff",
icon: <Play className="w-4 h-4 inline mr-1" />,
},
SUCCESS: {
label: "已完成",
color: "#28a745",
icon: <CheckCircle className="w-4 h-4 inline mr-1" />,
},
FAILED: {
label: "失败",
color: "#dc3545",
icon: <AlertCircle className="w-4 h-4 inline mr-1" />,
},
PAUSED: {
label: "已暂停",
color: "#6c757d",
icon: <Pause className="w-4 h-4 inline mr-1" />,
},
};
return statusConfig[s as keyof typeof statusConfig] || statusConfig.PENDING;
};
const columns = [
{
title: "任务名称",
dataIndex: "name",
key: "name",
},
{
title: "状态",
dataIndex: "status",
key: "status",
render: (v: string) => getStatusBadge(v).label,
},
{
title: "配比方式",
dataIndex: "ratio_method",
key: "ratio_method",
},
{
title: "目标数量",
dataIndex: "totals",
key: "totals",
},
{
title: "目标数据集",
dataIndex: "target_dataset_name",
key: "target_dataset_name",
},
{
title: "创建时间",
dataIndex: "created_at",
key: "created_at",
},
{
title: "操作",
key: "actions",
render: (_: any, task: RatioTaskItem) => (
<div className="flex items-center gap-2">
{operations.map((op) => (
<Tooltip key={op.key} title={op.label}>
<Button
type="text"
icon={op.icon}
onClick={() => op.onClick(task.id)}
/>
</Tooltip>
))}
</div>
),
},
];
const renderTableView = () => (
<Card>
<Table
columns={columns}
dataSource={tableData}
rowKey="id"
loading={loading}
pagination={pagination}
scroll={{ x: "max-content" }}
locale={{
emptyText: (
<div className="text-center py-8">
<BarChart3 className="w-12 h-12 text-gray-400 mx-auto mb-4" />
<h3 className="text-lg font-medium text-gray-900 mb-2">
</h3>
<p className="text-gray-500 mb-4">
{searchParams.keyword || (searchParams.filter?.status?.[0] && searchParams.filter?.status?.[0] !== "all")
? "没有找到匹配的任务"
: "开始创建您的第一个配比任务"}
</p>
{!searchParams.keyword && (!searchParams.filter?.status?.length || searchParams.filter?.status?.[0] === "all") && (
<Button
onClick={() =>
navigate("/data/synthesis/ratio-task/create")
}
type="primary"
>
<Plus className="w-4 h-4 mr-2" />
</Button>
)}
</div>
),
}}
/>
</Card>
);
const operations = [
{
key: "delete",
label: "删除",
danger: true,
confirm: {
title: "确认删除该数据集?",
description: "删除后该数据集将无法恢复,请谨慎操作。",
okText: "删除",
cancelText: "取消",
okType: "danger",
},
icon: <DeleteOutlined />,
onClick: (item) => handleDelete(String(item.id)),
}
];
const renderCardView = () => (
<CardView
loading={loading}
data={tableData.map((task) => ({
...task,
description: task.ratio_method === "DATASET" ? "按数据集配比" : "按标签配比",
icon: <BarChart3 className="w-6 h-6" />,
iconColor: task.ratio_method === "DATASET" ? "bg-blue-100" : "bg-green-100",
statistics: [
{
label: "目标数量",
value: (task.totals ?? 0).toLocaleString(),
},
{
label: "创建时间",
value: task.created_at || "-",
},
],
status: getStatusBadge(task.status),
}))}
pagination={pagination}
operations={operations}
/>
);
// 搜索、筛选和视图控制相关
const searchFilters = [
{
key: "status",
label: "状态筛选",
options: [
{ label: "全部状态", value: "all" },
{ label: "等待中", value: "PENDING" },
{ label: "运行中", value: "RUNNING" },
{ label: "已完成", value: "SUCCESS" },
{ label: "失败", value: "FAILED" },
{ label: "已暂停", value: "PAUSED" },
],
},
];
// 处理 SearchControls 的筛选变化
const handleSearchControlsFiltersChange = (
filters: Record<string, string[]>
) => {
handleFiltersChange(filters);
};
// 处理视图切换
const handleViewModeChange = (mode: "card" | "list") => {
setViewMode(mode === "card" ? "card" : "list");
};
return (
<div className="">
<div className="flex items-center justify-between">
<h2 className="text-xl font-bold"></h2>
<Button
type="primary"
onClick={() => navigate("/data/synthesis/ratio-task/create")}
icon={<Plus className="w-4 h-4" />}
>
</Button>
</div>
<>
{/* 搜索、筛选和视图控制 */}
<SearchControls
searchTerm={searchParams.keyword}
onSearchChange={(keyword) => setSearchParams({ ...searchParams, keyword })}
searchPlaceholder="搜索任务名称"
filters={searchFilters}
onFiltersChange={handleSearchControlsFiltersChange}
onClearFilters={() => setSearchParams({ ...searchParams, filter: {} })}
viewMode={viewMode === "card" ? "card" : "list"}
onViewModeChange={handleViewModeChange}
showViewToggle={true}
/>
{/* 任务列表 */}
{viewMode === "list" ? renderTableView() : renderCardView()}
</>
</div>
);
}

View File

@@ -1,382 +0,0 @@
import { useState } from "react";
import {
Button,
Card,
Input,
Select,
Badge,
Progress,
Table,
Alert,
} from "antd";
import {
Plus,
Eye,
Clock,
Play,
CheckCircle,
AlertCircle,
Pause,
Download as DownloadIcon,
BarChart3,
} from "lucide-react";
import type { RatioTask } from "@/pages/RatioTask/ratio";
import { mockRatioTasks } from "@/mock/ratio";
import { useNavigate } from "react-router";
import CardView from "@/components/CardView";
import { SearchControls } from "@/components/SearchControls";
import DevelopmentInProgress from "@/components/DevelopmentInProgress";
export default function RatioTasksPage() {
return <DevelopmentInProgress showTime="2025.11.30" />;
const navigate = useNavigate();
const [searchQuery, setSearchQuery] = useState("");
const [filterStatus, setFilterStatus] = useState("all");
const [filterType, setFilterType] = useState("all");
const [sortBy, setSortBy] = useState("createdAt");
const [sortOrder, setSortOrder] = useState<"asc" | "desc">("desc");
const [viewMode, setViewMode] = useState<"card" | "list">("card");
const [tasks, setTasks] = useState<RatioTask[]>(mockRatioTasks);
// 过滤和排序任务
const filteredAndSortedTasks = tasks
.filter((task) => {
const matchesSearch = task.name
.toLowerCase()
.includes(searchQuery.toLowerCase());
const matchesStatus =
filterStatus === "all" || task.status === filterStatus;
const matchesType = filterType === "all" || task.ratioType === filterType;
return matchesSearch && matchesStatus && matchesType;
})
.sort((a, b) => {
let aValue: any, bValue: any;
switch (sortBy) {
case "name":
aValue = a.name.toLowerCase();
bValue = b.name.toLowerCase();
break;
case "targetCount":
aValue = a.targetCount;
bValue = b.targetCount;
break;
case "generatedCount":
aValue = a.generatedCount;
bValue = b.generatedCount;
break;
case "progress":
aValue = a.progress;
bValue = b.progress;
break;
case "createdAt":
default:
aValue = new Date(a.createdAt).getTime();
bValue = new Date(b.createdAt).getTime();
break;
}
if (sortOrder === "asc") {
return aValue > bValue ? 1 : -1;
} else {
return aValue < bValue ? 1 : -1;
}
});
const getStatusBadge = (status: string) => {
const statusConfig = {
pending: {
label: "等待中",
color: "#f09e10ff",
icon: <Clock className="w-4 h-4 inline mr-1" />,
},
running: {
label: "运行中",
color: "#007bff",
icon: <Play className="w-4 h-4 inline mr-1" />,
},
completed: {
label: "已完成",
color: "#28a745",
icon: <CheckCircle className="w-4 h-4 inline mr-1" />,
},
failed: {
label: "失败",
color: "#dc3545",
icon: <AlertCircle className="w-4 h-4 inline mr-1" />,
},
paused: {
label: "已暂停",
color: "#6c757d",
icon: <Pause className="w-4 h-4 inline mr-1" />,
},
};
return (
statusConfig[status as keyof typeof statusConfig] || statusConfig.pending
);
};
const handleTaskAction = (taskId: number, action: string) => {
setTasks((prev) =>
prev.map((task) => {
if (task.id === taskId) {
switch (action) {
case "pause":
return { ...task, status: "paused" as const };
case "resume":
return { ...task, status: "running" as const };
case "stop":
return {
...task,
status: "failed" as const,
progress: task.progress,
};
default:
return task;
}
}
return task;
})
);
};
const columns = [
{
title: "任务名称",
dataIndex: "name",
key: "name",
},
{
title: "状态",
dataIndex: "status",
key: "status",
},
{
title: "配比方式",
dataIndex: "ratioType",
key: "ratioType",
},
{
title: "进度",
dataIndex: "progress",
key: "progress",
},
{
title: "目标数量",
dataIndex: "targetCount",
key: "targetCount",
},
{
title: "已生成",
dataIndex: "generatedCount",
key: "generatedCount",
},
{
title: "数据源",
dataIndex: "sourceDatasets",
key: "sourceDatasets",
},
{
title: "创建时间",
dataIndex: "createdAt",
key: "createdAt",
},
{
title: "操作",
key: "actions",
render: (_: any, task: RatioTask) => (
<div className="flex items-center gap-1 justify-end">
{task.status === "running" && (
<Button
type="link"
size="small"
onClick={() => handleTaskAction(task.id, "pause")}
>
</Button>
)}
{task.status === "paused" && (
<Button
size="small"
type="link"
onClick={() => handleTaskAction(task.id, "resume")}
>
</Button>
)}
<Button type="link" size="small">
</Button>
</div>
),
},
];
const renderTableView = () => (
<Card>
<Table
columns={columns}
dataSource={filteredAndSortedTasks}
rowKey="id"
scroll={{ x: "max-content" }}
locale={{
emptyText: (
<div className="text-center py-8">
<BarChart3 className="w-12 h-12 text-gray-400 mx-auto mb-4" />
<h3 className="text-lg font-medium text-gray-900 mb-2">
</h3>
<p className="text-gray-500 mb-4">
{searchQuery || filterStatus !== "all" || filterType !== "all"
? "没有找到匹配的任务"
: "开始创建您的第一个配比任务"}
</p>
{!searchQuery &&
filterStatus === "all" &&
filterType === "all" && (
<Button
onClick={() =>
navigate("/data/synthesis/ratio-task/create")
}
type="primary"
>
<Plus className="w-4 h-4 mr-2" />
</Button>
)}
</div>
),
}}
/>
</Card>
);
const renderCardView = () => (
<CardView
data={filteredAndSortedTasks.map((task) => ({
...task,
description:
task.ratioType === "dataset" ? "按数据集配比" : "按标签配比",
icon: <BarChart3 className="w-6 h-6" />,
iconColor:
task.ratioType === "dataset" ? "bg-blue-100" : "bg-green-100",
statistics: [
{
label: "目标数量",
value: task.targetCount.toLocaleString(),
},
{
label: "已生成",
value: task.generatedCount.toLocaleString(),
},
{
label: "进度",
value: `${Math.round(task.progress)}%`,
},
],
status: getStatusBadge(task.status),
}))}
operations={[
{
key: "view",
label: "查看",
onClick: (item) => navigate(`/data/synthesis/ratio-task/${item.id}`),
},
{
key: "download",
label: "下载",
onClick: (item) => console.log("下载", item.name),
},
]}
/>
);
// 搜索、筛选和视图控制相关
const searchFilters = [
{
key: "status",
label: "状态筛选",
options: [
{ label: "全部状态", value: "all" },
{ label: "等待中", value: "pending" },
{ label: "运行中", value: "running" },
{ label: "已完成", value: "completed" },
{ label: "失败", value: "failed" },
{ label: "已暂停", value: "paused" },
],
},
{
key: "type",
label: "类型筛选",
options: [
{ label: "全部类型", value: "all" },
{ label: "按数据集", value: "dataset" },
{ label: "按标签", value: "label" },
],
},
{
key: "sortBy",
label: "排序方式",
options: [
{ label: "创建时间", value: "createdAt" },
{ label: "任务名称", value: "name" },
{ label: "目标数量", value: "targetCount" },
{ label: "已生成", value: "generatedCount" },
{ label: "进度", value: "progress" },
],
},
{
key: "sortOrder",
label: "排序顺序",
options: [
{ label: "升序", value: "asc" },
{ label: "降序", value: "desc" },
],
},
];
// 处理 SearchControls 的筛选变化
const handleSearchControlsFiltersChange = (
filters: Record<string, string[]>
) => {
setFilterStatus(filters.status?.[0] || "all");
setFilterType(filters.type?.[0] || "all");
setSortBy(filters.sortBy?.[0] || "createdAt");
setSortOrder((filters.sortOrder?.[0] as "asc" | "desc") || "desc");
};
// 处理视图切换
const handleViewModeChange = (mode: "card" | "list") => {
setViewMode(mode === "card" ? "card" : "list");
};
return (
<div className="">
<div className="flex items-center justify-between">
<h2 className="text-xl font-bold"></h2>
<Button
type="primary"
onClick={() => navigate("/data/synthesis/ratio-task/create")}
icon={<Plus className="w-4 h-4" />}
>
</Button>
</div>
<>
{/* 搜索、筛选和视图控制 */}
<SearchControls
searchTerm={searchQuery}
onSearchChange={setSearchQuery}
searchPlaceholder="搜索任务名称"
filters={searchFilters}
onFiltersChange={handleSearchControlsFiltersChange}
viewMode={viewMode === "card" ? "card" : "list"}
onViewModeChange={handleViewModeChange}
showViewToggle={true}
/>
{/* 任务列表 */}
{viewMode === "list" ? renderTableView() : renderCardView()}
</>
</div>
);
}

View File

@@ -0,0 +1,18 @@
import { get, post, put, del, download } from "@/utils/request";
// 查询配比任务列表(分页)
export function queryRatioTasksUsingGet(params?: any) {
return get("/api/synthesis/ratio-task", params);
}
// 创建配比任务
export function createRatioTaskUsingPost(data: any) {
return post("/api/synthesis/ratio-task", data);
}
// 删除配比任务(支持批量)
export function deleteRatioTasksUsingDelete(ids: string[]) {
const qs = (ids || []).map((id) => `ids=${encodeURIComponent(id)}`).join("&");
const url = qs ? `/api/synthesis/ratio-task?${qs}` : "/api/synthesis/ratio-task";
return del(url);
}

View File

@@ -1,24 +0,0 @@
export interface RatioTask {
id: number
name: string
status: "pending" | "running" | "completed" | "failed" | "paused"
progress: number
sourceDatasets: string[]
targetCount: number
generatedCount: number
createdAt: string
ratioType: "dataset" | "label"
estimatedTime?: string
quality?: number
errorMessage?: string
ratioConfigs: RatioConfig[]
}
export interface RatioConfig {
id: string
name: string
type: "dataset" | "label"
quantity: number
percentage: number
source: string
}

View File

@@ -0,0 +1,82 @@
// Ratio module models aligned with scripts/db/data-ratio-init.sql
// enums
export type RatioMethod = "TAG" | "DATASET"
export type RatioStatus = "PENDING" | "RUNNING" | "COMPLETED" | "FAILED" | "PAUSED"
// t_st_ratio_instances
export interface RatioInstance {
id: string
name: string
description?: string
targetDatasetId?: string
ratioMethod?: RatioMethod
ratioParameters?: any
mergeMethod?: string
status?: RatioStatus | string
totals?: number
createdAt?: string
updatedAt?: string
createdBy?: string
updatedBy?: string
}
// t_st_ratio_relations
export interface RatioRelation {
id: string
ratioInstanceId: string
sourceDatasetId?: string
ratioValue?: string
counts?: number
filterConditions?: string
createdAt?: string
updatedAt?: string
createdBy?: string
updatedBy?: string
}
// API DTOs
export interface RatioConfigItem {
datasetId: string
counts: string
filter_conditions: string
}
export interface CreateRatioTaskRequest {
name: string
description?: string
totals: string
ratio_method: RatioMethod
config: RatioConfigItem[]
}
export interface TargetDatasetInfo {
id: string
name: string
datasetType: string
status: string
}
export interface CreateRatioTaskResponse {
id: string
name: string
description?: string
totals: number
ratio_method: RatioMethod
status: string
config: RatioConfigItem[]
targetDataset: TargetDatasetInfo
}
export interface RatioTaskItem {
id: string
name: string
description?: string
status?: string
totals?: number
ratio_method?: RatioMethod
target_dataset_id?: string
target_dataset_name?: string
created_at?: string
updated_at?: string
}

View File

@@ -38,8 +38,8 @@ import KnowledgeBaseFileDetailPage from "@/pages/KnowledgeBase/FileDetail/Knowle
import OperatorMarketPage from "@/pages/OperatorMarket/Home/OperatorMarket";
import OperatorPluginCreate from "@/pages/OperatorMarket/Create/OperatorPluginCreate";
import OperatorPluginDetail from "@/pages/OperatorMarket/Detail/OperatorPluginDetail";
import RatioTasksPage from "@/pages/RatioTask/RatioTask";
import CreateRatioTask from "@/pages/RatioTask/CreateRatioTask";
import RatioTasksPage from "@/pages/RatioTask/Home/RatioTask.tsx";
import CreateRatioTask from "@/pages/RatioTask/Create/CreateRatioTask.tsx";
import OrchestrationPage from "@/pages/Orchestration/Orchestration";
import WorkflowEditor from "@/pages/Orchestration/WorkflowEditor";
import SettingsPage from "@/pages/SettingsPage/SettingsPage";

View File

@@ -0,0 +1,71 @@
"""
Tables for Ratio (Data Synthesis Ratio) module
Derived from scripts/db/data-ratio-init.sql
- t_st_ratio_instances
- t_st_ratio_relations
"""
import uuid
from sqlalchemy import Column, String, Text, BigInteger, TIMESTAMP, JSON, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from app.db.session import Base
class RatioInstance(Base):
"""配比实例表(UUID 主键) -> t_st_ratio_instances
Columns per data-ratio-init.sql:
id, name, description, target_dataset_id, ratio_method, ratio_parameters,
merge_method, status, totals, created_at, updated_at, created_by, updated_by
"""
__tablename__ = "t_st_ratio_instances"
id = Column(String(64), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
name = Column(String(64), nullable=True, comment="名称")
description = Column(Text, nullable=True, comment="描述")
target_dataset_id = Column(String(64), nullable=True, comment="模板数据集ID")
ratio_method = Column(String(50), nullable=True, comment="配比方式,按标签(TAG),按数据集(DATASET)")
ratio_parameters = Column(JSON, nullable=True, comment="配比参数")
merge_method = Column(String(50), nullable=True, comment="合并方式")
status = Column(String(20), nullable=True, comment="状态")
totals = Column(BigInteger, nullable=True, comment="总数")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column( TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
created_by = Column(String(255), nullable=True, comment="创建者")
updated_by = Column(String(255), nullable=True, comment="更新者")
def __repr__(self) -> str:
return f"<RatioInstance(id={self.id}, name={self.name}, method={self.ratio_method}, status={self.status})>"
class RatioRelation(Base):
"""配比关系表(UUID 主键) -> t_st_ratio_relations
Columns per data-ratio-init.sql:
id, ratio_instance_id, source_dataset_id, ratio_value, counts, filter_conditions,
created_at, updated_at, created_by, updated_by
"""
__tablename__ = "t_st_ratio_relations"
id = Column(String(64), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
ratio_instance_id = Column(String(64), nullable=False, comment="配比实例ID")
source_dataset_id = Column(String(64), nullable=True, comment="源数据集ID")
ratio_value = Column(String(256), nullable=True)
counts = Column(BigInteger, nullable=True, comment="条数")
filter_conditions = Column(Text, nullable=True, comment="过滤条件")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
created_by = Column(String(255), nullable=True, comment="创建者")
updated_by = Column(String(255), nullable=True, comment="更新者")
def __repr__(self) -> str:
return (
f"<RatioRelation(id={self.id}, ratio_instance_id={self.ratio_instance_id}, "
f"source_dataset_id={self.source_dataset_id}, counts={self.counts})>"
)

View File

@@ -41,6 +41,10 @@ async def fastapi_http_exception_handler(request: Request, exc: HTTPException):
# 自定义异常处理器:RequestValidationError
async def validation_exception_handler(request: Request, exc: RequestValidationError):
"""将请求验证错误转换为标准响应格式"""
# 仅返回每个错误的简要 detail 文本(来自 Pydantic 错误的 `msg` 字段),不返回整个错误对象
raw_errors = exc.errors() or []
errors = [err.get("msg", "Validation error") for err in raw_errors]
return JSONResponse(
status_code=422,
content={
@@ -48,9 +52,9 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
"message": "error",
"data": {
"detail": "Validation error",
"errors": exc.errors()
}
}
"errors": errors,
},
},
)
# 自定义异常处理器:未捕获的异常
@@ -94,4 +98,4 @@ class DMServiceClientError(LabelStudioAdapterException):
class SyncServiceError(LabelStudioAdapterException):
"""同步服务错误"""
pass
pass

View File

@@ -2,6 +2,7 @@ from fastapi import APIRouter
from .system.interface import router as system_router
from .annotation.interface import router as annotation_router
from .synthesis.interface import router as ratio_router
router = APIRouter(
prefix="/api"
@@ -9,5 +10,6 @@ router = APIRouter(
router.include_router(system_router)
router.include_router(annotation_router)
router.include_router(ratio_router)
__all__ = ["router"]
__all__ = ["router"]

View File

@@ -0,0 +1,11 @@
from fastapi import APIRouter
router = APIRouter(
prefix="/synthesis",
tags = ["synthesis"]
)
# Include sub-routers
from .ratio_task import router as ratio_task_router
router.include_router(ratio_task_router)

View File

@@ -0,0 +1,253 @@
import asyncio
from typing import Set
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field, field_validator
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import or_, func, delete, select
from app.core.logging import get_logger
from app.db.models import Dataset
from app.db.session import get_db
from app.module.dataset import DatasetManagementService
from app.module.shared.schema import StandardResponse
from app.module.synthesis.schema.ratio_task import (
CreateRatioTaskResponse,
CreateRatioTaskRequest,
PagedRatioTaskResponse,
RatioTaskItem,
TargetDatasetInfo,
)
from app.module.synthesis.service.ratio_task import RatioTaskService
from app.db.models.ratio_task import RatioInstance, RatioRelation
router = APIRouter(
prefix="/ratio-task",
tags=["synthesis/ratio-task"],
)
logger = get_logger(__name__)
@router.post("", response_model=StandardResponse[CreateRatioTaskResponse], status_code=200)
async def create_ratio_task(
req: CreateRatioTaskRequest,
db: AsyncSession = Depends(get_db),
):
"""
创建配比任务
Path: /api/synthesis/ratio-task
"""
try:
# 校验 config 中的 dataset_id 是否存在
dm_service = DatasetManagementService(db)
source_types = await get_dataset_types(dm_service, req)
await valid_exists(db, req)
# 创建目标数据集:名称使用“<任务名称>-配比生成-时间戳”
target_dataset_name = f"{req.name}-配比生成-{datetime.now().strftime('%Y%m%d%H%M%S')}"
target_type = get_target_dataset_type(source_types)
target_dataset = Dataset(
name=target_dataset_name,
description=req.description or "",
dataset_type=target_type,
status="DRAFT",
)
db.add(target_dataset)
await db.flush() # 获取 target_dataset.id
service = RatioTaskService(db)
instance = await service.create_task(
name=req.name,
description=req.description,
totals=int(req.totals),
ratio_method=req.ratio_method,
config=[
{
"dataset_id": item.dataset_id,
"counts": int(item.counts),
"filter_conditions": item.filter_conditions,
}
for item in req.config
],
target_dataset_id=target_dataset.id,
)
# 异步执行配比任务(支持 DATASET / TAG)
asyncio.create_task(RatioTaskService.execute_dataset_ratio_task(instance.id))
return StandardResponse(
code=200,
message="success",
data=CreateRatioTaskResponse(
id=instance.id,
name=instance.name,
description=instance.description,
totals=instance.totals or 0,
ratio_method=instance.ratio_method or req.ratio_method,
status=instance.status or "PENDING",
config=req.config,
targetDataset=TargetDatasetInfo(
id=str(target_dataset.id),
name=str(target_dataset.name),
datasetType=str(target_dataset.dataset_type),
status=str(target_dataset.status),
)
)
)
except HTTPException:
await db.rollback()
raise
except Exception as e:
await db.rollback()
logger.error(f"Failed to create ratio task: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("", response_model=StandardResponse[PagedRatioTaskResponse], status_code=200)
async def list_ratio_tasks(
page: int = 1,
size: int = 10,
name: str | None = None,
status: str | None = None,
db: AsyncSession = Depends(get_db),
):
"""分页查询配比任务,支持名称与状态过滤"""
try:
query = select(RatioInstance)
# filters
if name:
# simple contains filter
query = query.where(RatioInstance.name.like(f"%{name}%"))
if status:
query = query.where(RatioInstance.status == status)
# count
count_q = select(func.count()).select_from(query.subquery())
total = (await db.execute(count_q)).scalar_one()
# page (1-based)
page_index = max(page, 1) - 1
query = query.order_by(RatioInstance.created_at.desc()).offset(page_index * size).limit(size)
result = await db.execute(query)
items = result.scalars().all()
# map to DTOs and attach dataset name
# preload datasets
ds_ids = {i.target_dataset_id for i in items if i.target_dataset_id}
ds_map = {}
if ds_ids:
ds_res = await db.execute(select(Dataset).where(Dataset.id.in_(list(ds_ids))))
for d in ds_res.scalars().all():
ds_map[d.id] = d
content: list[RatioTaskItem] = []
for i in items:
ds = ds_map.get(i.target_dataset_id) if i.target_dataset_id else None
content.append(
RatioTaskItem(
id=i.id,
name=i.name or "",
description=i.description,
status=i.status,
totals=i.totals,
ratio_method=i.ratio_method,
target_dataset_id=i.target_dataset_id,
target_dataset_name=(ds.name if ds else None),
created_at=str(i.created_at) if getattr(i, "created_at", None) else None,
updated_at=str(i.updated_at) if getattr(i, "updated_at", None) else None,
)
)
total_pages = (total + size - 1) // size if size > 0 else 0
return StandardResponse(
code=200,
message="success",
data=PagedRatioTaskResponse(
content=content,
totalElements=total,
totalPages=total_pages,
page=page,
size=size,
),
)
except Exception as e:
logger.error(f"Failed to list ratio tasks: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.delete("", response_model=StandardResponse[str], status_code=200)
async def delete_ratio_tasks(
ids: list[str] = Query(..., description="要删除的配比任务ID列表"),
db: AsyncSession = Depends(get_db),
):
"""删除配比任务,返回简单结果字符串。"""
try:
if not ids:
raise HTTPException(status_code=400, detail="ids is required")
# 先删除关联关系
await db.execute(
delete(RatioRelation).where(RatioRelation.ratio_instance_id.in_(ids))
)
# 再删除实例
await db.execute(
delete(RatioInstance).where(RatioInstance.id.in_(ids))
)
await db.commit()
return StandardResponse(code=200, message="success", data="success")
except HTTPException:
await db.rollback()
raise
except Exception as e:
await db.rollback()
logger.error(f"Failed to delete ratio tasks: {e}")
raise HTTPException(status_code=500, detail=f"Fail to delete ratio task: {e}")
async def valid_exists(db, req: CreateRatioTaskRequest):
# 校验配比任务名称不能重复
exist_task_q = await db.execute(
select(RatioInstance).where(RatioInstance.name == req.name)
)
try:
exist_task_q.scalar_one_or_none()
except Exception as e:
logger.error(f"create ratio task failed: ratio task {req.name} already exists")
raise HTTPException(status_code=400, detail=f"ratio task {req.name} already exists")
async def get_dataset_types(dm_service: DatasetManagementService, req: CreateRatioTaskRequest) -> Set[str]:
source_types: Set[str] = set()
for item in req.config:
dataset = await dm_service.get_dataset(item.dataset_id)
if not dataset:
raise HTTPException(status_code=400, detail=f"dataset_id not found: {item.dataset_id}")
else:
dtype = getattr(dataset, "dataset_type", None) or getattr(dataset, "datasetType", None)
source_types.add(str(dtype).upper())
return source_types
def get_target_dataset_type(source_types: Set[str]) -> str:
# 根据源数据集类型决定目标数据集类型
# 规则:
# 1) 若全部为 TEXT -> TEXT
# 2) 若存在且仅存在一种介质类型(IMAGE/AUDIO/VIDEO),且无其它类型 -> 对应介质类型
# 3) 其它情况 -> OTHER
media_modalities = {"IMAGE", "AUDIO", "VIDEO"}
target_type = "OTHER"
if source_types == {"TEXT"}:
target_type = "TEXT"
else:
media_involved = source_types & media_modalities
if len(media_involved) == 1 and source_types == media_involved:
# 仅有一种介质类型且无其它类型
target_type = next(iter(media_involved))
return target_type

View File

@@ -0,0 +1,86 @@
from typing import List, Optional
from pydantic import BaseModel, Field, field_validator
class RatioConfigItem(BaseModel):
dataset_id: str = Field(..., alias="datasetId", description="数据集id")
counts: str = Field(..., description="数量")
filter_conditions: str = Field(..., description="过滤条件")
@field_validator("counts")
@classmethod
def validate_counts(cls, v: str) -> str:
# ensure it's a numeric string
try:
int(v)
except Exception:
raise ValueError("counts must be a numeric string")
return v
class CreateRatioTaskRequest(BaseModel):
name: str = Field(..., description="名称")
description: Optional[str] = Field(None, description="描述")
totals: str = Field(..., description="目标数量")
ratio_method: str = Field(..., description="配比方式", alias="ratio_method")
config: List[RatioConfigItem] = Field(..., description="配比设置列表")
@field_validator("ratio_method")
@classmethod
def validate_ratio_method(cls, v: str) -> str:
allowed = {"TAG", "DATASET"}
if v not in allowed:
raise ValueError(f"ratio_method must be one of {allowed}")
return v
@field_validator("totals")
@classmethod
def validate_totals(cls, v: str) -> str:
try:
iv = int(v)
if iv < 0:
raise ValueError("totals must be >= 0")
except Exception:
raise ValueError("totals must be a numeric string")
return v
class TargetDatasetInfo(BaseModel):
id: str
name: str
datasetType: str
status: str
class CreateRatioTaskResponse(BaseModel):
# task info
id: str
name: str
description: Optional[str] = None
totals: int
ratio_method: str
status: str
# echoed config
config: List[RatioConfigItem]
# created dataset
targetDataset: TargetDatasetInfo
class RatioTaskItem(BaseModel):
id: str
name: str
description: Optional[str] = None
status: Optional[str] = None
totals: Optional[int] = None
ratio_method: Optional[str] = None
target_dataset_id: Optional[str] = None
target_dataset_name: Optional[str] = None
created_at: Optional[str] = None
updated_at: Optional[str] = None
class PagedRatioTaskResponse(BaseModel):
content: List[RatioTaskItem]
totalElements: int
totalPages: int
page: int
size: int

View File

@@ -0,0 +1,282 @@
from typing import List, Optional, Dict, Any
import random
import os
import shutil
import asyncio
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.logging import get_logger
from app.db.models.ratio_task import RatioInstance, RatioRelation
from app.db.models import Dataset, DatasetFiles
from app.db.session import AsyncSessionLocal
logger = get_logger(__name__)
class RatioTaskService:
"""Service for Ratio Task DB operations."""
def __init__(self, db: AsyncSession):
self.db = db
async def create_task(
self,
*,
name: str,
description: Optional[str],
totals: int,
ratio_method: str,
config: List[Dict[str, Any]],
target_dataset_id: Optional[str] = None,
) -> RatioInstance:
"""Create a ratio task instance and its relations.
config item format: {"dataset_id": str, "counts": int, "filter_conditions": str}
"""
logger.info(f"Creating ratio task: name={name}, method={ratio_method}, totals={totals}, items={len(config or [])}")
instance = RatioInstance(
name=name,
description=description,
ratio_method=ratio_method,
totals=totals,
target_dataset_id=target_dataset_id,
status="PENDING",
)
self.db.add(instance)
await self.db.flush() # populate instance.id
for item in config or []:
relation = RatioRelation(
ratio_instance_id=instance.id,
source_dataset_id=item.get("dataset_id"),
counts=int(item.get("counts", 0)),
filter_conditions=item.get("filter_conditions"),
)
self.db.add(relation)
await self.db.commit()
await self.db.refresh(instance)
logger.info(f"Ratio task created: {instance.id}")
return instance
# ========================= Execution (Background) ========================= #
@staticmethod
async def execute_dataset_ratio_task(instance_id: str) -> None:
"""Execute a ratio task in background.
Supported ratio_method:
- DATASET: randomly select counts files from each source dataset
- TAG: randomly select counts files matching relation.filter_conditions tags
Steps:
- Mark instance RUNNING
- For each relation: fetch ACTIVE files, optionally filter by tags
- Copy selected files into target dataset
- Update dataset statistics and mark instance SUCCESS/FAILED
"""
async with AsyncSessionLocal() as session: # type: AsyncSession
try:
# Load instance and relations
inst_res = await session.execute(select(RatioInstance).where(RatioInstance.id == instance_id))
instance: Optional[RatioInstance] = inst_res.scalar_one_or_none()
if not instance:
logger.error(f"Ratio instance not found: {instance_id}")
return
logger.info(f"start execute ratio task: {instance_id}")
rel_res = await session.execute(
select(RatioRelation).where(RatioRelation.ratio_instance_id == instance_id)
)
relations: List[RatioRelation] = list(rel_res.scalars().all())
# Mark running
instance.status = "RUNNING"
if instance.ratio_method not in {"DATASET", "TAG"}:
logger.info(f"Instance {instance_id} ratio_method={instance.ratio_method} not supported yet")
instance.status = "SUCCESS"
return
# Load target dataset
ds_res = await session.execute(select(Dataset).where(Dataset.id == instance.target_dataset_id))
target_ds: Optional[Dataset] = ds_res.scalar_one_or_none()
if not target_ds:
logger.error(f"Target dataset not found for instance {instance_id}")
instance.status = "FAILED"
return
# Preload existing target file paths for deduplication
existing_path_rows = await session.execute(
select(DatasetFiles.file_path).where(DatasetFiles.dataset_id == target_ds.id)
)
existing_paths = set(p for p in existing_path_rows.scalars().all() if p)
added_count = 0
added_size = 0
for rel in relations:
if not rel.source_dataset_id or not rel.counts or rel.counts <= 0:
continue
# Fetch all files for the source dataset (ACTIVE only)
files_res = await session.execute(
select(DatasetFiles).where(
DatasetFiles.dataset_id == rel.source_dataset_id,
DatasetFiles.status == "ACTIVE",
)
)
files = list(files_res.scalars().all())
# TAG mode: filter by tags according to relation.filter_conditions
if instance.ratio_method == "TAG":
required_tags = RatioTaskService._parse_required_tags(rel.filter_conditions)
if required_tags:
files = [f for f in files if RatioTaskService._file_contains_tags(f, required_tags)]
if not files:
continue
pick_n = min(rel.counts or 0, len(files))
chosen = random.sample(files, pick_n) if pick_n < len(files) else files
# Copy into target dataset with de-dup by target path
for f in chosen:
src_path = f.file_path
new_path = src_path
needs_copy = False
src_prefix = f"/dataset/{rel.source_dataset_id}"
if isinstance(src_path, str) and src_path.startswith(src_prefix):
dst_prefix = f"/dataset/{target_ds.id}"
new_path = src_path.replace(src_prefix, dst_prefix, 1)
needs_copy = True
# De-dup by target path
if new_path in existing_paths:
continue
# Perform copy only when needed
if needs_copy:
dst_dir = os.path.dirname(new_path)
await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True)
await asyncio.to_thread(shutil.copy2, src_path, new_path)
new_file = DatasetFiles(
dataset_id=target_ds.id, # type: ignore
file_name=f.file_name,
file_path=new_path,
file_type=f.file_type,
file_size=f.file_size,
check_sum=f.check_sum,
tags=f.tags,
dataset_filemetadata=f.dataset_filemetadata,
status="ACTIVE",
)
session.add(new_file)
existing_paths.add(new_path)
added_count += 1
added_size += int(f.file_size or 0)
# Periodically flush to avoid huge transactions
await session.flush()
# Update target dataset statistics
target_ds.file_count = (target_ds.file_count or 0) + added_count # type: ignore
target_ds.size_bytes = (target_ds.size_bytes or 0) + added_size # type: ignore
# If target dataset has files, mark it ACTIVE
if (target_ds.file_count or 0) > 0: # type: ignore
target_ds.status = "ACTIVE"
# Done
instance.status = "SUCCESS"
logger.info(f"Dataset ratio execution completed: instance={instance_id}, files={added_count}, size={added_size}")
except Exception as e:
logger.exception(f"Dataset ratio execution failed for {instance_id}: {e}")
try:
# Try mark failed
inst_res = await session.execute(select(RatioInstance).where(RatioInstance.id == instance_id))
instance = inst_res.scalar_one_or_none()
if instance:
instance.status = "FAILED"
finally:
pass
finally:
await session.commit()
# ------------------------- helpers for TAG filtering ------------------------- #
@staticmethod
def _parse_required_tags(conditions: Optional[str]) -> set[str]:
"""Parse filter_conditions into a set of required tag strings.
Supports simple separators: comma, semicolon, space. Empty/None -> empty set.
"""
if not conditions:
return set()
raw = conditions.replace("\n", " ")
seps = [",", ";", " "]
tokens = [raw]
for sep in seps:
nxt = []
for t in tokens:
nxt.extend(t.split(sep))
tokens = nxt
return {t.strip() for t in tokens if t and t.strip()}
@staticmethod
def _file_contains_tags(f: DatasetFiles, required: set[str]) -> bool:
if not required:
return True
tags = f.tags
if not tags:
return False
try:
# tags could be a list of strings or list of objects with 'name'
tag_names = set()
if isinstance(tags, list):
for item in tags:
if isinstance(item, str):
tag_names.add(item)
elif isinstance(item, dict):
name = item.get("name") or item.get("label") or item.get("tag")
if isinstance(name, str):
tag_names.add(name)
elif isinstance(tags, dict):
# flat dict of name->... treat keys as tags
tag_names = set(map(str, tags.keys()))
else:
return False
logger.info(f">>>>>{tags}>>>>>{required}, {tag_names}")
return required.issubset(tag_names)
except Exception:
return False
@staticmethod
async def get_new_file(f, rel: RatioRelation, target_ds: Dataset) -> DatasetFiles:
new_path = f.file_path
src_prefix = f"/dataset/{rel.source_dataset_id}"
if isinstance(f.file_path, str) and f.file_path.startswith(src_prefix):
dst_prefix = f"/dataset/{target_ds.id}"
new_path = f.file_path.replace(src_prefix, dst_prefix, 1)
dst_dir = os.path.dirname(new_path)
# Ensure directory and copy the file in a thread to avoid blocking the event loop
await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True)
await asyncio.to_thread(shutil.copy2, f.file_path, new_path)
new_file = DatasetFiles(
dataset_id=target_ds.id, # type: ignore
file_name=f.file_name,
file_path=new_path,
file_type=f.file_type,
file_size=f.file_size,
check_sum=f.check_sum,
tags=f.tags,
dataset_filemetadata=f.dataset_filemetadata,
status="ACTIVE",
)
return new_file

View File

@@ -0,0 +1,32 @@
USE datamate;
CREATE TABLE IF NOT EXISTS t_st_ratio_instances
(
id varchar(64) primary key COMMENT 'UUID',
name varchar(64) COMMENT '名称',
description TEXT COMMENT '描述',
target_dataset_id varchar(64) COMMENT '模板数据集ID',
ratio_method varchar(50) COMMENT '配比方式,按标签(TAG),按数据集(DATASET)',
ratio_parameters JSON COMMENT '配比参数',
merge_method varchar(50) COMMENT '合并方式',
status varchar(20) COMMENT '状态',
totals BIGINT COMMENT '总数',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
created_by VARCHAR(255) COMMENT '创建者',
updated_by VARCHAR(255) COMMENT '更新者'
) COMMENT='配比实例表(UUID 主键)';
CREATE TABLE IF NOT EXISTS t_st_ratio_relations
(
id varchar(64) primary key COMMENT 'UUID',
ratio_instance_id varchar(64) COMMENT '配比实例ID',
source_dataset_id varchar(64) COMMENT '源数据集ID',
ratio_value varchar(256),
counts BIGINT COMMENT '条数',
filter_conditions text COMMENT '过滤条件',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
created_by VARCHAR(255) COMMENT '创建者',
updated_by VARCHAR(255) COMMENT '更新者'
) COMMENT='配比关系表(UUID 主键)';