You've already forked DataMate
feature:增加数据配比功能 (#52)
* refactor: 修改调整数据归集实现,删除无用代码,优化代码结构 * feature: 每天凌晨00:00扫描所有数据集,检查数据集是否超过了预设的保留天数,超出保留天数的数据集调用删除接口进行删除 * fix: 修改删除数据集文件的逻辑,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录 * fix: 增加参数校验和接口定义,删除不使用的接口 * fix: 数据集统计数据默认为0 * feature: 数据集状态增加流转,创建时为草稿状态,上传文件或者归集文件后修改为活动状态 * refactor: 修改分页查询归集任务的代码 * fix: 更新后重新执行;归集任务执行增加事务控制 * feature: 创建归集任务时能够同步创建数据集,更新归集任务时能更新到指定数据集 * fix: 创建归集任务不需要创建数据集时不应该报错 * fix: 修复删除文件时数据集的统计数据不变动 * feature: 查询数据集详情时能够获取到文件标签分布 * fix: tags为空时不进行分析 * fix: 状态修改为ACTIVE * fix: 修改解析tag的方法 * feature: 实现创建、分页查询、删除配比任务 * feature: 实现创建、分页查询、删除配比任务的前端交互 * fix: 修复进度计算异常导致的页面报错
This commit is contained in:
@@ -10,6 +10,7 @@ import com.datamate.common.interfaces.PagedResponse;
|
||||
import com.datamate.datamanagement.application.DatasetApplicationService;
|
||||
import com.datamate.datamanagement.domain.model.dataset.Dataset;
|
||||
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
|
||||
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
|
||||
import jakarta.validation.Valid;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@@ -35,11 +36,13 @@ public class CollectionTaskController{
|
||||
public ResponseEntity<CollectionTaskResponse> createTask(@Valid @RequestBody CreateCollectionTaskRequest request) {
|
||||
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
|
||||
String datasetId = null;
|
||||
DatasetResponse dataset = null;
|
||||
if (Objects.nonNull(request.getDataset())) {
|
||||
datasetId = datasetService.createDataset(request.getDataset()).getId();
|
||||
dataset = DatasetConverter.INSTANCE.convertToResponse(datasetService.createDataset(request.getDataset()));
|
||||
datasetId = dataset.getId();
|
||||
}
|
||||
CollectionTaskResponse response = CollectionTaskConverter.INSTANCE.toResponse(taskService.create(task, datasetId));
|
||||
response.setDataset(DatasetConverter.INSTANCE.convertToResponse(datasetService.getDataset(datasetId)));
|
||||
response.setDataset(dataset);
|
||||
return ResponseEntity.ok().body(response);
|
||||
}
|
||||
|
||||
|
||||
@@ -119,6 +119,8 @@ public class DatasetApplicationService {
|
||||
public Dataset getDataset(String datasetId) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
|
||||
List<DatasetFile> datasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
dataset.setFiles(datasetFiles);
|
||||
return dataset;
|
||||
}
|
||||
|
||||
|
||||
@@ -102,6 +102,10 @@ public class DatasetFileApplicationService {
|
||||
public void deleteDatasetFile(String datasetId, String fileId) {
|
||||
DatasetFile file = getDatasetFile(datasetId, fileId);
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
dataset.setFiles(new ArrayList<>(Collections.singleton(file)));
|
||||
datasetFileRepository.removeById(fileId);
|
||||
dataset.removeFile(file);
|
||||
datasetRepository.updateById(dataset);
|
||||
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
|
||||
if (file.getFilePath().startsWith(dataset.getPath())) {
|
||||
try {
|
||||
@@ -111,9 +115,6 @@ public class DatasetFileApplicationService {
|
||||
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
|
||||
}
|
||||
}
|
||||
datasetFileRepository.removeById(fileId);
|
||||
dataset.removeFile(file);
|
||||
datasetRepository.updateById(dataset);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -110,7 +110,7 @@ public class FileMetadataService {
|
||||
.fileType(fileType)
|
||||
.uploadTime(LocalDateTime.now())
|
||||
.lastAccessTime(LocalDateTime.now())
|
||||
.status("UPLOADED")
|
||||
.status("ACTIVE")
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@@ -2,9 +2,13 @@ package com.datamate.datamanagement.domain.model.dataset;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.*;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
@@ -25,11 +29,25 @@ public class DatasetFile {
|
||||
private String fileType; // JPG/PNG/DCM/TXT
|
||||
private Long fileSize; // bytes
|
||||
private String checkSum;
|
||||
private List<String> tags;
|
||||
private String tags;
|
||||
private String metadata;
|
||||
private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR
|
||||
private LocalDateTime uploadTime;
|
||||
private LocalDateTime lastAccessTime;
|
||||
private LocalDateTime createdAt;
|
||||
private LocalDateTime updatedAt;
|
||||
|
||||
/**
|
||||
* 解析标签
|
||||
*
|
||||
* @return 标签列表
|
||||
*/
|
||||
public List<String> analyzeTag() {
|
||||
try {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
return mapper.readValue(tags, List.class);
|
||||
} catch (Exception e) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
package com.datamate.datamanagement.domain.model.dataset;
|
||||
|
||||
/**
|
||||
* 状态常量类 - 统一管理所有状态枚举值
|
||||
*/
|
||||
public final class StatusConstants {
|
||||
|
||||
/**
|
||||
* 数据集状态
|
||||
*/
|
||||
public static final class DatasetStatuses {
|
||||
public static final String DRAFT = "DRAFT";
|
||||
public static final String ACTIVE = "ACTIVE";
|
||||
public static final String ARCHIVED = "ARCHIVED";
|
||||
public static final String PROCESSING = "PROCESSING";
|
||||
|
||||
private DatasetStatuses() {}
|
||||
}
|
||||
|
||||
/**
|
||||
* 数据集文件状态
|
||||
*/
|
||||
public static final class DatasetFileStatuses {
|
||||
public static final String UPLOADED = "UPLOADED";
|
||||
public static final String PROCESSING = "PROCESSING";
|
||||
public static final String COMPLETED = "COMPLETED";
|
||||
public static final String ERROR = "ERROR";
|
||||
|
||||
private DatasetFileStatuses() {}
|
||||
}
|
||||
|
||||
private StatusConstants() {}
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
package com.datamate.datamanagement.interfaces.converter;
|
||||
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
|
||||
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
|
||||
@@ -7,11 +9,16 @@ import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
|
||||
import com.datamate.common.domain.model.ChunkUploadRequest;
|
||||
import com.datamate.datamanagement.domain.model.dataset.Dataset;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.mapstruct.Mapper;
|
||||
import org.mapstruct.Mapping;
|
||||
import org.mapstruct.Named;
|
||||
import org.mapstruct.factory.Mappers;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 数据集文件转换器
|
||||
@@ -26,6 +33,7 @@ public interface DatasetConverter {
|
||||
*/
|
||||
@Mapping(source = "sizeBytes", target = "totalSize")
|
||||
@Mapping(source = "path", target = "targetLocation")
|
||||
@Mapping(source = "files", target = "distribution", qualifiedByName = "getDistribution")
|
||||
DatasetResponse convertToResponse(Dataset dataset);
|
||||
|
||||
/**
|
||||
@@ -49,4 +57,28 @@ public interface DatasetConverter {
|
||||
* 将数据集文件转换为响应
|
||||
*/
|
||||
DatasetFileResponse convertToResponse(DatasetFile datasetFile);
|
||||
|
||||
/**
|
||||
* 获取数据文件的标签分布
|
||||
*
|
||||
* @param datasetFiles 数据集文件
|
||||
* @return 标签分布
|
||||
*/
|
||||
@Named("getDistribution")
|
||||
default Map<String, Long> getDistribution(List<DatasetFile> datasetFiles) {
|
||||
Map<String, Long> distribution = new HashMap<>();
|
||||
if (CollectionUtils.isEmpty(datasetFiles)) {
|
||||
return distribution;
|
||||
}
|
||||
for (DatasetFile datasetFile : datasetFiles) {
|
||||
List<String> tags = datasetFile.analyzeTag();
|
||||
if (CollectionUtils.isEmpty(tags)) {
|
||||
continue;
|
||||
}
|
||||
for (String tag : tags) {
|
||||
distribution.put(tag, distribution.getOrDefault(tag, 0L) + 1);
|
||||
}
|
||||
}
|
||||
return distribution;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import lombok.Setter;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 数据集响应DTO
|
||||
@@ -42,8 +43,8 @@ public class DatasetResponse {
|
||||
private LocalDateTime updatedAt;
|
||||
/** 创建者 */
|
||||
private String createdBy;
|
||||
/**
|
||||
* 更新者
|
||||
*/
|
||||
/** 更新者 */
|
||||
private String updatedBy;
|
||||
/** 分布 */
|
||||
private Map<String, Long> distribution ;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { RatioTask } from "@/pages/RatioTask/ratio";
|
||||
import type { RatioTask } from "@/pages/RatioTask/ratio.model.ts";
|
||||
|
||||
export const mockRatioTasks: RatioTask[] = [
|
||||
{
|
||||
|
||||
@@ -49,11 +49,13 @@ export interface Dataset {
|
||||
status: DatasetStatus;
|
||||
size?: string;
|
||||
itemCount?: number;
|
||||
fileCount?: number;
|
||||
createdBy: string;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
tags: string[];
|
||||
targetLocation?: string;
|
||||
distribution?: Record<string, number>;
|
||||
}
|
||||
|
||||
export interface TagItem {
|
||||
|
||||
314
frontend/src/pages/RatioTask/Create/CreateRatioTask.tsx
Normal file
314
frontend/src/pages/RatioTask/Create/CreateRatioTask.tsx
Normal file
@@ -0,0 +1,314 @@
|
||||
import { useState } from "react";
|
||||
import { Button, Card, Form, Divider, message } from "antd";
|
||||
import { ArrowLeft, Play, BarChart3, Shuffle, PieChart } from "lucide-react";
|
||||
import { createRatioTaskUsingPost } from "@/pages/RatioTask/ratio.api.ts";
|
||||
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
|
||||
import { useNavigate } from "react-router";
|
||||
import SelectDataset from "@/pages/RatioTask/Create/components/SelectDataset.tsx";
|
||||
import BasicInformation from "@/pages/RatioTask/Create/components/BasicInformation.tsx";
|
||||
import RatioConfig from "@/pages/RatioTask/Create/components/RatioConfig.tsx";
|
||||
|
||||
export default function CreateRatioTask() {
|
||||
|
||||
const navigate = useNavigate();
|
||||
const [form] = Form.useForm();
|
||||
// 配比任务相关状态
|
||||
const [ratioTaskForm, setRatioTaskForm] = useState({
|
||||
name: "",
|
||||
description: "",
|
||||
ratioType: "dataset" as "dataset" | "label",
|
||||
selectedDatasets: [] as string[],
|
||||
ratioConfigs: [] as any[],
|
||||
totalTargetCount: 10000,
|
||||
autoStart: true,
|
||||
});
|
||||
|
||||
const [datasets, setDatasets] = useState<Dataset[]>([]);
|
||||
const [creating, setCreating] = useState(false);
|
||||
const [distributions, setDistributions] = useState<Record<string, Record<string, number>>>({});
|
||||
|
||||
|
||||
const handleCreateRatioTask = async () => {
|
||||
try {
|
||||
const values = await form.validateFields();
|
||||
if (!ratioTaskForm.ratioConfigs.length) {
|
||||
message.error("请配置配比项");
|
||||
return;
|
||||
}
|
||||
// Build request payload
|
||||
const ratio_method = ratioTaskForm.ratioType === "dataset" ? "DATASET" : "TAG";
|
||||
const totals = String(values.totalTargetCount);
|
||||
const config = ratioTaskForm.ratioConfigs.map((c) => {
|
||||
if (ratio_method === "DATASET") {
|
||||
return {
|
||||
datasetId: String(c.source),
|
||||
counts: String(c.quantity ?? 0),
|
||||
filter_conditions: "",
|
||||
};
|
||||
}
|
||||
// TAG mode: source key like `${datasetId}_${label}`
|
||||
const source = String(c.source || "");
|
||||
const idx = source.indexOf("_");
|
||||
const datasetId = idx > 0 ? source.slice(0, idx) : source;
|
||||
const label = idx > 0 ? source.slice(idx + 1) : "";
|
||||
return {
|
||||
datasetId,
|
||||
counts: String(c.quantity ?? 0),
|
||||
filter_conditions: label ? JSON.stringify({ label }) : "",
|
||||
};
|
||||
});
|
||||
|
||||
setCreating(true);
|
||||
await createRatioTaskUsingPost({
|
||||
name: values.name,
|
||||
description: values.description,
|
||||
totals,
|
||||
ratio_method,
|
||||
config,
|
||||
});
|
||||
message.success("配比任务创建成功");
|
||||
navigate("/data/synthesis/ratio-task");
|
||||
} catch {
|
||||
// 校验失败
|
||||
} finally {
|
||||
setCreating(false);
|
||||
}
|
||||
};
|
||||
|
||||
// dataset selection is handled inside SelectDataset via onSelectedDatasetsChange
|
||||
|
||||
const updateRatioConfig = (source: string, quantity: number) => {
|
||||
setRatioTaskForm((prev) => {
|
||||
const existingIndex = prev.ratioConfigs.findIndex(
|
||||
(config) => config.source === source
|
||||
);
|
||||
const totalOtherQuantity = prev.ratioConfigs
|
||||
.filter((config) => config.source !== source)
|
||||
.reduce((sum, config) => sum + config.quantity, 0);
|
||||
|
||||
const newConfig = {
|
||||
id: source,
|
||||
name: source,
|
||||
type: prev.ratioType,
|
||||
quantity: Math.min(
|
||||
quantity,
|
||||
prev.totalTargetCount - totalOtherQuantity
|
||||
),
|
||||
percentage: Math.round((quantity / prev.totalTargetCount) * 100),
|
||||
source,
|
||||
};
|
||||
|
||||
if (existingIndex >= 0) {
|
||||
const newConfigs = [...prev.ratioConfigs];
|
||||
newConfigs[existingIndex] = newConfig;
|
||||
return { ...prev, ratioConfigs: newConfigs };
|
||||
} else {
|
||||
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const generateAutoRatio = () => {
|
||||
const selectedCount = ratioTaskForm.selectedDatasets.length;
|
||||
if (selectedCount === 0) return;
|
||||
|
||||
const baseQuantity = Math.floor(
|
||||
ratioTaskForm.totalTargetCount / selectedCount
|
||||
);
|
||||
const remainder = ratioTaskForm.totalTargetCount % selectedCount;
|
||||
|
||||
const newConfigs = ratioTaskForm.selectedDatasets.map(
|
||||
(datasetId, index) => {
|
||||
const quantity = baseQuantity + (index < remainder ? 1 : 0);
|
||||
return {
|
||||
id: datasetId,
|
||||
name: datasetId,
|
||||
type: ratioTaskForm.ratioType,
|
||||
quantity,
|
||||
percentage: Math.round(
|
||||
(quantity / ratioTaskForm.totalTargetCount) * 100
|
||||
),
|
||||
source: datasetId,
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
setRatioTaskForm((prev) => ({ ...prev, ratioConfigs: newConfigs }));
|
||||
};
|
||||
|
||||
// 标签模式下,更新某数据集的某个标签的数量
|
||||
const updateLabelRatioConfig = (datasetId: string, label: string, quantity: number) => {
|
||||
const sourceKey = `${datasetId}_${label}`;
|
||||
setRatioTaskForm((prev) => {
|
||||
const existingIndex = prev.ratioConfigs.findIndex((c) => c.source === sourceKey);
|
||||
const totalOtherQuantity = prev.ratioConfigs
|
||||
.filter((c) => c.source !== sourceKey)
|
||||
.reduce((sum, c) => sum + c.quantity, 0);
|
||||
|
||||
const dist = distributions[datasetId] || {};
|
||||
const labelMax = dist[label] ?? Infinity;
|
||||
const cappedQuantity = Math.max(
|
||||
0,
|
||||
Math.min(quantity, prev.totalTargetCount - totalOtherQuantity, labelMax)
|
||||
);
|
||||
|
||||
const newConfig = {
|
||||
id: sourceKey,
|
||||
name: label,
|
||||
type: "label",
|
||||
quantity: cappedQuantity,
|
||||
percentage: Math.round((cappedQuantity / prev.totalTargetCount) * 100),
|
||||
source: sourceKey,
|
||||
};
|
||||
|
||||
if (existingIndex >= 0) {
|
||||
const newConfigs = [...prev.ratioConfigs];
|
||||
newConfigs[existingIndex] = newConfig;
|
||||
return { ...prev, ratioConfigs: newConfigs };
|
||||
} else {
|
||||
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const handleValuesChange = (_, allValues) => {
|
||||
setRatioTaskForm({ ...ratioTaskForm, ...allValues });
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<div className="flex items-center">
|
||||
<Button
|
||||
type="text"
|
||||
onClick={() => navigate("/data/synthesis/ratio-task")}
|
||||
>
|
||||
<ArrowLeft className="w-4 h-4 mr-1" />
|
||||
</Button>
|
||||
<h1 className="text-xl font-bold bg-clip-text">创建配比任务</h1>
|
||||
</div>
|
||||
</div>
|
||||
<Card className="overflow-y-auto p-2">
|
||||
<Form
|
||||
form={form}
|
||||
initialValues={ratioTaskForm}
|
||||
onValuesChange={handleValuesChange}
|
||||
layout="vertical"
|
||||
>
|
||||
<div className="grid grid-cols-12 gap-6">
|
||||
{/* 左侧:数据集选择 */}
|
||||
<SelectDataset
|
||||
selectedDatasets={ratioTaskForm.selectedDatasets}
|
||||
ratioType={ratioTaskForm.ratioType}
|
||||
onRatioTypeChange={(value) => setRatioTaskForm({ ...ratioTaskForm, ratioType: value, ratioConfigs: [] })}
|
||||
onSelectedDatasetsChange={(next) => {
|
||||
setRatioTaskForm((prev) => ({
|
||||
...prev,
|
||||
selectedDatasets: next,
|
||||
ratioConfigs: prev.ratioConfigs.filter((c) => {
|
||||
const id = String(c.source);
|
||||
// keep only items whose dataset id remains selected
|
||||
const dsId = id.includes("_") ? id.split("_")[0] : id;
|
||||
return next.includes(dsId);
|
||||
}),
|
||||
}));
|
||||
}}
|
||||
onDistributionsChange={(next) => setDistributions(next)}
|
||||
onDatasetsChange={(list) => setDatasets(list)}
|
||||
/>
|
||||
{/* 右侧:配比配置 */}
|
||||
<div className="col-span-7">
|
||||
<h2 className="font-medium text-gray-900 text-lg mb-2 flex items-center gap-2">
|
||||
<PieChart className="w-5 h-5" />
|
||||
配比配置
|
||||
</h2>
|
||||
<Card>
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<div>
|
||||
<span className="flex items-center gap-2 font-semibold">
|
||||
<BarChart3 className="w-5 h-5" />
|
||||
配比设置
|
||||
</span>
|
||||
<div className="text-gray-500 text-xs">
|
||||
设置每个数据集的配比数量
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
icon={<Shuffle />}
|
||||
size="small"
|
||||
onClick={generateAutoRatio}
|
||||
disabled={ratioTaskForm.selectedDatasets.length === 0}
|
||||
>
|
||||
平均分配
|
||||
</Button>
|
||||
</div>
|
||||
<BasicInformation totalTargetCount={ratioTaskForm.totalTargetCount} />
|
||||
<RatioConfig
|
||||
ratioType={ratioTaskForm.ratioType}
|
||||
selectedDatasets={ratioTaskForm.selectedDatasets}
|
||||
datasets={datasets}
|
||||
ratioConfigs={ratioTaskForm.ratioConfigs as any}
|
||||
totalTargetCount={ratioTaskForm.totalTargetCount}
|
||||
distributions={distributions}
|
||||
onUpdateDatasetQuantity={(datasetId, quantity) => updateRatioConfig(datasetId, quantity)}
|
||||
onUpdateLabelQuantity={(datasetId, label, quantity) => updateLabelRatioConfig(datasetId, label, quantity)}
|
||||
/>
|
||||
{/* 配比预览 */}
|
||||
{ratioTaskForm.ratioConfigs.length > 0 && (
|
||||
<div className="mb-4">
|
||||
<span className="text-sm font-medium">配比预览</span>
|
||||
<div className="p-3 bg-gray-50 rounded-lg">
|
||||
<div className="grid grid-cols-2 gap-4 text-sm">
|
||||
<div>
|
||||
<span className="text-gray-500">总配比数量:</span>
|
||||
<span className="ml-2 font-medium">
|
||||
{ratioTaskForm.ratioConfigs
|
||||
.reduce((sum, config) => sum + config.quantity, 0)
|
||||
.toLocaleString()}
|
||||
</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-gray-500">目标数量:</span>
|
||||
<span className="ml-2 font-medium">
|
||||
{ratioTaskForm.totalTargetCount.toLocaleString()}
|
||||
</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-gray-500">配比项目:</span>
|
||||
<span className="ml-2 font-medium">
|
||||
{ratioTaskForm.ratioConfigs.length}个
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
<Divider />
|
||||
<div className="flex justify-end gap-2">
|
||||
<Button
|
||||
onClick={() => navigate("/data/synthesis/ratio-task")}
|
||||
>
|
||||
取消
|
||||
</Button>
|
||||
<Button
|
||||
type="primary"
|
||||
onClick={handleCreateRatioTask}
|
||||
loading={creating}
|
||||
disabled={
|
||||
!ratioTaskForm.name ||
|
||||
ratioTaskForm.ratioConfigs.length === 0
|
||||
}
|
||||
>
|
||||
<Play className="w-4 h-4 mr-2" />
|
||||
创建任务
|
||||
</Button>
|
||||
</div>
|
||||
</Card>
|
||||
</div>
|
||||
</div>
|
||||
</Form>
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
import React from "react";
|
||||
import { Form, Input } from "antd";
|
||||
|
||||
const { TextArea } = Input;
|
||||
|
||||
interface BasicInformationProps {
|
||||
totalTargetCount: number;
|
||||
}
|
||||
|
||||
const BasicInformation: React.FC<BasicInformationProps> = ({ totalTargetCount }) => {
|
||||
return (
|
||||
<div className="grid grid-cols-2 gap-4 mb-4">
|
||||
<Form.Item
|
||||
label="任务名称"
|
||||
name="name"
|
||||
rules={[{ required: true, message: "请输入配比任务名称" }]}
|
||||
>
|
||||
<Input placeholder="输入配比任务名称" />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
label="目标总数量"
|
||||
name="totalTargetCount"
|
||||
rules={[{ required: true, message: "请输入目标总数量" }]}
|
||||
>
|
||||
<Input type="number" placeholder="目标总数量" min={1} />
|
||||
</Form.Item>
|
||||
<Form.Item label="任务描述" name="description" className="col-span-2">
|
||||
<TextArea placeholder="描述配比任务的目的和要求(可选)" rows={2} />
|
||||
</Form.Item>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default BasicInformation;
|
||||
132
frontend/src/pages/RatioTask/Create/components/RatioConfig.tsx
Normal file
132
frontend/src/pages/RatioTask/Create/components/RatioConfig.tsx
Normal file
@@ -0,0 +1,132 @@
|
||||
import React from "react";
|
||||
import { Badge, Card, Input, Progress } from "antd";
|
||||
import { BarChart3 } from "lucide-react";
|
||||
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
|
||||
|
||||
interface RatioConfigItem {
|
||||
id: string;
|
||||
name: string;
|
||||
type: "dataset" | "label";
|
||||
quantity: number;
|
||||
percentage: number;
|
||||
source: string;
|
||||
}
|
||||
|
||||
interface RatioConfigProps {
|
||||
ratioType: "dataset" | "label";
|
||||
selectedDatasets: string[];
|
||||
datasets: Dataset[];
|
||||
ratioConfigs: RatioConfigItem[];
|
||||
totalTargetCount: number;
|
||||
distributions: Record<string, Record<string, number>>;
|
||||
onUpdateDatasetQuantity: (datasetId: string, quantity: number) => void;
|
||||
onUpdateLabelQuantity: (datasetId: string, label: string, quantity: number) => void;
|
||||
}
|
||||
|
||||
const RatioConfig: React.FC<RatioConfigProps> = ({
|
||||
ratioType,
|
||||
selectedDatasets,
|
||||
datasets,
|
||||
ratioConfigs,
|
||||
totalTargetCount,
|
||||
distributions,
|
||||
onUpdateDatasetQuantity,
|
||||
onUpdateLabelQuantity,
|
||||
}) => {
|
||||
const totalConfigured = ratioConfigs.reduce((sum, c) => sum + (c.quantity || 0), 0);
|
||||
|
||||
return (
|
||||
<div className="mb-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-sm font-medium">配比设置</span>
|
||||
<span className="text-xs text-gray-500">
|
||||
已配置: {totalConfigured} / {totalTargetCount}
|
||||
</span>
|
||||
</div>
|
||||
{selectedDatasets.length === 0 ? (
|
||||
<div className="text-center py-8 text-gray-500">
|
||||
<BarChart3 className="w-12 h-12 mx-auto mb-2 text-gray-300" />
|
||||
<p className="text-sm">请先选择数据集</p>
|
||||
</div>
|
||||
) : (
|
||||
<div style={{ maxHeight: 500, overflowY: "auto" }}>
|
||||
{selectedDatasets.map((datasetId) => {
|
||||
const dataset = datasets.find((d) => String(d.id) === datasetId);
|
||||
const config = ratioConfigs.find((c) => c.source === datasetId);
|
||||
const currentQuantity = config?.quantity || 0;
|
||||
if (!dataset) return null;
|
||||
return (
|
||||
<Card key={datasetId} size="small" className="mb-2">
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="font-medium text-sm">{dataset.name}</span>
|
||||
<Badge color="gray">{dataset.fileCount}条</Badge>
|
||||
</div>
|
||||
<div className="text-xs text-gray-500">{config?.percentage || 0}%</div>
|
||||
</div>
|
||||
{ratioType === "dataset" ? (
|
||||
<div>
|
||||
<div className="flex items-center gap-2 mb-2">
|
||||
<span className="text-xs">数量:</span>
|
||||
<Input
|
||||
type="number"
|
||||
value={currentQuantity}
|
||||
onChange={(e) => onUpdateDatasetQuantity(datasetId, Number(e.target.value))}
|
||||
style={{ width: 80 }}
|
||||
min={0}
|
||||
max={Math.min(dataset.fileCount || 0, totalTargetCount)}
|
||||
/>
|
||||
<span className="text-xs text-gray-500">条</span>
|
||||
</div>
|
||||
<Progress
|
||||
percent={Math.round((currentQuantity / totalTargetCount) * 100)}
|
||||
size="small"
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<div>
|
||||
{!distributions[String(dataset.id)] ? (
|
||||
<div className="text-xs text-gray-400">加载标签分布...</div>
|
||||
) : Object.entries(distributions[String(dataset.id)]).length === 0 ? (
|
||||
<div className="text-xs text-gray-400">该数据集暂无标签</div>
|
||||
) : (
|
||||
<div className="flex flex-col gap-2">
|
||||
{Object.entries(distributions[String(dataset.id)]).map(([label, count]) => {
|
||||
const sourceKey = `${datasetId}_${label}`;
|
||||
const labelConfig = ratioConfigs.find((c) => c.source === sourceKey);
|
||||
const labelQuantity = labelConfig?.quantity || 0;
|
||||
return (
|
||||
<div key={label} className="flex items-center justify-between gap-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<Badge color="gray">{label}</Badge>
|
||||
<span className="text-xs text-gray-500">{count}条</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-xs">数量:</span>
|
||||
<Input
|
||||
type="number"
|
||||
value={labelQuantity}
|
||||
onChange={(e) => onUpdateLabelQuantity(datasetId, label, Number(e.target.value))}
|
||||
style={{ width: 80 }}
|
||||
min={0}
|
||||
max={Math.min(Number(count) || 0, totalTargetCount)}
|
||||
/>
|
||||
<span className="text-xs text-gray-500">条</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</Card>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default RatioConfig;
|
||||
250
frontend/src/pages/RatioTask/Create/components/SelectDataset.tsx
Normal file
250
frontend/src/pages/RatioTask/Create/components/SelectDataset.tsx
Normal file
@@ -0,0 +1,250 @@
|
||||
import React, { useEffect, useState } from "react";
|
||||
import { Badge, Button, Card, Checkbox, Input, Pagination, Select } from "antd";
|
||||
import { Database, Search as SearchIcon } from "lucide-react";
|
||||
import type { Dataset } from "@/pages/DataManagement/dataset.model.ts";
|
||||
import { queryDatasetsUsingGet, queryDatasetByIdUsingGet, queryDatasetStatisticsByIdUsingGet } from "@/pages/DataManagement/dataset.api.ts";
|
||||
|
||||
interface SelectDatasetProps {
|
||||
selectedDatasets: string[];
|
||||
ratioType: "dataset" | "label";
|
||||
onRatioTypeChange: (val: "dataset" | "label") => void;
|
||||
onSelectedDatasetsChange: (next: string[]) => void;
|
||||
onDistributionsChange?: (next: Record<string, Record<string, number>>) => void;
|
||||
onDatasetsChange?: (list: Dataset[]) => void;
|
||||
}
|
||||
|
||||
const SelectDataset: React.FC<SelectDatasetProps> = ({
|
||||
selectedDatasets,
|
||||
ratioType,
|
||||
onRatioTypeChange,
|
||||
onSelectedDatasetsChange,
|
||||
onDistributionsChange,
|
||||
onDatasetsChange,
|
||||
}) => {
|
||||
const [datasets, setDatasets] = useState<Dataset[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [pagination, setPagination] = useState({ page: 1, size: 10, total: 0 });
|
||||
const [distributions, setDistributions] = useState<Record<string, Record<string, number>>>({});
|
||||
|
||||
// Fetch dataset list
|
||||
useEffect(() => {
|
||||
const fetchDatasets = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const { data } = await queryDatasetsUsingGet({
|
||||
page: pagination.page,
|
||||
size: pagination.size,
|
||||
keyword: searchQuery?.trim() || undefined,
|
||||
});
|
||||
const list = data?.content || data?.data || [];
|
||||
setDatasets(list);
|
||||
onDatasetsChange?.(list);
|
||||
setPagination((prev) => ({ ...prev, total: data?.totalElements ?? data?.total ?? 0 }));
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
fetchDatasets();
|
||||
}, [pagination.page, pagination.size, searchQuery]);
|
||||
|
||||
// Fetch label distributions when in label mode
|
||||
useEffect(() => {
|
||||
const fetchDistributions = async () => {
|
||||
if (ratioType !== "label" || !datasets?.length) return;
|
||||
const idsToFetch = datasets.map((d) => String(d.id)).filter((id) => !distributions[id]);
|
||||
if (!idsToFetch.length) return;
|
||||
try {
|
||||
const results = await Promise.all(
|
||||
idsToFetch.map(async (id) => {
|
||||
try {
|
||||
const statRes = await queryDatasetStatisticsByIdUsingGet(id);
|
||||
return { id, stats: statRes?.data };
|
||||
} catch {
|
||||
return { id, stats: null };
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
const next: Record<string, Record<string, number>> = { ...distributions };
|
||||
for (const { id, stats } of results) {
|
||||
let dist: Record<string, number> | undefined = undefined;
|
||||
if (stats) {
|
||||
const candidates: any[] = [
|
||||
(stats as any).labelDistribution,
|
||||
(stats as any).tagDistribution,
|
||||
(stats as any).label_stats,
|
||||
(stats as any).labels,
|
||||
(stats as any).distribution,
|
||||
];
|
||||
let picked = candidates.find((c) => c && (typeof c === "object" || Array.isArray(c)));
|
||||
if (Array.isArray(picked)) {
|
||||
const obj: Record<string, number> = {};
|
||||
picked.forEach((it: any) => {
|
||||
const key = it?.label ?? it?.name ?? it?.tag ?? it?.key;
|
||||
const val = it?.count ?? it?.value ?? it?.num ?? it?.total;
|
||||
if (key != null && typeof val === "number") obj[String(key)] = val;
|
||||
});
|
||||
dist = obj;
|
||||
} else if (picked && typeof picked === "object") {
|
||||
dist = picked as Record<string, number>;
|
||||
}
|
||||
}
|
||||
if (!dist) {
|
||||
try {
|
||||
const detRes = await queryDatasetByIdUsingGet(id);
|
||||
const det = detRes?.data;
|
||||
if (det) {
|
||||
let picked =
|
||||
(det as any).distribution ||
|
||||
(det as any).labelDistribution ||
|
||||
(det as any).tagDistribution ||
|
||||
(det as any).label_stats ||
|
||||
(det as any).labels ||
|
||||
undefined;
|
||||
if (Array.isArray(picked)) {
|
||||
const obj: Record<string, number> = {};
|
||||
picked.forEach((it: any) => {
|
||||
const key = it?.label ?? it?.name ?? it?.tag ?? it?.key;
|
||||
const val = it?.count ?? it?.value ?? it?.num ?? it?.total;
|
||||
if (key != null && typeof val === "number") obj[String(key)] = val;
|
||||
});
|
||||
dist = obj;
|
||||
} else if (picked && typeof picked === "object") {
|
||||
dist = picked as Record<string, number>;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
dist = undefined;
|
||||
}
|
||||
}
|
||||
next[String(id)] = dist || {};
|
||||
}
|
||||
setDistributions(next);
|
||||
onDistributionsChange?.(next);
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
};
|
||||
fetchDistributions();
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [ratioType, datasets]);
|
||||
|
||||
const onToggleDataset = (datasetId: string, checked: boolean) => {
|
||||
if (checked) {
|
||||
const next = Array.from(new Set([...selectedDatasets, datasetId]));
|
||||
onSelectedDatasetsChange(next);
|
||||
} else {
|
||||
onSelectedDatasetsChange(selectedDatasets.filter((id) => id !== datasetId));
|
||||
}
|
||||
};
|
||||
|
||||
const onClearSelection = () => {
|
||||
onSelectedDatasetsChange([]);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="col-span-5">
|
||||
<h2 className="font-medium text-gray-900 text-lg mb-2 flex items-center gap-2">
|
||||
<Database className="w-5 h-5" />
|
||||
数据集选择
|
||||
</h2>
|
||||
<Card>
|
||||
<div className="flex items-center gap-4 mb-4">
|
||||
<span className="text-sm">配比方式:</span>
|
||||
<Select
|
||||
style={{ width: 120 }}
|
||||
value={ratioType}
|
||||
onChange={(v) => onRatioTypeChange(v)}
|
||||
options={[
|
||||
{ label: "按数据集", value: "dataset" },
|
||||
{ label: "按标签", value: "label" },
|
||||
]}
|
||||
/>
|
||||
</div>
|
||||
<Input
|
||||
prefix={<SearchIcon className="text-gray-400" />}
|
||||
placeholder="搜索数据集"
|
||||
value={searchQuery}
|
||||
onChange={(e) => {
|
||||
setSearchQuery(e.target.value);
|
||||
setPagination((p) => ({ ...p, page: 1 }));
|
||||
}}
|
||||
/>
|
||||
<div style={{ maxHeight: 500, overflowY: "auto" }}>
|
||||
{loading && (
|
||||
<div className="text-center text-gray-500 py-8">正在加载数据集...</div>
|
||||
)}
|
||||
{!loading &&
|
||||
datasets.map((dataset) => {
|
||||
const idStr = String(dataset.id);
|
||||
const checked = selectedDatasets.includes(idStr);
|
||||
return (
|
||||
<Card
|
||||
key={dataset.id}
|
||||
size="small"
|
||||
className={`mb-2 cursor-pointer ${checked ? "border-blue-500" : "hover:border-blue-200"}`}
|
||||
onClick={() => onToggleDataset(idStr, !checked)}
|
||||
>
|
||||
<div className="flex items-start gap-3">
|
||||
<Checkbox
|
||||
checked={checked}
|
||||
onChange={(e) => onToggleDataset(idStr, e.target.checked)}
|
||||
/>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="font-medium text-sm truncate">{dataset.name}</span>
|
||||
<Badge color="blue">{dataset.datasetType}</Badge>
|
||||
</div>
|
||||
<div className="text-xs text-gray-500 mt-1">{dataset.description}</div>
|
||||
<div className="flex items-center gap-4 mt-2 text-xs text-gray-500">
|
||||
<span>{dataset.fileCount}条</span>
|
||||
<span>{dataset.size}</span>
|
||||
</div>
|
||||
{ratioType === "label" && (
|
||||
<div className="mt-2">
|
||||
{distributions[idStr] ? (
|
||||
Object.entries(distributions[idStr]).length > 0 ? (
|
||||
<div className="flex flex-wrap gap-2 text-xs">
|
||||
{Object.entries(distributions[idStr])
|
||||
.slice(0, 8)
|
||||
.map(([tag, count]) => (
|
||||
<Badge key={tag} color="gray">{`${tag}: ${count}`}</Badge>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-xs text-gray-400">未检测到标签分布</div>
|
||||
)
|
||||
) : (
|
||||
<div className="text-xs text-gray-400">加载标签分布...</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
<div className="flex justify-between mt-3 items-center">
|
||||
<span className="text-sm text-gray-600">已选择 {selectedDatasets.length} 个数据集</span>
|
||||
<div className="flex items-center gap-3">
|
||||
<Button size="small" onClick={onClearSelection}>
|
||||
清空选择
|
||||
</Button>
|
||||
<Pagination
|
||||
size="small"
|
||||
current={pagination.page}
|
||||
pageSize={pagination.size}
|
||||
total={pagination.total}
|
||||
showSizeChanger
|
||||
onChange={(p, ps) => setPagination((prev) => ({ ...prev, page: p, size: ps }))}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default SelectDataset;
|
||||
@@ -1,571 +0,0 @@
|
||||
import { useState } from "react";
|
||||
import {
|
||||
Button,
|
||||
Card,
|
||||
Input,
|
||||
Select,
|
||||
Badge,
|
||||
Progress,
|
||||
Checkbox,
|
||||
Switch,
|
||||
Form,
|
||||
Divider,
|
||||
message,
|
||||
} from "antd";
|
||||
import {
|
||||
ArrowLeft,
|
||||
Play,
|
||||
Search as SearchIcon,
|
||||
Database,
|
||||
BarChart3,
|
||||
Shuffle,
|
||||
PieChart,
|
||||
} from "lucide-react";
|
||||
import type { RatioConfig, RatioTask } from "@/pages/RatioTask/ratio";
|
||||
import { mockRatioTasks } from "@/mock/ratio";
|
||||
import type { Dataset } from "@/pages/DataManagement/dataset.model";
|
||||
import { useNavigate } from "react-router";
|
||||
import DevelopmentInProgress from "@/components/DevelopmentInProgress";
|
||||
|
||||
const { TextArea } = Input;
|
||||
const { Option } = Select;
|
||||
|
||||
export default function CreateRatioTask() {
|
||||
return <DevelopmentInProgress showTime="2025.11.30" />;
|
||||
|
||||
const navigate = useNavigate();
|
||||
const [form] = Form.useForm();
|
||||
// 配比任务相关状态
|
||||
const [ratioTaskForm, setRatioTaskForm] = useState({
|
||||
name: "",
|
||||
description: "",
|
||||
ratioType: "dataset" as "dataset" | "label",
|
||||
selectedDatasets: [] as string[],
|
||||
ratioConfigs: [] as RatioConfig[],
|
||||
totalTargetCount: 10000,
|
||||
autoStart: true,
|
||||
});
|
||||
|
||||
const [tasks, setTasks] = useState<RatioTask[]>(mockRatioTasks);
|
||||
const [datasets] = useState<Dataset[]>([]);
|
||||
|
||||
const handleCreateRatioTask = async () => {
|
||||
try {
|
||||
const values = await form.validateFields();
|
||||
if (!ratioTaskForm.ratioConfigs.length) {
|
||||
message.error("请配置配比项");
|
||||
return;
|
||||
}
|
||||
const newTask: RatioTask = {
|
||||
id: Date.now(),
|
||||
name: values.name,
|
||||
status: ratioTaskForm.autoStart ? "pending" : "paused",
|
||||
progress: 0,
|
||||
sourceDatasets: ratioTaskForm.selectedDatasets,
|
||||
targetCount: values.totalTargetCount,
|
||||
generatedCount: 0,
|
||||
createdAt: new Date().toISOString().split("T")[0],
|
||||
ratioType: ratioTaskForm.ratioType,
|
||||
estimatedTime: "预计 20 分钟",
|
||||
ratioConfigs: ratioTaskForm.ratioConfigs,
|
||||
};
|
||||
|
||||
setTasks([newTask, ...tasks]);
|
||||
setRatioTaskForm({
|
||||
name: "",
|
||||
description: "",
|
||||
ratioType: "dataset",
|
||||
selectedDatasets: [],
|
||||
ratioConfigs: [],
|
||||
totalTargetCount: 10000,
|
||||
autoStart: true,
|
||||
});
|
||||
form.resetFields();
|
||||
message.success("配比任务创建成功");
|
||||
navigate("/data/ratio-task");
|
||||
} catch {
|
||||
// 校验失败
|
||||
}
|
||||
};
|
||||
|
||||
const handleDatasetSelection = (datasetId: string, checked: boolean) => {
|
||||
if (checked) {
|
||||
setRatioTaskForm((prev) => ({
|
||||
...prev,
|
||||
selectedDatasets: [...prev.selectedDatasets, datasetId],
|
||||
}));
|
||||
} else {
|
||||
setRatioTaskForm((prev) => ({
|
||||
...prev,
|
||||
selectedDatasets: prev.selectedDatasets.filter(
|
||||
(id) => id !== datasetId
|
||||
),
|
||||
ratioConfigs: prev.ratioConfigs.filter(
|
||||
(config) => config.source !== datasetId
|
||||
),
|
||||
}));
|
||||
}
|
||||
};
|
||||
|
||||
const updateRatioConfig = (source: string, quantity: number) => {
|
||||
setRatioTaskForm((prev) => {
|
||||
const existingIndex = prev.ratioConfigs.findIndex(
|
||||
(config) => config.source === source
|
||||
);
|
||||
const totalOtherQuantity = prev.ratioConfigs
|
||||
.filter((config) => config.source !== source)
|
||||
.reduce((sum, config) => sum + config.quantity, 0);
|
||||
|
||||
const newConfig: RatioConfig = {
|
||||
id: source,
|
||||
name: source,
|
||||
type: prev.ratioType,
|
||||
quantity: Math.min(
|
||||
quantity,
|
||||
prev.totalTargetCount - totalOtherQuantity
|
||||
),
|
||||
percentage: Math.round((quantity / prev.totalTargetCount) * 100),
|
||||
source,
|
||||
};
|
||||
|
||||
if (existingIndex >= 0) {
|
||||
const newConfigs = [...prev.ratioConfigs];
|
||||
newConfigs[existingIndex] = newConfig;
|
||||
return { ...prev, ratioConfigs: newConfigs };
|
||||
} else {
|
||||
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const generateAutoRatio = () => {
|
||||
const selectedCount = ratioTaskForm.selectedDatasets.length;
|
||||
if (selectedCount === 0) return;
|
||||
|
||||
const baseQuantity = Math.floor(
|
||||
ratioTaskForm.totalTargetCount / selectedCount
|
||||
);
|
||||
const remainder = ratioTaskForm.totalTargetCount % selectedCount;
|
||||
|
||||
const newConfigs: RatioConfig[] = ratioTaskForm.selectedDatasets.map(
|
||||
(datasetId, index) => {
|
||||
const quantity = baseQuantity + (index < remainder ? 1 : 0);
|
||||
return {
|
||||
id: datasetId,
|
||||
name: datasetId,
|
||||
type: ratioTaskForm.ratioType,
|
||||
quantity,
|
||||
percentage: Math.round(
|
||||
(quantity / ratioTaskForm.totalTargetCount) * 100
|
||||
),
|
||||
source: datasetId,
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
setRatioTaskForm((prev) => ({ ...prev, ratioConfigs: newConfigs }));
|
||||
};
|
||||
|
||||
const handleValuesChange = (_, allValues) => {
|
||||
setRatioTaskForm({ ...ratioTaskForm, ...allValues });
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<div className="flex items-center">
|
||||
<Button
|
||||
type="text"
|
||||
onClick={() => navigate("/data/synthesis/ratio-task")}
|
||||
>
|
||||
<ArrowLeft className="w-4 h-4 mr-1" />
|
||||
</Button>
|
||||
<h1 className="text-xl font-bold bg-clip-text">创建配比任务</h1>
|
||||
</div>
|
||||
</div>
|
||||
<Card className="overflow-y-auto p-2">
|
||||
<Form
|
||||
form={form}
|
||||
initialValues={ratioTaskForm}
|
||||
onValuesChange={handleValuesChange}
|
||||
layout="vertical"
|
||||
>
|
||||
<div className="grid grid-cols-12 gap-6">
|
||||
{/* 左侧:数据集选择 */}
|
||||
<div className="col-span-5">
|
||||
<h2 className="font-medium text-gray-900 text-lg mb-2 flex items-center gap-2">
|
||||
<Database className="w-5 h-5" />
|
||||
数据集选择
|
||||
</h2>
|
||||
<Card>
|
||||
<div className="flex items-center gap-4 mb-4">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm">配比方式:</span>
|
||||
<Form.Item name="ratioType" noStyle>
|
||||
<Select
|
||||
style={{ width: 120 }}
|
||||
onChange={(value: "dataset" | "label") =>
|
||||
setRatioTaskForm({
|
||||
...ratioTaskForm,
|
||||
ratioType: value,
|
||||
ratioConfigs: [],
|
||||
})
|
||||
}
|
||||
>
|
||||
<Option value="dataset">按数据集</Option>
|
||||
<Option value="label">按标签</Option>
|
||||
</Select>
|
||||
</Form.Item>
|
||||
</div>
|
||||
<Input
|
||||
prefix={<SearchIcon className="text-gray-400" />}
|
||||
placeholder="搜索数据集"
|
||||
style={{ width: 180 }}
|
||||
// 可加搜索逻辑
|
||||
/>
|
||||
</div>
|
||||
<div style={{ maxHeight: 500, overflowY: "auto" }}>
|
||||
{datasets.map((dataset) => (
|
||||
<Card
|
||||
key={dataset.id}
|
||||
size="small"
|
||||
className={`mb-2 cursor-pointer ${
|
||||
ratioTaskForm.selectedDatasets.includes(dataset.id)
|
||||
? "border-blue-500"
|
||||
: "hover:border-blue-200"
|
||||
}`}
|
||||
onClick={() =>
|
||||
handleDatasetSelection(
|
||||
dataset.id,
|
||||
!ratioTaskForm.selectedDatasets.includes(dataset.id)
|
||||
)
|
||||
}
|
||||
>
|
||||
<div className="flex items-start gap-3">
|
||||
<Checkbox
|
||||
checked={ratioTaskForm.selectedDatasets.includes(
|
||||
dataset.id
|
||||
)}
|
||||
onChange={(e) =>
|
||||
handleDatasetSelection(dataset.id, e.target.checked)
|
||||
}
|
||||
/>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="font-medium text-sm truncate">
|
||||
{dataset.name}
|
||||
</span>
|
||||
<Badge color="blue">{dataset.type}</Badge>
|
||||
</div>
|
||||
<div className="text-xs text-gray-500 mt-1">
|
||||
{dataset.description}
|
||||
</div>
|
||||
<div className="flex items-center gap-4 mt-2 text-xs text-gray-500">
|
||||
<span>{dataset.records?.toLocaleString()}条</span>
|
||||
<span>{dataset.size}</span>
|
||||
<span>{dataset.format}</span>
|
||||
</div>
|
||||
{ratioTaskForm.ratioType === "label" &&
|
||||
dataset.labels && (
|
||||
<div className="flex flex-wrap gap-1 mt-2">
|
||||
{dataset.labels.map((label, index) => (
|
||||
<Badge key={index} color="gray">
|
||||
{label}
|
||||
</Badge>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
))}
|
||||
</div>
|
||||
<div className="flex items-center justify-between p-3 bg-gray-50 rounded-lg mt-4">
|
||||
<span className="text-sm text-gray-600">
|
||||
已选择 {ratioTaskForm.selectedDatasets.length} 个数据集
|
||||
</span>
|
||||
<Button
|
||||
size="small"
|
||||
onClick={() =>
|
||||
setRatioTaskForm({
|
||||
...ratioTaskForm,
|
||||
selectedDatasets: [],
|
||||
ratioConfigs: [],
|
||||
})
|
||||
}
|
||||
>
|
||||
清空选择
|
||||
</Button>
|
||||
</div>
|
||||
</Card>
|
||||
</div>
|
||||
{/* 右侧:配比配置 */}
|
||||
<div className="col-span-7">
|
||||
<h2 className="font-medium text-gray-900 text-lg mb-2 flex items-center gap-2">
|
||||
<PieChart className="w-5 h-5" />
|
||||
配比配置
|
||||
</h2>
|
||||
<Card>
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<div>
|
||||
<span className="flex items-center gap-2 font-semibold">
|
||||
<BarChart3 className="w-5 h-5" />
|
||||
配比设置
|
||||
</span>
|
||||
<div className="text-gray-500 text-xs">
|
||||
设置每个数据集的配比数量
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
icon={<Shuffle />}
|
||||
size="small"
|
||||
onClick={generateAutoRatio}
|
||||
disabled={ratioTaskForm.selectedDatasets.length === 0}
|
||||
>
|
||||
平均分配
|
||||
</Button>
|
||||
</div>
|
||||
<div className="grid grid-cols-2 gap-4 mb-4">
|
||||
<Form.Item
|
||||
label="任务名称"
|
||||
name="name"
|
||||
rules={[{ required: true, message: "请输入配比任务名称" }]}
|
||||
>
|
||||
<Input
|
||||
placeholder="输入配比任务名称"
|
||||
value={ratioTaskForm.name}
|
||||
/>
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
label="目标总数量"
|
||||
name="totalTargetCount"
|
||||
rules={[{ required: true, message: "请输入目标总数量" }]}
|
||||
>
|
||||
<Input
|
||||
type="number"
|
||||
placeholder="目标总数量"
|
||||
min={1}
|
||||
value={ratioTaskForm.totalTargetCount}
|
||||
/>
|
||||
</Form.Item>
|
||||
</div>
|
||||
<Form.Item label="任务描述" name="description">
|
||||
<TextArea
|
||||
placeholder="描述配比任务的目的和要求(可选)"
|
||||
rows={2}
|
||||
value={ratioTaskForm.description}
|
||||
/>
|
||||
</Form.Item>
|
||||
<div className="mb-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-sm font-medium">配比设置</span>
|
||||
<span className="text-xs text-gray-500">
|
||||
已配置:{" "}
|
||||
{ratioTaskForm.ratioConfigs.reduce(
|
||||
(sum, config) => sum + config.quantity,
|
||||
0
|
||||
)}{" "}
|
||||
/ {ratioTaskForm.totalTargetCount}
|
||||
</span>
|
||||
</div>
|
||||
{ratioTaskForm.selectedDatasets.length === 0 ? (
|
||||
<div className="text-center py-8 text-gray-500">
|
||||
<BarChart3 className="w-12 h-12 mx-auto mb-2 text-gray-300" />
|
||||
<p className="text-sm">请先选择数据集</p>
|
||||
</div>
|
||||
) : (
|
||||
<div style={{ maxHeight: 500, overflowY: "auto" }}>
|
||||
{ratioTaskForm.selectedDatasets.map((datasetId) => {
|
||||
const dataset = datasets.find(
|
||||
(d) => d.id === datasetId
|
||||
);
|
||||
const config = ratioTaskForm.ratioConfigs.find(
|
||||
(c) => c.source === datasetId
|
||||
);
|
||||
const currentQuantity = config?.quantity || 0;
|
||||
if (!dataset) return null;
|
||||
return (
|
||||
<Card key={datasetId} size="small" className="mb-2">
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="font-medium text-sm">
|
||||
{dataset.name}
|
||||
</span>
|
||||
<Badge color="gray">
|
||||
{dataset.records.toLocaleString()}条
|
||||
</Badge>
|
||||
</div>
|
||||
<div className="text-xs text-gray-500">
|
||||
{config?.percentage || 0}%
|
||||
</div>
|
||||
</div>
|
||||
{ratioTaskForm.ratioType === "dataset" ? (
|
||||
<div>
|
||||
<div className="flex items-center gap-2 mb-2">
|
||||
<span className="text-xs">数量:</span>
|
||||
<Input
|
||||
type="number"
|
||||
value={currentQuantity}
|
||||
onChange={(e) =>
|
||||
updateRatioConfig(
|
||||
datasetId,
|
||||
Number(e.target.value)
|
||||
)
|
||||
}
|
||||
style={{ width: 80 }}
|
||||
min={0}
|
||||
max={ratioTaskForm.totalTargetCount}
|
||||
/>
|
||||
<span className="text-xs text-gray-500">
|
||||
条
|
||||
</span>
|
||||
</div>
|
||||
<Progress
|
||||
percent={Math.round(
|
||||
(currentQuantity /
|
||||
ratioTaskForm.totalTargetCount) *
|
||||
100
|
||||
)}
|
||||
size="small"
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<div>
|
||||
{dataset.labels?.map((label, index) => {
|
||||
const labelConfig =
|
||||
ratioTaskForm.ratioConfigs.find(
|
||||
(c) =>
|
||||
c.source === `${datasetId}_${label}`
|
||||
);
|
||||
const labelQuantity =
|
||||
labelConfig?.quantity || 0;
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
className="flex items-center gap-2 mb-2"
|
||||
>
|
||||
<Badge color="gray">{label}</Badge>
|
||||
<Input
|
||||
type="number"
|
||||
value={labelQuantity}
|
||||
onChange={(e) =>
|
||||
updateRatioConfig(
|
||||
`${datasetId}_${label}`,
|
||||
Number(e.target.value)
|
||||
)
|
||||
}
|
||||
style={{ width: 70 }}
|
||||
min={0}
|
||||
/>
|
||||
<span className="text-xs text-gray-500">
|
||||
条
|
||||
</span>
|
||||
<Progress
|
||||
percent={Math.round(
|
||||
(labelQuantity /
|
||||
ratioTaskForm.totalTargetCount) *
|
||||
100
|
||||
)}
|
||||
size="small"
|
||||
style={{ width: 80 }}
|
||||
/>
|
||||
<span className="text-xs text-gray-500 min-w-8">
|
||||
{Math.round(
|
||||
(labelQuantity /
|
||||
ratioTaskForm.totalTargetCount) *
|
||||
100
|
||||
)}
|
||||
%
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</Card>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{/* 配比预览 */}
|
||||
{ratioTaskForm.ratioConfigs.length > 0 && (
|
||||
<div className="mb-4">
|
||||
<span className="text-sm font-medium">配比预览</span>
|
||||
<div className="p-3 bg-gray-50 rounded-lg">
|
||||
<div className="grid grid-cols-2 gap-4 text-sm">
|
||||
<div>
|
||||
<span className="text-gray-500">总配比数量:</span>
|
||||
<span className="ml-2 font-medium">
|
||||
{ratioTaskForm.ratioConfigs
|
||||
.reduce((sum, config) => sum + config.quantity, 0)
|
||||
.toLocaleString()}
|
||||
</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-gray-500">目标数量:</span>
|
||||
<span className="ml-2 font-medium">
|
||||
{ratioTaskForm.totalTargetCount.toLocaleString()}
|
||||
</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-gray-500">配比项目:</span>
|
||||
<span className="ml-2 font-medium">
|
||||
{ratioTaskForm.ratioConfigs.length}个
|
||||
</span>
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-gray-500">预计时间:</span>
|
||||
<span className="ml-2 font-medium">约 20 分钟</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
<div className="flex items-center justify-between p-3 border rounded-lg mb-4">
|
||||
<div>
|
||||
<span className="text-sm font-medium">创建后自动开始</span>
|
||||
<div className="text-xs text-gray-500 mt-1">
|
||||
任务创建完成后立即开始执行
|
||||
</div>
|
||||
</div>
|
||||
<Form.Item name="autoStart" valuePropName="checked" noStyle>
|
||||
<Switch
|
||||
checked={ratioTaskForm.autoStart}
|
||||
onChange={(checked) =>
|
||||
setRatioTaskForm({
|
||||
...ratioTaskForm,
|
||||
autoStart: checked,
|
||||
})
|
||||
}
|
||||
/>
|
||||
</Form.Item>
|
||||
</div>
|
||||
<Divider />
|
||||
<div className="flex justify-end gap-2">
|
||||
<Button
|
||||
onClick={() => navigate("/data/synthesis/ratio-task")}
|
||||
>
|
||||
取消
|
||||
</Button>
|
||||
<Button
|
||||
type="primary"
|
||||
onClick={handleCreateRatioTask}
|
||||
disabled={
|
||||
!ratioTaskForm.name ||
|
||||
ratioTaskForm.ratioConfigs.length === 0
|
||||
}
|
||||
>
|
||||
<Play className="w-4 h-4 mr-2" />
|
||||
创建任务
|
||||
</Button>
|
||||
</div>
|
||||
</Card>
|
||||
</div>
|
||||
</div>
|
||||
</Form>
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
246
frontend/src/pages/RatioTask/Home/RatioTask.tsx
Normal file
246
frontend/src/pages/RatioTask/Home/RatioTask.tsx
Normal file
@@ -0,0 +1,246 @@
|
||||
import { useState } from "react";
|
||||
import { Button, Card, Table, Tooltip, App } from "antd";
|
||||
import { Plus, Clock, Play, CheckCircle, AlertCircle, Pause, BarChart3 } from "lucide-react";
|
||||
import { DeleteOutlined } from "@ant-design/icons";
|
||||
import type { RatioTaskItem } from "@/pages/RatioTask/ratio.model.ts";
|
||||
import { useNavigate } from "react-router";
|
||||
import CardView from "@/components/CardView.tsx";
|
||||
import { SearchControls } from "@/components/SearchControls.tsx";
|
||||
import { queryRatioTasksUsingGet, deleteRatioTasksUsingDelete } from "@/pages/RatioTask/ratio.api.ts";
|
||||
import useFetchData from "@/hooks/useFetchData";
|
||||
|
||||
export default function RatioTasksPage() {
|
||||
const navigate = useNavigate();
|
||||
const [viewMode, setViewMode] = useState<"card" | "list">("card");
|
||||
const { message } = App.useApp();
|
||||
|
||||
const { loading, tableData, pagination, searchParams, setSearchParams, handleFiltersChange, fetchData } =
|
||||
useFetchData<RatioTaskItem>(queryRatioTasksUsingGet, (d) => d as RatioTaskItem, 30000, true, [], 0);
|
||||
|
||||
const handleDelete = async (id: string) => {
|
||||
await deleteRatioTasksUsingDelete([id]);
|
||||
message.success("删除成功");
|
||||
await fetchData();
|
||||
};
|
||||
|
||||
const getStatusBadge = (status: string) => {
|
||||
const s = (status || "").toUpperCase();
|
||||
const statusConfig = {
|
||||
PENDING: {
|
||||
label: "等待中",
|
||||
color: "#f09e10ff",
|
||||
icon: <Clock className="w-4 h-4 inline mr-1" />,
|
||||
},
|
||||
RUNNING: {
|
||||
label: "运行中",
|
||||
color: "#007bff",
|
||||
icon: <Play className="w-4 h-4 inline mr-1" />,
|
||||
},
|
||||
SUCCESS: {
|
||||
label: "已完成",
|
||||
color: "#28a745",
|
||||
icon: <CheckCircle className="w-4 h-4 inline mr-1" />,
|
||||
},
|
||||
FAILED: {
|
||||
label: "失败",
|
||||
color: "#dc3545",
|
||||
icon: <AlertCircle className="w-4 h-4 inline mr-1" />,
|
||||
},
|
||||
PAUSED: {
|
||||
label: "已暂停",
|
||||
color: "#6c757d",
|
||||
icon: <Pause className="w-4 h-4 inline mr-1" />,
|
||||
},
|
||||
};
|
||||
return statusConfig[s as keyof typeof statusConfig] || statusConfig.PENDING;
|
||||
};
|
||||
|
||||
const columns = [
|
||||
{
|
||||
title: "任务名称",
|
||||
dataIndex: "name",
|
||||
key: "name",
|
||||
},
|
||||
{
|
||||
title: "状态",
|
||||
dataIndex: "status",
|
||||
key: "status",
|
||||
render: (v: string) => getStatusBadge(v).label,
|
||||
},
|
||||
{
|
||||
title: "配比方式",
|
||||
dataIndex: "ratio_method",
|
||||
key: "ratio_method",
|
||||
},
|
||||
{
|
||||
title: "目标数量",
|
||||
dataIndex: "totals",
|
||||
key: "totals",
|
||||
},
|
||||
{
|
||||
title: "目标数据集",
|
||||
dataIndex: "target_dataset_name",
|
||||
key: "target_dataset_name",
|
||||
},
|
||||
{
|
||||
title: "创建时间",
|
||||
dataIndex: "created_at",
|
||||
key: "created_at",
|
||||
},
|
||||
{
|
||||
title: "操作",
|
||||
key: "actions",
|
||||
render: (_: any, task: RatioTaskItem) => (
|
||||
<div className="flex items-center gap-2">
|
||||
{operations.map((op) => (
|
||||
<Tooltip key={op.key} title={op.label}>
|
||||
<Button
|
||||
type="text"
|
||||
icon={op.icon}
|
||||
onClick={() => op.onClick(task.id)}
|
||||
/>
|
||||
</Tooltip>
|
||||
))}
|
||||
</div>
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
const renderTableView = () => (
|
||||
<Card>
|
||||
<Table
|
||||
columns={columns}
|
||||
dataSource={tableData}
|
||||
rowKey="id"
|
||||
loading={loading}
|
||||
pagination={pagination}
|
||||
scroll={{ x: "max-content" }}
|
||||
locale={{
|
||||
emptyText: (
|
||||
<div className="text-center py-8">
|
||||
<BarChart3 className="w-12 h-12 text-gray-400 mx-auto mb-4" />
|
||||
<h3 className="text-lg font-medium text-gray-900 mb-2">
|
||||
暂无配比任务
|
||||
</h3>
|
||||
<p className="text-gray-500 mb-4">
|
||||
{searchParams.keyword || (searchParams.filter?.status?.[0] && searchParams.filter?.status?.[0] !== "all")
|
||||
? "没有找到匹配的任务"
|
||||
: "开始创建您的第一个配比任务"}
|
||||
</p>
|
||||
{!searchParams.keyword && (!searchParams.filter?.status?.length || searchParams.filter?.status?.[0] === "all") && (
|
||||
<Button
|
||||
onClick={() =>
|
||||
navigate("/data/synthesis/ratio-task/create")
|
||||
}
|
||||
type="primary"
|
||||
>
|
||||
<Plus className="w-4 h-4 mr-2" />
|
||||
创建配比任务
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
</Card>
|
||||
);
|
||||
const operations = [
|
||||
{
|
||||
key: "delete",
|
||||
label: "删除",
|
||||
danger: true,
|
||||
confirm: {
|
||||
title: "确认删除该数据集?",
|
||||
description: "删除后该数据集将无法恢复,请谨慎操作。",
|
||||
okText: "删除",
|
||||
cancelText: "取消",
|
||||
okType: "danger",
|
||||
},
|
||||
icon: <DeleteOutlined />,
|
||||
onClick: (item) => handleDelete(String(item.id)),
|
||||
}
|
||||
];
|
||||
const renderCardView = () => (
|
||||
<CardView
|
||||
loading={loading}
|
||||
data={tableData.map((task) => ({
|
||||
...task,
|
||||
description: task.ratio_method === "DATASET" ? "按数据集配比" : "按标签配比",
|
||||
icon: <BarChart3 className="w-6 h-6" />,
|
||||
iconColor: task.ratio_method === "DATASET" ? "bg-blue-100" : "bg-green-100",
|
||||
statistics: [
|
||||
{
|
||||
label: "目标数量",
|
||||
value: (task.totals ?? 0).toLocaleString(),
|
||||
},
|
||||
{
|
||||
label: "创建时间",
|
||||
value: task.created_at || "-",
|
||||
},
|
||||
],
|
||||
status: getStatusBadge(task.status),
|
||||
}))}
|
||||
pagination={pagination}
|
||||
operations={operations}
|
||||
/>
|
||||
);
|
||||
|
||||
// 搜索、筛选和视图控制相关
|
||||
const searchFilters = [
|
||||
{
|
||||
key: "status",
|
||||
label: "状态筛选",
|
||||
options: [
|
||||
{ label: "全部状态", value: "all" },
|
||||
{ label: "等待中", value: "PENDING" },
|
||||
{ label: "运行中", value: "RUNNING" },
|
||||
{ label: "已完成", value: "SUCCESS" },
|
||||
{ label: "失败", value: "FAILED" },
|
||||
{ label: "已暂停", value: "PAUSED" },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
// 处理 SearchControls 的筛选变化
|
||||
const handleSearchControlsFiltersChange = (
|
||||
filters: Record<string, string[]>
|
||||
) => {
|
||||
handleFiltersChange(filters);
|
||||
};
|
||||
|
||||
// 处理视图切换
|
||||
const handleViewModeChange = (mode: "card" | "list") => {
|
||||
setViewMode(mode === "card" ? "card" : "list");
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="">
|
||||
<div className="flex items-center justify-between">
|
||||
<h2 className="text-xl font-bold">配比任务</h2>
|
||||
<Button
|
||||
type="primary"
|
||||
onClick={() => navigate("/data/synthesis/ratio-task/create")}
|
||||
icon={<Plus className="w-4 h-4" />}
|
||||
>
|
||||
创建配比任务
|
||||
</Button>
|
||||
</div>
|
||||
<>
|
||||
{/* 搜索、筛选和视图控制 */}
|
||||
<SearchControls
|
||||
searchTerm={searchParams.keyword}
|
||||
onSearchChange={(keyword) => setSearchParams({ ...searchParams, keyword })}
|
||||
searchPlaceholder="搜索任务名称"
|
||||
filters={searchFilters}
|
||||
onFiltersChange={handleSearchControlsFiltersChange}
|
||||
onClearFilters={() => setSearchParams({ ...searchParams, filter: {} })}
|
||||
viewMode={viewMode === "card" ? "card" : "list"}
|
||||
onViewModeChange={handleViewModeChange}
|
||||
showViewToggle={true}
|
||||
/>
|
||||
{/* 任务列表 */}
|
||||
{viewMode === "list" ? renderTableView() : renderCardView()}
|
||||
</>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,382 +0,0 @@
|
||||
import { useState } from "react";
|
||||
import {
|
||||
Button,
|
||||
Card,
|
||||
Input,
|
||||
Select,
|
||||
Badge,
|
||||
Progress,
|
||||
Table,
|
||||
Alert,
|
||||
} from "antd";
|
||||
import {
|
||||
Plus,
|
||||
Eye,
|
||||
Clock,
|
||||
Play,
|
||||
CheckCircle,
|
||||
AlertCircle,
|
||||
Pause,
|
||||
Download as DownloadIcon,
|
||||
BarChart3,
|
||||
} from "lucide-react";
|
||||
import type { RatioTask } from "@/pages/RatioTask/ratio";
|
||||
import { mockRatioTasks } from "@/mock/ratio";
|
||||
import { useNavigate } from "react-router";
|
||||
import CardView from "@/components/CardView";
|
||||
import { SearchControls } from "@/components/SearchControls";
|
||||
import DevelopmentInProgress from "@/components/DevelopmentInProgress";
|
||||
|
||||
export default function RatioTasksPage() {
|
||||
return <DevelopmentInProgress showTime="2025.11.30" />;
|
||||
const navigate = useNavigate();
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [filterStatus, setFilterStatus] = useState("all");
|
||||
const [filterType, setFilterType] = useState("all");
|
||||
const [sortBy, setSortBy] = useState("createdAt");
|
||||
const [sortOrder, setSortOrder] = useState<"asc" | "desc">("desc");
|
||||
const [viewMode, setViewMode] = useState<"card" | "list">("card");
|
||||
|
||||
const [tasks, setTasks] = useState<RatioTask[]>(mockRatioTasks);
|
||||
|
||||
// 过滤和排序任务
|
||||
const filteredAndSortedTasks = tasks
|
||||
.filter((task) => {
|
||||
const matchesSearch = task.name
|
||||
.toLowerCase()
|
||||
.includes(searchQuery.toLowerCase());
|
||||
const matchesStatus =
|
||||
filterStatus === "all" || task.status === filterStatus;
|
||||
const matchesType = filterType === "all" || task.ratioType === filterType;
|
||||
return matchesSearch && matchesStatus && matchesType;
|
||||
})
|
||||
.sort((a, b) => {
|
||||
let aValue: any, bValue: any;
|
||||
|
||||
switch (sortBy) {
|
||||
case "name":
|
||||
aValue = a.name.toLowerCase();
|
||||
bValue = b.name.toLowerCase();
|
||||
break;
|
||||
case "targetCount":
|
||||
aValue = a.targetCount;
|
||||
bValue = b.targetCount;
|
||||
break;
|
||||
case "generatedCount":
|
||||
aValue = a.generatedCount;
|
||||
bValue = b.generatedCount;
|
||||
break;
|
||||
case "progress":
|
||||
aValue = a.progress;
|
||||
bValue = b.progress;
|
||||
break;
|
||||
case "createdAt":
|
||||
default:
|
||||
aValue = new Date(a.createdAt).getTime();
|
||||
bValue = new Date(b.createdAt).getTime();
|
||||
break;
|
||||
}
|
||||
|
||||
if (sortOrder === "asc") {
|
||||
return aValue > bValue ? 1 : -1;
|
||||
} else {
|
||||
return aValue < bValue ? 1 : -1;
|
||||
}
|
||||
});
|
||||
|
||||
const getStatusBadge = (status: string) => {
|
||||
const statusConfig = {
|
||||
pending: {
|
||||
label: "等待中",
|
||||
color: "#f09e10ff",
|
||||
icon: <Clock className="w-4 h-4 inline mr-1" />,
|
||||
},
|
||||
running: {
|
||||
label: "运行中",
|
||||
color: "#007bff",
|
||||
icon: <Play className="w-4 h-4 inline mr-1" />,
|
||||
},
|
||||
completed: {
|
||||
label: "已完成",
|
||||
color: "#28a745",
|
||||
icon: <CheckCircle className="w-4 h-4 inline mr-1" />,
|
||||
},
|
||||
failed: {
|
||||
label: "失败",
|
||||
color: "#dc3545",
|
||||
icon: <AlertCircle className="w-4 h-4 inline mr-1" />,
|
||||
},
|
||||
paused: {
|
||||
label: "已暂停",
|
||||
color: "#6c757d",
|
||||
icon: <Pause className="w-4 h-4 inline mr-1" />,
|
||||
},
|
||||
};
|
||||
return (
|
||||
statusConfig[status as keyof typeof statusConfig] || statusConfig.pending
|
||||
);
|
||||
};
|
||||
|
||||
const handleTaskAction = (taskId: number, action: string) => {
|
||||
setTasks((prev) =>
|
||||
prev.map((task) => {
|
||||
if (task.id === taskId) {
|
||||
switch (action) {
|
||||
case "pause":
|
||||
return { ...task, status: "paused" as const };
|
||||
case "resume":
|
||||
return { ...task, status: "running" as const };
|
||||
case "stop":
|
||||
return {
|
||||
...task,
|
||||
status: "failed" as const,
|
||||
progress: task.progress,
|
||||
};
|
||||
default:
|
||||
return task;
|
||||
}
|
||||
}
|
||||
return task;
|
||||
})
|
||||
);
|
||||
};
|
||||
|
||||
const columns = [
|
||||
{
|
||||
title: "任务名称",
|
||||
dataIndex: "name",
|
||||
key: "name",
|
||||
},
|
||||
{
|
||||
title: "状态",
|
||||
dataIndex: "status",
|
||||
key: "status",
|
||||
},
|
||||
{
|
||||
title: "配比方式",
|
||||
dataIndex: "ratioType",
|
||||
key: "ratioType",
|
||||
},
|
||||
{
|
||||
title: "进度",
|
||||
dataIndex: "progress",
|
||||
key: "progress",
|
||||
},
|
||||
{
|
||||
title: "目标数量",
|
||||
dataIndex: "targetCount",
|
||||
key: "targetCount",
|
||||
},
|
||||
{
|
||||
title: "已生成",
|
||||
dataIndex: "generatedCount",
|
||||
key: "generatedCount",
|
||||
},
|
||||
{
|
||||
title: "数据源",
|
||||
dataIndex: "sourceDatasets",
|
||||
key: "sourceDatasets",
|
||||
},
|
||||
{
|
||||
title: "创建时间",
|
||||
dataIndex: "createdAt",
|
||||
key: "createdAt",
|
||||
},
|
||||
{
|
||||
title: "操作",
|
||||
key: "actions",
|
||||
render: (_: any, task: RatioTask) => (
|
||||
<div className="flex items-center gap-1 justify-end">
|
||||
{task.status === "running" && (
|
||||
<Button
|
||||
type="link"
|
||||
size="small"
|
||||
onClick={() => handleTaskAction(task.id, "pause")}
|
||||
>
|
||||
停止
|
||||
</Button>
|
||||
)}
|
||||
{task.status === "paused" && (
|
||||
<Button
|
||||
size="small"
|
||||
type="link"
|
||||
onClick={() => handleTaskAction(task.id, "resume")}
|
||||
>
|
||||
开始
|
||||
</Button>
|
||||
)}
|
||||
<Button type="link" size="small">
|
||||
下载
|
||||
</Button>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
const renderTableView = () => (
|
||||
<Card>
|
||||
<Table
|
||||
columns={columns}
|
||||
dataSource={filteredAndSortedTasks}
|
||||
rowKey="id"
|
||||
scroll={{ x: "max-content" }}
|
||||
locale={{
|
||||
emptyText: (
|
||||
<div className="text-center py-8">
|
||||
<BarChart3 className="w-12 h-12 text-gray-400 mx-auto mb-4" />
|
||||
<h3 className="text-lg font-medium text-gray-900 mb-2">
|
||||
暂无配比任务
|
||||
</h3>
|
||||
<p className="text-gray-500 mb-4">
|
||||
{searchQuery || filterStatus !== "all" || filterType !== "all"
|
||||
? "没有找到匹配的任务"
|
||||
: "开始创建您的第一个配比任务"}
|
||||
</p>
|
||||
{!searchQuery &&
|
||||
filterStatus === "all" &&
|
||||
filterType === "all" && (
|
||||
<Button
|
||||
onClick={() =>
|
||||
navigate("/data/synthesis/ratio-task/create")
|
||||
}
|
||||
type="primary"
|
||||
>
|
||||
<Plus className="w-4 h-4 mr-2" />
|
||||
创建配比任务
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
</Card>
|
||||
);
|
||||
const renderCardView = () => (
|
||||
<CardView
|
||||
data={filteredAndSortedTasks.map((task) => ({
|
||||
...task,
|
||||
description:
|
||||
task.ratioType === "dataset" ? "按数据集配比" : "按标签配比",
|
||||
icon: <BarChart3 className="w-6 h-6" />,
|
||||
iconColor:
|
||||
task.ratioType === "dataset" ? "bg-blue-100" : "bg-green-100",
|
||||
statistics: [
|
||||
{
|
||||
label: "目标数量",
|
||||
value: task.targetCount.toLocaleString(),
|
||||
},
|
||||
{
|
||||
label: "已生成",
|
||||
value: task.generatedCount.toLocaleString(),
|
||||
},
|
||||
{
|
||||
label: "进度",
|
||||
value: `${Math.round(task.progress)}%`,
|
||||
},
|
||||
],
|
||||
status: getStatusBadge(task.status),
|
||||
}))}
|
||||
operations={[
|
||||
{
|
||||
key: "view",
|
||||
label: "查看",
|
||||
onClick: (item) => navigate(`/data/synthesis/ratio-task/${item.id}`),
|
||||
},
|
||||
{
|
||||
key: "download",
|
||||
label: "下载",
|
||||
onClick: (item) => console.log("下载", item.name),
|
||||
},
|
||||
]}
|
||||
/>
|
||||
);
|
||||
|
||||
// 搜索、筛选和视图控制相关
|
||||
const searchFilters = [
|
||||
{
|
||||
key: "status",
|
||||
label: "状态筛选",
|
||||
options: [
|
||||
{ label: "全部状态", value: "all" },
|
||||
{ label: "等待中", value: "pending" },
|
||||
{ label: "运行中", value: "running" },
|
||||
{ label: "已完成", value: "completed" },
|
||||
{ label: "失败", value: "failed" },
|
||||
{ label: "已暂停", value: "paused" },
|
||||
],
|
||||
},
|
||||
{
|
||||
key: "type",
|
||||
label: "类型筛选",
|
||||
options: [
|
||||
{ label: "全部类型", value: "all" },
|
||||
{ label: "按数据集", value: "dataset" },
|
||||
{ label: "按标签", value: "label" },
|
||||
],
|
||||
},
|
||||
{
|
||||
key: "sortBy",
|
||||
label: "排序方式",
|
||||
options: [
|
||||
{ label: "创建时间", value: "createdAt" },
|
||||
{ label: "任务名称", value: "name" },
|
||||
{ label: "目标数量", value: "targetCount" },
|
||||
{ label: "已生成", value: "generatedCount" },
|
||||
{ label: "进度", value: "progress" },
|
||||
],
|
||||
},
|
||||
{
|
||||
key: "sortOrder",
|
||||
label: "排序顺序",
|
||||
options: [
|
||||
{ label: "升序", value: "asc" },
|
||||
{ label: "降序", value: "desc" },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
// 处理 SearchControls 的筛选变化
|
||||
const handleSearchControlsFiltersChange = (
|
||||
filters: Record<string, string[]>
|
||||
) => {
|
||||
setFilterStatus(filters.status?.[0] || "all");
|
||||
setFilterType(filters.type?.[0] || "all");
|
||||
setSortBy(filters.sortBy?.[0] || "createdAt");
|
||||
setSortOrder((filters.sortOrder?.[0] as "asc" | "desc") || "desc");
|
||||
};
|
||||
|
||||
// 处理视图切换
|
||||
const handleViewModeChange = (mode: "card" | "list") => {
|
||||
setViewMode(mode === "card" ? "card" : "list");
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="">
|
||||
<div className="flex items-center justify-between">
|
||||
<h2 className="text-xl font-bold">配比任务</h2>
|
||||
<Button
|
||||
type="primary"
|
||||
onClick={() => navigate("/data/synthesis/ratio-task/create")}
|
||||
icon={<Plus className="w-4 h-4" />}
|
||||
>
|
||||
创建配比任务
|
||||
</Button>
|
||||
</div>
|
||||
<>
|
||||
{/* 搜索、筛选和视图控制 */}
|
||||
<SearchControls
|
||||
searchTerm={searchQuery}
|
||||
onSearchChange={setSearchQuery}
|
||||
searchPlaceholder="搜索任务名称"
|
||||
filters={searchFilters}
|
||||
onFiltersChange={handleSearchControlsFiltersChange}
|
||||
viewMode={viewMode === "card" ? "card" : "list"}
|
||||
onViewModeChange={handleViewModeChange}
|
||||
showViewToggle={true}
|
||||
/>
|
||||
{/* 任务列表 */}
|
||||
{viewMode === "list" ? renderTableView() : renderCardView()}
|
||||
</>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
18
frontend/src/pages/RatioTask/ratio.api.ts
Normal file
18
frontend/src/pages/RatioTask/ratio.api.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import { get, post, put, del, download } from "@/utils/request";
|
||||
|
||||
// 查询配比任务列表(分页)
|
||||
export function queryRatioTasksUsingGet(params?: any) {
|
||||
return get("/api/synthesis/ratio-task", params);
|
||||
}
|
||||
|
||||
// 创建配比任务
|
||||
export function createRatioTaskUsingPost(data: any) {
|
||||
return post("/api/synthesis/ratio-task", data);
|
||||
}
|
||||
|
||||
// 删除配比任务(支持批量)
|
||||
export function deleteRatioTasksUsingDelete(ids: string[]) {
|
||||
const qs = (ids || []).map((id) => `ids=${encodeURIComponent(id)}`).join("&");
|
||||
const url = qs ? `/api/synthesis/ratio-task?${qs}` : "/api/synthesis/ratio-task";
|
||||
return del(url);
|
||||
}
|
||||
24
frontend/src/pages/RatioTask/ratio.d.ts
vendored
24
frontend/src/pages/RatioTask/ratio.d.ts
vendored
@@ -1,24 +0,0 @@
|
||||
export interface RatioTask {
|
||||
id: number
|
||||
name: string
|
||||
status: "pending" | "running" | "completed" | "failed" | "paused"
|
||||
progress: number
|
||||
sourceDatasets: string[]
|
||||
targetCount: number
|
||||
generatedCount: number
|
||||
createdAt: string
|
||||
ratioType: "dataset" | "label"
|
||||
estimatedTime?: string
|
||||
quality?: number
|
||||
errorMessage?: string
|
||||
ratioConfigs: RatioConfig[]
|
||||
}
|
||||
|
||||
export interface RatioConfig {
|
||||
id: string
|
||||
name: string
|
||||
type: "dataset" | "label"
|
||||
quantity: number
|
||||
percentage: number
|
||||
source: string
|
||||
}
|
||||
82
frontend/src/pages/RatioTask/ratio.model.ts
Normal file
82
frontend/src/pages/RatioTask/ratio.model.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
// Ratio module models aligned with scripts/db/data-ratio-init.sql
|
||||
|
||||
// enums
|
||||
export type RatioMethod = "TAG" | "DATASET"
|
||||
export type RatioStatus = "PENDING" | "RUNNING" | "COMPLETED" | "FAILED" | "PAUSED"
|
||||
|
||||
// t_st_ratio_instances
|
||||
export interface RatioInstance {
|
||||
id: string
|
||||
name: string
|
||||
description?: string
|
||||
targetDatasetId?: string
|
||||
ratioMethod?: RatioMethod
|
||||
ratioParameters?: any
|
||||
mergeMethod?: string
|
||||
status?: RatioStatus | string
|
||||
totals?: number
|
||||
createdAt?: string
|
||||
updatedAt?: string
|
||||
createdBy?: string
|
||||
updatedBy?: string
|
||||
}
|
||||
|
||||
// t_st_ratio_relations
|
||||
export interface RatioRelation {
|
||||
id: string
|
||||
ratioInstanceId: string
|
||||
sourceDatasetId?: string
|
||||
ratioValue?: string
|
||||
counts?: number
|
||||
filterConditions?: string
|
||||
createdAt?: string
|
||||
updatedAt?: string
|
||||
createdBy?: string
|
||||
updatedBy?: string
|
||||
}
|
||||
|
||||
// API DTOs
|
||||
export interface RatioConfigItem {
|
||||
datasetId: string
|
||||
counts: string
|
||||
filter_conditions: string
|
||||
}
|
||||
|
||||
export interface CreateRatioTaskRequest {
|
||||
name: string
|
||||
description?: string
|
||||
totals: string
|
||||
ratio_method: RatioMethod
|
||||
config: RatioConfigItem[]
|
||||
}
|
||||
|
||||
export interface TargetDatasetInfo {
|
||||
id: string
|
||||
name: string
|
||||
datasetType: string
|
||||
status: string
|
||||
}
|
||||
|
||||
export interface CreateRatioTaskResponse {
|
||||
id: string
|
||||
name: string
|
||||
description?: string
|
||||
totals: number
|
||||
ratio_method: RatioMethod
|
||||
status: string
|
||||
config: RatioConfigItem[]
|
||||
targetDataset: TargetDatasetInfo
|
||||
}
|
||||
|
||||
export interface RatioTaskItem {
|
||||
id: string
|
||||
name: string
|
||||
description?: string
|
||||
status?: string
|
||||
totals?: number
|
||||
ratio_method?: RatioMethod
|
||||
target_dataset_id?: string
|
||||
target_dataset_name?: string
|
||||
created_at?: string
|
||||
updated_at?: string
|
||||
}
|
||||
@@ -38,8 +38,8 @@ import KnowledgeBaseFileDetailPage from "@/pages/KnowledgeBase/FileDetail/Knowle
|
||||
import OperatorMarketPage from "@/pages/OperatorMarket/Home/OperatorMarket";
|
||||
import OperatorPluginCreate from "@/pages/OperatorMarket/Create/OperatorPluginCreate";
|
||||
import OperatorPluginDetail from "@/pages/OperatorMarket/Detail/OperatorPluginDetail";
|
||||
import RatioTasksPage from "@/pages/RatioTask/RatioTask";
|
||||
import CreateRatioTask from "@/pages/RatioTask/CreateRatioTask";
|
||||
import RatioTasksPage from "@/pages/RatioTask/Home/RatioTask.tsx";
|
||||
import CreateRatioTask from "@/pages/RatioTask/Create/CreateRatioTask.tsx";
|
||||
import OrchestrationPage from "@/pages/Orchestration/Orchestration";
|
||||
import WorkflowEditor from "@/pages/Orchestration/WorkflowEditor";
|
||||
import SettingsPage from "@/pages/SettingsPage/SettingsPage";
|
||||
|
||||
71
runtime/datamate-python/app/db/models/ratio_task.py
Normal file
71
runtime/datamate-python/app/db/models/ratio_task.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
Tables for Ratio (Data Synthesis Ratio) module
|
||||
|
||||
Derived from scripts/db/data-ratio-init.sql
|
||||
- t_st_ratio_instances
|
||||
- t_st_ratio_relations
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from sqlalchemy import Column, String, Text, BigInteger, TIMESTAMP, JSON, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.db.session import Base
|
||||
|
||||
|
||||
class RatioInstance(Base):
|
||||
"""配比实例表(UUID 主键) -> t_st_ratio_instances
|
||||
|
||||
Columns per data-ratio-init.sql:
|
||||
id, name, description, target_dataset_id, ratio_method, ratio_parameters,
|
||||
merge_method, status, totals, created_at, updated_at, created_by, updated_by
|
||||
"""
|
||||
|
||||
__tablename__ = "t_st_ratio_instances"
|
||||
|
||||
id = Column(String(64), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
name = Column(String(64), nullable=True, comment="名称")
|
||||
description = Column(Text, nullable=True, comment="描述")
|
||||
target_dataset_id = Column(String(64), nullable=True, comment="模板数据集ID")
|
||||
ratio_method = Column(String(50), nullable=True, comment="配比方式,按标签(TAG),按数据集(DATASET)")
|
||||
ratio_parameters = Column(JSON, nullable=True, comment="配比参数")
|
||||
merge_method = Column(String(50), nullable=True, comment="合并方式")
|
||||
status = Column(String(20), nullable=True, comment="状态")
|
||||
totals = Column(BigInteger, nullable=True, comment="总数")
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
updated_at = Column( TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||
created_by = Column(String(255), nullable=True, comment="创建者")
|
||||
updated_by = Column(String(255), nullable=True, comment="更新者")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<RatioInstance(id={self.id}, name={self.name}, method={self.ratio_method}, status={self.status})>"
|
||||
|
||||
|
||||
class RatioRelation(Base):
|
||||
"""配比关系表(UUID 主键) -> t_st_ratio_relations
|
||||
|
||||
Columns per data-ratio-init.sql:
|
||||
id, ratio_instance_id, source_dataset_id, ratio_value, counts, filter_conditions,
|
||||
created_at, updated_at, created_by, updated_by
|
||||
"""
|
||||
|
||||
__tablename__ = "t_st_ratio_relations"
|
||||
|
||||
id = Column(String(64), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
ratio_instance_id = Column(String(64), nullable=False, comment="配比实例ID")
|
||||
source_dataset_id = Column(String(64), nullable=True, comment="源数据集ID")
|
||||
ratio_value = Column(String(256), nullable=True)
|
||||
counts = Column(BigInteger, nullable=True, comment="条数")
|
||||
filter_conditions = Column(Text, nullable=True, comment="过滤条件")
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||
created_by = Column(String(255), nullable=True, comment="创建者")
|
||||
updated_by = Column(String(255), nullable=True, comment="更新者")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"<RatioRelation(id={self.id}, ratio_instance_id={self.ratio_instance_id}, "
|
||||
f"source_dataset_id={self.source_dataset_id}, counts={self.counts})>"
|
||||
)
|
||||
|
||||
@@ -41,6 +41,10 @@ async def fastapi_http_exception_handler(request: Request, exc: HTTPException):
|
||||
# 自定义异常处理器:RequestValidationError
|
||||
async def validation_exception_handler(request: Request, exc: RequestValidationError):
|
||||
"""将请求验证错误转换为标准响应格式"""
|
||||
# 仅返回每个错误的简要 detail 文本(来自 Pydantic 错误的 `msg` 字段),不返回整个错误对象
|
||||
raw_errors = exc.errors() or []
|
||||
errors = [err.get("msg", "Validation error") for err in raw_errors]
|
||||
|
||||
return JSONResponse(
|
||||
status_code=422,
|
||||
content={
|
||||
@@ -48,9 +52,9 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
|
||||
"message": "error",
|
||||
"data": {
|
||||
"detail": "Validation error",
|
||||
"errors": exc.errors()
|
||||
}
|
||||
}
|
||||
"errors": errors,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
# 自定义异常处理器:未捕获的异常
|
||||
@@ -94,4 +98,4 @@ class DMServiceClientError(LabelStudioAdapterException):
|
||||
|
||||
class SyncServiceError(LabelStudioAdapterException):
|
||||
"""同步服务错误"""
|
||||
pass
|
||||
pass
|
||||
|
||||
@@ -2,6 +2,7 @@ from fastapi import APIRouter
|
||||
|
||||
from .system.interface import router as system_router
|
||||
from .annotation.interface import router as annotation_router
|
||||
from .synthesis.interface import router as ratio_router
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/api"
|
||||
@@ -9,5 +10,6 @@ router = APIRouter(
|
||||
|
||||
router.include_router(system_router)
|
||||
router.include_router(annotation_router)
|
||||
router.include_router(ratio_router)
|
||||
|
||||
__all__ = ["router"]
|
||||
__all__ = ["router"]
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/synthesis",
|
||||
tags = ["synthesis"]
|
||||
)
|
||||
|
||||
# Include sub-routers
|
||||
from .ratio_task import router as ratio_task_router
|
||||
|
||||
router.include_router(ratio_task_router)
|
||||
@@ -0,0 +1,253 @@
|
||||
import asyncio
|
||||
from typing import Set
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import or_, func, delete, select
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.db.models import Dataset
|
||||
from app.db.session import get_db
|
||||
from app.module.dataset import DatasetManagementService
|
||||
from app.module.shared.schema import StandardResponse
|
||||
from app.module.synthesis.schema.ratio_task import (
|
||||
CreateRatioTaskResponse,
|
||||
CreateRatioTaskRequest,
|
||||
PagedRatioTaskResponse,
|
||||
RatioTaskItem,
|
||||
TargetDatasetInfo,
|
||||
)
|
||||
from app.module.synthesis.service.ratio_task import RatioTaskService
|
||||
from app.db.models.ratio_task import RatioInstance, RatioRelation
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/ratio-task",
|
||||
tags=["synthesis/ratio-task"],
|
||||
)
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
|
||||
@router.post("", response_model=StandardResponse[CreateRatioTaskResponse], status_code=200)
|
||||
async def create_ratio_task(
|
||||
req: CreateRatioTaskRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
创建配比任务
|
||||
|
||||
Path: /api/synthesis/ratio-task
|
||||
"""
|
||||
try:
|
||||
# 校验 config 中的 dataset_id 是否存在
|
||||
dm_service = DatasetManagementService(db)
|
||||
source_types = await get_dataset_types(dm_service, req)
|
||||
|
||||
await valid_exists(db, req)
|
||||
|
||||
# 创建目标数据集:名称使用“<任务名称>-配比生成-时间戳”
|
||||
target_dataset_name = f"{req.name}-配比生成-{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
||||
|
||||
target_type = get_target_dataset_type(source_types)
|
||||
|
||||
target_dataset = Dataset(
|
||||
name=target_dataset_name,
|
||||
description=req.description or "",
|
||||
dataset_type=target_type,
|
||||
status="DRAFT",
|
||||
)
|
||||
db.add(target_dataset)
|
||||
await db.flush() # 获取 target_dataset.id
|
||||
|
||||
service = RatioTaskService(db)
|
||||
instance = await service.create_task(
|
||||
name=req.name,
|
||||
description=req.description,
|
||||
totals=int(req.totals),
|
||||
ratio_method=req.ratio_method,
|
||||
config=[
|
||||
{
|
||||
"dataset_id": item.dataset_id,
|
||||
"counts": int(item.counts),
|
||||
"filter_conditions": item.filter_conditions,
|
||||
}
|
||||
for item in req.config
|
||||
],
|
||||
target_dataset_id=target_dataset.id,
|
||||
)
|
||||
|
||||
# 异步执行配比任务(支持 DATASET / TAG)
|
||||
asyncio.create_task(RatioTaskService.execute_dataset_ratio_task(instance.id))
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=CreateRatioTaskResponse(
|
||||
id=instance.id,
|
||||
name=instance.name,
|
||||
description=instance.description,
|
||||
totals=instance.totals or 0,
|
||||
ratio_method=instance.ratio_method or req.ratio_method,
|
||||
status=instance.status or "PENDING",
|
||||
config=req.config,
|
||||
targetDataset=TargetDatasetInfo(
|
||||
id=str(target_dataset.id),
|
||||
name=str(target_dataset.name),
|
||||
datasetType=str(target_dataset.dataset_type),
|
||||
status=str(target_dataset.status),
|
||||
)
|
||||
)
|
||||
)
|
||||
except HTTPException:
|
||||
await db.rollback()
|
||||
raise
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"Failed to create ratio task: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("", response_model=StandardResponse[PagedRatioTaskResponse], status_code=200)
|
||||
async def list_ratio_tasks(
|
||||
page: int = 1,
|
||||
size: int = 10,
|
||||
name: str | None = None,
|
||||
status: str | None = None,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""分页查询配比任务,支持名称与状态过滤"""
|
||||
try:
|
||||
query = select(RatioInstance)
|
||||
# filters
|
||||
if name:
|
||||
# simple contains filter
|
||||
query = query.where(RatioInstance.name.like(f"%{name}%"))
|
||||
if status:
|
||||
query = query.where(RatioInstance.status == status)
|
||||
|
||||
# count
|
||||
count_q = select(func.count()).select_from(query.subquery())
|
||||
total = (await db.execute(count_q)).scalar_one()
|
||||
|
||||
# page (1-based)
|
||||
page_index = max(page, 1) - 1
|
||||
query = query.order_by(RatioInstance.created_at.desc()).offset(page_index * size).limit(size)
|
||||
result = await db.execute(query)
|
||||
items = result.scalars().all()
|
||||
|
||||
# map to DTOs and attach dataset name
|
||||
# preload datasets
|
||||
ds_ids = {i.target_dataset_id for i in items if i.target_dataset_id}
|
||||
ds_map = {}
|
||||
if ds_ids:
|
||||
ds_res = await db.execute(select(Dataset).where(Dataset.id.in_(list(ds_ids))))
|
||||
for d in ds_res.scalars().all():
|
||||
ds_map[d.id] = d
|
||||
|
||||
content: list[RatioTaskItem] = []
|
||||
for i in items:
|
||||
ds = ds_map.get(i.target_dataset_id) if i.target_dataset_id else None
|
||||
content.append(
|
||||
RatioTaskItem(
|
||||
id=i.id,
|
||||
name=i.name or "",
|
||||
description=i.description,
|
||||
status=i.status,
|
||||
totals=i.totals,
|
||||
ratio_method=i.ratio_method,
|
||||
target_dataset_id=i.target_dataset_id,
|
||||
target_dataset_name=(ds.name if ds else None),
|
||||
created_at=str(i.created_at) if getattr(i, "created_at", None) else None,
|
||||
updated_at=str(i.updated_at) if getattr(i, "updated_at", None) else None,
|
||||
)
|
||||
)
|
||||
|
||||
total_pages = (total + size - 1) // size if size > 0 else 0
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=PagedRatioTaskResponse(
|
||||
content=content,
|
||||
totalElements=total,
|
||||
totalPages=total_pages,
|
||||
page=page,
|
||||
size=size,
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list ratio tasks: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.delete("", response_model=StandardResponse[str], status_code=200)
|
||||
async def delete_ratio_tasks(
|
||||
ids: list[str] = Query(..., description="要删除的配比任务ID列表"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
"""删除配比任务,返回简单结果字符串。"""
|
||||
try:
|
||||
if not ids:
|
||||
raise HTTPException(status_code=400, detail="ids is required")
|
||||
|
||||
# 先删除关联关系
|
||||
await db.execute(
|
||||
delete(RatioRelation).where(RatioRelation.ratio_instance_id.in_(ids))
|
||||
)
|
||||
# 再删除实例
|
||||
await db.execute(
|
||||
delete(RatioInstance).where(RatioInstance.id.in_(ids))
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
return StandardResponse(code=200, message="success", data="success")
|
||||
except HTTPException:
|
||||
await db.rollback()
|
||||
raise
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
logger.error(f"Failed to delete ratio tasks: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Fail to delete ratio task: {e}")
|
||||
|
||||
|
||||
async def valid_exists(db, req: CreateRatioTaskRequest):
|
||||
# 校验配比任务名称不能重复
|
||||
exist_task_q = await db.execute(
|
||||
select(RatioInstance).where(RatioInstance.name == req.name)
|
||||
)
|
||||
try:
|
||||
exist_task_q.scalar_one_or_none()
|
||||
except Exception as e:
|
||||
logger.error(f"create ratio task failed: ratio task {req.name} already exists")
|
||||
raise HTTPException(status_code=400, detail=f"ratio task {req.name} already exists")
|
||||
|
||||
|
||||
async def get_dataset_types(dm_service: DatasetManagementService, req: CreateRatioTaskRequest) -> Set[str]:
|
||||
source_types: Set[str] = set()
|
||||
for item in req.config:
|
||||
dataset = await dm_service.get_dataset(item.dataset_id)
|
||||
if not dataset:
|
||||
raise HTTPException(status_code=400, detail=f"dataset_id not found: {item.dataset_id}")
|
||||
else:
|
||||
dtype = getattr(dataset, "dataset_type", None) or getattr(dataset, "datasetType", None)
|
||||
source_types.add(str(dtype).upper())
|
||||
return source_types
|
||||
|
||||
|
||||
def get_target_dataset_type(source_types: Set[str]) -> str:
|
||||
# 根据源数据集类型决定目标数据集类型
|
||||
# 规则:
|
||||
# 1) 若全部为 TEXT -> TEXT
|
||||
# 2) 若存在且仅存在一种介质类型(IMAGE/AUDIO/VIDEO),且无其它类型 -> 对应介质类型
|
||||
# 3) 其它情况 -> OTHER
|
||||
media_modalities = {"IMAGE", "AUDIO", "VIDEO"}
|
||||
target_type = "OTHER"
|
||||
if source_types == {"TEXT"}:
|
||||
target_type = "TEXT"
|
||||
else:
|
||||
media_involved = source_types & media_modalities
|
||||
if len(media_involved) == 1 and source_types == media_involved:
|
||||
# 仅有一种介质类型且无其它类型
|
||||
target_type = next(iter(media_involved))
|
||||
return target_type
|
||||
@@ -0,0 +1,86 @@
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
class RatioConfigItem(BaseModel):
|
||||
dataset_id: str = Field(..., alias="datasetId", description="数据集id")
|
||||
counts: str = Field(..., description="数量")
|
||||
filter_conditions: str = Field(..., description="过滤条件")
|
||||
|
||||
@field_validator("counts")
|
||||
@classmethod
|
||||
def validate_counts(cls, v: str) -> str:
|
||||
# ensure it's a numeric string
|
||||
try:
|
||||
int(v)
|
||||
except Exception:
|
||||
raise ValueError("counts must be a numeric string")
|
||||
return v
|
||||
|
||||
|
||||
class CreateRatioTaskRequest(BaseModel):
|
||||
name: str = Field(..., description="名称")
|
||||
description: Optional[str] = Field(None, description="描述")
|
||||
totals: str = Field(..., description="目标数量")
|
||||
ratio_method: str = Field(..., description="配比方式", alias="ratio_method")
|
||||
config: List[RatioConfigItem] = Field(..., description="配比设置列表")
|
||||
|
||||
@field_validator("ratio_method")
|
||||
@classmethod
|
||||
def validate_ratio_method(cls, v: str) -> str:
|
||||
allowed = {"TAG", "DATASET"}
|
||||
if v not in allowed:
|
||||
raise ValueError(f"ratio_method must be one of {allowed}")
|
||||
return v
|
||||
|
||||
@field_validator("totals")
|
||||
@classmethod
|
||||
def validate_totals(cls, v: str) -> str:
|
||||
try:
|
||||
iv = int(v)
|
||||
if iv < 0:
|
||||
raise ValueError("totals must be >= 0")
|
||||
except Exception:
|
||||
raise ValueError("totals must be a numeric string")
|
||||
return v
|
||||
|
||||
|
||||
class TargetDatasetInfo(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
datasetType: str
|
||||
status: str
|
||||
|
||||
|
||||
class CreateRatioTaskResponse(BaseModel):
|
||||
# task info
|
||||
id: str
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
totals: int
|
||||
ratio_method: str
|
||||
status: str
|
||||
# echoed config
|
||||
config: List[RatioConfigItem]
|
||||
# created dataset
|
||||
targetDataset: TargetDatasetInfo
|
||||
|
||||
|
||||
class RatioTaskItem(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
status: Optional[str] = None
|
||||
totals: Optional[int] = None
|
||||
ratio_method: Optional[str] = None
|
||||
target_dataset_id: Optional[str] = None
|
||||
target_dataset_name: Optional[str] = None
|
||||
created_at: Optional[str] = None
|
||||
updated_at: Optional[str] = None
|
||||
|
||||
|
||||
class PagedRatioTaskResponse(BaseModel):
|
||||
content: List[RatioTaskItem]
|
||||
totalElements: int
|
||||
totalPages: int
|
||||
page: int
|
||||
size: int
|
||||
@@ -0,0 +1,282 @@
|
||||
from typing import List, Optional, Dict, Any
|
||||
import random
|
||||
import os
|
||||
import shutil
|
||||
import asyncio
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.db.models.ratio_task import RatioInstance, RatioRelation
|
||||
from app.db.models import Dataset, DatasetFiles
|
||||
from app.db.session import AsyncSessionLocal
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class RatioTaskService:
|
||||
"""Service for Ratio Task DB operations."""
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
async def create_task(
|
||||
self,
|
||||
*,
|
||||
name: str,
|
||||
description: Optional[str],
|
||||
totals: int,
|
||||
ratio_method: str,
|
||||
config: List[Dict[str, Any]],
|
||||
target_dataset_id: Optional[str] = None,
|
||||
) -> RatioInstance:
|
||||
"""Create a ratio task instance and its relations.
|
||||
|
||||
config item format: {"dataset_id": str, "counts": int, "filter_conditions": str}
|
||||
"""
|
||||
logger.info(f"Creating ratio task: name={name}, method={ratio_method}, totals={totals}, items={len(config or [])}")
|
||||
|
||||
instance = RatioInstance(
|
||||
name=name,
|
||||
description=description,
|
||||
ratio_method=ratio_method,
|
||||
totals=totals,
|
||||
target_dataset_id=target_dataset_id,
|
||||
status="PENDING",
|
||||
)
|
||||
self.db.add(instance)
|
||||
await self.db.flush() # populate instance.id
|
||||
|
||||
for item in config or []:
|
||||
relation = RatioRelation(
|
||||
ratio_instance_id=instance.id,
|
||||
source_dataset_id=item.get("dataset_id"),
|
||||
counts=int(item.get("counts", 0)),
|
||||
filter_conditions=item.get("filter_conditions"),
|
||||
)
|
||||
self.db.add(relation)
|
||||
|
||||
await self.db.commit()
|
||||
await self.db.refresh(instance)
|
||||
logger.info(f"Ratio task created: {instance.id}")
|
||||
return instance
|
||||
|
||||
# ========================= Execution (Background) ========================= #
|
||||
|
||||
@staticmethod
|
||||
async def execute_dataset_ratio_task(instance_id: str) -> None:
|
||||
"""Execute a ratio task in background.
|
||||
|
||||
Supported ratio_method:
|
||||
- DATASET: randomly select counts files from each source dataset
|
||||
- TAG: randomly select counts files matching relation.filter_conditions tags
|
||||
|
||||
Steps:
|
||||
- Mark instance RUNNING
|
||||
- For each relation: fetch ACTIVE files, optionally filter by tags
|
||||
- Copy selected files into target dataset
|
||||
- Update dataset statistics and mark instance SUCCESS/FAILED
|
||||
"""
|
||||
async with AsyncSessionLocal() as session: # type: AsyncSession
|
||||
try:
|
||||
# Load instance and relations
|
||||
inst_res = await session.execute(select(RatioInstance).where(RatioInstance.id == instance_id))
|
||||
instance: Optional[RatioInstance] = inst_res.scalar_one_or_none()
|
||||
if not instance:
|
||||
logger.error(f"Ratio instance not found: {instance_id}")
|
||||
return
|
||||
logger.info(f"start execute ratio task: {instance_id}")
|
||||
|
||||
rel_res = await session.execute(
|
||||
select(RatioRelation).where(RatioRelation.ratio_instance_id == instance_id)
|
||||
)
|
||||
relations: List[RatioRelation] = list(rel_res.scalars().all())
|
||||
|
||||
# Mark running
|
||||
instance.status = "RUNNING"
|
||||
|
||||
if instance.ratio_method not in {"DATASET", "TAG"}:
|
||||
logger.info(f"Instance {instance_id} ratio_method={instance.ratio_method} not supported yet")
|
||||
instance.status = "SUCCESS"
|
||||
return
|
||||
|
||||
# Load target dataset
|
||||
ds_res = await session.execute(select(Dataset).where(Dataset.id == instance.target_dataset_id))
|
||||
target_ds: Optional[Dataset] = ds_res.scalar_one_or_none()
|
||||
if not target_ds:
|
||||
logger.error(f"Target dataset not found for instance {instance_id}")
|
||||
instance.status = "FAILED"
|
||||
return
|
||||
|
||||
# Preload existing target file paths for deduplication
|
||||
existing_path_rows = await session.execute(
|
||||
select(DatasetFiles.file_path).where(DatasetFiles.dataset_id == target_ds.id)
|
||||
)
|
||||
existing_paths = set(p for p in existing_path_rows.scalars().all() if p)
|
||||
|
||||
added_count = 0
|
||||
added_size = 0
|
||||
|
||||
for rel in relations:
|
||||
if not rel.source_dataset_id or not rel.counts or rel.counts <= 0:
|
||||
continue
|
||||
|
||||
# Fetch all files for the source dataset (ACTIVE only)
|
||||
files_res = await session.execute(
|
||||
select(DatasetFiles).where(
|
||||
DatasetFiles.dataset_id == rel.source_dataset_id,
|
||||
DatasetFiles.status == "ACTIVE",
|
||||
)
|
||||
)
|
||||
files = list(files_res.scalars().all())
|
||||
|
||||
# TAG mode: filter by tags according to relation.filter_conditions
|
||||
if instance.ratio_method == "TAG":
|
||||
required_tags = RatioTaskService._parse_required_tags(rel.filter_conditions)
|
||||
if required_tags:
|
||||
files = [f for f in files if RatioTaskService._file_contains_tags(f, required_tags)]
|
||||
|
||||
if not files:
|
||||
continue
|
||||
|
||||
pick_n = min(rel.counts or 0, len(files))
|
||||
chosen = random.sample(files, pick_n) if pick_n < len(files) else files
|
||||
|
||||
# Copy into target dataset with de-dup by target path
|
||||
for f in chosen:
|
||||
src_path = f.file_path
|
||||
new_path = src_path
|
||||
needs_copy = False
|
||||
src_prefix = f"/dataset/{rel.source_dataset_id}"
|
||||
if isinstance(src_path, str) and src_path.startswith(src_prefix):
|
||||
dst_prefix = f"/dataset/{target_ds.id}"
|
||||
new_path = src_path.replace(src_prefix, dst_prefix, 1)
|
||||
needs_copy = True
|
||||
|
||||
# De-dup by target path
|
||||
if new_path in existing_paths:
|
||||
continue
|
||||
|
||||
# Perform copy only when needed
|
||||
if needs_copy:
|
||||
dst_dir = os.path.dirname(new_path)
|
||||
await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True)
|
||||
await asyncio.to_thread(shutil.copy2, src_path, new_path)
|
||||
|
||||
new_file = DatasetFiles(
|
||||
dataset_id=target_ds.id, # type: ignore
|
||||
file_name=f.file_name,
|
||||
file_path=new_path,
|
||||
file_type=f.file_type,
|
||||
file_size=f.file_size,
|
||||
check_sum=f.check_sum,
|
||||
tags=f.tags,
|
||||
dataset_filemetadata=f.dataset_filemetadata,
|
||||
status="ACTIVE",
|
||||
)
|
||||
session.add(new_file)
|
||||
existing_paths.add(new_path)
|
||||
added_count += 1
|
||||
added_size += int(f.file_size or 0)
|
||||
|
||||
# Periodically flush to avoid huge transactions
|
||||
await session.flush()
|
||||
|
||||
# Update target dataset statistics
|
||||
target_ds.file_count = (target_ds.file_count or 0) + added_count # type: ignore
|
||||
target_ds.size_bytes = (target_ds.size_bytes or 0) + added_size # type: ignore
|
||||
# If target dataset has files, mark it ACTIVE
|
||||
if (target_ds.file_count or 0) > 0: # type: ignore
|
||||
target_ds.status = "ACTIVE"
|
||||
|
||||
# Done
|
||||
instance.status = "SUCCESS"
|
||||
logger.info(f"Dataset ratio execution completed: instance={instance_id}, files={added_count}, size={added_size}")
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Dataset ratio execution failed for {instance_id}: {e}")
|
||||
try:
|
||||
# Try mark failed
|
||||
inst_res = await session.execute(select(RatioInstance).where(RatioInstance.id == instance_id))
|
||||
instance = inst_res.scalar_one_or_none()
|
||||
if instance:
|
||||
instance.status = "FAILED"
|
||||
finally:
|
||||
pass
|
||||
finally:
|
||||
await session.commit()
|
||||
|
||||
# ------------------------- helpers for TAG filtering ------------------------- #
|
||||
|
||||
@staticmethod
|
||||
def _parse_required_tags(conditions: Optional[str]) -> set[str]:
|
||||
"""Parse filter_conditions into a set of required tag strings.
|
||||
|
||||
Supports simple separators: comma, semicolon, space. Empty/None -> empty set.
|
||||
"""
|
||||
if not conditions:
|
||||
return set()
|
||||
raw = conditions.replace("\n", " ")
|
||||
seps = [",", ";", " "]
|
||||
tokens = [raw]
|
||||
for sep in seps:
|
||||
nxt = []
|
||||
for t in tokens:
|
||||
nxt.extend(t.split(sep))
|
||||
tokens = nxt
|
||||
return {t.strip() for t in tokens if t and t.strip()}
|
||||
|
||||
@staticmethod
|
||||
def _file_contains_tags(f: DatasetFiles, required: set[str]) -> bool:
|
||||
if not required:
|
||||
return True
|
||||
tags = f.tags
|
||||
if not tags:
|
||||
return False
|
||||
try:
|
||||
# tags could be a list of strings or list of objects with 'name'
|
||||
tag_names = set()
|
||||
if isinstance(tags, list):
|
||||
for item in tags:
|
||||
if isinstance(item, str):
|
||||
tag_names.add(item)
|
||||
elif isinstance(item, dict):
|
||||
name = item.get("name") or item.get("label") or item.get("tag")
|
||||
if isinstance(name, str):
|
||||
tag_names.add(name)
|
||||
elif isinstance(tags, dict):
|
||||
# flat dict of name->... treat keys as tags
|
||||
tag_names = set(map(str, tags.keys()))
|
||||
else:
|
||||
return False
|
||||
logger.info(f">>>>>{tags}>>>>>{required}, {tag_names}")
|
||||
return required.issubset(tag_names)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
async def get_new_file(f, rel: RatioRelation, target_ds: Dataset) -> DatasetFiles:
|
||||
new_path = f.file_path
|
||||
src_prefix = f"/dataset/{rel.source_dataset_id}"
|
||||
if isinstance(f.file_path, str) and f.file_path.startswith(src_prefix):
|
||||
dst_prefix = f"/dataset/{target_ds.id}"
|
||||
new_path = f.file_path.replace(src_prefix, dst_prefix, 1)
|
||||
dst_dir = os.path.dirname(new_path)
|
||||
# Ensure directory and copy the file in a thread to avoid blocking the event loop
|
||||
await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True)
|
||||
await asyncio.to_thread(shutil.copy2, f.file_path, new_path)
|
||||
|
||||
new_file = DatasetFiles(
|
||||
dataset_id=target_ds.id, # type: ignore
|
||||
file_name=f.file_name,
|
||||
file_path=new_path,
|
||||
file_type=f.file_type,
|
||||
file_size=f.file_size,
|
||||
check_sum=f.check_sum,
|
||||
tags=f.tags,
|
||||
dataset_filemetadata=f.dataset_filemetadata,
|
||||
status="ACTIVE",
|
||||
)
|
||||
return new_file
|
||||
32
scripts/db/data-ratio-init.sql
Normal file
32
scripts/db/data-ratio-init.sql
Normal file
@@ -0,0 +1,32 @@
|
||||
USE datamate;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS t_st_ratio_instances
|
||||
(
|
||||
id varchar(64) primary key COMMENT 'UUID',
|
||||
name varchar(64) COMMENT '名称',
|
||||
description TEXT COMMENT '描述',
|
||||
target_dataset_id varchar(64) COMMENT '模板数据集ID',
|
||||
ratio_method varchar(50) COMMENT '配比方式,按标签(TAG),按数据集(DATASET)',
|
||||
ratio_parameters JSON COMMENT '配比参数',
|
||||
merge_method varchar(50) COMMENT '合并方式',
|
||||
status varchar(20) COMMENT '状态',
|
||||
totals BIGINT COMMENT '总数',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
created_by VARCHAR(255) COMMENT '创建者',
|
||||
updated_by VARCHAR(255) COMMENT '更新者'
|
||||
) COMMENT='配比实例表(UUID 主键)';
|
||||
|
||||
CREATE TABLE IF NOT EXISTS t_st_ratio_relations
|
||||
(
|
||||
id varchar(64) primary key COMMENT 'UUID',
|
||||
ratio_instance_id varchar(64) COMMENT '配比实例ID',
|
||||
source_dataset_id varchar(64) COMMENT '源数据集ID',
|
||||
ratio_value varchar(256),
|
||||
counts BIGINT COMMENT '条数',
|
||||
filter_conditions text COMMENT '过滤条件',
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
||||
created_by VARCHAR(255) COMMENT '创建者',
|
||||
updated_by VARCHAR(255) COMMENT '更新者'
|
||||
) COMMENT='配比关系表(UUID 主键)';
|
||||
Reference in New Issue
Block a user