diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/FileTag.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/FileTag.java index c8ebecd..292aa08 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/FileTag.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/FileTag.java @@ -20,7 +20,7 @@ import java.util.Map; @JsonIgnoreProperties(ignoreUnknown = true) @JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) public class FileTag { - private Map value; + private Map values; private String type; @@ -30,7 +30,7 @@ public class FileTag { public List getTags() { List tags = new ArrayList<>(); - Object tagValues = value.get(type); + Object tagValues = values.get(type); if (tagValues instanceof List) { for (Object tag : (List) tagValues) { if (tag instanceof String) { diff --git a/frontend/src/pages/RatioTask/Create/CreateRatioTask.tsx b/frontend/src/pages/RatioTask/Create/CreateRatioTask.tsx index e27b4aa..de1a64b 100644 --- a/frontend/src/pages/RatioTask/Create/CreateRatioTask.tsx +++ b/frontend/src/pages/RatioTask/Create/CreateRatioTask.tsx @@ -38,7 +38,7 @@ export default function CreateRatioTask() { const totals = String(values.totalTargetCount); const config = ratioTaskForm.ratioConfigs.map((c) => { return { - datasetId: c.id, + datasetId: c.source, counts: String(c.quantity ?? 0), filterConditions: { label: c.labelFilter, dateRange: String(c.dateRange ?? 0)}, }; diff --git a/frontend/src/pages/RatioTask/Create/components/RatioConfig.tsx b/frontend/src/pages/RatioTask/Create/components/RatioConfig.tsx index 98e5f78..7fefcbd 100644 --- a/frontend/src/pages/RatioTask/Create/components/RatioConfig.tsx +++ b/frontend/src/pages/RatioTask/Create/components/RatioConfig.tsx @@ -1,11 +1,16 @@ -import React, { useMemo, useState } from "react"; -import { Badge, Card, Input, Progress, Button, DatePicker, Select } from "antd"; -import { BarChart3, Filter, Clock } from "lucide-react"; +import React, { useMemo, useState, useEffect, FC } from "react"; +import { + Badge, + Card, + Progress, + Button, + Select, + Table, + InputNumber, + Space, +} from "antd"; +import { BarChart3, Filter } from "lucide-react"; import type { Dataset } from "@/pages/DataManagement/dataset.model.ts"; -import dayjs from 'dayjs'; - -const { RangePicker } = DatePicker; -const { Option } = Select; const TIME_RANGE_OPTIONS = [ { label: '最近1天', value: 1 }, @@ -21,9 +26,9 @@ interface RatioConfigItem { type: "dataset" | "label"; quantity: number; percentage: number; - source: string; + source: string; // dataset id labelFilter?: string; - dateRange?: string; + dateRange?: number; } interface RatioConfigProps { @@ -35,169 +40,113 @@ interface RatioConfigProps { onChange?: (configs: RatioConfigItem[]) => void; } -const RatioConfig: React.FC = ({ - ratioType, - selectedDatasets, - datasets, - totalTargetCount, - distributions, - onChange, - }) => { - const [ratioConfigs, setRatioConfigs] = useState([]); - const [datasetFilters, setDatasetFilters] = useState>({}); +const genId = (datasetId: string) => + `${datasetId}-${Math.random().toString(36).slice(2, 9)}`; + +const RatioConfig: FC = ({ + ratioType, + selectedDatasets, + datasets, + totalTargetCount, + distributions, + onChange, + }) => { + const [ratioConfigs, setRatioConfigs] = useState([]); - // 配比项总数 const totalConfigured = useMemo( () => ratioConfigs.reduce((sum, c) => sum + (c.quantity || 0), 0), [ratioConfigs] ); - // 获取数据集的标签列表 const getDatasetLabels = (datasetId: string): string[] => { const dist = distributions[String(datasetId)] || {}; return Object.keys(dist); }; - // 自动平均分配 + const addConfig = (datasetId: string) => { + const dataset = datasets.find((d) => String(d.id) === datasetId); + const newConfig: RatioConfigItem = { + id: genId(datasetId), + name: dataset?.name || datasetId, + type: ratioType, + quantity: 0, + percentage: 0, + source: datasetId, + }; + const newConfigs = [...ratioConfigs, newConfig]; + setRatioConfigs(newConfigs); + onChange?.(newConfigs); + }; + + const removeConfig = (configId: string) => { + const newConfigs = ratioConfigs.filter((c) => c.id !== configId); + const adjusted = recomputePercentages(newConfigs); + setRatioConfigs(adjusted); + onChange?.(adjusted); + }; + + const updateConfig = ( + configId: string, + updates: Partial< + Pick + > + ) => { + const newConfigs = ratioConfigs.map((c) => + c.id === configId ? { ...c, ...updates } : c + ); + const adjusted = recomputePercentages(newConfigs); + setRatioConfigs(adjusted); + onChange?.(adjusted); + }; + + const recomputePercentages = (configs: RatioConfigItem[]) => { + return configs.map((c) => ({ + ...c, + percentage: + totalTargetCount > 0 + ? Math.round((c.quantity / totalTargetCount) * 100) + : 0, + })); + }; + const generateAutoRatio = () => { const selectedCount = selectedDatasets.length; if (selectedCount === 0) return; const baseQuantity = Math.floor(totalTargetCount / selectedCount); const remainder = totalTargetCount % selectedCount; - const newConfigs = selectedDatasets.map((datasetId, index) => { + + let newConfigs: RatioConfigItem[] = ratioConfigs.filter( + (c) => !selectedDatasets.includes(c.source) + ); + + selectedDatasets.forEach((datasetId, index) => { const dataset = datasets.find((d) => String(d.id) === datasetId); const quantity = baseQuantity + (index < remainder ? 1 : 0); - return { - id: datasetId, + const config: RatioConfigItem = { + id: genId(datasetId), name: dataset?.name || datasetId, type: ratioType, quantity, percentage: Math.round((quantity / totalTargetCount) * 100), source: datasetId, - labelFilter: datasetFilters[datasetId]?.labelFilter, - dateRange: datasetFilters[datasetId]?.dateRange, }; + newConfigs.push(config); }); + setRatioConfigs(newConfigs); onChange?.(newConfigs); }; - // 更新数据集配比项 - const updateDatasetQuantity = (datasetId: string, quantity: number) => { - setRatioConfigs((prev) => { - const existingIndex = prev.findIndex( - (config) => config.source === datasetId - ); - const totalOtherQuantity = prev - .filter((config) => config.source !== datasetId) - .reduce((sum, config) => sum + config.quantity, 0); - - const dataset = datasets.find((d) => String(d.id) === datasetId); - const newConfig: RatioConfigItem = { - id: datasetId, - name: dataset?.name || datasetId, - type: ratioType, - quantity: Math.min(quantity, totalTargetCount - totalOtherQuantity), - percentage: Math.round((quantity / totalTargetCount) * 100), - source: datasetId, - labelFilter: datasetFilters[datasetId]?.labelFilter, - dateRange: datasetFilters[datasetId]?.dateRange, - }; - - let newConfigs; - if (existingIndex >= 0) { - newConfigs = [...prev]; - newConfigs[existingIndex] = newConfig; - } else { - newConfigs = [...prev, newConfig]; - } - onChange?.(newConfigs); - return newConfigs; - }); - }; - - // 更新筛选条件 - const updateFilters = (datasetId: string, updates: { - labelFilter?: string; - dateRange?: [string, string]; - }) => { - setDatasetFilters(prev => ({ - ...prev, - [datasetId]: { - ...prev[datasetId], - ...updates, - } - })); - }; - - // 渲染筛选器 - const renderFilters = (datasetId: string) => { - const labels = getDatasetLabels(datasetId); - const config = ratioConfigs.find(c => c.source === datasetId); - const filters = datasetFilters[datasetId] || {}; - - return ( -
-
- - 筛选条件 -
- -
-
-
标签筛选
- -
- -
-
标签更新时间
- -
-
-
+ useEffect(() => { + const keep = ratioConfigs.filter((c) => + selectedDatasets.includes(c.source) ); - }; - - // 选中数据集变化时,初始化筛选条件 - React.useEffect(() => { - const initialFilters: Record = {}; - selectedDatasets.forEach(datasetId => { - const config = ratioConfigs.find(c => c.source === datasetId); - if (config) { - initialFilters[datasetId] = { - labelFilter: config.labelFilter, - dateRange: config.dateRange, - }; - } - }); - setDatasetFilters(prev => ({ ...prev, ...initialFilters })); + if (keep.length !== ratioConfigs.length) { + const adjusted = recomputePercentages(keep); + setRatioConfigs(adjusted); + onChange?.(adjusted); + } + // eslint-disable-next-line react-hooks/exhaustive-deps }, [selectedDatasets]); return ( @@ -209,15 +158,18 @@ const RatioConfig: React.FC = ({ (已配置:{totalConfigured}/{totalTargetCount}条) - +
+ +
+ {selectedDatasets.length === 0 ? (
@@ -225,7 +177,6 @@ const RatioConfig: React.FC = ({
) : (
- {/* 配比预览 */} {ratioConfigs.length > 0 && (
@@ -250,54 +201,146 @@ const RatioConfig: React.FC = ({
{selectedDatasets.map((datasetId) => { const dataset = datasets.find((d) => String(d.id) === datasetId); - const config = ratioConfigs.find((c) => c.source === datasetId); - const currentQuantity = config?.quantity || 0; - if (!dataset) return null; + const datasetConfigs = ratioConfigs.filter( + (c) => c.source === datasetId + ); + + const labels = getDatasetLabels(datasetId); + const usedLabels = datasetConfigs + .map((c) => c.labelFilter) + .filter(Boolean) as string[]; + + const columns = [ + { + title: "配比项", + dataIndex: "id", + key: "id", + render: (_: any, record: RatioConfigItem) => ( + + + {record.name} + + ), + }, + { + title: "标签筛选", + dataIndex: "labelFilter", + key: "labelFilter", + render: (_: any, record: RatioConfigItem) => { + const availableLabels = labels + .map((l) => ({ label: l, value: l })) + .filter( + (opt) => + opt.value === record.labelFilter || + !usedLabels.includes(opt.value) + ); + return ( + + updateConfig(record.id, { + dateRange: value || undefined, + }) + } + /> + ), + }, + { + title: "数量", + dataIndex: "quantity", + key: "quantity", + render: (_: any, record: RatioConfigItem) => ( + + updateConfig(record.id, { quantity: Number(v || 0) }) + } + /> + ), + }, + { + title: "占比", + dataIndex: "percentage", + key: "percentage", + render: (_: any, record: RatioConfigItem) => ( +
+
+ {record.percentage ?? 0}% +
+ +
+ ), + }, + { + title: "操作", + dataIndex: "actions", + key: "actions", + render: (_: any, record: RatioConfigItem) => ( + + ), + }, + ]; + return (
- - {dataset.name} - + {dataset.name} {dataset.fileCount}条
- {config?.percentage || 0}% + {datasetConfigs.reduce((s, c) => s + (c.percentage || 0), 0)}%
- {/* 筛选条件 */} - {renderFilters(datasetId)} - -
- 数量: - - updateDatasetQuantity( - datasetId, - Number(e.target.value) - ) - } - style={{ width: 100 }} - min={0} - max={Math.min( - dataset.fileCount || 0, - totalTargetCount - )} - /> - -
- + +
+ +
); })} diff --git a/frontend/src/pages/RatioTask/Detail/RatioTaskDetail.tsx b/frontend/src/pages/RatioTask/Detail/RatioTaskDetail.tsx index ab27227..ef3100a 100644 --- a/frontend/src/pages/RatioTask/Detail/RatioTaskDetail.tsx +++ b/frontend/src/pages/RatioTask/Detail/RatioTaskDetail.tsx @@ -177,11 +177,6 @@ export default function RatioTaskDetail() { ), }, - { - key: "type", - label: "配比方式", - children: ratioTask.type || "未知", - }, { key: "createdBy", label: "创建者", diff --git a/frontend/src/pages/RatioTask/Home/RatioTask.tsx b/frontend/src/pages/RatioTask/Home/RatioTask.tsx index 1ab0706..6f5081f 100644 --- a/frontend/src/pages/RatioTask/Home/RatioTask.tsx +++ b/frontend/src/pages/RatioTask/Home/RatioTask.tsx @@ -95,12 +95,6 @@ export default function RatioTasksPage() { ); }, }, - { - title: "配比方式", - dataIndex: "ratio_method", - key: "ratio_method", - width: 120, - }, { title: "目标数量", dataIndex: "totals", diff --git a/runtime/datamate-python/app/db/models/ratio_task.py b/runtime/datamate-python/app/db/models/ratio_task.py index 6b84108..5b5e300 100644 --- a/runtime/datamate-python/app/db/models/ratio_task.py +++ b/runtime/datamate-python/app/db/models/ratio_task.py @@ -28,7 +28,6 @@ class RatioInstance(Base): name = Column(String(64), nullable=True, comment="名称") description = Column(Text, nullable=True, comment="描述") target_dataset_id = Column(String(64), nullable=True, comment="模板数据集ID") - ratio_method = Column(String(50), nullable=True, comment="配比方式,按标签(TAG),按数据集(DATASET)") ratio_parameters = Column(JSON, nullable=True, comment="配比参数") merge_method = Column(String(50), nullable=True, comment="合并方式") status = Column(String(20), nullable=True, comment="状态") @@ -39,7 +38,7 @@ class RatioInstance(Base): updated_by = Column(String(255), nullable=True, comment="更新者") def __repr__(self) -> str: - return f"" + return f"" class RatioRelation(Base): diff --git a/runtime/datamate-python/app/module/dataset/schema/dataset_file.py b/runtime/datamate-python/app/module/dataset/schema/dataset_file.py index 6738869..bf9269c 100644 --- a/runtime/datamate-python/app/module/dataset/schema/dataset_file.py +++ b/runtime/datamate-python/app/module/dataset/schema/dataset_file.py @@ -27,15 +27,15 @@ class PagedDatasetFileResponse(BaseModel): size: int = Field(..., description="每页大小") class DatasetFileTag(BaseModel): - id: str = Field(..., description="标签ID") - type: str = Field(..., description="类型") - from_name: str = Field(..., description="标签名称") - value: dict = Field(..., description="标签值") + id: str = Field(None, description="标签ID") + type: str = Field(None, description="类型") + from_name: str = Field(None, description="标签名称") + values: dict = Field(None, description="标签值") def get_tags(self) -> List[str]: tags = [] - # 如果 value 是字典类型,根据 type 获取对应的值 - tag_values = self.value.get(self.type, []) + # 如果 values 是字典类型,根据 type 获取对应的值 + tag_values = self.values.get(self.type, []) # 处理标签值 if isinstance(tag_values, list): @@ -55,7 +55,7 @@ class FileTagUpdate(BaseModel): """单个文件的标签更新请求""" file_id: str = Field(..., alias="fileId", description="文件ID") tags: List[Dict[str, Any]] = Field(..., description="要更新的标签列表(部分更新)") - + class Config: populate_by_name = True @@ -63,7 +63,7 @@ class FileTagUpdate(BaseModel): class BatchUpdateFileTagsRequest(BaseModel): """批量更新文件标签请求""" updates: List[FileTagUpdate] = Field(..., description="文件标签更新列表", min_length=1) - + class Config: populate_by_name = True @@ -74,7 +74,7 @@ class FileTagUpdateResult(BaseModel): success: bool = Field(..., description="是否更新成功") message: Optional[str] = Field(None, description="结果信息") tags_updated_at: Optional[datetime] = Field(None, alias="tagsUpdatedAt", description="标签更新时间") - + class Config: populate_by_name = True @@ -85,6 +85,6 @@ class BatchUpdateFileTagsResponse(BaseModel): total: int = Field(..., description="总更新数量") success_count: int = Field(..., alias="successCount", description="成功数量") failure_count: int = Field(..., alias="failureCount", description="失败数量") - + class Config: populate_by_name = True diff --git a/runtime/datamate-python/app/module/synthesis/interface/ratio_task.py b/runtime/datamate-python/app/module/synthesis/interface/ratio_task.py index a67c5f7..09d3a16 100644 --- a/runtime/datamate-python/app/module/synthesis/interface/ratio_task.py +++ b/runtime/datamate-python/app/module/synthesis/interface/ratio_task.py @@ -170,7 +170,6 @@ async def list_ratio_tasks( description=i.description, status=i.status, totals=i.totals, - ratio_method=i.ratio_method, target_dataset_id=i.target_dataset_id, target_dataset_name=(ds.name if ds else None), created_at=str(i.created_at) if getattr(i, "created_at", None) else None, @@ -330,7 +329,6 @@ async def get_ratio_task( description=instance.description, status=instance.status or "UNKNOWN", totals=instance.totals or 0, - ratio_method=instance.ratio_method or "", config=config, target_dataset=target_dataset_info, created_at=instance.created_at, diff --git a/runtime/datamate-python/app/module/synthesis/schema/ratio_task.py b/runtime/datamate-python/app/module/synthesis/schema/ratio_task.py index a781829..1b01023 100644 --- a/runtime/datamate-python/app/module/synthesis/schema/ratio_task.py +++ b/runtime/datamate-python/app/module/synthesis/schema/ratio_task.py @@ -88,7 +88,6 @@ class RatioTaskItem(BaseModel): description: Optional[str] = None status: Optional[str] = None totals: Optional[int] = None - ratio_method: Optional[str] = None target_dataset_id: Optional[str] = None target_dataset_name: Optional[str] = None created_at: Optional[str] = None @@ -110,7 +109,6 @@ class RatioTaskDetailResponse(BaseModel): description: Optional[str] = Field(None, description="任务描述") status: str = Field(..., description="任务状态") totals: int = Field(..., description="目标总数") - ratio_method: str = Field(..., description="配比方式") config: List[Dict[str, Any]] = Field(..., description="配比配置") target_dataset: Dict[str, Any] = Field(..., description="目标数据集信息") created_at: Optional[datetime] = Field(None, description="创建时间") diff --git a/runtime/datamate-python/app/module/synthesis/service/ratio_task.py b/runtime/datamate-python/app/module/synthesis/service/ratio_task.py index bd55061..ea0a1bb 100644 --- a/runtime/datamate-python/app/module/synthesis/service/ratio_task.py +++ b/runtime/datamate-python/app/module/synthesis/service/ratio_task.py @@ -1,3 +1,4 @@ +from datetime import datetime from typing import List, Optional, Dict, Any import random import json @@ -173,7 +174,7 @@ class RatioTaskService: @staticmethod async def handle_selected_file(existing_paths: set[Any], f, session, target_ds: Dataset): src_path = f.file_path - dst_prefix = f"/dataset/{target_ds.id}" + dst_prefix = f"/dataset/{target_ds.id}/" file_name = RatioTaskService.get_new_file_name(dst_prefix, existing_paths, f) new_path = dst_prefix + file_name @@ -181,18 +182,20 @@ class RatioTaskService: await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True) await asyncio.to_thread(shutil.copy2, src_path, new_path) - new_file = DatasetFiles( - dataset_id=target_ds.id, # type: ignore - file_name=file_name, - file_path=new_path, - file_type=f.file_type, - file_size=f.file_size, - check_sum=f.check_sum, - tags=f.tags, - dataset_filemetadata=f.dataset_filemetadata, - status="ACTIVE", - ) - session.add(new_file) + file_data = { + "dataset_id": target_ds.id, # type: ignore + "file_name": file_name, + "file_path": new_path, + "file_type": f.file_type, + "file_size": f.file_size, + "check_sum": f.check_sum, + "tags": f.tags, + "tags_updated_at": datetime.now(), + "dataset_filemetadata": f.dataset_filemetadata, + "status": "ACTIVE", + } + file_record = {k: v for k, v in file_data.items() if v is not None} + session.add(DatasetFiles(**file_record)) existing_paths.add(new_path) @staticmethod diff --git a/scripts/db/data-ratio-init.sql b/scripts/db/data-ratio-init.sql index cf2f670..c84faab 100644 --- a/scripts/db/data-ratio-init.sql +++ b/scripts/db/data-ratio-init.sql @@ -6,7 +6,6 @@ CREATE TABLE IF NOT EXISTS t_st_ratio_instances name varchar(64) COMMENT '名称', description TEXT COMMENT '描述', target_dataset_id varchar(64) COMMENT '模板数据集ID', - ratio_method varchar(50) COMMENT '配比方式,按标签(TAG),按数据集(DATASET)', ratio_parameters JSON COMMENT '配比参数', merge_method varchar(50) COMMENT '合并方式', status varchar(20) COMMENT '状态',