fix:修复配比任务操作问题 (#66)

* fix:配比任务需要能够跳转到目标数据集

* feature:增加配比任务详情接口

* fix:删除不存在的配比详情页面

* fix:使用正式的逻辑来展示标签

* fix:参数默认值去掉多余的-

* fix:修复配比任务相关操作
This commit is contained in:
Vincent
2025-11-07 19:01:45 +08:00
committed by GitHub
parent 28b7c631a4
commit 60e2289019
9 changed files with 120 additions and 172 deletions

View File

@@ -2,8 +2,10 @@ package com.datamate.datamanagement.domain.model.dataset;
import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName; import com.baomidou.mybatisplus.annotation.TableName;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.*; import lombok.*;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.time.LocalDateTime; import java.time.LocalDateTime;
@@ -17,6 +19,7 @@ import java.util.List;
@Getter @Getter
@Setter @Setter
@Builder @Builder
@Slf4j
@NoArgsConstructor @NoArgsConstructor
@AllArgsConstructor @AllArgsConstructor
@TableName("t_dm_dataset_files") @TableName("t_dm_dataset_files")
@@ -42,11 +45,12 @@ public class DatasetFile {
* *
* @return 标签列表 * @return 标签列表
*/ */
public List<String> analyzeTag() { public List<FileTag> analyzeTag() {
try { try {
ObjectMapper mapper = new ObjectMapper(); ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(tags, List.class); return mapper.readValue(tags, new TypeReference<List<FileTag>>() {});
} catch (Exception e) { } catch (Exception e) {
log.error(e.getMessage(), e);
return Collections.emptyList(); return Collections.emptyList();
} }
} }

View File

@@ -0,0 +1,48 @@
package com.datamate.datamanagement.domain.model.dataset;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
import com.fasterxml.jackson.databind.annotation.JsonNaming;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class)
public class FileTag {
private Map<String, Object> value;
private String type;
private String id;
private String fromName;
public List<String> getTags() {
List<String> tags = new ArrayList<>();
Object tagValues = value.get(type);
if (tagValues instanceof List) {
for (Object tag : (List<?>) tagValues) {
if (tag instanceof String) {
tags.add((String) tag);
}
}
} else if (tagValues instanceof String) {
tags.add((String) tagValues);
}
if(StringUtils.isNotEmpty(fromName)) {
return tags.stream().map(tag -> fromName + " " + tag).toList();
}
return tags;
}
}

View File

@@ -2,6 +2,7 @@ package com.datamate.datamanagement.interfaces.converter;
import com.datamate.common.infrastructure.exception.BusinessException; import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode; import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.datamanagement.domain.model.dataset.FileTag;
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest; import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse; import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
import com.datamate.datamanagement.interfaces.dto.DatasetResponse; import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
@@ -71,12 +72,12 @@ public interface DatasetConverter {
return distribution; return distribution;
} }
for (DatasetFile datasetFile : datasetFiles) { for (DatasetFile datasetFile : datasetFiles) {
List<String> tags = datasetFile.analyzeTag(); List<FileTag> tags = datasetFile.analyzeTag();
if (CollectionUtils.isEmpty(tags)) { if (CollectionUtils.isEmpty(tags)) {
continue; return distribution;
} }
for (String tag : tags) { for (FileTag tag : tags) {
distribution.put(tag, distribution.getOrDefault(tag, 0L) + 1); tag.getTags().forEach(tagName -> distribution.put(tagName, distribution.getOrDefault(tagName, 0L) + 1));
} }
} }
return distribution; return distribution;

View File

@@ -51,9 +51,9 @@ import java.util.concurrent.Semaphore;
public class RagEtlService { public class RagEtlService {
private static final Semaphore SEMAPHORE = new Semaphore(10); private static final Semaphore SEMAPHORE = new Semaphore(10);
@Value("${datamate.rag.milvus-host:-milvus-standalone}") @Value("${datamate.rag.milvus-host:milvus-standalone}")
private String milvusHost; private String milvusHost;
@Value("${datamate.rag.milvus-port:-19530}") @Value("${datamate.rag.milvus-port:19530}")
private int milvusPort; private int milvusPort;
private final RagFileRepository ragFileRepository; private final RagFileRepository ragFileRepository;

View File

@@ -76,116 +76,6 @@ export default function CreateRatioTask() {
setCreating(false); setCreating(false);
} }
}; };
const totalConfigured = useMemo(
() =>
ratioTaskForm?.ratioConfigs?.reduce?.(
(sum, c) => sum + (c.quantity || 0),
0
) || 0,
[ratioTaskForm.ratioConfigs]
);
// dataset selection is handled inside SelectDataset via onSelectedDatasetsChange
const updateRatioConfig = (source: string, quantity: number) => {
setRatioTaskForm((prev) => {
const existingIndex = prev.ratioConfigs.findIndex(
(config) => config.source === source
);
const totalOtherQuantity = prev.ratioConfigs
.filter((config) => config.source !== source)
.reduce((sum, config) => sum + config.quantity, 0);
const newConfig = {
id: source,
name: source,
type: prev.ratioType,
quantity: Math.min(
quantity,
prev.totalTargetCount - totalOtherQuantity
),
percentage: Math.round((quantity / prev.totalTargetCount) * 100),
source,
};
if (existingIndex >= 0) {
const newConfigs = [...prev.ratioConfigs];
newConfigs[existingIndex] = newConfig;
return { ...prev, ratioConfigs: newConfigs };
} else {
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
}
});
};
const generateAutoRatio = () => {
const selectedCount = ratioTaskForm.selectedDatasets.length;
if (selectedCount === 0) return;
const baseQuantity = Math.floor(
ratioTaskForm.totalTargetCount / selectedCount
);
const remainder = ratioTaskForm.totalTargetCount % selectedCount;
const newConfigs = ratioTaskForm.selectedDatasets.map(
(datasetId, index) => {
const quantity = baseQuantity + (index < remainder ? 1 : 0);
return {
id: datasetId,
name: datasetId,
type: ratioTaskForm.ratioType,
quantity,
percentage: Math.round(
(quantity / ratioTaskForm.totalTargetCount) * 100
),
source: datasetId,
};
}
);
setRatioTaskForm((prev) => ({ ...prev, ratioConfigs: newConfigs }));
};
// 标签模式下,更新某数据集的某个标签的数量
const updateLabelRatioConfig = (
datasetId: string,
label: string,
quantity: number
) => {
const sourceKey = `${datasetId}_${label}`;
setRatioTaskForm((prev) => {
const existingIndex = prev.ratioConfigs.findIndex(
(c) => c.source === sourceKey
);
const totalOtherQuantity = prev.ratioConfigs
.filter((c) => c.source !== sourceKey)
.reduce((sum, c) => sum + c.quantity, 0);
const dist = distributions[datasetId] || {};
const labelMax = dist[label] ?? Infinity;
const cappedQuantity = Math.max(
0,
Math.min(quantity, prev.totalTargetCount - totalOtherQuantity, labelMax)
);
const newConfig = {
id: sourceKey,
name: label,
type: "label",
quantity: cappedQuantity,
percentage: Math.round((cappedQuantity / prev.totalTargetCount) * 100),
source: sourceKey,
};
if (existingIndex >= 0) {
const newConfigs = [...prev.ratioConfigs];
newConfigs[existingIndex] = newConfig;
return { ...prev, ratioConfigs: newConfigs };
} else {
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
}
});
};
const handleValuesChange = (_, allValues) => { const handleValuesChange = (_, allValues) => {
setRatioTaskForm({ ...ratioTaskForm, ...allValues }); setRatioTaskForm({ ...ratioTaskForm, ...allValues });

View File

@@ -123,7 +123,7 @@ export default function RatioTasksPage() {
<Button <Button
type="text" type="text"
icon={op.icon} icon={op.icon}
onClick={() => op.onClick(task.id)} onClick={() => op.onClick(task)}
/> />
</Tooltip> </Tooltip>
))} ))}

View File

@@ -16,8 +16,7 @@ export function createRatioTaskUsingPost(data: any) {
} }
// 删除配比任务(支持批量) // 删除配比任务(支持批量)
export function deleteRatioTasksUsingDelete(ids: string[]) { export function deleteRatioTasksUsingDelete(id: string) {
const qs = (ids || []).map((id) => `ids=${encodeURIComponent(id)}`).join("&"); const url = `/api/synthesis/ratio-task?ids=${id}`;
const url = qs ? `/api/synthesis/ratio-task?${qs}` : "/api/synthesis/ratio-task";
return del(url); return del(url);
} }

View File

@@ -26,3 +26,26 @@ class PagedDatasetFileResponse(BaseModel):
page: int = Field(..., description="当前页码") page: int = Field(..., description="当前页码")
size: int = Field(..., description="每页大小") size: int = Field(..., description="每页大小")
class DatasetFileTag(BaseModel):
id: str = Field(..., description="标签ID")
type: str = Field(..., description="类型")
from_name: str = Field(..., description="标签名称")
value: dict = Field(..., description="标签值")
def get_tags(self) -> List[str]:
tags = []
# 如果 value 是字典类型,根据 type 获取对应的值
tag_values = self.value.get(self.type, [])
# 处理标签值
if isinstance(tag_values, list):
for tag in tag_values:
if isinstance(tag, str):
tags.append(str(tag))
elif isinstance(tag_values, str):
tags.append(tag_values)
# 如果 from_name 不为空,添加前缀
if self.from_name:
tags = [f"{self.from_name} {tag}" for tag in tags]
return tags

View File

@@ -1,5 +1,6 @@
from typing import List, Optional, Dict, Any from typing import List, Optional, Dict, Any
import random import random
import json
import os import os
import shutil import shutil
import asyncio import asyncio
@@ -12,6 +13,7 @@ from app.core.logging import get_logger
from app.db.models.ratio_task import RatioInstance, RatioRelation from app.db.models.ratio_task import RatioInstance, RatioRelation
from app.db.models import Dataset, DatasetFiles from app.db.models import Dataset, DatasetFiles
from app.db.session import AsyncSessionLocal from app.db.session import AsyncSessionLocal
from app.module.dataset.schema.dataset_file import DatasetFileTag
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -218,65 +220,46 @@ class RatioTaskService:
""" """
if not conditions: if not conditions:
return set() return set()
raw = conditions.replace("\n", " ") data = json.loads(conditions)
seps = [",", ";", " "] required_tags = set()
tokens = [raw] if data.get("label"):
for sep in seps: required_tags.add(data["label"])
nxt = [] return required_tags
for t in tokens:
nxt.extend(t.split(sep))
tokens = nxt
return {t.strip() for t in tokens if t and t.strip()}
@staticmethod @staticmethod
def _file_contains_tags(f: DatasetFiles, required: set[str]) -> bool: def _file_contains_tags(file: DatasetFiles, required: set[str]) -> bool:
if not required: if not required:
return True return True
tags = f.tags tags = file.tags
if not tags: if not tags:
return False return False
try: try:
# tags could be a list of strings or list of objects with 'name' # tags could be a list of strings or list of objects with 'name'
tag_names = set() tag_names = RatioTaskService.get_all_tags(tags)
if isinstance(tags, list):
for item in tags:
if isinstance(item, str):
tag_names.add(item)
elif isinstance(item, dict):
name = item.get("name") or item.get("label") or item.get("tag")
if isinstance(name, str):
tag_names.add(name)
elif isinstance(tags, dict):
# flat dict of name->... treat keys as tags
tag_names = set(map(str, tags.keys()))
else:
return False
logger.info(f">>>>>{tags}>>>>>{required}, {tag_names}")
return required.issubset(tag_names) return required.issubset(tag_names)
except Exception: except Exception as e:
logger.exception(f"Failed to get tags for {file}", e)
return False return False
@staticmethod @staticmethod
async def get_new_file(f, rel: RatioRelation, target_ds: Dataset) -> DatasetFiles: def get_all_tags(tags) -> set[str]:
new_path = f.file_path """获取所有处理后的标签字符串列表"""
src_prefix = f"/dataset/{rel.source_dataset_id}" all_tags = set()
if isinstance(f.file_path, str) and f.file_path.startswith(src_prefix): if not tags:
dst_prefix = f"/dataset/{target_ds.id}" return all_tags
new_path = f.file_path.replace(src_prefix, dst_prefix, 1)
dst_dir = os.path.dirname(new_path)
# Ensure directory and copy the file in a thread to avoid blocking the event loop
await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True)
await asyncio.to_thread(shutil.copy2, f.file_path, new_path)
new_file = DatasetFiles( file_tags = []
dataset_id=target_ds.id, # type: ignore for tag_data in tags:
file_name=f.file_name, # 处理可能的命名风格转换(下划线转驼峰)
file_path=new_path, processed_data = {}
file_type=f.file_type, for key, value in tag_data.items():
file_size=f.file_size, # 将驼峰转为下划线以匹配 Pydantic 模型字段
check_sum=f.check_sum, processed_data[key] = value
tags=f.tags, # 创建 DatasetFileTag 对象
dataset_filemetadata=f.dataset_filemetadata, file_tag = DatasetFileTag(**processed_data)
status="ACTIVE", file_tags.append(file_tag)
)
return new_file for file_tag in file_tags:
for tag_data in file_tag.get_tags():
all_tags.add(tag_data)
return all_tags