You've already forked DataMate
fix:修复配比任务操作问题 (#66)
* fix:配比任务需要能够跳转到目标数据集 * feature:增加配比任务详情接口 * fix:删除不存在的配比详情页面 * fix:使用正式的逻辑来展示标签 * fix:参数默认值去掉多余的- * fix:修复配比任务相关操作
This commit is contained in:
@@ -2,8 +2,10 @@ package com.datamate.datamanagement.domain.model.dataset;
|
|||||||
|
|
||||||
import com.baomidou.mybatisplus.annotation.TableId;
|
import com.baomidou.mybatisplus.annotation.TableId;
|
||||||
import com.baomidou.mybatisplus.annotation.TableName;
|
import com.baomidou.mybatisplus.annotation.TableName;
|
||||||
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import lombok.*;
|
import lombok.*;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
@@ -17,6 +19,7 @@ import java.util.List;
|
|||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
@Builder
|
@Builder
|
||||||
|
@Slf4j
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@TableName("t_dm_dataset_files")
|
@TableName("t_dm_dataset_files")
|
||||||
@@ -42,11 +45,12 @@ public class DatasetFile {
|
|||||||
*
|
*
|
||||||
* @return 标签列表
|
* @return 标签列表
|
||||||
*/
|
*/
|
||||||
public List<String> analyzeTag() {
|
public List<FileTag> analyzeTag() {
|
||||||
try {
|
try {
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
return mapper.readValue(tags, List.class);
|
return mapper.readValue(tags, new TypeReference<List<FileTag>>() {});
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
log.error(e.getMessage(), e);
|
||||||
return Collections.emptyList();
|
return Collections.emptyList();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,48 @@
|
|||||||
|
package com.datamate.datamanagement.domain.model.dataset;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
|
||||||
|
import com.fasterxml.jackson.databind.annotation.JsonNaming;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class)
|
||||||
|
public class FileTag {
|
||||||
|
private Map<String, Object> value;
|
||||||
|
|
||||||
|
private String type;
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
private String fromName;
|
||||||
|
|
||||||
|
public List<String> getTags() {
|
||||||
|
List<String> tags = new ArrayList<>();
|
||||||
|
Object tagValues = value.get(type);
|
||||||
|
if (tagValues instanceof List) {
|
||||||
|
for (Object tag : (List<?>) tagValues) {
|
||||||
|
if (tag instanceof String) {
|
||||||
|
tags.add((String) tag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (tagValues instanceof String) {
|
||||||
|
tags.add((String) tagValues);
|
||||||
|
}
|
||||||
|
if(StringUtils.isNotEmpty(fromName)) {
|
||||||
|
return tags.stream().map(tag -> fromName + " " + tag).toList();
|
||||||
|
}
|
||||||
|
return tags;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ package com.datamate.datamanagement.interfaces.converter;
|
|||||||
|
|
||||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||||
|
import com.datamate.datamanagement.domain.model.dataset.FileTag;
|
||||||
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
|
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
|
||||||
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
|
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
|
||||||
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
|
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
|
||||||
@@ -71,12 +72,12 @@ public interface DatasetConverter {
|
|||||||
return distribution;
|
return distribution;
|
||||||
}
|
}
|
||||||
for (DatasetFile datasetFile : datasetFiles) {
|
for (DatasetFile datasetFile : datasetFiles) {
|
||||||
List<String> tags = datasetFile.analyzeTag();
|
List<FileTag> tags = datasetFile.analyzeTag();
|
||||||
if (CollectionUtils.isEmpty(tags)) {
|
if (CollectionUtils.isEmpty(tags)) {
|
||||||
continue;
|
return distribution;
|
||||||
}
|
}
|
||||||
for (String tag : tags) {
|
for (FileTag tag : tags) {
|
||||||
distribution.put(tag, distribution.getOrDefault(tag, 0L) + 1);
|
tag.getTags().forEach(tagName -> distribution.put(tagName, distribution.getOrDefault(tagName, 0L) + 1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return distribution;
|
return distribution;
|
||||||
|
|||||||
@@ -51,9 +51,9 @@ import java.util.concurrent.Semaphore;
|
|||||||
public class RagEtlService {
|
public class RagEtlService {
|
||||||
private static final Semaphore SEMAPHORE = new Semaphore(10);
|
private static final Semaphore SEMAPHORE = new Semaphore(10);
|
||||||
|
|
||||||
@Value("${datamate.rag.milvus-host:-milvus-standalone}")
|
@Value("${datamate.rag.milvus-host:milvus-standalone}")
|
||||||
private String milvusHost;
|
private String milvusHost;
|
||||||
@Value("${datamate.rag.milvus-port:-19530}")
|
@Value("${datamate.rag.milvus-port:19530}")
|
||||||
private int milvusPort;
|
private int milvusPort;
|
||||||
|
|
||||||
private final RagFileRepository ragFileRepository;
|
private final RagFileRepository ragFileRepository;
|
||||||
|
|||||||
@@ -76,116 +76,6 @@ export default function CreateRatioTask() {
|
|||||||
setCreating(false);
|
setCreating(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
const totalConfigured = useMemo(
|
|
||||||
() =>
|
|
||||||
ratioTaskForm?.ratioConfigs?.reduce?.(
|
|
||||||
(sum, c) => sum + (c.quantity || 0),
|
|
||||||
0
|
|
||||||
) || 0,
|
|
||||||
[ratioTaskForm.ratioConfigs]
|
|
||||||
);
|
|
||||||
|
|
||||||
// dataset selection is handled inside SelectDataset via onSelectedDatasetsChange
|
|
||||||
|
|
||||||
const updateRatioConfig = (source: string, quantity: number) => {
|
|
||||||
setRatioTaskForm((prev) => {
|
|
||||||
const existingIndex = prev.ratioConfigs.findIndex(
|
|
||||||
(config) => config.source === source
|
|
||||||
);
|
|
||||||
const totalOtherQuantity = prev.ratioConfigs
|
|
||||||
.filter((config) => config.source !== source)
|
|
||||||
.reduce((sum, config) => sum + config.quantity, 0);
|
|
||||||
|
|
||||||
const newConfig = {
|
|
||||||
id: source,
|
|
||||||
name: source,
|
|
||||||
type: prev.ratioType,
|
|
||||||
quantity: Math.min(
|
|
||||||
quantity,
|
|
||||||
prev.totalTargetCount - totalOtherQuantity
|
|
||||||
),
|
|
||||||
percentage: Math.round((quantity / prev.totalTargetCount) * 100),
|
|
||||||
source,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (existingIndex >= 0) {
|
|
||||||
const newConfigs = [...prev.ratioConfigs];
|
|
||||||
newConfigs[existingIndex] = newConfig;
|
|
||||||
return { ...prev, ratioConfigs: newConfigs };
|
|
||||||
} else {
|
|
||||||
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
|
|
||||||
}
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
const generateAutoRatio = () => {
|
|
||||||
const selectedCount = ratioTaskForm.selectedDatasets.length;
|
|
||||||
if (selectedCount === 0) return;
|
|
||||||
|
|
||||||
const baseQuantity = Math.floor(
|
|
||||||
ratioTaskForm.totalTargetCount / selectedCount
|
|
||||||
);
|
|
||||||
const remainder = ratioTaskForm.totalTargetCount % selectedCount;
|
|
||||||
|
|
||||||
const newConfigs = ratioTaskForm.selectedDatasets.map(
|
|
||||||
(datasetId, index) => {
|
|
||||||
const quantity = baseQuantity + (index < remainder ? 1 : 0);
|
|
||||||
return {
|
|
||||||
id: datasetId,
|
|
||||||
name: datasetId,
|
|
||||||
type: ratioTaskForm.ratioType,
|
|
||||||
quantity,
|
|
||||||
percentage: Math.round(
|
|
||||||
(quantity / ratioTaskForm.totalTargetCount) * 100
|
|
||||||
),
|
|
||||||
source: datasetId,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
setRatioTaskForm((prev) => ({ ...prev, ratioConfigs: newConfigs }));
|
|
||||||
};
|
|
||||||
|
|
||||||
// 标签模式下,更新某数据集的某个标签的数量
|
|
||||||
const updateLabelRatioConfig = (
|
|
||||||
datasetId: string,
|
|
||||||
label: string,
|
|
||||||
quantity: number
|
|
||||||
) => {
|
|
||||||
const sourceKey = `${datasetId}_${label}`;
|
|
||||||
setRatioTaskForm((prev) => {
|
|
||||||
const existingIndex = prev.ratioConfigs.findIndex(
|
|
||||||
(c) => c.source === sourceKey
|
|
||||||
);
|
|
||||||
const totalOtherQuantity = prev.ratioConfigs
|
|
||||||
.filter((c) => c.source !== sourceKey)
|
|
||||||
.reduce((sum, c) => sum + c.quantity, 0);
|
|
||||||
|
|
||||||
const dist = distributions[datasetId] || {};
|
|
||||||
const labelMax = dist[label] ?? Infinity;
|
|
||||||
const cappedQuantity = Math.max(
|
|
||||||
0,
|
|
||||||
Math.min(quantity, prev.totalTargetCount - totalOtherQuantity, labelMax)
|
|
||||||
);
|
|
||||||
|
|
||||||
const newConfig = {
|
|
||||||
id: sourceKey,
|
|
||||||
name: label,
|
|
||||||
type: "label",
|
|
||||||
quantity: cappedQuantity,
|
|
||||||
percentage: Math.round((cappedQuantity / prev.totalTargetCount) * 100),
|
|
||||||
source: sourceKey,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (existingIndex >= 0) {
|
|
||||||
const newConfigs = [...prev.ratioConfigs];
|
|
||||||
newConfigs[existingIndex] = newConfig;
|
|
||||||
return { ...prev, ratioConfigs: newConfigs };
|
|
||||||
} else {
|
|
||||||
return { ...prev, ratioConfigs: [...prev.ratioConfigs, newConfig] };
|
|
||||||
}
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
const handleValuesChange = (_, allValues) => {
|
const handleValuesChange = (_, allValues) => {
|
||||||
setRatioTaskForm({ ...ratioTaskForm, ...allValues });
|
setRatioTaskForm({ ...ratioTaskForm, ...allValues });
|
||||||
|
|||||||
@@ -123,7 +123,7 @@ export default function RatioTasksPage() {
|
|||||||
<Button
|
<Button
|
||||||
type="text"
|
type="text"
|
||||||
icon={op.icon}
|
icon={op.icon}
|
||||||
onClick={() => op.onClick(task.id)}
|
onClick={() => op.onClick(task)}
|
||||||
/>
|
/>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
))}
|
))}
|
||||||
|
|||||||
@@ -16,8 +16,7 @@ export function createRatioTaskUsingPost(data: any) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 删除配比任务(支持批量)
|
// 删除配比任务(支持批量)
|
||||||
export function deleteRatioTasksUsingDelete(ids: string[]) {
|
export function deleteRatioTasksUsingDelete(id: string) {
|
||||||
const qs = (ids || []).map((id) => `ids=${encodeURIComponent(id)}`).join("&");
|
const url = `/api/synthesis/ratio-task?ids=${id}`;
|
||||||
const url = qs ? `/api/synthesis/ratio-task?${qs}` : "/api/synthesis/ratio-task";
|
|
||||||
return del(url);
|
return del(url);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,3 +26,26 @@ class PagedDatasetFileResponse(BaseModel):
|
|||||||
page: int = Field(..., description="当前页码")
|
page: int = Field(..., description="当前页码")
|
||||||
size: int = Field(..., description="每页大小")
|
size: int = Field(..., description="每页大小")
|
||||||
|
|
||||||
|
class DatasetFileTag(BaseModel):
|
||||||
|
id: str = Field(..., description="标签ID")
|
||||||
|
type: str = Field(..., description="类型")
|
||||||
|
from_name: str = Field(..., description="标签名称")
|
||||||
|
value: dict = Field(..., description="标签值")
|
||||||
|
|
||||||
|
def get_tags(self) -> List[str]:
|
||||||
|
tags = []
|
||||||
|
# 如果 value 是字典类型,根据 type 获取对应的值
|
||||||
|
tag_values = self.value.get(self.type, [])
|
||||||
|
|
||||||
|
# 处理标签值
|
||||||
|
if isinstance(tag_values, list):
|
||||||
|
for tag in tag_values:
|
||||||
|
if isinstance(tag, str):
|
||||||
|
tags.append(str(tag))
|
||||||
|
elif isinstance(tag_values, str):
|
||||||
|
tags.append(tag_values)
|
||||||
|
# 如果 from_name 不为空,添加前缀
|
||||||
|
if self.from_name:
|
||||||
|
tags = [f"{self.from_name} {tag}" for tag in tags]
|
||||||
|
|
||||||
|
return tags
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from typing import List, Optional, Dict, Any
|
from typing import List, Optional, Dict, Any
|
||||||
import random
|
import random
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import asyncio
|
import asyncio
|
||||||
@@ -12,6 +13,7 @@ from app.core.logging import get_logger
|
|||||||
from app.db.models.ratio_task import RatioInstance, RatioRelation
|
from app.db.models.ratio_task import RatioInstance, RatioRelation
|
||||||
from app.db.models import Dataset, DatasetFiles
|
from app.db.models import Dataset, DatasetFiles
|
||||||
from app.db.session import AsyncSessionLocal
|
from app.db.session import AsyncSessionLocal
|
||||||
|
from app.module.dataset.schema.dataset_file import DatasetFileTag
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
@@ -218,65 +220,46 @@ class RatioTaskService:
|
|||||||
"""
|
"""
|
||||||
if not conditions:
|
if not conditions:
|
||||||
return set()
|
return set()
|
||||||
raw = conditions.replace("\n", " ")
|
data = json.loads(conditions)
|
||||||
seps = [",", ";", " "]
|
required_tags = set()
|
||||||
tokens = [raw]
|
if data.get("label"):
|
||||||
for sep in seps:
|
required_tags.add(data["label"])
|
||||||
nxt = []
|
return required_tags
|
||||||
for t in tokens:
|
|
||||||
nxt.extend(t.split(sep))
|
|
||||||
tokens = nxt
|
|
||||||
return {t.strip() for t in tokens if t and t.strip()}
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _file_contains_tags(f: DatasetFiles, required: set[str]) -> bool:
|
def _file_contains_tags(file: DatasetFiles, required: set[str]) -> bool:
|
||||||
if not required:
|
if not required:
|
||||||
return True
|
return True
|
||||||
tags = f.tags
|
tags = file.tags
|
||||||
if not tags:
|
if not tags:
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
# tags could be a list of strings or list of objects with 'name'
|
# tags could be a list of strings or list of objects with 'name'
|
||||||
tag_names = set()
|
tag_names = RatioTaskService.get_all_tags(tags)
|
||||||
if isinstance(tags, list):
|
|
||||||
for item in tags:
|
|
||||||
if isinstance(item, str):
|
|
||||||
tag_names.add(item)
|
|
||||||
elif isinstance(item, dict):
|
|
||||||
name = item.get("name") or item.get("label") or item.get("tag")
|
|
||||||
if isinstance(name, str):
|
|
||||||
tag_names.add(name)
|
|
||||||
elif isinstance(tags, dict):
|
|
||||||
# flat dict of name->... treat keys as tags
|
|
||||||
tag_names = set(map(str, tags.keys()))
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
logger.info(f">>>>>{tags}>>>>>{required}, {tag_names}")
|
|
||||||
return required.issubset(tag_names)
|
return required.issubset(tag_names)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
|
logger.exception(f"Failed to get tags for {file}", e)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def get_new_file(f, rel: RatioRelation, target_ds: Dataset) -> DatasetFiles:
|
def get_all_tags(tags) -> set[str]:
|
||||||
new_path = f.file_path
|
"""获取所有处理后的标签字符串列表"""
|
||||||
src_prefix = f"/dataset/{rel.source_dataset_id}"
|
all_tags = set()
|
||||||
if isinstance(f.file_path, str) and f.file_path.startswith(src_prefix):
|
if not tags:
|
||||||
dst_prefix = f"/dataset/{target_ds.id}"
|
return all_tags
|
||||||
new_path = f.file_path.replace(src_prefix, dst_prefix, 1)
|
|
||||||
dst_dir = os.path.dirname(new_path)
|
|
||||||
# Ensure directory and copy the file in a thread to avoid blocking the event loop
|
|
||||||
await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True)
|
|
||||||
await asyncio.to_thread(shutil.copy2, f.file_path, new_path)
|
|
||||||
|
|
||||||
new_file = DatasetFiles(
|
file_tags = []
|
||||||
dataset_id=target_ds.id, # type: ignore
|
for tag_data in tags:
|
||||||
file_name=f.file_name,
|
# 处理可能的命名风格转换(下划线转驼峰)
|
||||||
file_path=new_path,
|
processed_data = {}
|
||||||
file_type=f.file_type,
|
for key, value in tag_data.items():
|
||||||
file_size=f.file_size,
|
# 将驼峰转为下划线以匹配 Pydantic 模型字段
|
||||||
check_sum=f.check_sum,
|
processed_data[key] = value
|
||||||
tags=f.tags,
|
# 创建 DatasetFileTag 对象
|
||||||
dataset_filemetadata=f.dataset_filemetadata,
|
file_tag = DatasetFileTag(**processed_data)
|
||||||
status="ACTIVE",
|
file_tags.append(file_tag)
|
||||||
)
|
|
||||||
return new_file
|
for file_tag in file_tags:
|
||||||
|
for tag_data in file_tag.get_tags():
|
||||||
|
all_tags.add(tag_data)
|
||||||
|
return all_tags
|
||||||
|
|||||||
Reference in New Issue
Block a user