fix:修复配比任务操作问题 (#66)

* fix:配比任务需要能够跳转到目标数据集

* feature:增加配比任务详情接口

* fix:删除不存在的配比详情页面

* fix:使用正式的逻辑来展示标签

* fix:参数默认值去掉多余的-

* fix:修复配比任务相关操作
This commit is contained in:
Vincent
2025-11-07 19:01:45 +08:00
committed by GitHub
parent 28b7c631a4
commit 60e2289019
9 changed files with 120 additions and 172 deletions

View File

@@ -25,4 +25,27 @@ class PagedDatasetFileResponse(BaseModel):
totalPages: int = Field(..., description="总页数")
page: int = Field(..., description="当前页码")
size: int = Field(..., description="每页大小")
class DatasetFileTag(BaseModel):
id: str = Field(..., description="标签ID")
type: str = Field(..., description="类型")
from_name: str = Field(..., description="标签名称")
value: dict = Field(..., description="标签值")
def get_tags(self) -> List[str]:
tags = []
# 如果 value 是字典类型,根据 type 获取对应的值
tag_values = self.value.get(self.type, [])
# 处理标签值
if isinstance(tag_values, list):
for tag in tag_values:
if isinstance(tag, str):
tags.append(str(tag))
elif isinstance(tag_values, str):
tags.append(tag_values)
# 如果 from_name 不为空,添加前缀
if self.from_name:
tags = [f"{self.from_name} {tag}" for tag in tags]
return tags

View File

@@ -1,5 +1,6 @@
from typing import List, Optional, Dict, Any
import random
import json
import os
import shutil
import asyncio
@@ -12,6 +13,7 @@ from app.core.logging import get_logger
from app.db.models.ratio_task import RatioInstance, RatioRelation
from app.db.models import Dataset, DatasetFiles
from app.db.session import AsyncSessionLocal
from app.module.dataset.schema.dataset_file import DatasetFileTag
logger = get_logger(__name__)
@@ -218,65 +220,46 @@ class RatioTaskService:
"""
if not conditions:
return set()
raw = conditions.replace("\n", " ")
seps = [",", ";", " "]
tokens = [raw]
for sep in seps:
nxt = []
for t in tokens:
nxt.extend(t.split(sep))
tokens = nxt
return {t.strip() for t in tokens if t and t.strip()}
data = json.loads(conditions)
required_tags = set()
if data.get("label"):
required_tags.add(data["label"])
return required_tags
@staticmethod
def _file_contains_tags(f: DatasetFiles, required: set[str]) -> bool:
def _file_contains_tags(file: DatasetFiles, required: set[str]) -> bool:
if not required:
return True
tags = f.tags
tags = file.tags
if not tags:
return False
try:
# tags could be a list of strings or list of objects with 'name'
tag_names = set()
if isinstance(tags, list):
for item in tags:
if isinstance(item, str):
tag_names.add(item)
elif isinstance(item, dict):
name = item.get("name") or item.get("label") or item.get("tag")
if isinstance(name, str):
tag_names.add(name)
elif isinstance(tags, dict):
# flat dict of name->... treat keys as tags
tag_names = set(map(str, tags.keys()))
else:
return False
logger.info(f">>>>>{tags}>>>>>{required}, {tag_names}")
tag_names = RatioTaskService.get_all_tags(tags)
return required.issubset(tag_names)
except Exception:
except Exception as e:
logger.exception(f"Failed to get tags for {file}", e)
return False
@staticmethod
async def get_new_file(f, rel: RatioRelation, target_ds: Dataset) -> DatasetFiles:
new_path = f.file_path
src_prefix = f"/dataset/{rel.source_dataset_id}"
if isinstance(f.file_path, str) and f.file_path.startswith(src_prefix):
dst_prefix = f"/dataset/{target_ds.id}"
new_path = f.file_path.replace(src_prefix, dst_prefix, 1)
dst_dir = os.path.dirname(new_path)
# Ensure directory and copy the file in a thread to avoid blocking the event loop
await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True)
await asyncio.to_thread(shutil.copy2, f.file_path, new_path)
def get_all_tags(tags) -> set[str]:
"""获取所有处理后的标签字符串列表"""
all_tags = set()
if not tags:
return all_tags
new_file = DatasetFiles(
dataset_id=target_ds.id, # type: ignore
file_name=f.file_name,
file_path=new_path,
file_type=f.file_type,
file_size=f.file_size,
check_sum=f.check_sum,
tags=f.tags,
dataset_filemetadata=f.dataset_filemetadata,
status="ACTIVE",
)
return new_file
file_tags = []
for tag_data in tags:
# 处理可能的命名风格转换(下划线转驼峰)
processed_data = {}
for key, value in tag_data.items():
# 将驼峰转为下划线以匹配 Pydantic 模型字段
processed_data[key] = value
# 创建 DatasetFileTag 对象
file_tag = DatasetFileTag(**processed_data)
file_tags.append(file_tag)
for file_tag in file_tags:
for tag_data in file_tag.get_tags():
all_tags.add(tag_data)
return all_tags