feature: multiple ratio configurations can be set for the data set. (#103)

feature: multiple ratio configurations can be set for the data set.
This commit is contained in:
hefanli
2025-11-24 15:28:17 +08:00
committed by GitHub
parent 497a5688e9
commit c1352ab91f
11 changed files with 258 additions and 229 deletions

View File

@@ -170,7 +170,6 @@ async def list_ratio_tasks(
description=i.description,
status=i.status,
totals=i.totals,
ratio_method=i.ratio_method,
target_dataset_id=i.target_dataset_id,
target_dataset_name=(ds.name if ds else None),
created_at=str(i.created_at) if getattr(i, "created_at", None) else None,
@@ -330,7 +329,6 @@ async def get_ratio_task(
description=instance.description,
status=instance.status or "UNKNOWN",
totals=instance.totals or 0,
ratio_method=instance.ratio_method or "",
config=config,
target_dataset=target_dataset_info,
created_at=instance.created_at,

View File

@@ -88,7 +88,6 @@ class RatioTaskItem(BaseModel):
description: Optional[str] = None
status: Optional[str] = None
totals: Optional[int] = None
ratio_method: Optional[str] = None
target_dataset_id: Optional[str] = None
target_dataset_name: Optional[str] = None
created_at: Optional[str] = None
@@ -110,7 +109,6 @@ class RatioTaskDetailResponse(BaseModel):
description: Optional[str] = Field(None, description="任务描述")
status: str = Field(..., description="任务状态")
totals: int = Field(..., description="目标总数")
ratio_method: str = Field(..., description="配比方式")
config: List[Dict[str, Any]] = Field(..., description="配比配置")
target_dataset: Dict[str, Any] = Field(..., description="目标数据集信息")
created_at: Optional[datetime] = Field(None, description="创建时间")

View File

@@ -1,3 +1,4 @@
from datetime import datetime
from typing import List, Optional, Dict, Any
import random
import json
@@ -173,7 +174,7 @@ class RatioTaskService:
@staticmethod
async def handle_selected_file(existing_paths: set[Any], f, session, target_ds: Dataset):
src_path = f.file_path
dst_prefix = f"/dataset/{target_ds.id}"
dst_prefix = f"/dataset/{target_ds.id}/"
file_name = RatioTaskService.get_new_file_name(dst_prefix, existing_paths, f)
new_path = dst_prefix + file_name
@@ -181,18 +182,20 @@ class RatioTaskService:
await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True)
await asyncio.to_thread(shutil.copy2, src_path, new_path)
new_file = DatasetFiles(
dataset_id=target_ds.id, # type: ignore
file_name=file_name,
file_path=new_path,
file_type=f.file_type,
file_size=f.file_size,
check_sum=f.check_sum,
tags=f.tags,
dataset_filemetadata=f.dataset_filemetadata,
status="ACTIVE",
)
session.add(new_file)
file_data = {
"dataset_id": target_ds.id, # type: ignore
"file_name": file_name,
"file_path": new_path,
"file_type": f.file_type,
"file_size": f.file_size,
"check_sum": f.check_sum,
"tags": f.tags,
"tags_updated_at": datetime.now(),
"dataset_filemetadata": f.dataset_filemetadata,
"status": "ACTIVE",
}
file_record = {k: v for k, v in file_data.items() if v is not None}
session.add(DatasetFiles(**file_record))
existing_paths.add(new_path)
@staticmethod