feature: 数据配比增加通过更新时间来配置 (#95)

* feature: 数据配比增加通过更新时间来配置

* fix: 修复配比时间参数传递的问题
This commit is contained in:
hefanli
2025-11-20 18:50:51 +08:00
committed by GitHub
parent 955ffff6cd
commit cddfe9b149
10 changed files with 458 additions and 595 deletions

View File

@@ -12,7 +12,7 @@ from app.core.logging import get_logger
from app.db.models import Dataset
from app.db.session import get_db
from app.module.dataset import DatasetManagementService
from app.module.shared.schema import StandardResponse
from app.module.shared.schema import StandardResponse, TaskStatus
from app.module.synthesis.schema.ratio_task import (
CreateRatioTaskResponse,
CreateRatioTaskRequest,
@@ -49,52 +49,18 @@ async def create_ratio_task(
await valid_exists(db, req)
# 创建目标数据集:名称使用“<任务名称>-配比生成-时间戳”
target_dataset_name = f"{req.name}-配比生成-{datetime.now().strftime('%Y%m%d%H%M%S')}"
target_dataset = await create_target_dataset(db, req, source_types)
target_type = get_target_dataset_type(source_types)
instance = await create_ratio_instance(db, req, target_dataset)
target_dataset = Dataset(
id=str(uuid.uuid4()),
name=target_dataset_name,
description=req.description or "",
dataset_type=target_type,
status="DRAFT",
)
target_dataset.path = f"/dataset/{target_dataset.id}"
db.add(target_dataset)
await db.flush() # 获取 target_dataset.id
service = RatioTaskService(db)
instance = await service.create_task(
name=req.name,
description=req.description,
totals=int(req.totals),
ratio_method=req.ratio_method,
config=[
{
"dataset_id": item.dataset_id,
"counts": int(item.counts),
"filter_conditions": item.filter_conditions,
}
for item in req.config
],
target_dataset_id=target_dataset.id,
)
# 异步执行配比任务(支持 DATASET / TAG)
asyncio.create_task(RatioTaskService.execute_dataset_ratio_task(instance.id))
return StandardResponse(
code=200,
message="success",
data=CreateRatioTaskResponse(
response_data = CreateRatioTaskResponse(
id=instance.id,
name=instance.name,
description=instance.description,
totals=instance.totals or 0,
ratio_method=instance.ratio_method or req.ratio_method,
status=instance.status or "PENDING",
status=instance.status or TaskStatus.PENDING.name,
config=req.config,
targetDataset=TargetDatasetInfo(
id=str(target_dataset.id),
@@ -103,6 +69,10 @@ async def create_ratio_task(
status=str(target_dataset.status),
)
)
return StandardResponse(
code=200,
message="success",
data=response_data
)
except HTTPException:
await db.rollback()
@@ -113,6 +83,46 @@ async def create_ratio_task(
raise HTTPException(status_code=500, detail="Internal server error")
async def create_ratio_instance(db, req: CreateRatioTaskRequest, target_dataset: Dataset) -> RatioInstance:
service = RatioTaskService(db)
logger.info(f"create_ratio_instance: {req}")
instance = await service.create_task(
name=req.name,
description=req.description,
totals=int(req.totals),
config=[
{
"dataset_id": item.dataset_id,
"counts": int(item.counts),
"filter_conditions": item.filter_conditions,
}
for item in req.config
],
target_dataset_id=target_dataset.id,
)
return instance
async def create_target_dataset(db, req: CreateRatioTaskRequest, source_types: set[str]) -> Dataset:
# 创建目标数据集:名称使用“<任务名称>-时间戳”
target_dataset_name = f"{req.name}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
target_type = get_target_dataset_type(source_types)
target_dataset_id = uuid.uuid4()
target_dataset = Dataset(
id=str(target_dataset_id),
name=target_dataset_name,
description=req.description or "",
dataset_type=target_type,
status="DRAFT",
path=f"/dataset/{target_dataset_id}",
)
db.add(target_dataset)
await db.flush() # 获取 target_dataset.id
return target_dataset
@router.get("", response_model=StandardResponse[PagedRatioTaskResponse], status_code=200)
async def list_ratio_tasks(
page: int = 1,