feat(data-management): 添加数据集父子层级结构功能

- 在OpenAPI规范中新增parentDatasetId字段用于层级过滤
- 实现数据集父子关系的创建、更新和删除逻辑
- 添加数据集移动时的路径重命名和文件路径前缀更新
- 增加子数据集数量验证防止误删父数据集
- 更新前端界面支持选择父数据集和导航显示
- 优化Python后端自动标注任务的路径处理逻辑
- 修改数据库表结构添加外键约束确保数据一致性
This commit is contained in:
2026-01-20 13:34:50 +08:00
parent bde00c3c38
commit 79371ba078
26 changed files with 394 additions and 133 deletions

View File

@@ -14,6 +14,7 @@ class Dataset(Base):
__tablename__ = "t_dm_datasets"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
parent_dataset_id = Column(String(36), nullable=True, comment="父数据集ID(UUID)")
name = Column(String(255), nullable=False, comment="数据集名称")
description = Column(Text, nullable=True, comment="数据集描述")
dataset_type = Column(String(50), nullable=False, comment="数据集类型:IMAGE/TEXT/QA/MULTIMODAL/OTHER")

View File

@@ -195,8 +195,8 @@ def _update_task_status(
conn.execute(sql, params)
def _load_dataset_files(dataset_id: str) -> List[Tuple[str, str, str]]:
"""加载指定数据集下的所有已完成文件。"""
def _load_dataset_files(dataset_id: str) -> List[Tuple[str, str, str]]:
"""加载指定数据集下的所有已完成文件。"""
sql = text(
"""
@@ -208,9 +208,45 @@ def _load_dataset_files(dataset_id: str) -> List[Tuple[str, str, str]]:
"""
)
with SQLManager.create_connect() as conn:
rows = conn.execute(sql, {"dataset_id": dataset_id}).fetchall()
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
with SQLManager.create_connect() as conn:
rows = conn.execute(sql, {"dataset_id": dataset_id}).fetchall()
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
def _load_dataset_meta(dataset_id: str) -> Optional[Dict[str, Any]]:
"""加载数据集基础信息(含父ID与路径)。"""
sql = text(
"""
SELECT id, name, parent_dataset_id, path
FROM t_dm_datasets
WHERE id = :dataset_id
"""
)
with SQLManager.create_connect() as conn:
row = conn.execute(sql, {"dataset_id": dataset_id}).fetchone()
if not row:
return None
return dict(row._mapping) # type: ignore[attr-defined]
def _resolve_output_parent(source_dataset_id: str) -> Tuple[Optional[str], str]:
"""根据源数据集确定产出数据集的父级与基路径(产出挂在父级下)。"""
base_path = DEFAULT_OUTPUT_ROOT.rstrip("/") or "/dataset"
source_meta = _load_dataset_meta(source_dataset_id)
if not source_meta:
return None, base_path
parent_dataset_id = source_meta.get("parent_dataset_id")
if not parent_dataset_id:
return None, base_path
parent_meta = _load_dataset_meta(str(parent_dataset_id))
parent_path = parent_meta.get("path") if parent_meta else None
if not parent_path:
return None, base_path
return str(parent_dataset_id), str(parent_path)
def _load_files_by_ids(file_ids: List[str]) -> List[Tuple[str, str, str]]:
@@ -245,34 +281,35 @@ def _ensure_output_dir(output_dir: str) -> str:
return output_dir
def _create_output_dataset(
source_dataset_id: str,
source_dataset_name: str,
output_dataset_name: str,
) -> Tuple[str, str]:
"""为自动标注结果创建一个新的数据集并返回 (dataset_id, path)。"""
new_dataset_id = str(uuid.uuid4())
dataset_base_path = DEFAULT_OUTPUT_ROOT.rstrip("/") or "/dataset"
output_dir = os.path.join(dataset_base_path, new_dataset_id)
def _create_output_dataset(
source_dataset_id: str,
source_dataset_name: str,
output_dataset_name: str,
) -> Tuple[str, str]:
"""为自动标注结果创建一个新的数据集并返回 (dataset_id, path)。"""
new_dataset_id = str(uuid.uuid4())
parent_dataset_id, dataset_base_path = _resolve_output_parent(source_dataset_id)
output_dir = os.path.join(dataset_base_path, new_dataset_id)
description = (
f"Auto annotations for dataset {source_dataset_name or source_dataset_id}"[:255]
)
sql = text(
"""
INSERT INTO t_dm_datasets (id, name, description, dataset_type, path, status)
VALUES (:id, :name, :description, :dataset_type, :path, :status)
"""
)
params = {
"id": new_dataset_id,
"name": output_dataset_name,
"description": description,
"dataset_type": "IMAGE",
"path": output_dir,
"status": "ACTIVE",
sql = text(
"""
INSERT INTO t_dm_datasets (id, parent_dataset_id, name, description, dataset_type, path, status)
VALUES (:id, :parent_dataset_id, :name, :description, :dataset_type, :path, :status)
"""
)
params = {
"id": new_dataset_id,
"parent_dataset_id": parent_dataset_id,
"name": output_dataset_name,
"description": description,
"dataset_type": "IMAGE",
"path": output_dir,
"status": "ACTIVE",
}
with SQLManager.create_connect() as conn: