You've already forked DataMate
feat(data-management): 添加数据集父子层级结构功能
- 在OpenAPI规范中新增parentDatasetId字段用于层级过滤 - 实现数据集父子关系的创建、更新和删除逻辑 - 添加数据集移动时的路径重命名和文件路径前缀更新 - 增加子数据集数量验证防止误删父数据集 - 更新前端界面支持选择父数据集和导航显示 - 优化Python后端自动标注任务的路径处理逻辑 - 修改数据库表结构添加外键约束确保数据一致性
This commit is contained in:
@@ -14,6 +14,7 @@ class Dataset(Base):
|
||||
__tablename__ = "t_dm_datasets"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
parent_dataset_id = Column(String(36), nullable=True, comment="父数据集ID(UUID)")
|
||||
name = Column(String(255), nullable=False, comment="数据集名称")
|
||||
description = Column(Text, nullable=True, comment="数据集描述")
|
||||
dataset_type = Column(String(50), nullable=False, comment="数据集类型:IMAGE/TEXT/QA/MULTIMODAL/OTHER")
|
||||
|
||||
@@ -195,8 +195,8 @@ def _update_task_status(
|
||||
conn.execute(sql, params)
|
||||
|
||||
|
||||
def _load_dataset_files(dataset_id: str) -> List[Tuple[str, str, str]]:
|
||||
"""加载指定数据集下的所有已完成文件。"""
|
||||
def _load_dataset_files(dataset_id: str) -> List[Tuple[str, str, str]]:
|
||||
"""加载指定数据集下的所有已完成文件。"""
|
||||
|
||||
sql = text(
|
||||
"""
|
||||
@@ -208,9 +208,45 @@ def _load_dataset_files(dataset_id: str) -> List[Tuple[str, str, str]]:
|
||||
"""
|
||||
)
|
||||
|
||||
with SQLManager.create_connect() as conn:
|
||||
rows = conn.execute(sql, {"dataset_id": dataset_id}).fetchall()
|
||||
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
|
||||
with SQLManager.create_connect() as conn:
|
||||
rows = conn.execute(sql, {"dataset_id": dataset_id}).fetchall()
|
||||
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
|
||||
|
||||
|
||||
def _load_dataset_meta(dataset_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""加载数据集基础信息(含父ID与路径)。"""
|
||||
|
||||
sql = text(
|
||||
"""
|
||||
SELECT id, name, parent_dataset_id, path
|
||||
FROM t_dm_datasets
|
||||
WHERE id = :dataset_id
|
||||
"""
|
||||
)
|
||||
with SQLManager.create_connect() as conn:
|
||||
row = conn.execute(sql, {"dataset_id": dataset_id}).fetchone()
|
||||
if not row:
|
||||
return None
|
||||
return dict(row._mapping) # type: ignore[attr-defined]
|
||||
|
||||
|
||||
def _resolve_output_parent(source_dataset_id: str) -> Tuple[Optional[str], str]:
|
||||
"""根据源数据集确定产出数据集的父级与基路径(产出挂在父级下)。"""
|
||||
|
||||
base_path = DEFAULT_OUTPUT_ROOT.rstrip("/") or "/dataset"
|
||||
source_meta = _load_dataset_meta(source_dataset_id)
|
||||
if not source_meta:
|
||||
return None, base_path
|
||||
|
||||
parent_dataset_id = source_meta.get("parent_dataset_id")
|
||||
if not parent_dataset_id:
|
||||
return None, base_path
|
||||
|
||||
parent_meta = _load_dataset_meta(str(parent_dataset_id))
|
||||
parent_path = parent_meta.get("path") if parent_meta else None
|
||||
if not parent_path:
|
||||
return None, base_path
|
||||
return str(parent_dataset_id), str(parent_path)
|
||||
|
||||
|
||||
def _load_files_by_ids(file_ids: List[str]) -> List[Tuple[str, str, str]]:
|
||||
@@ -245,34 +281,35 @@ def _ensure_output_dir(output_dir: str) -> str:
|
||||
return output_dir
|
||||
|
||||
|
||||
def _create_output_dataset(
|
||||
source_dataset_id: str,
|
||||
source_dataset_name: str,
|
||||
output_dataset_name: str,
|
||||
) -> Tuple[str, str]:
|
||||
"""为自动标注结果创建一个新的数据集并返回 (dataset_id, path)。"""
|
||||
|
||||
new_dataset_id = str(uuid.uuid4())
|
||||
dataset_base_path = DEFAULT_OUTPUT_ROOT.rstrip("/") or "/dataset"
|
||||
output_dir = os.path.join(dataset_base_path, new_dataset_id)
|
||||
def _create_output_dataset(
|
||||
source_dataset_id: str,
|
||||
source_dataset_name: str,
|
||||
output_dataset_name: str,
|
||||
) -> Tuple[str, str]:
|
||||
"""为自动标注结果创建一个新的数据集并返回 (dataset_id, path)。"""
|
||||
|
||||
new_dataset_id = str(uuid.uuid4())
|
||||
parent_dataset_id, dataset_base_path = _resolve_output_parent(source_dataset_id)
|
||||
output_dir = os.path.join(dataset_base_path, new_dataset_id)
|
||||
|
||||
description = (
|
||||
f"Auto annotations for dataset {source_dataset_name or source_dataset_id}"[:255]
|
||||
)
|
||||
|
||||
sql = text(
|
||||
"""
|
||||
INSERT INTO t_dm_datasets (id, name, description, dataset_type, path, status)
|
||||
VALUES (:id, :name, :description, :dataset_type, :path, :status)
|
||||
"""
|
||||
)
|
||||
params = {
|
||||
"id": new_dataset_id,
|
||||
"name": output_dataset_name,
|
||||
"description": description,
|
||||
"dataset_type": "IMAGE",
|
||||
"path": output_dir,
|
||||
"status": "ACTIVE",
|
||||
sql = text(
|
||||
"""
|
||||
INSERT INTO t_dm_datasets (id, parent_dataset_id, name, description, dataset_type, path, status)
|
||||
VALUES (:id, :parent_dataset_id, :name, :description, :dataset_type, :path, :status)
|
||||
"""
|
||||
)
|
||||
params = {
|
||||
"id": new_dataset_id,
|
||||
"parent_dataset_id": parent_dataset_id,
|
||||
"name": output_dataset_name,
|
||||
"description": description,
|
||||
"dataset_type": "IMAGE",
|
||||
"path": output_dir,
|
||||
"status": "ACTIVE",
|
||||
}
|
||||
|
||||
with SQLManager.create_connect() as conn:
|
||||
|
||||
Reference in New Issue
Block a user