Files
DataMate/backend/openapi/specs/data-management.yaml
Jerry Yan 79371ba078 feat(data-management): 添加数据集父子层级结构功能
- 在OpenAPI规范中新增parentDatasetId字段用于层级过滤
- 实现数据集父子关系的创建、更新和删除逻辑
- 添加数据集移动时的路径重命名和文件路径前缀更新
- 增加子数据集数量验证防止误删父数据集
- 更新前端界面支持选择父数据集和导航显示
- 优化Python后端自动标注任务的路径处理逻辑
- 修改数据库表结构添加外键约束确保数据一致性
2026-01-20 13:34:50 +08:00

905 lines
25 KiB
YAML

openapi: 3.0.3
info:
title: Data Management Service API
description: |
数据管理服务API,提供数据集的创建、管理和文件操作功能。
主要功能:
- 数据集的创建和管理
- 多种数据集类型支持(图像、文本、音频、视频、多模态等)
- 数据集文件管理
- 数据集标签和元数据管理
- 数据集统计信息
version: 1.0.0
servers:
- url: http://localhost:8092/api/v1/data-management
description: Development server
tags:
- name: Dataset
description: 数据集管理
- name: DatasetFile
description: 数据集文件管理
- name: DatasetType
description: 数据集类型管理
- name: Tag
description: 标签管理
paths:
/data-management/datasets:
get:
tags: [Dataset]
operationId: getDatasets
summary: 获取数据集列表
description: 分页查询数据集列表,支持按类型、标签等条件筛选
parameters:
- name: page
in: query
schema:
type: integer
default: 0
description: 页码,从1开始
- name: size
in: query
schema:
type: integer
default: 20
description: 每页大小
- name: type
in: query
schema:
type: string
description: 数据集类型过滤
- name: tags
in: query
schema:
type: string
description: 标签过滤,多个标签用逗号分隔
- name: keyword
in: query
schema:
type: string
description: 关键词搜索(名称、描述)
- name: status
in: query
schema:
type: string
enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED]
description: 数据集状态过滤
- name: parentDatasetId
in: query
schema:
type: string
description: 父数据集ID过滤(传空字符串表示根数据集)
responses:
'200':
description: 成功
content:
application/json:
schema:
$ref: '#/components/schemas/PagedDatasetResponse'
'400':
description: 请求参数错误
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
post:
tags: [Dataset]
operationId: createDataset
summary: 创建数据集
description: 创建新的数据集
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateDatasetRequest'
responses:
'201':
description: 创建成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetResponse'
'400':
description: 请求参数错误
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
/data-management/datasets/{datasetId}:
get:
tags: [Dataset]
operationId: getDatasetById
summary: 获取数据集详情
description: 根据ID获取数据集详细信息
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
responses:
'200':
description: 成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetResponse'
'404':
description: 数据集不存在
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
put:
tags: [Dataset]
summary: 更新数据集
operationId: updateDataset
description: 更新数据集信息
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/UpdateDatasetRequest'
responses:
'200':
description: 更新成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetResponse'
'404':
description: 数据集不存在
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
delete:
tags: [Dataset]
operationId: deleteDataset
summary: 删除数据集
description: 删除指定的数据集
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
responses:
'204':
description: 删除成功
'404':
description: 数据集不存在
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
/data-management/datasets/{datasetId}/files:
get:
tags: [DatasetFile]
summary: 获取数据集文件列表
operationId: getDatasetFiles
description: 分页获取数据集中的文件列表
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
- name: page
in: query
schema:
type: integer
default: 0
description: 页码,从0开始
- name: size
in: query
schema:
type: integer
default: 20
description: 每页大小
- name: fileType
in: query
schema:
type: string
description: 文件类型过滤
- name: status
in: query
schema:
type: string
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
description: 文件状态过滤
responses:
'200':
description: 成功
content:
application/json:
schema:
$ref: '#/components/schemas/PagedDatasetFileResponse'
/data-management/datasets/{datasetId}/files/directories:
post:
tags: [ DatasetFile ]
operationId: createDirectory
summary: 在数据集下创建子目录
description: 在指定数据集下的某个前缀路径中创建一个新的子目录
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateDirectoryRequest'
responses:
'200':
description: 创建成功
/data-management/datasets/{datasetId}/files/{fileId}:
get:
tags: [DatasetFile]
summary: 获取文件详情
description: 获取数据集中指定文件的详细信息
operationId: getDatasetFileById
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
- name: fileId
in: path
required: true
schema:
type: string
description: 文件ID
responses:
'200':
description: 成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetFileResponse'
delete:
tags: [DatasetFile]
summary: 删除文件
operationId: deleteDatasetFile
description: 从数据集中删除指定文件
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
- name: fileId
in: path
required: true
schema:
type: string
description: 文件ID
responses:
'204':
description: 删除成功
/data-management/datasets/{datasetId}/files/{fileId}/download:
get:
tags: [DatasetFile]
operationId: downloadDatasetFile
summary: 下载文件
description: 下载数据集中的指定文件
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
- name: fileId
in: path
required: true
schema:
type: string
description: 文件ID
responses:
'200':
description: 文件内容
content:
application/octet-stream:
schema:
type: string
format: binary
/data-management/datasets/{datasetId}/files/download:
get:
tags: [ DatasetFile ]
operationId: downloadDatasetFileAsZip
summary: 下载文件
description: 下载数据集中全部文件
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
responses:
'200':
description: 文件内容
content:
application/octet-stream:
schema:
type: string
format: binary
/data-management/datasets/{datasetId}/files/upload/add:
post:
tags: [ DatasetFile ]
operationId: addFilesToDataset
summary: 添加文件到数据集(仅创建数据库记录)
description: 将指定源文件路径列表添加到数据集,仅在数据库中创建记录,不执行物理文件系统操作。
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/AddFilesRequest'
responses:
'200':
description: 添加成功,返回创建的文件记录列表
content:
application/json:
schema:
type: array
items:
$ref: '#/components/schemas/DatasetFileResponse'
/data-management/datasets/{datasetId}/files/upload/pre-upload:
post:
tags: [ DatasetFile ]
operationId: preUpload
summary: 切片上传预上传
description: 预上传接口,返回后续分片上传所需的请求ID
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/UploadFilesPreRequest'
responses:
'200':
description: 预上传成功,返回请求ID
content:
application/json:
schema:
type: string
/data-management/datasets/{datasetId}/files/upload/chunk:
post:
tags: [ DatasetFile ]
operationId: chunkUpload
summary: 切片上传
description: 使用预上传返回的请求ID进行分片上传
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
multipart/form-data:
schema:
$ref: '#/components/schemas/UploadFileRequest'
responses:
'200':
description: 上传成功
/data-management/dataset-types:
get:
operationId: getDatasetTypes
tags: [DatasetType]
summary: 获取数据集类型列表
description: 获取所有支持的数据集类型
responses:
'200':
description: 成功
content:
application/json:
schema:
type: array
items:
$ref: '#/components/schemas/DatasetTypeResponse'
/data-management/tags:
get:
tags: [Tag]
operationId: getTags
summary: 获取标签列表
description: 获取所有可用的标签
parameters:
- name: keyword
in: query
schema:
type: string
description: 标签名称关键词搜索
responses:
'200':
description: 成功
content:
application/json:
schema:
type: array
items:
$ref: '#/components/schemas/TagResponse'
post:
tags: [Tag]
operationId: createTag
summary: 创建标签
description: 创建新的标签
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateTagRequest'
responses:
'201':
description: 创建成功
content:
application/json:
schema:
$ref: '#/components/schemas/TagResponse'
/data-management/datasets/{datasetId}/statistics:
get:
tags: [Dataset]
operationId: getDatasetStatistics
summary: 获取数据集统计信息
description: 获取数据集的统计信息(文件数量、大小、完成度等)
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
responses:
'200':
description: 成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetStatisticsResponse'
components:
schemas:
PagedDatasetResponse:
type: object
properties:
content:
type: array
items:
$ref: '#/components/schemas/DatasetResponse'
page:
type: integer
description: 当前页码
size:
type: integer
description: 每页大小
totalElements:
type: integer
description: 总元素数
totalPages:
type: integer
description: 总页数
first:
type: boolean
description: 是否为第一页
last:
type: boolean
description: 是否为最后一页
DatasetResponse:
type: object
properties:
id:
type: string
description: 数据集ID
parentDatasetId:
type: string
description: 父数据集ID
name:
type: string
description: 数据集名称
description:
type: string
description: 数据集描述
type:
$ref: '#/components/schemas/DatasetTypeResponse'
status:
type: string
enum: [ACTIVE, INACTIVE, PROCESSING]
description: 数据集状态
tags:
type: array
items:
$ref: '#/components/schemas/TagResponse'
description: 标签列表
dataSource:
type: string
description: 数据源
targetLocation:
type: string
description: 目标位置
fileCount:
type: integer
description: 文件数量
totalSize:
type: integer
format: int64
description: 总大小(字节)
completionRate:
type: number
format: float
description: 完成率(0-100)
createdAt:
type: string
format: date-time
description: 创建时间
updatedAt:
type: string
format: date-time
description: 更新时间
createdBy:
type: string
description: 创建者
CreateDatasetRequest:
type: object
required:
- name
- type
properties:
name:
type: string
description: 数据集名称
minLength: 1
maxLength: 100
description:
type: string
description: 数据集描述
maxLength: 500
type:
type: string
description: 数据集类型
parentDatasetId:
type: string
description: 父数据集ID
tags:
type: array
items:
type: string
description: 标签列表
dataSource:
type: string
description: 数据源
targetLocation:
type: string
description: 目标位置
UpdateDatasetRequest:
type: object
properties:
name:
type: string
description: 数据集名称
maxLength: 100
description:
type: string
description: 数据集描述
maxLength: 500
parentDatasetId:
type: string
description: 父数据集ID
tags:
type: array
items:
type: string
description: 标签列表
status:
type: string
enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED]
description: 数据集状态
UploadFilesPreRequest:
type: object
description: 切片上传预上传请求
properties:
hasArchive:
type: boolean
description: 是否为压缩包上传
default: false
totalFileNum:
type: integer
format: int32
minimum: 1
description: 总文件数量
totalSize:
type: integer
format: int64
description: 总文件大小(字节)
prefix:
type: string
description: 目标子目录前缀,例如 "images/",为空表示数据集根目录
required: [ totalFileNum ]
CreateDirectoryRequest:
type: object
description: 创建数据集子目录请求
properties:
parentPrefix:
type: string
description: 父级前缀路径,例如 "images/",为空表示数据集根目录
directoryName:
type: string
description: 新建目录名称
required: [ directoryName ]
UploadFileRequest:
type: object
description: 分片上传请求
properties:
reqId:
type: string
description: 预上传返回的请求ID
fileNo:
type: integer
format: int32
description: 文件编号(批量中的第几个)
fileName:
type: string
description: 文件名称
totalChunkNum:
type: integer
format: int32
description: 文件总分片数量
chunkNo:
type: integer
format: int32
description: 当前分片编号(从1开始)
file:
type: string
format: binary
description: 分片二进制内容
checkSumHex:
type: string
description: 分片校验和(十六进制)
required: [ reqId, fileNo, fileName, totalChunkNum, chunkNo, file ]
DatasetTypeResponse:
type: object
properties:
code:
type: string
description: 类型编码
name:
type: string
description: 类型名称
description:
type: string
description: 类型描述
supportedFormats:
type: array
items:
type: string
description: 支持的文件格式
icon:
type: string
description: 图标
PagedDatasetFileResponse:
type: object
properties:
content:
type: array
items:
$ref: '#/components/schemas/DatasetFileResponse'
page:
type: integer
description: 当前页码
size:
type: integer
description: 每页大小
totalElements:
type: integer
description: 总元素数
totalPages:
type: integer
description: 总页数
first:
type: boolean
description: 是否为第一页
last:
type: boolean
description: 是否为最后一页
DatasetFileResponse:
type: object
properties:
id:
type: string
description: 文件ID
fileName:
type: string
description: 文件名
originalName:
type: string
description: 原始文件名
fileType:
type: string
description: 文件类型
fileSize:
type: integer
format: int64
description: 文件大小(字节)
status:
type: string
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
description: 文件状态
description:
type: string
description: 文件描述
filePath:
type: string
description: 文件路径
uploadTime:
type: string
format: date-time
description: 上传时间
uploadedBy:
type: string
description: 上传者
TagResponse:
type: object
properties:
id:
type: string
description: 标签ID
name:
type: string
description: 标签名称
color:
type: string
description: 标签颜色
description:
type: string
description: 标签描述
usageCount:
type: integer
description: 使用次数
CreateTagRequest:
type: object
required:
- name
properties:
name:
type: string
description: 标签名称
minLength: 1
maxLength: 50
color:
type: string
description: 标签颜色
pattern: '^#[0-9A-Fa-f]{6}$'
description:
type: string
description: 标签描述
maxLength: 200
DatasetStatisticsResponse:
type: object
properties:
totalFiles:
type: integer
description: 总文件数
completedFiles:
type: integer
description: 已完成文件数
totalSize:
type: integer
format: int64
description: 总大小(字节)
completionRate:
type: number
format: float
description: 完成率(0-100)
fileTypeDistribution:
type: object
additionalProperties:
type: integer
description: 文件类型分布
statusDistribution:
type: object
additionalProperties:
type: integer
description: 状态分布
ErrorResponse:
type: object
properties:
error:
type: string
description: 错误代码
message:
type: string
description: 错误消息
timestamp:
type: string
format: date-time
description: 错误时间
path:
type: string
description: 请求路径
AddFilesRequest:
type: object
description: 将源文件路径添加到数据集的请求
properties:
sourcePaths:
type: array
items:
type: string
description: 源文件路径列表(相对或绝对路径),每个元素表示一个要添加的文件或目录路径
softAdd:
type: boolean
description: 如果为 true,则仅在数据库中创建记录(默认 false)
default: false
required:
- sourcePaths