Files
DataMate/backend/openapi/specs/data-management.yaml
2025-10-21 23:00:48 +08:00

720 lines
18 KiB
YAML

openapi: 3.0.3
info:
title: Data Management Service API
description: |
数据管理服务API,提供数据集的创建、管理和文件操作功能。
主要功能:
- 数据集的创建和管理
- 多种数据集类型支持(图像、文本、音频、视频、多模态等)
- 数据集文件管理
- 数据集标签和元数据管理
- 数据集统计信息
version: 1.0.0
servers:
- url: http://localhost:8092/api/v1/data-management
description: Development server
tags:
- name: Dataset
description: 数据集管理
- name: DatasetFile
description: 数据集文件管理
- name: DatasetType
description: 数据集类型管理
- name: Tag
description: 标签管理
paths:
/data-management/datasets:
get:
tags: [Dataset]
operationId: getDatasets
summary: 获取数据集列表
description: 分页查询数据集列表,支持按类型、标签等条件筛选
parameters:
- name: page
in: query
schema:
type: integer
default: 0
description: 页码,从0开始
- name: size
in: query
schema:
type: integer
default: 20
description: 每页大小
- name: type
in: query
schema:
type: string
description: 数据集类型过滤
- name: tags
in: query
schema:
type: string
description: 标签过滤,多个标签用逗号分隔
- name: keyword
in: query
schema:
type: string
description: 关键词搜索(名称、描述)
- name: status
in: query
schema:
type: string
enum: [ACTIVE, INACTIVE, PROCESSING]
description: 数据集状态过滤
responses:
'200':
description: 成功
content:
application/json:
schema:
$ref: '#/components/schemas/PagedDatasetResponse'
'400':
description: 请求参数错误
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
post:
tags: [Dataset]
operationId: createDataset
summary: 创建数据集
description: 创建新的数据集
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateDatasetRequest'
responses:
'201':
description: 创建成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetResponse'
'400':
description: 请求参数错误
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
/data-management/datasets/{datasetId}:
get:
tags: [Dataset]
operationId: getDatasetById
summary: 获取数据集详情
description: 根据ID获取数据集详细信息
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
responses:
'200':
description: 成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetResponse'
'404':
description: 数据集不存在
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
put:
tags: [Dataset]
summary: 更新数据集
operationId: updateDataset
description: 更新数据集信息
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/UpdateDatasetRequest'
responses:
'200':
description: 更新成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetResponse'
'404':
description: 数据集不存在
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
delete:
tags: [Dataset]
operationId: deleteDataset
summary: 删除数据集
description: 删除指定的数据集
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
responses:
'204':
description: 删除成功
'404':
description: 数据集不存在
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
/data-management/datasets/{datasetId}/files:
get:
tags: [DatasetFile]
summary: 获取数据集文件列表
operationId: getDatasetFiles
description: 分页获取数据集中的文件列表
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
- name: page
in: query
schema:
type: integer
default: 0
description: 页码,从0开始
- name: size
in: query
schema:
type: integer
default: 20
description: 每页大小
- name: fileType
in: query
schema:
type: string
description: 文件类型过滤
- name: status
in: query
schema:
type: string
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
description: 文件状态过滤
responses:
'200':
description: 成功
content:
application/json:
schema:
$ref: '#/components/schemas/PagedDatasetFileResponse'
post:
tags: [DatasetFile]
summary: 上传文件到数据集
operationId: uploadDatasetFile
description: 向指定数据集上传文件
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
multipart/form-data:
schema:
type: object
properties:
file:
type: string
format: binary
description: 要上传的文件
description:
type: string
description: 文件描述
responses:
'201':
description: 上传成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetFileResponse'
/data-management/datasets/{datasetId}/files/{fileId}:
get:
tags: [DatasetFile]
summary: 获取文件详情
description: 获取数据集中指定文件的详细信息
operationId: getDatasetFileById
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
- name: fileId
in: path
required: true
schema:
type: string
description: 文件ID
responses:
'200':
description: 成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetFileResponse'
delete:
tags: [DatasetFile]
summary: 删除文件
operationId: deleteDatasetFile
description: 从数据集中删除指定文件
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
- name: fileId
in: path
required: true
schema:
type: string
description: 文件ID
responses:
'204':
description: 删除成功
/data-management/datasets/{datasetId}/files/{fileId}/download:
get:
tags: [DatasetFile]
operationId: downloadDatasetFile
summary: 下载文件
description: 下载数据集中的指定文件
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
- name: fileId
in: path
required: true
schema:
type: string
description: 文件ID
responses:
'200':
description: 文件内容
content:
application/octet-stream:
schema:
type: string
format: binary
/data-management/dataset-types:
get:
operationId: getDatasetTypes
tags: [DatasetType]
summary: 获取数据集类型列表
description: 获取所有支持的数据集类型
responses:
'200':
description: 成功
content:
application/json:
schema:
type: array
items:
$ref: '#/components/schemas/DatasetTypeResponse'
/data-management/tags:
get:
tags: [Tag]
operationId: getTags
summary: 获取标签列表
description: 获取所有可用的标签
parameters:
- name: keyword
in: query
schema:
type: string
description: 标签名称关键词搜索
responses:
'200':
description: 成功
content:
application/json:
schema:
type: array
items:
$ref: '#/components/schemas/TagResponse'
post:
tags: [Tag]
operationId: createTag
summary: 创建标签
description: 创建新的标签
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateTagRequest'
responses:
'201':
description: 创建成功
content:
application/json:
schema:
$ref: '#/components/schemas/TagResponse'
/data-management/datasets/{datasetId}/statistics:
get:
tags: [Dataset]
operationId: getDatasetStatistics
summary: 获取数据集统计信息
description: 获取数据集的统计信息(文件数量、大小、完成度等)
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
responses:
'200':
description: 成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetStatisticsResponse'
components:
schemas:
PagedDatasetResponse:
type: object
properties:
content:
type: array
items:
$ref: '#/components/schemas/DatasetResponse'
page:
type: integer
description: 当前页码
size:
type: integer
description: 每页大小
totalElements:
type: integer
description: 总元素数
totalPages:
type: integer
description: 总页数
first:
type: boolean
description: 是否为第一页
last:
type: boolean
description: 是否为最后一页
DatasetResponse:
type: object
properties:
id:
type: string
description: 数据集ID
name:
type: string
description: 数据集名称
description:
type: string
description: 数据集描述
type:
$ref: '#/components/schemas/DatasetTypeResponse'
status:
type: string
enum: [ACTIVE, INACTIVE, PROCESSING]
description: 数据集状态
tags:
type: array
items:
$ref: '#/components/schemas/TagResponse'
description: 标签列表
dataSource:
type: string
description: 数据源
targetLocation:
type: string
description: 目标位置
fileCount:
type: integer
description: 文件数量
totalSize:
type: integer
format: int64
description: 总大小(字节)
completionRate:
type: number
format: float
description: 完成率(0-100)
createdAt:
type: string
format: date-time
description: 创建时间
updatedAt:
type: string
format: date-time
description: 更新时间
createdBy:
type: string
description: 创建者
CreateDatasetRequest:
type: object
required:
- name
- type
properties:
name:
type: string
description: 数据集名称
minLength: 1
maxLength: 100
description:
type: string
description: 数据集描述
maxLength: 500
type:
type: string
description: 数据集类型
tags:
type: array
items:
type: string
description: 标签列表
dataSource:
type: string
description: 数据源
targetLocation:
type: string
description: 目标位置
UpdateDatasetRequest:
type: object
properties:
name:
type: string
description: 数据集名称
maxLength: 100
description:
type: string
description: 数据集描述
maxLength: 500
tags:
type: array
items:
type: string
description: 标签列表
status:
type: string
enum: [ACTIVE, INACTIVE]
description: 数据集状态
DatasetTypeResponse:
type: object
properties:
code:
type: string
description: 类型编码
name:
type: string
description: 类型名称
description:
type: string
description: 类型描述
supportedFormats:
type: array
items:
type: string
description: 支持的文件格式
icon:
type: string
description: 图标
PagedDatasetFileResponse:
type: object
properties:
content:
type: array
items:
$ref: '#/components/schemas/DatasetFileResponse'
page:
type: integer
description: 当前页码
size:
type: integer
description: 每页大小
totalElements:
type: integer
description: 总元素数
totalPages:
type: integer
description: 总页数
first:
type: boolean
description: 是否为第一页
last:
type: boolean
description: 是否为最后一页
DatasetFileResponse:
type: object
properties:
id:
type: string
description: 文件ID
fileName:
type: string
description: 文件名
originalName:
type: string
description: 原始文件名
fileType:
type: string
description: 文件类型
fileSize:
type: integer
format: int64
description: 文件大小(字节)
status:
type: string
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
description: 文件状态
description:
type: string
description: 文件描述
filePath:
type: string
description: 文件路径
uploadTime:
type: string
format: date-time
description: 上传时间
uploadedBy:
type: string
description: 上传者
TagResponse:
type: object
properties:
id:
type: string
description: 标签ID
name:
type: string
description: 标签名称
color:
type: string
description: 标签颜色
description:
type: string
description: 标签描述
usageCount:
type: integer
description: 使用次数
CreateTagRequest:
type: object
required:
- name
properties:
name:
type: string
description: 标签名称
minLength: 1
maxLength: 50
color:
type: string
description: 标签颜色
pattern: '^#[0-9A-Fa-f]{6}$'
description:
type: string
description: 标签描述
maxLength: 200
DatasetStatisticsResponse:
type: object
properties:
totalFiles:
type: integer
description: 总文件数
completedFiles:
type: integer
description: 已完成文件数
totalSize:
type: integer
format: int64
description: 总大小(字节)
completionRate:
type: number
format: float
description: 完成率(0-100)
fileTypeDistribution:
type: object
additionalProperties:
type: integer
description: 文件类型分布
statusDistribution:
type: object
additionalProperties:
type: integer
description: 状态分布
ErrorResponse:
type: object
properties:
error:
type: string
description: 错误代码
message:
type: string
description: 错误消息
timestamp:
type: string
format: date-time
description: 错误时间
path:
type: string
description: 请求路径