feature:增加定时清除超出保留期限数据集的功能;增加数据归集任务绑定数据集的接口 (#24)

* refactor: 修改调整数据归集实现,删除无用代码,优化代码结构

* feature: 每天凌晨00:00扫描所有数据集,检查数据集是否超过了预设的保留天数,超出保留天数的数据集调用删除接口进行删除

* fix: 修改删除数据集文件的逻辑,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录

* fix: 增加参数校验和接口定义,删除不使用的接口

* fix: 数据集统计数据默认为0

* feature: 数据集状态增加流转,创建时为草稿状态,上传文件或者归集文件后修改为活动状态

* refactor: 修改分页查询归集任务的代码

* fix: 更新后重新执行;归集任务执行增加事务控制

* feature: 创建归集任务时能够同步创建数据集,更新归集任务时能更新到指定数据集
This commit is contained in:
hefanli
2025-10-25 15:59:36 +08:00
committed by GitHub
parent 871ba5758d
commit 46dfb389f1
21 changed files with 375 additions and 212 deletions

View File

@@ -39,7 +39,7 @@ paths:
schema:
type: integer
default: 0
description: 页码,从0开始
description: 页码,从1开始
- name: size
in: query
schema:
@@ -65,7 +65,7 @@ paths:
in: query
schema:
type: string
enum: [ACTIVE, INACTIVE, PROCESSING]
enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED]
description: 数据集状态过滤
responses:
'200':
@@ -231,40 +231,6 @@ paths:
schema:
$ref: '#/components/schemas/PagedDatasetFileResponse'
post:
tags: [DatasetFile]
summary: 上传文件到数据集
operationId: uploadDatasetFile
description: 向指定数据集上传文件
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
multipart/form-data:
schema:
type: object
properties:
file:
type: string
format: binary
description: 要上传的文件
description:
type: string
description: 文件描述
responses:
'201':
description: 上传成功
content:
application/json:
schema:
$ref: '#/components/schemas/DatasetFileResponse'
/data-management/datasets/{datasetId}/files/{fileId}:
get:
tags: [DatasetFile]
@@ -342,6 +308,78 @@ paths:
type: string
format: binary
/data-management/datasets/{datasetId}/files/download:
get:
tags: [ DatasetFile ]
operationId: downloadDatasetFileAsZip
summary: 下载文件
description: 下载数据集中全部文件
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
responses:
'200':
description: 文件内容
content:
application/octet-stream:
schema:
type: string
format: binary
/data-management/datasets/{datasetId}/files/upload/pre-upload:
post:
tags: [ DatasetFile ]
operationId: preUpload
summary: 切片上传预上传
description: 预上传接口,返回后续分片上传所需的请求ID
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/UploadFilesPreRequest'
responses:
'200':
description: 预上传成功,返回请求ID
content:
application/json:
schema:
type: string
/data-management/datasets/{datasetId}/files/upload/chunk:
post:
tags: [ DatasetFile ]
operationId: chunkUpload
summary: 切片上传
description: 使用预上传返回的请求ID进行分片上传
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
multipart/form-data:
schema:
$ref: '#/components/schemas/UploadFileRequest'
responses:
'200':
description: 上传成功
/data-management/dataset-types:
get:
operationId: getDatasetTypes
@@ -548,9 +586,59 @@ components:
description: 标签列表
status:
type: string
enum: [ACTIVE, INACTIVE]
enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED]
description: 数据集状态
UploadFilesPreRequest:
type: object
description: 切片上传预上传请求
properties:
hasArchive:
type: boolean
description: 是否为压缩包上传
default: false
totalFileNum:
type: integer
format: int32
minimum: 1
description: 总文件数量
totalSize:
type: integer
format: int64
description: 总文件大小(字节)
required: [ totalFileNum ]
UploadFileRequest:
type: object
description: 分片上传请求
properties:
reqId:
type: string
description: 预上传返回的请求ID
fileNo:
type: integer
format: int32
description: 文件编号(批量中的第几个)
fileName:
type: string
description: 文件名称
totalChunkNum:
type: integer
format: int32
description: 文件总分片数量
chunkNo:
type: integer
format: int32
description: 当前分片编号(从1开始)
file:
type: string
format: binary
description: 分片二进制内容
checkSumHex:
type: string
description: 分片校验和(十六进制)
required: [ reqId, fileNo, fileName, totalChunkNum, chunkNo, file ]
DatasetTypeResponse:
type: object
properties: