feat(auto-annotation): integrate YOLO auto-labeling and enhance data management (#223)

* feat(auto-annotation): initial setup

* chore: remove package-lock.json

* chore: 清理本地测试脚本与 Maven 设置

* chore: change package-lock.json
This commit is contained in:
Kecheng Sha
2026-01-05 14:22:44 +08:00
committed by GitHub
parent ccfb84c034
commit 3f1ad6a872
44 changed files with 8503 additions and 5238 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,54 +1,63 @@
package com.datamate.datamanagement.domain.model.dataset;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.*;
import lombok.extern.slf4j.Slf4j;
import java.time.LocalDateTime;
import java.util.Collections;
import java.util.List;
/**
* 数据集文件实体(与数据库表 t_dm_dataset_files 对齐)
*/
@Getter
@Setter
@Builder
@Slf4j
@NoArgsConstructor
@AllArgsConstructor
@TableName("t_dm_dataset_files")
public class DatasetFile {
@TableId
private String id; // UUID
private String datasetId; // UUID
private String fileName;
private String filePath;
private String fileType; // JPG/PNG/DCM/TXT
private Long fileSize; // bytes
private String checkSum;
private String tags;
private String metadata;
private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR
private LocalDateTime uploadTime;
private LocalDateTime lastAccessTime;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
/**
* 解析标签
*
* @return 标签列表
*/
public List<FileTag> analyzeTag() {
try {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(tags, new TypeReference<List<FileTag>>() {});
} catch (Exception e) {
return Collections.emptyList();
}
}
}
package com.datamate.datamanagement.domain.model.dataset;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.*;
import lombok.extern.slf4j.Slf4j;
import java.time.LocalDateTime;
import java.util.Collections;
import java.util.List;
/**
* 数据集文件实体(与数据库表 t_dm_dataset_files 对齐)
*/
@Getter
@Setter
@Builder
@Slf4j
@NoArgsConstructor
@AllArgsConstructor
@TableName("t_dm_dataset_files")
public class DatasetFile {
@TableId
private String id; // UUID
private String datasetId; // UUID
private String fileName;
private String filePath;
private String fileType; // JPG/PNG/DCM/TXT
private Long fileSize; // bytes
private String checkSum;
private String tags;
private String metadata;
private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR
private LocalDateTime uploadTime;
private LocalDateTime lastAccessTime;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
/** 标记是否为目录(非持久化字段) */
@TableField(exist = false)
private Boolean directory;
/** 目录包含的文件数量(非持久化字段) */
@TableField(exist = false)
private Long fileCount;
/**
* 解析标签
*
* @return 标签列表
*/
public List<FileTag> analyzeTag() {
try {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(tags, new TypeReference<List<FileTag>>() {});
} catch (Exception e) {
return Collections.emptyList();
}
}
}

View File

@@ -1,21 +1,24 @@
package com.datamate.datamanagement.domain.model.dataset;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
/**
* 数据集文件上传检查信息
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class DatasetFileUploadCheckInfo {
/** 数据集id */
private String datasetId;
/** 是否为压缩包上传 */
private boolean hasArchive;
}
package com.datamate.datamanagement.domain.model.dataset;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
/**
* 数据集文件上传检查信息
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class DatasetFileUploadCheckInfo {
/** 数据集id */
private String datasetId;
/** 是否为压缩包上传 */
private boolean hasArchive;
/** 目标子目录前缀,例如 "images/",为空表示数据集根目录 */
private String prefix;
}

View File

@@ -1,43 +1,47 @@
package com.datamate.datamanagement.infrastructure.exception;
import com.datamate.common.infrastructure.exception.ErrorCode;
import lombok.AllArgsConstructor;
import lombok.Getter;
/**
* 数据管理模块错误码
*
* @author dallas
* @since 2025-10-20
*/
@Getter
@AllArgsConstructor
public enum DataManagementErrorCode implements ErrorCode {
/**
* 数据集不存在
*/
DATASET_NOT_FOUND("data_management.0001", "数据集不存在"),
/**
* 数据集已存在
*/
DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"),
/**
* 数据集状态错误
*/
DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"),
/**
* 数据集标签不存在
*/
DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"),
/**
* 数据集标签已存在
*/
DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在"),
/**
* 数据集标签已存在
*/
DATASET_FILE_ALREADY_EXISTS("data_management.0006", "数据集文件已存在");
private final String code;
private final String message;
}
package com.datamate.datamanagement.infrastructure.exception;
import com.datamate.common.infrastructure.exception.ErrorCode;
import lombok.AllArgsConstructor;
import lombok.Getter;
/**
* 数据管理模块错误码
*
* @author dallas
* @since 2025-10-20
*/
@Getter
@AllArgsConstructor
public enum DataManagementErrorCode implements ErrorCode {
/**
* 数据集不存在
*/
DATASET_NOT_FOUND("data_management.0001", "数据集不存在"),
/**
* 数据集已存在
*/
DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"),
/**
* 数据集状态错误
*/
DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"),
/**
* 数据集标签不存在
*/
DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"),
/**
* 数据集标签已存在
*/
DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在"),
/**
* 数据集文件已存在
*/
DATASET_FILE_ALREADY_EXISTS("data_management.0006", "数据集文件已存在"),
/**
* 目录不存在
*/
DIRECTORY_NOT_FOUND("data_management.0007", "目录不存在");
private final String code;
private final String message;
}

View File

@@ -0,0 +1,20 @@
package com.datamate.datamanagement.interfaces.dto;
import jakarta.validation.constraints.NotBlank;
import lombok.Getter;
import lombok.Setter;
/**
* 创建数据集子目录请求
*/
@Getter
@Setter
public class CreateDirectoryRequest {
/** 父级前缀路径,例如 "images/",为空表示数据集根目录 */
private String parentPrefix;
/** 新建目录名称 */
@NotBlank
private String directoryName;
}

View File

@@ -1,36 +1,40 @@
package com.datamate.datamanagement.interfaces.dto;
import lombok.Getter;
import lombok.Setter;
import java.time.LocalDateTime;
/**
* 数据集文件响应DTO
*/
@Getter
@Setter
public class DatasetFileResponse {
/** 文件ID */
private String id;
/** 文件名 */
private String fileName;
/** 原始文件名 */
private String originalName;
/** 文件类型 */
private String fileType;
/** 文件大小(字节) */
private Long fileSize;
/** 文件状态 */
private String status;
/** 文件描述 */
private String description;
/** 文件路径 */
private String filePath;
/** 上传时间 */
private LocalDateTime uploadTime;
/** 最后更新时间 */
private LocalDateTime lastAccessTime;
/** 上传者 */
private String uploadedBy;
}
package com.datamate.datamanagement.interfaces.dto;
import lombok.Getter;
import lombok.Setter;
import java.time.LocalDateTime;
/**
* 数据集文件响应DTO
*/
@Getter
@Setter
public class DatasetFileResponse {
/** 文件ID */
private String id;
/** 文件名 */
private String fileName;
/** 原始文件名 */
private String originalName;
/** 文件类型 */
private String fileType;
/** 文件大小(字节) */
private Long fileSize;
/** 文件状态 */
private String status;
/** 文件描述 */
private String description;
/** 文件路径 */
private String filePath;
/** 上传时间 */
private LocalDateTime uploadTime;
/** 最后更新时间 */
private LocalDateTime lastAccessTime;
/** 上传者 */
private String uploadedBy;
/** 是否为目录 */
private Boolean directory;
/** 目录文件数量 */
private Long fileCount;
}

View File

@@ -1,22 +1,25 @@
package com.datamate.datamanagement.interfaces.dto;
import jakarta.validation.constraints.Min;
import lombok.Getter;
import lombok.Setter;
/**
* 切片上传预上传请求
*/
@Getter
@Setter
public class UploadFilesPreRequest {
/** 是否为压缩包上传 */
private boolean hasArchive;
/** 总文件数量 */
@Min(1)
private int totalFileNum;
/** 总文件大小 */
private long totalSize;
}
package com.datamate.datamanagement.interfaces.dto;
import jakarta.validation.constraints.Min;
import lombok.Getter;
import lombok.Setter;
/**
* 切片上传预上传请求
*/
@Getter
@Setter
public class UploadFilesPreRequest {
/** 是否为压缩包上传 */
private boolean hasArchive;
/** 总文件数量 */
@Min(1)
private int totalFileNum;
/** 总文件大小 */
private long totalSize;
/** 目标子目录前缀,例如 "images/",为空表示数据集根目录 */
private String prefix;
}

View File

@@ -1,165 +1,197 @@
package com.datamate.datamanagement.interfaces.rest;
import com.datamate.common.infrastructure.common.IgnoreResponseWrap;
import com.datamate.common.infrastructure.common.Response;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.common.interfaces.PagedResponse;
import com.datamate.common.interfaces.PagingQuery;
import com.datamate.datamanagement.application.DatasetFileApplicationService;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
import jakarta.servlet.http.HttpServletResponse;
import jakarta.validation.Valid;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
/**
* 数据集文件 REST 控制器(UUID 模式)
*/
@Slf4j
@RestController
@RequestMapping("/data-management/datasets/{datasetId}/files")
public class DatasetFileController {
private final DatasetFileApplicationService datasetFileApplicationService;
@Autowired
public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService) {
this.datasetFileApplicationService = datasetFileApplicationService;
}
@GetMapping
public Response<PagedResponse<DatasetFile>> getDatasetFiles(
@PathVariable("datasetId") String datasetId,
@RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory,
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) {
PagingQuery pagingQuery = new PagingQuery(page, size);
PagedResponse<DatasetFile> filesPage;
if (isWithDirectory) {
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
} else {
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, null, null, pagingQuery);
}
return Response.ok(filesPage);
}
@GetMapping("/{fileId}")
public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById(
@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
try {
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
return ResponseEntity.ok(Response.ok(DatasetConverter.INSTANCE.convertToResponse(datasetFile)));
} catch (IllegalArgumentException e) {
return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null));
}
}
@DeleteMapping("/{fileId}")
public ResponseEntity<Response<Void>> deleteDatasetFile(
@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
try {
datasetFileApplicationService.deleteDatasetFile(datasetId, fileId);
return ResponseEntity.ok().build();
} catch (IllegalArgumentException e) {
return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null));
}
}
@IgnoreResponseWrap
@GetMapping(value = "/{fileId}/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE + ";charset=UTF-8")
public ResponseEntity<Resource> downloadDatasetFileById(@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
try {
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId);
return ResponseEntity.ok()
.contentType(MediaType.APPLICATION_OCTET_STREAM)
.header(HttpHeaders.CONTENT_DISPOSITION,
"attachment; filename=\"" + datasetFile.getFileName() + "\"")
.body(resource);
} catch (IllegalArgumentException e) {
return ResponseEntity.status(HttpStatus.NOT_FOUND).build();
} catch (Exception e) {
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build();
}
}
@IgnoreResponseWrap
@GetMapping(value = "/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE)
public void downloadDatasetFileAsZip(@PathVariable("datasetId") String datasetId, HttpServletResponse response) {
datasetFileApplicationService.downloadDatasetFileAsZip(datasetId, response);
}
/**
* 文件上传请求
*
* @param request 批量文件上传请求
* @return 批量上传请求id
*/
@PostMapping("/upload/pre-upload")
public ResponseEntity<Response<String>> preUpload(@PathVariable("datasetId") String datasetId,
@RequestBody @Valid UploadFilesPreRequest request) {
return ResponseEntity.ok(Response.ok(datasetFileApplicationService.preUpload(request, datasetId)));
}
/**
* 分块上传
*
* @param uploadFileRequest 上传文件请求
*/
@PostMapping("/upload/chunk")
public ResponseEntity<Void> chunkUpload(@PathVariable("datasetId") String datasetId,
@Valid UploadFileRequest uploadFileRequest) {
log.info("file upload reqId:{}, fileNo:{}, total chunk num:{}, current chunkNo:{}",
uploadFileRequest.getReqId(), uploadFileRequest.getFileNo(), uploadFileRequest.getTotalChunkNum(),
uploadFileRequest.getChunkNo());
datasetFileApplicationService.chunkUpload(datasetId, uploadFileRequest);
return ResponseEntity.ok().build();
}
/**
* 将指定路径中的文件拷贝到数据集目录下
*
* @param datasetId 数据集ID
* @param req 源文件路径列表
* @return 数据集文件响应DTO列表
*/
@PostMapping("/upload/copy")
public List<DatasetFileResponse> copyFilesToDatasetDir(@PathVariable("datasetId") String datasetId,
@RequestBody @Valid CopyFilesRequest req) {
List<DatasetFile> datasetFiles = datasetFileApplicationService.copyFilesToDatasetDir(datasetId, req);
return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles);
}
/**
* 添加文件到数据集(仅创建数据库记录,不执行文件系统操作)
*
* @param datasetId 数据集ID
* @param req 添加文件请求(包含源文件路径列表和softAdd标志)
* @return 数据集文件响应DTO列表
*/
@PostMapping("/upload/add")
public List<DatasetFileResponse> addFilesToDataset(@PathVariable("datasetId") String datasetId,
@RequestBody @Valid AddFilesRequest req) {
List<DatasetFile> datasetFiles = datasetFileApplicationService.addFilesToDataset(datasetId, req);
return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles);
}
}
package com.datamate.datamanagement.interfaces.rest;
import com.datamate.common.infrastructure.common.IgnoreResponseWrap;
import com.datamate.common.infrastructure.common.Response;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.common.interfaces.PagedResponse;
import com.datamate.common.interfaces.PagingQuery;
import com.datamate.datamanagement.application.DatasetFileApplicationService;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
import jakarta.servlet.http.HttpServletResponse;
import jakarta.validation.Valid;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
/**
* 数据集文件 REST 控制器(UUID 模式)
*/
@Slf4j
@RestController
@RequestMapping("/data-management/datasets/{datasetId}/files")
public class DatasetFileController {
private final DatasetFileApplicationService datasetFileApplicationService;
@Autowired
public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService) {
this.datasetFileApplicationService = datasetFileApplicationService;
}
@GetMapping
public Response<PagedResponse<DatasetFile>> getDatasetFiles(
@PathVariable("datasetId") String datasetId,
@RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory,
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) {
PagingQuery pagingQuery = new PagingQuery(page, size);
PagedResponse<DatasetFile> filesPage;
if (isWithDirectory) {
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
} else {
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, null, null, pagingQuery);
}
return Response.ok(filesPage);
}
@GetMapping("/{fileId}")
public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById(
@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
try {
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
return ResponseEntity.ok(Response.ok(DatasetConverter.INSTANCE.convertToResponse(datasetFile)));
} catch (IllegalArgumentException e) {
return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null));
}
}
@DeleteMapping("/{fileId}")
public ResponseEntity<Response<Void>> deleteDatasetFile(
@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
try {
datasetFileApplicationService.deleteDatasetFile(datasetId, fileId);
return ResponseEntity.ok().build();
} catch (IllegalArgumentException e) {
return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null));
}
}
@IgnoreResponseWrap
@GetMapping(value = "/{fileId}/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE + ";charset=UTF-8")
public ResponseEntity<Resource> downloadDatasetFileById(@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
try {
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId);
return ResponseEntity.ok()
.contentType(MediaType.APPLICATION_OCTET_STREAM)
.header(HttpHeaders.CONTENT_DISPOSITION,
"attachment; filename=\"" + datasetFile.getFileName() + "\"")
.body(resource);
} catch (IllegalArgumentException e) {
return ResponseEntity.status(HttpStatus.NOT_FOUND).build();
} catch (Exception e) {
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build();
}
}
@IgnoreResponseWrap
@GetMapping(value = "/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE)
public void downloadDatasetFileAsZip(@PathVariable("datasetId") String datasetId, HttpServletResponse response) {
datasetFileApplicationService.downloadDatasetFileAsZip(datasetId, response);
}
/**
* 文件上传请求
*
* @param request 批量文件上传请求
* @return 批量上传请求id
*/
@PostMapping("/upload/pre-upload")
public ResponseEntity<Response<String>> preUpload(@PathVariable("datasetId") String datasetId,
@RequestBody @Valid UploadFilesPreRequest request) {
return ResponseEntity.ok(Response.ok(datasetFileApplicationService.preUpload(request, datasetId)));
}
/**
* 分块上传
*
* @param uploadFileRequest 上传文件请求
*/
@PostMapping("/upload/chunk")
public ResponseEntity<Void> chunkUpload(@PathVariable("datasetId") String datasetId,
@Valid UploadFileRequest uploadFileRequest) {
log.info("file upload reqId:{}, fileNo:{}, total chunk num:{}, current chunkNo:{}",
uploadFileRequest.getReqId(), uploadFileRequest.getFileNo(), uploadFileRequest.getTotalChunkNum(),
uploadFileRequest.getChunkNo());
datasetFileApplicationService.chunkUpload(datasetId, uploadFileRequest);
return ResponseEntity.ok().build();
}
/**
* 将指定路径中的文件拷贝到数据集目录下
*
* @param datasetId 数据集ID
* @param req 源文件路径列表
* @return 数据集文件响应DTO列表
*/
@PostMapping("/upload/copy")
public List<DatasetFileResponse> copyFilesToDatasetDir(@PathVariable("datasetId") String datasetId,
@RequestBody @Valid CopyFilesRequest req) {
List<DatasetFile> datasetFiles = datasetFileApplicationService.copyFilesToDatasetDir(datasetId, req);
return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles);
}
/**
* 添加文件到数据集(仅创建数据库记录,不执行文件系统操作)
*
* @param datasetId 数据集ID
* @param req 添加文件请求(包含源文件路径列表和softAdd标志)
* @return 数据集文件响应DTO列表
*/
@PostMapping("/upload/add")
public List<DatasetFileResponse> addFilesToDataset(@PathVariable("datasetId") String datasetId,
@RequestBody @Valid AddFilesRequest req) {
List<DatasetFile> datasetFiles = datasetFileApplicationService.addFilesToDataset(datasetId, req);
return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles);
}
/**
* 在数据集下创建子目录
*/
@PostMapping("/directories")
public ResponseEntity<Void> createDirectory(@PathVariable("datasetId") String datasetId,
@RequestBody @Valid CreateDirectoryRequest req) {
datasetFileApplicationService.createDirectory(datasetId, req);
return ResponseEntity.ok().build();
}
/**
* 下载目录(压缩为 ZIP)
*/
@IgnoreResponseWrap
@GetMapping(value = "/directories/download", produces = "application/zip")
public void downloadDirectory(@PathVariable("datasetId") String datasetId,
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix,
HttpServletResponse response) {
datasetFileApplicationService.downloadDirectory(datasetId, prefix, response);
}
/**
* 删除目录及其所有内容
*/
@DeleteMapping("/directories")
public ResponseEntity<Void> deleteDirectory(@PathVariable("datasetId") String datasetId,
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) {
datasetFileApplicationService.deleteDirectory(datasetId, prefix);
return ResponseEntity.ok().build();
}
}

View File

@@ -1,17 +1,18 @@
package com.datamate.common.infrastructure.exception;
import lombok.AllArgsConstructor;
import lombok.Getter;
/**
* CommonErrorCode
*
* @since 2025/12/5
*/
@Getter
@AllArgsConstructor
public enum CommonErrorCode implements ErrorCode{
PRE_UPLOAD_REQUEST_NOT_EXIST("common.0101", "预上传请求不存在");
private final String code;
private final String message;
}
package com.datamate.common.infrastructure.exception;
import lombok.AllArgsConstructor;
import lombok.Getter;
/**
* CommonErrorCode
*
* @since 2025/12/5
*/
@Getter
@AllArgsConstructor
public enum CommonErrorCode implements ErrorCode{
PARAM_ERROR("common.0001", "参数错误"),
PRE_UPLOAD_REQUEST_NOT_EXIST("common.0101", "预上传请求不存在");
private final String code;
private final String message;
}

View File

@@ -199,15 +199,11 @@ function CardView<T extends BaseCardDataType>(props: CardViewProps<T>) {
? ""
: "bg-gradient-to-br from-sky-300 to-blue-500 text-white"
}`}
style={{
...(item?.iconColor
style={
item?.iconColor
? { backgroundColor: item.iconColor }
: {}),
backgroundImage:
"linear-gradient(180deg, rgba(255,255,255,0.35), rgba(255,255,255,0.05))",
boxShadow:
"inset 0 0 0 1px rgba(255,255,255,0.25)",
}}
: {}
}
>
<div className="w-[2.1rem] h-[2.1rem] text-gray-50">{item?.icon}</div>
</div>

View File

@@ -1,331 +1,406 @@
import React, { useCallback, useEffect } from "react";
import { Button, Input, Table } from "antd";
import { RightOutlined } from "@ant-design/icons";
import { mapDataset } from "@/pages/DataManagement/dataset.const";
import {
Dataset,
DatasetFile,
DatasetType,
} from "@/pages/DataManagement/dataset.model";
import {
queryDatasetFilesUsingGet,
queryDatasetsUsingGet,
} from "@/pages/DataManagement/dataset.api";
import { formatBytes } from "@/utils/unit";
import { useDebouncedEffect } from "@/hooks/useDebouncedEffect";
interface DatasetFileTransferProps
extends React.HTMLAttributes<HTMLDivElement> {
open: boolean;
selectedFilesMap: { [key: string]: DatasetFile };
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
onDatasetSelect?: (dataset: Dataset | null) => void;
}
const fileCols = [
{
title: "所属数据集",
dataIndex: "datasetName",
key: "datasetName",
ellipsis: true,
},
{
title: "文件名",
dataIndex: "fileName",
key: "fileName",
ellipsis: true,
},
{
title: "大小",
dataIndex: "fileSize",
key: "fileSize",
ellipsis: true,
render: formatBytes,
},
];
// Customize Table Transfer
const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
open,
selectedFilesMap,
onSelectedFilesChange,
onDatasetSelect,
...props
}) => {
const [datasets, setDatasets] = React.useState<Dataset[]>([]);
const [datasetSearch, setDatasetSearch] = React.useState<string>("");
const [datasetPagination, setDatasetPagination] = React.useState<{
current: number;
pageSize: number;
total: number;
}>({ current: 1, pageSize: 10, total: 0 });
const [files, setFiles] = React.useState<DatasetFile[]>([]);
const [filesSearch, setFilesSearch] = React.useState<string>("");
const [filesPagination, setFilesPagination] = React.useState<{
current: number;
pageSize: number;
total: number;
}>({ current: 1, pageSize: 10, total: 0 });
const [showFiles, setShowFiles] = React.useState<boolean>(false);
const [selectedDataset, setSelectedDataset] = React.useState<Dataset | null>(
null
);
const [datasetSelections, setDatasetSelections] = React.useState<Dataset[]>(
[]
);
const fetchDatasets = async () => {
const { data } = await queryDatasetsUsingGet({
// Ant Design Table pagination.current is 1-based; ensure backend also receives 1-based value
page: datasetPagination.current,
size: datasetPagination.pageSize,
keyword: datasetSearch,
type: DatasetType.TEXT,
});
setDatasets(data.content.map(mapDataset) || []);
setDatasetPagination((prev) => ({
...prev,
total: data.totalElements,
}));
};
useDebouncedEffect(
() => {
fetchDatasets();
},
[datasetSearch, datasetPagination.pageSize, datasetPagination.current],
300
);
const fetchFiles = useCallback(
async (
options?: Partial<{ page: number; pageSize: number; keyword: string }>
) => {
if (!selectedDataset) return;
const page = options?.page ?? filesPagination.current;
const pageSize = options?.pageSize ?? filesPagination.pageSize;
const keyword = options?.keyword ?? filesSearch;
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
page,
size: pageSize,
keyword,
});
setFiles(
(data.content || []).map((item: DatasetFile) => ({
...item,
key: item.id,
datasetName: selectedDataset.name,
}))
);
setFilesPagination((prev) => ({
...prev,
current: page,
pageSize,
total: data.totalElements,
}));
},
[selectedDataset, filesPagination.current, filesPagination.pageSize, filesSearch]
);
useEffect(() => {
// 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求
if (selectedDataset) {
setFilesPagination({ current: 1, pageSize: 10, total: 0 });
fetchFiles({ page: 1, pageSize: 10 }).catch(() => {});
} else {
setFiles([]);
setFilesPagination({ current: 1, pageSize: 10, total: 0 });
}
// 只在 selectedDataset 变化时触发
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [selectedDataset]);
useEffect(() => {
onDatasetSelect?.(selectedDataset);
}, [selectedDataset, onDatasetSelect]);
const toggleSelectFile = (record: DatasetFile) => {
if (!selectedFilesMap[record.id]) {
onSelectedFilesChange({
...selectedFilesMap,
[record.id]: record,
});
} else {
const newSelectedFiles = { ...selectedFilesMap };
delete newSelectedFiles[record.id];
onSelectedFilesChange(newSelectedFiles);
}
};
useEffect(() => {
if (!open) {
// 重置状态
setDatasets([]);
setDatasetSearch("");
setDatasetPagination({ current: 1, pageSize: 10, total: 0 });
setFiles([]);
setFilesSearch("");
setFilesPagination({ current: 1, pageSize: 10, total: 0 });
setShowFiles(false);
setSelectedDataset(null);
setDatasetSelections([]);
onDatasetSelect?.(null);
}
}, [open, onDatasetSelect]);
const datasetCols = [
{
title: "数据集名称",
dataIndex: "name",
key: "name",
ellipsis: true,
},
{
title: "文件数",
dataIndex: "fileCount",
key: "fileCount",
ellipsis: true,
},
{
title: "大小",
dataIndex: "totalSize",
key: "totalSize",
ellipsis: true,
render: formatBytes,
},
];
return (
<div {...props}>
<div className="grid grid-cols-25 gap-4 w-full">
<div className="border-card flex flex-col col-span-12">
<div className="border-bottom p-2 font-bold"></div>
<div className="p-2">
<Input
placeholder="搜索数据集名称..."
value={datasetSearch}
allowClear
onChange={(e) => setDatasetSearch(e.target.value)}
/>
</div>
<Table
scroll={{ y: 400 }}
rowKey="id"
size="small"
rowClassName={(record) =>
selectedDataset?.id === record.id ? "bg-blue-100" : ""
}
onRow={(record: Dataset) => ({
onClick: () => {
setSelectedDataset(record);
if (!datasetSelections.find((d) => d.id === record.id)) {
setDatasetSelections([...datasetSelections, record]);
} else {
setDatasetSelections(
datasetSelections.filter((d) => d.id !== record.id)
);
}
},
})}
dataSource={datasets}
columns={datasetCols}
pagination={{
...datasetPagination,
onChange: (page, pageSize) =>
setDatasetPagination({
current: page,
pageSize: pageSize || datasetPagination.pageSize,
total: datasetPagination.total,
}),
}}
/>
</div>
<RightOutlined />
<div className="border-card flex flex-col col-span-12">
<div className="border-bottom p-2 font-bold"></div>
<div className="p-2">
<Input
placeholder="搜索文件名称..."
value={filesSearch}
onChange={(e) => setFilesSearch(e.target.value)}
/>
</div>
<Table
scroll={{ y: 400 }}
rowKey="id"
size="small"
dataSource={files}
columns={fileCols.slice(1, fileCols.length)}
pagination={{
...filesPagination,
onChange: (page, pageSize) => {
const nextPageSize = pageSize || filesPagination.pageSize;
setFilesPagination((prev) => ({
...prev,
current: page,
pageSize: nextPageSize,
}));
fetchFiles({ page, pageSize: nextPageSize }).catch(() => {});
},
}}
onRow={(record: DatasetFile) => ({
onClick: () => toggleSelectFile(record),
})}
rowSelection={{
type: "checkbox",
selectedRowKeys: Object.keys(selectedFilesMap),
// 单选
onSelect: (record: DatasetFile) => {
toggleSelectFile(record);
},
// 全选
onSelectAll: (selected, selectedRows: DatasetFile[]) => {
if (selected) {
// ✔ 全选 -> 将 files 列表全部加入 selectedFilesMap
const newMap: Record<string, DatasetFile> = { ...selectedFilesMap };
selectedRows.forEach((f) => {
newMap[f.id] = f;
});
onSelectedFilesChange(newMap);
} else {
// ✘ 取消全选 -> 清空 map
const newMap = { ...selectedFilesMap };
Object.keys(newMap).forEach((id) => {
if (files.some((f) => String(f.id) === id)) {
// 仅移除当前页对应文件
delete newMap[id];
}
});
onSelectedFilesChange(newMap);
}
},
getCheckboxProps: (record: DatasetFile) => ({
name: record.fileName,
}),
}}
/>
</div>
</div>
<Button className="mt-4" onClick={() => setShowFiles(!showFiles)}>
{showFiles ? "取消预览" : "预览"}
</Button>
<div hidden={!showFiles}>
<Table
scroll={{ y: 400 }}
rowKey="id"
size="small"
dataSource={Object.values(selectedFilesMap)}
columns={fileCols}
/>
</div>
</div>
);
};
export default DatasetFileTransfer;
import React, { useCallback, useEffect } from "react";
import { Button, Input, Table, message } from "antd";
import { RightOutlined } from "@ant-design/icons";
import { mapDataset } from "@/pages/DataManagement/dataset.const";
import {
Dataset,
DatasetFile,
DatasetType,
} from "@/pages/DataManagement/dataset.model";
import {
queryDatasetFilesUsingGet,
queryDatasetsUsingGet,
} from "@/pages/DataManagement/dataset.api";
import { formatBytes } from "@/utils/unit";
import { useDebouncedEffect } from "@/hooks/useDebouncedEffect";
interface DatasetFileTransferProps
extends React.HTMLAttributes<HTMLDivElement> {
open: boolean;
selectedFilesMap: { [key: string]: DatasetFile };
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
onDatasetSelect?: (dataset: Dataset | null) => void;
datasetTypeFilter?: DatasetType;
}
const fileCols = [
{
title: "所属数据集",
dataIndex: "datasetName",
key: "datasetName",
ellipsis: true,
},
{
title: "文件名",
dataIndex: "fileName",
key: "fileName",
ellipsis: true,
},
{
title: "大小",
dataIndex: "fileSize",
key: "fileSize",
ellipsis: true,
render: formatBytes,
},
];
// Customize Table Transfer
const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
open,
selectedFilesMap,
onSelectedFilesChange,
onDatasetSelect,
datasetTypeFilter = DatasetType.TEXT,
...props
}) => {
const [datasets, setDatasets] = React.useState<Dataset[]>([]);
const [datasetSearch, setDatasetSearch] = React.useState<string>("");
const [datasetPagination, setDatasetPagination] = React.useState<{
current: number;
pageSize: number;
total: number;
}>({ current: 1, pageSize: 10, total: 0 });
const [files, setFiles] = React.useState<DatasetFile[]>([]);
const [filesSearch, setFilesSearch] = React.useState<string>("");
const [filesPagination, setFilesPagination] = React.useState<{
current: number;
pageSize: number;
total: number;
}>({ current: 1, pageSize: 10, total: 0 });
const [showFiles, setShowFiles] = React.useState<boolean>(false);
const [selectedDataset, setSelectedDataset] = React.useState<Dataset | null>(
null
);
const [datasetSelections, setDatasetSelections] = React.useState<Dataset[]>(
[]
);
const [selectingAll, setSelectingAll] = React.useState<boolean>(false);
const fetchDatasets = async () => {
const { data } = await queryDatasetsUsingGet({
// Ant Design Table pagination.current is 1-based; ensure backend also receives 1-based value
page: datasetPagination.current,
size: datasetPagination.pageSize,
keyword: datasetSearch,
type: datasetTypeFilter,
});
setDatasets(data.content.map(mapDataset) || []);
setDatasetPagination((prev) => ({
...prev,
total: data.totalElements,
}));
};
useDebouncedEffect(
() => {
fetchDatasets();
},
[datasetSearch, datasetPagination.pageSize, datasetPagination.current],
300
);
const fetchFiles = useCallback(
async (
options?: Partial<{ page: number; pageSize: number; keyword: string }>
) => {
if (!selectedDataset) return;
const page = options?.page ?? filesPagination.current;
const pageSize = options?.pageSize ?? filesPagination.pageSize;
const keyword = options?.keyword ?? filesSearch;
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
page,
size: pageSize,
keyword,
});
setFiles(
(data.content || []).map((item: DatasetFile) => ({
...item,
id: item.id,
key: String(item.id), // rowKey 使用字符串,确保与 selectedRowKeys 类型一致
datasetName: selectedDataset.name,
}))
);
setFilesPagination((prev) => ({
...prev,
current: page,
pageSize,
total: data.totalElements,
}));
},
[selectedDataset, filesPagination.current, filesPagination.pageSize, filesSearch]
);
useEffect(() => {
// 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求
if (selectedDataset) {
setFilesPagination({ current: 1, pageSize: 10, total: 0 });
// 后端 page 参数为 0-based,这里传 0 获取第一页
fetchFiles({ page: 0, pageSize: 10 }).catch(() => {});
} else {
setFiles([]);
setFilesPagination({ current: 1, pageSize: 10, total: 0 });
}
// 只在 selectedDataset 变化时触发
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [selectedDataset]);
useEffect(() => {
onDatasetSelect?.(selectedDataset);
}, [selectedDataset, onDatasetSelect]);
const handleSelectAllInDataset = useCallback(async () => {
if (!selectedDataset) {
message.warning("请先选择一个数据集");
return;
}
try {
setSelectingAll(true);
const pageSize = 1000; // 分批拉取,避免后端单页限制
let page = 0; // 后端 page 参数为 0-based,从 0 开始
let total = 0;
const allFiles: DatasetFile[] = [];
while (true) {
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
page,
size: pageSize,
});
const content: DatasetFile[] = (data.content || []).map(
(item: DatasetFile) => ({
...item,
key: item.id,
datasetName: selectedDataset.name,
}),
);
if (!content.length) {
break;
}
allFiles.push(...content);
// 优先用后端的 totalElements,否则用当前累积数
total = typeof data.totalElements === "number" ? data.totalElements : allFiles.length;
// 如果这一页数量小于 pageSize,说明已经拿完;否则继续下一页
if (content.length < pageSize) {
break;
}
page += 1;
}
const newMap: { [key: string]: DatasetFile } = { ...selectedFilesMap };
allFiles.forEach((file) => {
if (file && file.id != null) {
newMap[String(file.id)] = file;
}
});
onSelectedFilesChange(newMap);
const count = total || allFiles.length;
if (count > 0) {
message.success(`已选中当前数据集的全部 ${count} 个文件`);
} else {
message.info("当前数据集下没有可选文件");
}
} catch (error) {
console.error("Failed to select all files in dataset", error);
message.error("全选整个数据集失败,请稍后重试");
} finally {
setSelectingAll(false);
}
}, [selectedDataset, selectedFilesMap, onSelectedFilesChange]);
const toggleSelectFile = (record: DatasetFile) => {
if (!selectedFilesMap[record.id]) {
onSelectedFilesChange({
...selectedFilesMap,
[record.id]: record,
});
} else {
const newSelectedFiles = { ...selectedFilesMap };
delete newSelectedFiles[record.id];
onSelectedFilesChange(newSelectedFiles);
}
};
useEffect(() => {
if (!open) {
// 重置状态
setDatasets([]);
setDatasetSearch("");
setDatasetPagination({ current: 1, pageSize: 10, total: 0 });
setFiles([]);
setFilesSearch("");
setFilesPagination({ current: 1, pageSize: 10, total: 0 });
setShowFiles(false);
setSelectedDataset(null);
setDatasetSelections([]);
onDatasetSelect?.(null);
}
}, [open, onDatasetSelect]);
const datasetCols = [
{
title: "数据集名称",
dataIndex: "name",
key: "name",
ellipsis: true,
},
{
title: "文件数",
dataIndex: "fileCount",
key: "fileCount",
ellipsis: true,
},
{
title: "大小",
dataIndex: "totalSize",
key: "totalSize",
ellipsis: true,
render: formatBytes,
},
];
return (
<div {...props}>
<div className="grid grid-cols-25 gap-4 w-full">
<div className="border-card flex flex-col col-span-12">
<div className="border-bottom p-2 font-bold"></div>
<div className="p-2">
<Input
placeholder="搜索数据集名称..."
value={datasetSearch}
allowClear
onChange={(e) => setDatasetSearch(e.target.value)}
/>
</div>
<Table
scroll={{ y: 400 }}
rowKey="id"
size="small"
rowClassName={(record) =>
selectedDataset?.id === record.id ? "bg-blue-100" : ""
}
onRow={(record: Dataset) => ({
onClick: () => {
setSelectedDataset(record);
if (!datasetSelections.find((d) => d.id === record.id)) {
setDatasetSelections([...datasetSelections, record]);
} else {
setDatasetSelections(
datasetSelections.filter((d) => d.id !== record.id)
);
}
},
})}
dataSource={datasets}
columns={datasetCols}
pagination={{
...datasetPagination,
onChange: (page, pageSize) =>
setDatasetPagination({
current: page,
pageSize: pageSize || datasetPagination.pageSize,
total: datasetPagination.total,
}),
}}
/>
</div>
<RightOutlined />
<div className="border-card flex flex-col col-span-12">
<div className="border-bottom p-2 font-bold flex justify-between items-center">
<span></span>
<Button
type="link"
size="small"
onClick={handleSelectAllInDataset}
disabled={!selectedDataset}
loading={selectingAll}
>
</Button>
</div>
<div className="p-2">
<Input
placeholder="搜索文件名称..."
value={filesSearch}
onChange={(e) => setFilesSearch(e.target.value)}
/>
</div>
<Table
scroll={{ y: 400 }}
rowKey={(record) => String(record.id)}
size="small"
dataSource={files}
columns={fileCols.slice(1, fileCols.length)}
pagination={{
...filesPagination,
onChange: (page, pageSize) => {
const nextPageSize = pageSize || filesPagination.pageSize;
setFilesPagination((prev) => ({
...prev,
current: page,
pageSize: nextPageSize,
}));
// 前端分页是 1-based,后端是 0-based,所以这里传 page - 1
fetchFiles({ page: page - 1, pageSize: nextPageSize }).catch(() => {});
},
}}
onRow={(record: DatasetFile) => ({
onClick: () => toggleSelectFile(record),
})}
rowSelection={{
type: "checkbox",
selectedRowKeys: Object.keys(selectedFilesMap),
preserveSelectedRowKeys: true,
// 单选
onSelect: (record: DatasetFile) => {
toggleSelectFile(record);
},
// 全选 - 改为全选整个数据集而不是当前页
onSelectAll: (selected, selectedRows: DatasetFile[]) => {
if (selected) {
// 点击表头“全选”时,改为一键全选当前数据集的全部文件
// 而不是只选中当前页
handleSelectAllInDataset();
} else {
// 取消表头“全选”时,清空当前已选文件
onSelectedFilesChange({});
}
},
getCheckboxProps: (record: DatasetFile) => ({
name: record.fileName,
}),
}}
/>
</div>
</div>
<Button className="mt-4" onClick={() => setShowFiles(!showFiles)}>
{showFiles ? "取消预览" : "预览"}
</Button>
<div hidden={!showFiles}>
<Table
scroll={{ y: 400 }}
rowKey="id"
size="small"
dataSource={Object.values(selectedFilesMap)}
columns={fileCols}
/>
</div>
</div>
);
};
export default DatasetFileTransfer;

View File

@@ -1,187 +1,198 @@
import { TaskItem } from "@/pages/DataManagement/dataset.model";
import { calculateSHA256, checkIsFilesExist } from "@/utils/file.util";
import { App } from "antd";
import { useRef, useState } from "react";
export function useFileSliceUpload(
{
preUpload,
uploadChunk,
cancelUpload,
}: {
preUpload: (id: string, params: any) => Promise<{ data: number }>;
uploadChunk: (id: string, formData: FormData, config: any) => Promise<any>;
cancelUpload: ((reqId: number) => Promise<any>) | null;
},
showTaskCenter = true // 上传时是否显示任务中心
) {
const { message } = App.useApp();
const [taskList, setTaskList] = useState<TaskItem[]>([]);
const taskListRef = useRef<TaskItem[]>([]); // 用于固定任务顺序
const createTask = (detail: any = {}) => {
const { dataset } = detail;
const title = `上传数据集: ${dataset.name} `;
const controller = new AbortController();
const task: TaskItem = {
key: dataset.id,
title,
percent: 0,
reqId: -1,
controller,
size: 0,
updateEvent: detail.updateEvent,
hasArchive: detail.hasArchive,
};
taskListRef.current = [task, ...taskListRef.current];
setTaskList(taskListRef.current);
return task;
};
const updateTaskList = (task: TaskItem) => {
taskListRef.current = taskListRef.current.map((item) =>
item.key === task.key ? task : item
);
setTaskList(taskListRef.current);
};
const removeTask = (task: TaskItem) => {
const { key } = task;
taskListRef.current = taskListRef.current.filter(
(item) => item.key !== key
);
setTaskList(taskListRef.current);
if (task.isCancel && task.cancelFn) {
task.cancelFn();
}
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent));
if (showTaskCenter) {
window.dispatchEvent(
new CustomEvent("show:task-popover", { detail: { show: false } })
);
}
};
async function buildFormData({ file, reqId, i, j }) {
const formData = new FormData();
const { slices, name, size } = file;
const checkSum = await calculateSHA256(slices[j]);
formData.append("file", slices[j]);
formData.append("reqId", reqId.toString());
formData.append("fileNo", (i + 1).toString());
formData.append("chunkNo", (j + 1).toString());
formData.append("fileName", name);
formData.append("fileSize", size.toString());
formData.append("totalChunkNum", slices.length.toString());
formData.append("checkSumHex", checkSum);
return formData;
}
async function uploadSlice(task: TaskItem, fileInfo) {
if (!task) {
return;
}
const { reqId, key } = task;
const { loaded, i, j, files, totalSize } = fileInfo;
const formData = await buildFormData({
file: files[i],
i,
j,
reqId,
});
let newTask = { ...task };
await uploadChunk(key, formData, {
onUploadProgress: (e) => {
const loadedSize = loaded + e.loaded;
const curPercent = Number((loadedSize / totalSize) * 100).toFixed(2);
newTask = {
...newTask,
...taskListRef.current.find((item) => item.key === key),
size: loadedSize,
percent: curPercent >= 100 ? 99.99 : curPercent,
};
updateTaskList(newTask);
},
});
}
async function uploadFile({ task, files, totalSize }) {
const { data: reqId } = await preUpload(task.key, {
totalFileNum: files.length,
totalSize,
datasetId: task.key,
hasArchive: task.hasArchive,
});
const newTask: TaskItem = {
...task,
reqId,
isCancel: false,
cancelFn: () => {
task.controller.abort();
cancelUpload?.(reqId);
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent));
},
};
updateTaskList(newTask);
if (showTaskCenter) {
window.dispatchEvent(
new CustomEvent("show:task-popover", { detail: { show: true } })
);
}
// // 更新数据状态
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent));
let loaded = 0;
for (let i = 0; i < files.length; i++) {
const { slices } = files[i];
for (let j = 0; j < slices.length; j++) {
await uploadSlice(newTask, {
loaded,
i,
j,
files,
totalSize,
});
loaded += slices[j].size;
}
}
removeTask(newTask);
}
const handleUpload = async ({ task, files }) => {
const isErrorFile = await checkIsFilesExist(files);
if (isErrorFile) {
message.error("文件被修改或删除,请重新选择文件上传");
removeTask({
...task,
isCancel: false,
...taskListRef.current.find((item) => item.key === task.key),
});
return;
}
try {
const totalSize = files.reduce((acc, file) => acc + file.size, 0);
await uploadFile({ task, files, totalSize });
} catch (err) {
console.error(err);
message.error("文件上传失败,请稍后重试");
removeTask({
...task,
isCancel: true,
...taskListRef.current.find((item) => item.key === task.key),
});
}
};
return {
taskList,
createTask,
removeTask,
handleUpload,
};
}
import { TaskItem } from "@/pages/DataManagement/dataset.model";
import { calculateSHA256, checkIsFilesExist } from "@/utils/file.util";
import { App } from "antd";
import { useRef, useState } from "react";
export function useFileSliceUpload(
{
preUpload,
uploadChunk,
cancelUpload,
}: {
preUpload: (id: string, params: any) => Promise<{ data: number }>;
uploadChunk: (id: string, formData: FormData, config: any) => Promise<any>;
cancelUpload: ((reqId: number) => Promise<any>) | null;
},
showTaskCenter = true // 上传时是否显示任务中心
) {
const { message } = App.useApp();
const [taskList, setTaskList] = useState<TaskItem[]>([]);
const taskListRef = useRef<TaskItem[]>([]); // 用于固定任务顺序
const createTask = (detail: any = {}) => {
const { dataset } = detail;
const title = `上传数据集: ${dataset.name} `;
const controller = new AbortController();
const task: TaskItem = {
key: dataset.id,
title,
percent: 0,
reqId: -1,
controller,
size: 0,
updateEvent: detail.updateEvent,
hasArchive: detail.hasArchive,
prefix: detail.prefix,
};
taskListRef.current = [task, ...taskListRef.current];
setTaskList(taskListRef.current);
return task;
};
const updateTaskList = (task: TaskItem) => {
taskListRef.current = taskListRef.current.map((item) =>
item.key === task.key ? task : item
);
setTaskList(taskListRef.current);
};
const removeTask = (task: TaskItem) => {
const { key } = task;
taskListRef.current = taskListRef.current.filter(
(item) => item.key !== key
);
setTaskList(taskListRef.current);
if (task.isCancel && task.cancelFn) {
task.cancelFn();
}
if (task.updateEvent) {
// 携带前缀信息,便于刷新后仍停留在当前目录
window.dispatchEvent(
new CustomEvent(task.updateEvent, {
detail: { prefix: (task as any).prefix },
})
);
}
if (showTaskCenter) {
window.dispatchEvent(
new CustomEvent("show:task-popover", { detail: { show: false } })
);
}
};
async function buildFormData({ file, reqId, i, j }) {
const formData = new FormData();
const { slices, name, size } = file;
const checkSum = await calculateSHA256(slices[j]);
formData.append("file", slices[j]);
formData.append("reqId", reqId.toString());
formData.append("fileNo", (i + 1).toString());
formData.append("chunkNo", (j + 1).toString());
formData.append("fileName", name);
formData.append("fileSize", size.toString());
formData.append("totalChunkNum", slices.length.toString());
formData.append("checkSumHex", checkSum);
return formData;
}
async function uploadSlice(task: TaskItem, fileInfo) {
if (!task) {
return;
}
const { reqId, key } = task;
const { loaded, i, j, files, totalSize } = fileInfo;
const formData = await buildFormData({
file: files[i],
i,
j,
reqId,
});
let newTask = { ...task };
await uploadChunk(key, formData, {
onUploadProgress: (e) => {
const loadedSize = loaded + e.loaded;
const curPercent = Number((loadedSize / totalSize) * 100).toFixed(2);
newTask = {
...newTask,
...taskListRef.current.find((item) => item.key === key),
size: loadedSize,
percent: curPercent >= 100 ? 99.99 : curPercent,
};
updateTaskList(newTask);
},
});
}
async function uploadFile({ task, files, totalSize }) {
console.log('[useSliceUpload] Calling preUpload with prefix:', task.prefix);
const { data: reqId } = await preUpload(task.key, {
totalFileNum: files.length,
totalSize,
datasetId: task.key,
hasArchive: task.hasArchive,
prefix: task.prefix,
});
console.log('[useSliceUpload] PreUpload response reqId:', reqId);
const newTask: TaskItem = {
...task,
reqId,
isCancel: false,
cancelFn: () => {
task.controller.abort();
cancelUpload?.(reqId);
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent));
},
};
updateTaskList(newTask);
if (showTaskCenter) {
window.dispatchEvent(
new CustomEvent("show:task-popover", { detail: { show: true } })
);
}
// // 更新数据状态
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent));
let loaded = 0;
for (let i = 0; i < files.length; i++) {
const { slices } = files[i];
for (let j = 0; j < slices.length; j++) {
await uploadSlice(newTask, {
loaded,
i,
j,
files,
totalSize,
});
loaded += slices[j].size;
}
}
removeTask(newTask);
}
const handleUpload = async ({ task, files }) => {
const isErrorFile = await checkIsFilesExist(files);
if (isErrorFile) {
message.error("文件被修改或删除,请重新选择文件上传");
removeTask({
...task,
isCancel: false,
...taskListRef.current.find((item) => item.key === task.key),
});
return;
}
try {
const totalSize = files.reduce((acc, file) => acc + file.size, 0);
await uploadFile({ task, files, totalSize });
} catch (err) {
console.error(err);
message.error("文件上传失败,请稍后重试");
removeTask({
...task,
isCancel: true,
...taskListRef.current.find((item) => item.key === task.key),
});
}
};
return {
taskList,
createTask,
removeTask,
handleUpload,
};
}

View File

@@ -0,0 +1,302 @@
import { useState, useEffect } from "react";
import { Card, Button, Table, message, Modal, Tag, Progress, Space, Tooltip } from "antd";
import {
PlusOutlined,
DeleteOutlined,
DownloadOutlined,
ReloadOutlined,
EyeOutlined,
} from "@ant-design/icons";
import type { ColumnType } from "antd/es/table";
import type { AutoAnnotationTask, AutoAnnotationStatus } from "../annotation.model";
import {
queryAutoAnnotationTasksUsingGet,
deleteAutoAnnotationTaskByIdUsingDelete,
downloadAutoAnnotationResultUsingGet,
} from "../annotation.api";
import CreateAutoAnnotationDialog from "./components/CreateAutoAnnotationDialog";
const STATUS_COLORS: Record<AutoAnnotationStatus, string> = {
pending: "default",
running: "processing",
completed: "success",
failed: "error",
cancelled: "default",
};
const STATUS_LABELS: Record<AutoAnnotationStatus, string> = {
pending: "等待中",
running: "处理中",
completed: "已完成",
failed: "失败",
cancelled: "已取消",
};
const MODEL_SIZE_LABELS: Record<string, string> = {
n: "YOLOv8n (最快)",
s: "YOLOv8s",
m: "YOLOv8m",
l: "YOLOv8l (推荐)",
x: "YOLOv8x (最精确)",
};
export default function AutoAnnotation() {
const [loading, setLoading] = useState(false);
const [tasks, setTasks] = useState<AutoAnnotationTask[]>([]);
const [showCreateDialog, setShowCreateDialog] = useState(false);
const [selectedRowKeys, setSelectedRowKeys] = useState<string[]>([]);
useEffect(() => {
fetchTasks();
const interval = setInterval(() => {
fetchTasks(true);
}, 3000);
return () => clearInterval(interval);
}, []);
const fetchTasks = async (silent = false) => {
if (!silent) setLoading(true);
try {
const response = await queryAutoAnnotationTasksUsingGet();
setTasks(response.data || response || []);
} catch (error) {
console.error("Failed to fetch auto annotation tasks:", error);
if (!silent) message.error("获取任务列表失败");
} finally {
if (!silent) setLoading(false);
}
};
const handleDelete = (task: AutoAnnotationTask) => {
Modal.confirm({
title: `确认删除自动标注任务「${task.name}」吗?`,
content: "删除任务后,已生成的标注结果不会被删除。",
okText: "删除",
okType: "danger",
cancelText: "取消",
onOk: async () => {
try {
await deleteAutoAnnotationTaskByIdUsingDelete(task.id);
message.success("任务删除成功");
fetchTasks();
setSelectedRowKeys((keys) => keys.filter((k) => k !== task.id));
} catch (error) {
console.error(error);
message.error("删除失败,请稍后重试");
}
},
});
};
const handleDownload = async (task: AutoAnnotationTask) => {
try {
message.loading("正在准备下载...", 0);
await downloadAutoAnnotationResultUsingGet(task.id);
message.destroy();
message.success("下载已开始");
} catch (error) {
console.error(error);
message.destroy();
message.error("下载失败");
}
};
const handleViewResult = (task: AutoAnnotationTask) => {
if (task.outputPath) {
Modal.info({
title: "标注结果路径",
content: (
<div>
<p>{task.outputPath}</p>
<p>{task.detectedObjects}</p>
<p>
{task.processedImages} / {task.totalImages}
</p>
</div>
),
});
}
};
const columns: ColumnType<AutoAnnotationTask>[] = [
{ title: "任务名称", dataIndex: "name", key: "name", width: 200 },
{
title: "数据集",
dataIndex: "datasetName",
key: "datasetName",
width: 220,
render: (_: any, record: AutoAnnotationTask) => {
const list =
record.sourceDatasets && record.sourceDatasets.length > 0
? record.sourceDatasets
: record.datasetName
? [record.datasetName]
: [];
if (list.length === 0) return "-";
const text = list.join(",");
return (
<Tooltip title={text}>
<span>{text}</span>
</Tooltip>
);
},
},
{
title: "模型",
dataIndex: ["config", "modelSize"],
key: "modelSize",
width: 120,
render: (size: string) => MODEL_SIZE_LABELS[size] || size,
},
{
title: "置信度",
dataIndex: ["config", "confThreshold"],
key: "confThreshold",
width: 100,
render: (threshold: number) => `${(threshold * 100).toFixed(0)}%`,
},
{
title: "目标类别",
dataIndex: ["config", "targetClasses"],
key: "targetClasses",
width: 120,
render: (classes: number[]) => (
<Tooltip
title={classes.length > 0 ? classes.join(", ") : "全部类别"}
>
<span>
{classes.length > 0
? `${classes.length} 个类别`
: "全部类别"}
</span>
</Tooltip>
),
},
{
title: "状态",
dataIndex: "status",
key: "status",
width: 100,
render: (status: AutoAnnotationStatus) => (
<Tag color={STATUS_COLORS[status]}>{STATUS_LABELS[status]}</Tag>
),
},
{
title: "进度",
dataIndex: "progress",
key: "progress",
width: 150,
render: (progress: number, record: AutoAnnotationTask) => (
<div>
<Progress percent={progress} size="small" />
<div style={{ fontSize: "12px", color: "#999" }}>
{record.processedImages} / {record.totalImages}
</div>
</div>
),
},
{
title: "检测对象数",
dataIndex: "detectedObjects",
key: "detectedObjects",
width: 100,
render: (count: number) => count.toLocaleString(),
},
{
title: "创建时间",
dataIndex: "createdAt",
key: "createdAt",
width: 150,
render: (time: string) => new Date(time).toLocaleString(),
},
{
title: "操作",
key: "actions",
width: 180,
fixed: "right",
render: (_: any, record: AutoAnnotationTask) => (
<Space size="small">
{record.status === "completed" && (
<>
<Tooltip title="查看结果">
<Button
type="link"
size="small"
icon={<EyeOutlined />}
onClick={() => handleViewResult(record)}
/>
</Tooltip>
<Tooltip title="下载结果">
<Button
type="link"
size="small"
icon={<DownloadOutlined />}
onClick={() => handleDownload(record)}
/>
</Tooltip>
</>
)}
<Tooltip title="删除">
<Button
type="link"
size="small"
danger
icon={<DeleteOutlined />}
onClick={() => handleDelete(record)}
/>
</Tooltip>
</Space>
),
},
];
return (
<div>
<Card
title="自动标注任务"
extra={
<Space>
<Button
type="primary"
icon={<PlusOutlined />}
onClick={() => setShowCreateDialog(true)}
>
</Button>
<Button
icon={<ReloadOutlined />}
loading={loading}
onClick={() => fetchTasks()}
>
</Button>
</Space>
}
>
<Table
rowKey="id"
loading={loading}
columns={columns}
dataSource={tasks}
rowSelection={{
selectedRowKeys,
onChange: (keys) => setSelectedRowKeys(keys as string[]),
}}
pagination={{ pageSize: 10 }}
scroll={{ x: 1000 }}
/>
</Card>
<CreateAutoAnnotationDialog
visible={showCreateDialog}
onCancel={() => setShowCreateDialog(false)}
onSuccess={() => {
setShowCreateDialog(false);
fetchTasks();
}}
/>
</div>
);
}

View File

@@ -0,0 +1,286 @@
import { useState, useEffect } from "react";
import { Modal, Form, Input, Select, Slider, message, Checkbox } from "antd";
import { createAutoAnnotationTaskUsingPost } from "../../annotation.api";
import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api";
import { mapDataset } from "@/pages/DataManagement/dataset.const";
import { DatasetType, type DatasetFile, type Dataset } from "@/pages/DataManagement/dataset.model";
import DatasetFileTransfer from "@/components/business/DatasetFileTransfer";
const { Option } = Select;
interface CreateAutoAnnotationDialogProps {
visible: boolean;
onCancel: () => void;
onSuccess: () => void;
}
const COCO_CLASSES = [
{ id: 0, name: "person", label: "人" },
{ id: 1, name: "bicycle", label: "自行车" },
{ id: 2, name: "car", label: "汽车" },
{ id: 3, name: "motorcycle", label: "摩托车" },
{ id: 4, name: "airplane", label: "飞机" },
{ id: 5, name: "bus", label: "公交车" },
{ id: 6, name: "train", label: "火车" },
{ id: 7, name: "truck", label: "卡车" },
{ id: 8, name: "boat", label: "船" },
{ id: 9, name: "traffic light", label: "红绿灯" },
{ id: 10, name: "fire hydrant", label: "消防栓" },
{ id: 11, name: "stop sign", label: "停止标志" },
{ id: 12, name: "parking meter", label: "停车计时器" },
{ id: 13, name: "bench", label: "长椅" },
{ id: 14, name: "bird", label: "鸟" },
{ id: 15, name: "cat", label: "猫" },
{ id: 16, name: "dog", label: "狗" },
{ id: 17, name: "horse", label: "马" },
{ id: 18, name: "sheep", label: "羊" },
{ id: 19, name: "cow", label: "牛" },
{ id: 20, name: "elephant", label: "大象" },
{ id: 21, name: "bear", label: "熊" },
{ id: 22, name: "zebra", label: "斑马" },
{ id: 23, name: "giraffe", label: "长颈鹿" },
{ id: 24, name: "backpack", label: "背包" },
{ id: 25, name: "umbrella", label: "雨伞" },
{ id: 26, name: "handbag", label: "手提包" },
{ id: 27, name: "tie", label: "领带" },
{ id: 28, name: "suitcase", label: "行李箱" },
{ id: 29, name: "frisbee", label: "飞盘" },
{ id: 30, name: "skis", label: "滑雪板" },
{ id: 31, name: "snowboard", label: "滑雪板" },
{ id: 32, name: "sports ball", label: "球类" },
{ id: 33, name: "kite", label: "风筝" },
{ id: 34, name: "baseball bat", label: "棒球棒" },
{ id: 35, name: "baseball glove", label: "棒球手套" },
{ id: 36, name: "skateboard", label: "滑板" },
{ id: 37, name: "surfboard", label: "冲浪板" },
{ id: 38, name: "tennis racket", label: "网球拍" },
{ id: 39, name: "bottle", label: "瓶子" },
{ id: 40, name: "wine glass", label: "酒杯" },
{ id: 41, name: "cup", label: "杯子" },
{ id: 42, name: "fork", label: "叉子" },
{ id: 43, name: "knife", label: "刀" },
{ id: 44, name: "spoon", label: "勺子" },
{ id: 45, name: "bowl", label: "碗" },
{ id: 46, name: "banana", label: "香蕉" },
{ id: 47, name: "apple", label: "苹果" },
{ id: 48, name: "sandwich", label: "三明治" },
{ id: 49, name: "orange", label: "橙子" },
{ id: 50, name: "broccoli", label: "西兰花" },
{ id: 51, name: "carrot", label: "胡萝卜" },
{ id: 52, name: "hot dog", label: "热狗" },
{ id: 53, name: "pizza", label: "披萨" },
{ id: 54, name: "donut", label: "甜甜圈" },
{ id: 55, name: "cake", label: "蛋糕" },
{ id: 56, name: "chair", label: "椅子" },
{ id: 57, name: "couch", label: "沙发" },
{ id: 58, name: "potted plant", label: "盆栽" },
{ id: 59, name: "bed", label: "床" },
{ id: 60, name: "dining table", label: "餐桌" },
{ id: 61, name: "toilet", label: "马桶" },
{ id: 62, name: "tv", label: "电视" },
{ id: 63, name: "laptop", label: "笔记本电脑" },
{ id: 64, name: "mouse", label: "鼠标" },
{ id: 65, name: "remote", label: "遥控器" },
{ id: 66, name: "keyboard", label: "键盘" },
{ id: 67, name: "cell phone", label: "手机" },
{ id: 68, name: "microwave", label: "微波炉" },
{ id: 69, name: "oven", label: "烤箱" },
{ id: 70, name: "toaster", label: "烤面包机" },
{ id: 71, name: "sink", label: "水槽" },
{ id: 72, name: "refrigerator", label: "冰箱" },
{ id: 73, name: "book", label: "书" },
{ id: 74, name: "clock", label: "钟表" },
{ id: 75, name: "vase", label: "花瓶" },
{ id: 76, name: "scissors", label: "剪刀" },
{ id: 77, name: "teddy bear", label: "玩具熊" },
{ id: 78, name: "hair drier", label: "吹风机" },
{ id: 79, name: "toothbrush", label: "牙刷" },
];
export default function CreateAutoAnnotationDialog({
visible,
onCancel,
onSuccess,
}: CreateAutoAnnotationDialogProps) {
const [form] = Form.useForm();
const [loading, setLoading] = useState(false);
const [datasets, setDatasets] = useState<any[]>([]);
const [selectAllClasses, setSelectAllClasses] = useState(true);
const [selectedFilesMap, setSelectedFilesMap] = useState<Record<string, DatasetFile>>({});
const [selectedDataset, setSelectedDataset] = useState<Dataset | null>(null);
const [imageFileCount, setImageFileCount] = useState(0);
useEffect(() => {
if (visible) {
fetchDatasets();
form.resetFields();
form.setFieldsValue({
modelSize: "l",
confThreshold: 0.7,
targetClasses: [],
});
}
}, [visible, form]);
const fetchDatasets = async () => {
try {
const { data } = await queryDatasetsUsingGet({
page: 0,
pageSize: 1000,
});
const imageDatasets = (data.content || [])
.map(mapDataset)
.filter((ds: any) => ds.datasetType === DatasetType.IMAGE);
setDatasets(imageDatasets);
} catch (error) {
console.error("Failed to fetch datasets:", error);
message.error("获取数据集列表失败");
}
};
useEffect(() => {
const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"];
const count = Object.values(selectedFilesMap).filter((file) => {
const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || "";
return imageExtensions.includes(ext);
}).length;
setImageFileCount(count);
}, [selectedFilesMap]);
const handleSubmit = async () => {
try {
const values = await form.validateFields();
if (imageFileCount === 0) {
message.error("请至少选择一个图像文件");
return;
}
setLoading(true);
const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"];
const imageFileIds = Object.values(selectedFilesMap)
.filter((file) => {
const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || "";
return imageExtensions.includes(ext);
})
.map((file) => file.id);
const payload = {
name: values.name,
datasetId: values.datasetId,
fileIds: imageFileIds,
config: {
modelSize: values.modelSize,
confThreshold: values.confThreshold,
targetClasses: selectAllClasses ? [] : values.targetClasses || [],
outputDatasetName: values.outputDatasetName || undefined,
},
};
await createAutoAnnotationTaskUsingPost(payload);
message.success("自动标注任务创建成功");
onSuccess();
} catch (error: any) {
if (error.errorFields) return;
console.error("Failed to create auto annotation task:", error);
message.error(error.message || "创建任务失败");
} finally {
setLoading(false);
}
};
const handleClassSelectionChange = (checked: boolean) => {
setSelectAllClasses(checked);
if (checked) {
form.setFieldsValue({ targetClasses: [] });
}
};
return (
<Modal
title="创建自动标注任务"
open={visible}
onCancel={onCancel}
onOk={handleSubmit}
confirmLoading={loading}
width={600}
destroyOnClose
>
<Form form={form} layout="vertical" preserve={false}>
<Form.Item
name="name"
label="任务名称"
rules={[
{ required: true, message: "请输入任务名称" },
{ max: 100, message: "任务名称不能超过100个字符" },
]}
>
<Input placeholder="请输入任务名称" />
</Form.Item>
<Form.Item label="选择数据集和图像文件" required>
<DatasetFileTransfer
open
selectedFilesMap={selectedFilesMap}
onSelectedFilesChange={setSelectedFilesMap}
onDatasetSelect={(dataset) => {
setSelectedDataset(dataset);
form.setFieldsValue({ datasetId: dataset?.id ?? "" });
}}
datasetTypeFilter={DatasetType.IMAGE}
/>
{selectedDataset && (
<div className="mt-2 p-2 bg-blue-50 rounded border border-blue-200 text-xs">
<span className="font-medium">{selectedDataset.name}</span> -
<span className="font-medium text-blue-600"> {imageFileCount} </span>
</div>
)}
</Form.Item>
<Form.Item hidden name="datasetId" rules={[{ required: true, message: "请选择数据集" }]}>
<Input type="hidden" />
</Form.Item>
<Form.Item name="modelSize" label="模型规模" rules={[{ required: true, message: "请选择模型规模" }]}>
<Select>
<Option value="n">YOLOv8n ()</Option>
<Option value="s">YOLOv8s</Option>
<Option value="m">YOLOv8m</Option>
<Option value="l">YOLOv8l ()</Option>
<Option value="x">YOLOv8x ()</Option>
</Select>
</Form.Item>
<Form.Item
name="confThreshold"
label="置信度阈值"
rules={[{ required: true, message: "请选择置信度阈值" }]}
>
<Slider min={0.1} max={0.9} step={0.05} tooltip={{ formatter: (v) => `${(v || 0) * 100}%` }} />
</Form.Item>
<Form.Item label="目标类别">
<Checkbox checked={selectAllClasses} onChange={(e) => handleClassSelectionChange(e.target.checked)}>
</Checkbox>
{!selectAllClasses && (
<Form.Item name="targetClasses" noStyle>
<Select mode="multiple" placeholder="选择目标类别" style={{ marginTop: 8 }}>
{COCO_CLASSES.map((cls) => (
<Option key={cls.id} value={cls.id}>
{cls.label} ({cls.name})
</Option>
))}
</Select>
</Form.Item>
)}
</Form.Item>
<Form.Item name="outputDatasetName" label="输出数据集名称 (可选)">
<Input placeholder="留空则将结果写入原数据集的标签中" />
</Form.Item>
</Form>
</Modal>
);
}

View File

@@ -0,0 +1 @@
export { default } from "./AutoAnnotation";

View File

@@ -1,192 +1,489 @@
import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api";
import { mapDataset } from "@/pages/DataManagement/dataset.const";
import { Button, Form, Input, Modal, Select, message } from "antd";
import TextArea from "antd/es/input/TextArea";
import { useEffect, useState } from "react";
import { createAnnotationTaskUsingPost, queryAnnotationTemplatesUsingGet } from "../../annotation.api";
import { Dataset } from "@/pages/DataManagement/dataset.model";
import type { AnnotationTemplate } from "../../annotation.model";
export default function CreateAnnotationTask({
open,
onClose,
onRefresh,
}: {
open: boolean;
onClose: () => void;
onRefresh: () => void;
}) {
const [form] = Form.useForm();
const [datasets, setDatasets] = useState<Dataset[]>([]);
const [templates, setTemplates] = useState<AnnotationTemplate[]>([]);
const [submitting, setSubmitting] = useState(false);
const [nameManuallyEdited, setNameManuallyEdited] = useState(false);
useEffect(() => {
if (!open) return;
const fetchData = async () => {
try {
// Fetch datasets
const { data: datasetData } = await queryDatasetsUsingGet({
page: 0,
pageSize: 1000, // Use camelCase for HTTP params
});
setDatasets(datasetData.content.map(mapDataset) || []);
// Fetch templates
const templateResponse = await queryAnnotationTemplatesUsingGet({
page: 1,
size: 100, // Backend max is 100 (template API uses 'size' not 'pageSize')
});
// The API returns: {code, message, data: {content, total, page, ...}}
if (templateResponse.code === 200 && templateResponse.data) {
const fetchedTemplates = templateResponse.data.content || [];
console.log("Fetched templates:", fetchedTemplates);
setTemplates(fetchedTemplates);
} else {
console.error("Failed to fetch templates:", templateResponse);
setTemplates([]);
}
} catch (error) {
console.error("Error fetching data:", error);
setTemplates([]);
}
};
fetchData();
}, [open]);
// Reset form and manual-edit flag when modal opens
useEffect(() => {
if (open) {
form.resetFields();
setNameManuallyEdited(false);
}
}, [open, form]);
const handleSubmit = async () => {
try {
const values = await form.validateFields();
setSubmitting(true);
// Send templateId instead of labelingConfig
const requestData = {
name: values.name,
description: values.description,
datasetId: values.datasetId,
templateId: values.templateId,
};
await createAnnotationTaskUsingPost(requestData);
message?.success?.("创建标注任务成功");
onClose();
onRefresh();
} catch (err: any) {
console.error("Create annotation task failed", err);
const msg = err?.message || err?.data?.message || "创建失败,请稍后重试";
(message as any)?.error?.(msg);
} finally {
setSubmitting(false);
}
};
return (
<Modal
open={open}
onCancel={onClose}
title="创建标注任务"
footer={
<>
<Button onClick={onClose} disabled={submitting}>
</Button>
<Button type="primary" onClick={handleSubmit} loading={submitting}>
</Button>
</>
}
width={800}
>
<Form form={form} layout="vertical">
{/* 数据集 与 标注工程名称 并排显示(数据集在左) */}
<div className="grid grid-cols-2 gap-4">
<Form.Item
label="数据集"
name="datasetId"
rules={[{ required: true, message: "请选择数据集" }]}
>
<Select
placeholder="请选择数据集"
options={datasets.map((dataset) => {
return {
label: (
<div className="flex items-center justify-between gap-3 py-2">
<div className="flex items-center font-sm text-gray-900">
<span className="mr-2">{(dataset as any).icon}</span>
<span>{dataset.name}</span>
</div>
<div className="text-xs text-gray-500">{dataset.size}</div>
</div>
),
value: dataset.id,
};
})}
onChange={(value) => {
// 如果用户未手动修改名称,则用数据集名称作为默认任务名
if (!nameManuallyEdited) {
const ds = datasets.find((d) => d.id === value);
if (ds) {
form.setFieldsValue({ name: ds.name });
}
}
}}
/>
</Form.Item>
<Form.Item
label="标注工程名称"
name="name"
rules={[{ required: true, message: "请输入任务名称" }]}
>
<Input
placeholder="输入标注工程名称"
onChange={() => setNameManuallyEdited(true)}
/>
</Form.Item>
</div>
{/* 描述变为可选 */}
<Form.Item label="描述" name="description">
<TextArea placeholder="(可选)详细描述标注任务的要求和目标" rows={3} />
</Form.Item>
{/* 标注模板选择 */}
<Form.Item
label="标注模板"
name="templateId"
rules={[{ required: true, message: "请选择标注模板" }]}
>
<Select
placeholder={templates.length === 0 ? "暂无可用模板,请先创建模板" : "请选择标注模板"}
showSearch
optionFilterProp="label"
notFoundContent={templates.length === 0 ? "暂无模板,请前往「标注模板」页面创建" : "未找到匹配的模板"}
options={templates.map((template) => ({
label: template.name,
value: template.id,
// Add description as subtitle
title: template.description,
}))}
optionRender={(option) => (
<div>
<div style={{ fontWeight: 500 }}>{option.label}</div>
{option.data.title && (
<div style={{ fontSize: 12, color: '#999', marginTop: 2 }}>
{option.data.title}
</div>
)}
</div>
)}
/>
</Form.Item>
</Form>
</Modal>
);
}
import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api";
import { mapDataset } from "@/pages/DataManagement/dataset.const";
import { Button, Form, Input, Modal, Select, message, Tabs, Slider, Checkbox } from "antd";
import TextArea from "antd/es/input/TextArea";
import { useEffect, useState } from "react";
import {
createAnnotationTaskUsingPost,
queryAnnotationTemplatesUsingGet,
createAutoAnnotationTaskUsingPost,
} from "../../annotation.api";
import DatasetFileTransfer from "@/components/business/DatasetFileTransfer";
import { DatasetType, type Dataset, type DatasetFile } from "@/pages/DataManagement/dataset.model";
import type { AnnotationTemplate } from "../../annotation.model";
const { Option } = Select;
const COCO_CLASSES = [
{ id: 0, name: "person", label: "人" },
{ id: 1, name: "bicycle", label: "自行车" },
{ id: 2, name: "car", label: "汽车" },
{ id: 3, name: "motorcycle", label: "摩托车" },
{ id: 4, name: "airplane", label: "飞机" },
{ id: 5, name: "bus", label: "公交车" },
{ id: 6, name: "train", label: "火车" },
{ id: 7, name: "truck", label: "卡车" },
{ id: 8, name: "boat", label: "船" },
{ id: 9, name: "traffic light", label: "红绿灯" },
{ id: 10, name: "fire hydrant", label: "消防栓" },
{ id: 11, name: "stop sign", label: "停止标志" },
{ id: 12, name: "parking meter", label: "停车计时器" },
{ id: 13, name: "bench", label: "长椅" },
{ id: 14, name: "bird", label: "鸟" },
{ id: 15, name: "cat", label: "猫" },
{ id: 16, name: "dog", label: "狗" },
{ id: 17, name: "horse", label: "马" },
{ id: 18, name: "sheep", label: "羊" },
{ id: 19, name: "cow", label: "牛" },
{ id: 20, name: "elephant", label: "大象" },
{ id: 21, name: "bear", label: "熊" },
{ id: 22, name: "zebra", label: "斑马" },
{ id: 23, name: "giraffe", label: "长颈鹿" },
{ id: 24, name: "backpack", label: "背包" },
{ id: 25, name: "umbrella", label: "雨伞" },
{ id: 26, name: "handbag", label: "手提包" },
{ id: 27, name: "tie", label: "领带" },
{ id: 28, name: "suitcase", label: "行李箱" },
{ id: 29, name: "frisbee", label: "飞盘" },
{ id: 30, name: "skis", label: "滑雪板" },
{ id: 31, name: "snowboard", label: "滑雪板" },
{ id: 32, name: "sports ball", label: "球类" },
{ id: 33, name: "kite", label: "风筝" },
{ id: 34, name: "baseball bat", label: "棒球棒" },
{ id: 35, name: "baseball glove", label: "棒球手套" },
{ id: 36, name: "skateboard", label: "滑板" },
{ id: 37, name: "surfboard", label: "冲浪板" },
{ id: 38, name: "tennis racket", label: "网球拍" },
{ id: 39, name: "bottle", label: "瓶子" },
{ id: 40, name: "wine glass", label: "酒杯" },
{ id: 41, name: "cup", label: "杯子" },
{ id: 42, name: "fork", label: "叉子" },
{ id: 43, name: "knife", label: "刀" },
{ id: 44, name: "spoon", label: "勺子" },
{ id: 45, name: "bowl", label: "碗" },
{ id: 46, name: "banana", label: "香蕉" },
{ id: 47, name: "apple", label: "苹果" },
{ id: 48, name: "sandwich", label: "三明治" },
{ id: 49, name: "orange", label: "橙子" },
{ id: 50, name: "broccoli", label: "西兰花" },
{ id: 51, name: "carrot", label: "胡萝卜" },
{ id: 52, name: "hot dog", label: "热狗" },
{ id: 53, name: "pizza", label: "披萨" },
{ id: 54, name: "donut", label: "甜甜圈" },
{ id: 55, name: "cake", label: "蛋糕" },
{ id: 56, name: "chair", label: "椅子" },
{ id: 57, name: "couch", label: "沙发" },
{ id: 58, name: "potted plant", label: "盆栽" },
{ id: 59, name: "bed", label: "床" },
{ id: 60, name: "dining table", label: "餐桌" },
{ id: 61, name: "toilet", label: "马桶" },
{ id: 62, name: "tv", label: "电视" },
{ id: 63, name: "laptop", label: "笔记本电脑" },
{ id: 64, name: "mouse", label: "鼠标" },
{ id: 65, name: "remote", label: "遥控器" },
{ id: 66, name: "keyboard", label: "键盘" },
{ id: 67, name: "cell phone", label: "手机" },
{ id: 68, name: "microwave", label: "微波炉" },
{ id: 69, name: "oven", label: "烤箱" },
{ id: 70, name: "toaster", label: "烤面包机" },
{ id: 71, name: "sink", label: "水槽" },
{ id: 72, name: "refrigerator", label: "冰箱" },
{ id: 73, name: "book", label: "书" },
{ id: 74, name: "clock", label: "钟表" },
{ id: 75, name: "vase", label: "花瓶" },
{ id: 76, name: "scissors", label: "剪刀" },
{ id: 77, name: "teddy bear", label: "玩具熊" },
{ id: 78, name: "hair drier", label: "吹风机" },
{ id: 79, name: "toothbrush", label: "牙刷" },
];
export default function CreateAnnotationTask({
open,
onClose,
onRefresh,
}: {
open: boolean;
onClose: () => void;
onRefresh: () => void;
}) {
const [manualForm] = Form.useForm();
const [autoForm] = Form.useForm();
const [datasets, setDatasets] = useState<Dataset[]>([]);
const [templates, setTemplates] = useState<AnnotationTemplate[]>([]);
const [submitting, setSubmitting] = useState(false);
const [nameManuallyEdited, setNameManuallyEdited] = useState(false);
const [activeMode, setActiveMode] = useState<"manual" | "auto">("manual");
const [selectAllClasses, setSelectAllClasses] = useState(true);
const [selectedFilesMap, setSelectedFilesMap] = useState<Record<string, DatasetFile>>({});
const [selectedDataset, setSelectedDataset] = useState<Dataset | null>(null);
const [imageFileCount, setImageFileCount] = useState(0);
useEffect(() => {
if (!open) return;
const fetchData = async () => {
try {
// Fetch datasets
const { data: datasetData } = await queryDatasetsUsingGet({
page: 0,
pageSize: 1000, // Use camelCase for HTTP params
});
setDatasets(datasetData.content.map(mapDataset) || []);
// Fetch templates
const templateResponse = await queryAnnotationTemplatesUsingGet({
page: 1,
size: 100, // Backend max is 100 (template API uses 'size' not 'pageSize')
});
// The API returns: {code, message, data: {content, total, page, ...}}
if (templateResponse.code === 200 && templateResponse.data) {
const fetchedTemplates = templateResponse.data.content || [];
console.log("Fetched templates:", fetchedTemplates);
setTemplates(fetchedTemplates);
} else {
console.error("Failed to fetch templates:", templateResponse);
setTemplates([]);
}
} catch (error) {
console.error("Error fetching data:", error);
setTemplates([]);
}
};
fetchData();
}, [open]);
// Reset form and manual-edit flag when modal opens
useEffect(() => {
if (open) {
manualForm.resetFields();
autoForm.resetFields();
setNameManuallyEdited(false);
setActiveMode("manual");
setSelectAllClasses(true);
setSelectedFilesMap({});
setSelectedDataset(null);
setImageFileCount(0);
}
}, [open, manualForm, autoForm]);
useEffect(() => {
const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"];
const count = Object.values(selectedFilesMap).filter((file) => {
const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || "";
return imageExtensions.includes(ext);
}).length;
setImageFileCount(count);
}, [selectedFilesMap]);
const handleManualSubmit = async () => {
try {
const values = await manualForm.validateFields();
setSubmitting(true);
// Send templateId instead of labelingConfig
const requestData = {
name: values.name,
description: values.description,
datasetId: values.datasetId,
templateId: values.templateId,
};
await createAnnotationTaskUsingPost(requestData);
message?.success?.("创建标注任务成功");
onClose();
onRefresh();
} catch (err: any) {
console.error("Create annotation task failed", err);
const msg = err?.message || err?.data?.message || "创建失败,请稍后重试";
(message as any)?.error?.(msg);
} finally {
setSubmitting(false);
}
};
const handleAutoSubmit = async () => {
try {
const values = await autoForm.validateFields();
if (imageFileCount === 0) {
message.error("请至少选择一个图像文件");
return;
}
setSubmitting(true);
const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"];
const imageFileIds = Object.values(selectedFilesMap)
.filter((file) => {
const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || "";
return imageExtensions.includes(ext);
})
.map((file) => file.id);
const payload = {
name: values.name,
datasetId: values.datasetId,
fileIds: imageFileIds,
config: {
modelSize: values.modelSize,
confThreshold: values.confThreshold,
targetClasses: selectAllClasses ? [] : values.targetClasses || [],
outputDatasetName: values.outputDatasetName || undefined,
},
};
await createAutoAnnotationTaskUsingPost(payload);
message.success("自动标注任务创建成功");
// 触发上层刷新自动标注任务列表
(onRefresh as any)?.("auto");
onClose();
} catch (error: any) {
if (error.errorFields) return;
console.error("Failed to create auto annotation task:", error);
message.error(error.message || "创建自动标注任务失败");
} finally {
setSubmitting(false);
}
};
const handleClassSelectionChange = (checked: boolean) => {
setSelectAllClasses(checked);
if (checked) {
autoForm.setFieldsValue({ targetClasses: [] });
}
};
return (
<Modal
open={open}
onCancel={onClose}
title="创建标注任务"
footer={
<>
<Button onClick={onClose} disabled={submitting}>
</Button>
<Button
type="primary"
onClick={activeMode === "manual" ? handleManualSubmit : handleAutoSubmit}
loading={submitting}
>
</Button>
</>
}
width={800}
>
<Tabs
activeKey={activeMode}
onChange={(key) => setActiveMode(key as "manual" | "auto")}
items={[
{
key: "manual",
label: "手动标注",
children: (
<Form form={manualForm} layout="vertical">
{/* 数据集 与 标注工程名称 并排显示(数据集在左) */}
<div className="grid grid-cols-2 gap-4">
<Form.Item
label="数据集"
name="datasetId"
rules={[{ required: true, message: "请选择数据集" }]}
>
<Select
placeholder="请选择数据集"
options={datasets.map((dataset) => {
return {
label: (
<div className="flex items-center justify-between gap-3 py-2">
<div className="flex items-center font-sm text-gray-900">
<span className="mr-2">{(dataset as any).icon}</span>
<span>{dataset.name}</span>
</div>
<div className="text-xs text-gray-500">{dataset.size}</div>
</div>
),
value: dataset.id,
};
})}
onChange={(value) => {
// 如果用户未手动修改名称,则用数据集名称作为默认任务名
if (!nameManuallyEdited) {
const ds = datasets.find((d) => d.id === value);
if (ds) {
let defaultName = ds.name || "";
if (defaultName.length < 3) {
defaultName = `${defaultName}-标注`;
}
manualForm.setFieldsValue({ name: defaultName });
}
}
}}
/>
</Form.Item>
<Form.Item
label="标注工程名称"
name="name"
rules={[
{
validator: (_rule, value) => {
const trimmed = (value || "").trim();
if (!trimmed) {
return Promise.reject(new Error("请输入任务名称"));
}
if (trimmed.length < 3) {
return Promise.reject(
new Error("任务名称至少需要 3 个字符(不含首尾空格,Label Studio 限制)"),
);
}
return Promise.resolve();
},
},
]}
>
<Input
placeholder="输入标注工程名称"
onChange={() => setNameManuallyEdited(true)}
/>
</Form.Item>
</div>
{/* 描述变为可选 */}
<Form.Item label="描述" name="description">
<TextArea placeholder="(可选)详细描述标注任务的要求和目标" rows={3} />
</Form.Item>
{/* 标注模板选择 */}
<Form.Item
label="标注模板"
name="templateId"
rules={[{ required: true, message: "请选择标注模板" }]}
>
<Select
placeholder={templates.length === 0 ? "暂无可用模板,请先创建模板" : "请选择标注模板"}
showSearch
optionFilterProp="label"
notFoundContent={templates.length === 0 ? "暂无模板,请前往「标注模板」页面创建" : "未找到匹配的模板"}
options={templates.map((template) => ({
label: template.name,
value: template.id,
// Add description as subtitle
title: template.description,
}))}
optionRender={(option) => (
<div>
<div style={{ fontWeight: 500 }}>{option.label}</div>
{option.data.title && (
<div style={{ fontSize: 12, color: '#999', marginTop: 2 }}>
{option.data.title}
</div>
)}
</div>
)}
/>
</Form.Item>
</Form>
),
},
{
key: "auto",
label: "自动标注",
children: (
<Form form={autoForm} layout="vertical" preserve={false}>
<Form.Item
name="name"
label="任务名称"
rules={[
{ required: true, message: "请输入任务名称" },
{ max: 100, message: "任务名称不能超过100个字符" },
]}
>
<Input placeholder="请输入任务名称" />
</Form.Item>
<Form.Item label="选择数据集和图像文件" required>
<DatasetFileTransfer
open
selectedFilesMap={selectedFilesMap}
onSelectedFilesChange={setSelectedFilesMap}
onDatasetSelect={(dataset) => {
setSelectedDataset(dataset as Dataset | null);
autoForm.setFieldsValue({ datasetId: dataset?.id ?? "" });
}}
datasetTypeFilter={DatasetType.IMAGE}
/>
{selectedDataset && (
<div className="mt-2 p-2 bg-blue-50 rounded border border-blue-200 text-xs">
<span className="font-medium">{selectedDataset.name}</span> -
<span className="font-medium text-blue-600"> {imageFileCount} </span>
</div>
)}
</Form.Item>
<Form.Item
hidden
name="datasetId"
rules={[{ required: true, message: "请选择数据集" }]}
>
<Input type="hidden" />
</Form.Item>
<Form.Item
name="modelSize"
label="模型规模"
rules={[{ required: true, message: "请选择模型规模" }]}
initialValue="l"
>
<Select>
<Option value="n">YOLOv8n ()</Option>
<Option value="s">YOLOv8s</Option>
<Option value="m">YOLOv8m</Option>
<Option value="l">YOLOv8l ()</Option>
<Option value="x">YOLOv8x ()</Option>
</Select>
</Form.Item>
<Form.Item
name="confThreshold"
label="置信度阈值"
rules={[{ required: true, message: "请选择置信度阈值" }]}
initialValue={0.7}
>
<Slider
min={0.1}
max={0.9}
step={0.05}
tooltip={{ formatter: (v) => `${(v || 0) * 100}%` }}
/>
</Form.Item>
<Form.Item label="目标类别">
<Checkbox
checked={selectAllClasses}
onChange={(e) => handleClassSelectionChange(e.target.checked)}
>
</Checkbox>
{!selectAllClasses && (
<Form.Item name="targetClasses" noStyle>
<Select mode="multiple" placeholder="选择目标类别" style={{ marginTop: 8 }}>
{COCO_CLASSES.map((cls) => (
<Option key={cls.id} value={cls.id}>
{cls.label} ({cls.name})
</Option>
))}
</Select>
</Form.Item>
)}
</Form.Item>
<Form.Item name="outputDatasetName" label="输出数据集名称 (可选)">
<Input placeholder="留空则将结果写入原数据集的标签中" />
</Form.Item>
</Form>
),
},
]}
/>
</Modal>
);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,50 +1,67 @@
import { get, post, put, del } from "@/utils/request";
// 标注任务管理相关接口
export function queryAnnotationTasksUsingGet(params?: any) {
return get("/api/annotation/project", params);
}
export function createAnnotationTaskUsingPost(data: any) {
return post("/api/annotation/project", data);
}
export function syncAnnotationTaskUsingPost(data: any) {
return post(`/api/annotation/task/sync`, data);
}
export function deleteAnnotationTaskByIdUsingDelete(mappingId: string) {
// Backend expects mapping UUID as path parameter
return del(`/api/annotation/project/${mappingId}`);
}
export function loginAnnotationUsingGet(mappingId: string) {
return get("/api/annotation/project/${mappingId}/login");
}
// 标签配置管理
export function getTagConfigUsingGet() {
return get("/api/annotation/tags/config");
}
// 标注模板管理
export function queryAnnotationTemplatesUsingGet(params?: any) {
return get("/api/annotation/template", params);
}
export function createAnnotationTemplateUsingPost(data: any) {
return post("/api/annotation/template", data);
}
export function updateAnnotationTemplateByIdUsingPut(
templateId: string | number,
data: any
) {
return put(`/api/annotation/template/${templateId}`, data);
}
export function deleteAnnotationTemplateByIdUsingDelete(
templateId: string | number
) {
return del(`/api/annotation/template/${templateId}`);
}
import { get, post, put, del, download } from "@/utils/request";
// 标注任务管理相关接口
export function queryAnnotationTasksUsingGet(params?: any) {
return get("/api/annotation/project", params);
}
export function createAnnotationTaskUsingPost(data: any) {
return post("/api/annotation/project", data);
}
export function syncAnnotationTaskUsingPost(data: any) {
return post(`/api/annotation/task/sync`, data);
}
export function deleteAnnotationTaskByIdUsingDelete(mappingId: string) {
// Backend expects mapping UUID as path parameter
return del(`/api/annotation/project/${mappingId}`);
}
// 标签配置管理
export function getTagConfigUsingGet() {
return get("/api/annotation/tags/config");
}
// 标注模板管理
export function queryAnnotationTemplatesUsingGet(params?: any) {
return get("/api/annotation/template", params);
}
export function createAnnotationTemplateUsingPost(data: any) {
return post("/api/annotation/template", data);
}
export function updateAnnotationTemplateByIdUsingPut(
templateId: string | number,
data: any
) {
return put(`/api/annotation/template/${templateId}`, data);
}
export function deleteAnnotationTemplateByIdUsingDelete(
templateId: string | number
) {
return del(`/api/annotation/template/${templateId}`);
}
// 自动标注任务管理
export function queryAutoAnnotationTasksUsingGet(params?: any) {
return get("/api/annotation/auto", params);
}
export function createAutoAnnotationTaskUsingPost(data: any) {
return post("/api/annotation/auto", data);
}
export function deleteAutoAnnotationTaskByIdUsingDelete(taskId: string) {
return del(`/api/annotation/auto/${taskId}`);
}
export function getAutoAnnotationTaskStatusUsingGet(taskId: string) {
return get(`/api/annotation/auto/${taskId}/status`);
}
export function downloadAutoAnnotationResultUsingGet(taskId: string) {
return download(`/api/annotation/auto/${taskId}/download`);
}

View File

@@ -1,245 +1,260 @@
import { useEffect, useMemo, useState } from "react";
import { Breadcrumb, App, Tabs } from "antd";
import {
ReloadOutlined,
DownloadOutlined,
UploadOutlined,
EditOutlined,
DeleteOutlined,
} from "@ant-design/icons";
import DetailHeader from "@/components/DetailHeader";
import { mapDataset, datasetTypeMap } from "../dataset.const";
import type { Dataset } from "@/pages/DataManagement/dataset.model";
import { Link, useNavigate, useParams } from "react-router";
import { useFilesOperation } from "./useFilesOperation";
import {
createDatasetTagUsingPost,
deleteDatasetByIdUsingDelete,
downloadDatasetUsingGet,
queryDatasetByIdUsingGet,
queryDatasetTagsUsingGet,
updateDatasetByIdUsingPut,
} from "../dataset.api";
import DataQuality from "./components/DataQuality";
import DataLineageFlow from "./components/DataLineageFlow";
import Overview from "./components/Overview";
import { Activity, Clock, File, FileType } from "lucide-react";
import EditDataset from "../Create/EditDataset";
import ImportConfiguration from "./components/ImportConfiguration";
const tabList = [
{
key: "overview",
label: "概览",
},
{
key: "lineage",
label: "数据血缘",
},
{
key: "quality",
label: "数据质量",
},
];
export default function DatasetDetail() {
const { id } = useParams(); // 获取动态路由参数
const navigate = useNavigate();
const [activeTab, setActiveTab] = useState("overview");
const { message } = App.useApp();
const [showEditDialog, setShowEditDialog] = useState(false);
const [dataset, setDataset] = useState<Dataset>({} as Dataset);
const filesOperation = useFilesOperation(dataset);
const [showUploadDialog, setShowUploadDialog] = useState(false);
const navigateItems = useMemo(
() => [
{
title: <Link to="/data/management"></Link>,
},
{
title: dataset.name || "数据集详情",
},
],
[dataset]
);
const fetchDataset = async () => {
const { data } = await queryDatasetByIdUsingGet(id as unknown as number);
setDataset(mapDataset(data));
};
useEffect(() => {
fetchDataset();
filesOperation.fetchFiles();
}, []);
const handleRefresh = async (showMessage = true) => {
fetchDataset();
filesOperation.fetchFiles();
if (showMessage) message.success({ content: "数据刷新成功" });
};
const handleDownload = async () => {
await downloadDatasetUsingGet(dataset.id);
message.success("文件下载成功");
};
const handleDeleteDataset = async () => {
await deleteDatasetByIdUsingDelete(dataset.id);
navigate("/data/management");
message.success("数据集删除成功");
};
useEffect(() => {
const refreshData = () => {
handleRefresh(false);
};
window.addEventListener("update:dataset", refreshData);
return () => {
window.removeEventListener("update:dataset", refreshData);
};
}, []);
// 基本信息描述项
const statistics = [
{
icon: <File className="text-blue-400 w-4 h-4" />,
key: "file",
value: dataset?.fileCount || 0,
},
{
icon: <Activity className="text-blue-400 w-4 h-4" />,
key: "size",
value: dataset?.size || "0 B",
},
{
icon: <FileType className="text-blue-400 w-4 h-4" />,
key: "type",
value:
datasetTypeMap[dataset?.datasetType as keyof typeof datasetTypeMap]
?.label ||
dataset?.type ||
"未知",
},
{
icon: <Clock className="text-blue-400 w-4 h-4" />,
key: "time",
value: dataset?.updatedAt,
},
];
// 数据集操作列表
const operations = [
{
key: "edit",
label: "编辑",
icon: <EditOutlined />,
onClick: () => {
setShowEditDialog(true);
},
},
{
key: "upload",
label: "导入数据",
icon: <UploadOutlined />,
onClick: () => setShowUploadDialog(true),
},
{
key: "export",
label: "导出",
icon: <DownloadOutlined />,
// isDropdown: true,
// items: [
// { key: "alpaca", label: "Alpaca 格式", icon: <FileTextOutlined /> },
// { key: "jsonl", label: "JSONL 格式", icon: <DatabaseOutlined /> },
// { key: "csv", label: "CSV 格式", icon: <FileTextOutlined /> },
// { key: "coco", label: "COCO 格式", icon: <FileImageOutlined /> },
// ],
onClick: () => handleDownload(),
},
{
key: "refresh",
label: "刷新",
icon: <ReloadOutlined />,
onClick: handleRefresh,
},
{
key: "delete",
label: "删除",
danger: true,
confirm: {
title: "确认删除该数据集?",
description: "删除后该数据集将无法恢复,请谨慎操作。",
okText: "删除",
cancelText: "取消",
okType: "danger",
},
icon: <DeleteOutlined />,
onClick: handleDeleteDataset,
},
];
return (
<div className="h-full flex flex-col gap-4">
<Breadcrumb items={navigateItems} />
{/* Header */}
<DetailHeader
data={dataset}
statistics={statistics}
operations={operations}
tagConfig={{
showAdd: true,
tags: dataset.tags || [],
onFetchTags: async () => {
const res = await queryDatasetTagsUsingGet({
page: 0,
pageSize: 1000,
});
return res.data || [];
},
onCreateAndTag: async (tagName) => {
const res = await createDatasetTagUsingPost({ name: tagName });
if (res.data) {
await updateDatasetByIdUsingPut(dataset.id, {
tags: [...dataset.tags.map((tag) => tag.name), res.data.name],
});
handleRefresh();
}
},
onAddTag: async (tag) => {
const res = await updateDatasetByIdUsingPut(dataset.id, {
tags: [...dataset.tags.map((tag) => tag.name), tag],
});
if (res.data) {
handleRefresh();
}
},
}}
/>
<div className="flex-overflow-auto p-6 pt-2 bg-white rounded-md shadow">
<Tabs activeKey={activeTab} items={tabList} onChange={setActiveTab} />
<div className="h-full overflow-auto">
{activeTab === "overview" && (
<Overview dataset={dataset} filesOperation={filesOperation} fetchDataset={fetchDataset}/>
)}
{activeTab === "lineage" && <DataLineageFlow dataset={dataset} />}
{activeTab === "quality" && <DataQuality />}
</div>
</div>
<ImportConfiguration
data={dataset}
open={showUploadDialog}
onClose={() => setShowUploadDialog(false)}
updateEvent="update:dataset"
/>
<EditDataset
data={dataset}
open={showEditDialog}
onClose={() => setShowEditDialog(false)}
onRefresh={handleRefresh}
/>
</div>
);
}
import { useEffect, useMemo, useState } from "react";
import { Breadcrumb, App, Tabs } from "antd";
import {
ReloadOutlined,
DownloadOutlined,
UploadOutlined,
EditOutlined,
DeleteOutlined,
} from "@ant-design/icons";
import DetailHeader from "@/components/DetailHeader";
import { mapDataset, datasetTypeMap } from "../dataset.const";
import type { Dataset } from "@/pages/DataManagement/dataset.model";
import { Link, useNavigate, useParams } from "react-router";
import { useFilesOperation } from "./useFilesOperation";
import {
createDatasetTagUsingPost,
deleteDatasetByIdUsingDelete,
downloadDatasetUsingGet,
queryDatasetByIdUsingGet,
queryDatasetTagsUsingGet,
updateDatasetByIdUsingPut,
} from "../dataset.api";
import DataQuality from "./components/DataQuality";
import DataLineageFlow from "./components/DataLineageFlow";
import Overview from "./components/Overview";
import { Activity, Clock, File, FileType } from "lucide-react";
import EditDataset from "../Create/EditDataset";
import ImportConfiguration from "./components/ImportConfiguration";
const tabList = [
{
key: "overview",
label: "概览",
},
{
key: "lineage",
label: "数据血缘",
},
{
key: "quality",
label: "数据质量",
},
];
export default function DatasetDetail() {
const { id } = useParams(); // 获取动态路由参数
const navigate = useNavigate();
const [activeTab, setActiveTab] = useState("overview");
const { message } = App.useApp();
const [showEditDialog, setShowEditDialog] = useState(false);
const [dataset, setDataset] = useState<Dataset>({} as Dataset);
const filesOperation = useFilesOperation(dataset);
const [showUploadDialog, setShowUploadDialog] = useState(false);
const navigateItems = useMemo(
() => [
{
title: <Link to="/data/management"></Link>,
},
{
title: dataset.name || "数据集详情",
},
],
[dataset]
);
const fetchDataset = async () => {
const { data } = await queryDatasetByIdUsingGet(id as unknown as number);
setDataset(mapDataset(data));
};
useEffect(() => {
fetchDataset();
filesOperation.fetchFiles('', 1, 10); // 从根目录开始,第一页
}, []);
const handleRefresh = async (showMessage = true, prefixOverride?: string) => {
fetchDataset();
// 刷新当前目录,保持在当前页
const targetPrefix =
prefixOverride !== undefined
? prefixOverride
: filesOperation.pagination.prefix;
filesOperation.fetchFiles(
targetPrefix,
filesOperation.pagination.current,
filesOperation.pagination.pageSize
);
if (showMessage) message.success({ content: "数据刷新成功" });
};
const handleDownload = async () => {
await downloadDatasetUsingGet(dataset.id);
message.success("文件下载成功");
};
const handleDeleteDataset = async () => {
await deleteDatasetByIdUsingDelete(dataset.id);
navigate("/data/management");
message.success("数据集删除成功");
};
useEffect(() => {
const refreshData = (e: Event) => {
const custom = e as CustomEvent<{ prefix?: string }>;
const prefixOverride = custom.detail?.prefix;
handleRefresh(false, prefixOverride);
};
window.addEventListener("update:dataset", refreshData as EventListener);
return () => {
window.removeEventListener(
"update:dataset",
refreshData as EventListener
);
};
}, []);
// 基本信息描述项
const statistics = [
{
icon: <File className="text-blue-400 w-4 h-4" />,
key: "file",
value: dataset?.fileCount || 0,
},
{
icon: <Activity className="text-blue-400 w-4 h-4" />,
key: "size",
value: dataset?.size || "0 B",
},
{
icon: <FileType className="text-blue-400 w-4 h-4" />,
key: "type",
value:
datasetTypeMap[dataset?.datasetType as keyof typeof datasetTypeMap]
?.label ||
dataset?.type ||
"未知",
},
{
icon: <Clock className="text-blue-400 w-4 h-4" />,
key: "time",
value: dataset?.updatedAt,
},
];
// 数据集操作列表
const operations = [
{
key: "edit",
label: "编辑",
icon: <EditOutlined />,
onClick: () => {
setShowEditDialog(true);
},
},
{
key: "upload",
label: "导入数据",
icon: <UploadOutlined />,
onClick: () => setShowUploadDialog(true),
},
{
key: "export",
label: "导出",
icon: <DownloadOutlined />,
// isDropdown: true,
// items: [
// { key: "alpaca", label: "Alpaca 格式", icon: <FileTextOutlined /> },
// { key: "jsonl", label: "JSONL 格式", icon: <DatabaseOutlined /> },
// { key: "csv", label: "CSV 格式", icon: <FileTextOutlined /> },
// { key: "coco", label: "COCO 格式", icon: <FileImageOutlined /> },
// ],
onClick: () => handleDownload(),
},
{
key: "refresh",
label: "刷新",
icon: <ReloadOutlined />,
onClick: handleRefresh,
},
{
key: "delete",
label: "删除",
danger: true,
confirm: {
title: "确认删除该数据集?",
description: "删除后该数据集将无法恢复,请谨慎操作。",
okText: "删除",
cancelText: "取消",
okType: "danger",
},
icon: <DeleteOutlined />,
onClick: handleDeleteDataset,
},
];
return (
<div className="h-full flex flex-col gap-4">
<Breadcrumb items={navigateItems} />
{/* Header */}
<DetailHeader
data={dataset}
statistics={statistics}
operations={operations}
tagConfig={{
showAdd: true,
tags: dataset.tags || [],
onFetchTags: async () => {
const res = await queryDatasetTagsUsingGet({
page: 0,
pageSize: 1000,
});
return res.data || [];
},
onCreateAndTag: async (tagName) => {
const res = await createDatasetTagUsingPost({ name: tagName });
if (res.data) {
await updateDatasetByIdUsingPut(dataset.id, {
tags: [...dataset.tags.map((tag) => tag.name), res.data.name],
});
handleRefresh();
}
},
onAddTag: async (tag) => {
const res = await updateDatasetByIdUsingPut(dataset.id, {
tags: [...dataset.tags.map((tag) => tag.name), tag],
});
if (res.data) {
handleRefresh();
}
},
}}
/>
<div className="flex-overflow-auto p-6 pt-2 bg-white rounded-md shadow">
<Tabs activeKey={activeTab} items={tabList} onChange={setActiveTab} />
<div className="h-full overflow-auto">
{activeTab === "overview" && (
<Overview dataset={dataset} filesOperation={filesOperation} fetchDataset={fetchDataset}/>
)}
{activeTab === "lineage" && <DataLineageFlow dataset={dataset} />}
{activeTab === "quality" && <DataQuality />}
</div>
</div>
<ImportConfiguration
data={dataset}
open={showUploadDialog}
onClose={() => setShowUploadDialog(false)}
prefix={filesOperation.pagination.prefix}
updateEvent="update:dataset"
/>
<EditDataset
data={dataset}
open={showEditDialog}
onClose={() => setShowEditDialog(false)}
onRefresh={handleRefresh}
/>
</div>
);
}

View File

@@ -1,277 +1,294 @@
import { Select, Input, Form, Radio, Modal, Button, UploadFile, Switch } from "antd";
import { InboxOutlined } from "@ant-design/icons";
import { dataSourceOptions } from "../../dataset.const";
import { Dataset, DataSource } from "../../dataset.model";
import { useEffect, useMemo, useState } from "react";
import { queryTasksUsingGet } from "@/pages/DataCollection/collection.apis";
import { updateDatasetByIdUsingPut } from "../../dataset.api";
import { sliceFile } from "@/utils/file.util";
import Dragger from "antd/es/upload/Dragger";
export default function ImportConfiguration({
data,
open,
onClose,
updateEvent = "update:dataset",
}: {
data: Dataset | null;
open: boolean;
onClose: () => void;
updateEvent?: string;
}) {
const [form] = Form.useForm();
const [collectionOptions, setCollectionOptions] = useState([]);
const [importConfig, setImportConfig] = useState<any>({
source: DataSource.UPLOAD,
});
const [fileList, setFileList] = useState<UploadFile[]>([]);
const fileSliceList = useMemo(() => {
const sliceList = fileList.map((file) => {
const slices = sliceFile(file);
return { originFile: file, slices, name: file.name, size: file.size };
});
return sliceList;
}, [fileList]);
// 本地上传文件相关逻辑
const resetFiles = () => {
setFileList([]);
};
const handleUpload = async (dataset: Dataset) => {
const formData = new FormData();
fileList.forEach((file) => {
formData.append("file", file);
});
window.dispatchEvent(
new CustomEvent("upload:dataset", {
detail: {
dataset,
files: fileSliceList,
updateEvent,
hasArchive: importConfig.hasArchive,
},
})
);
resetFiles();
};
const handleBeforeUpload = (_, files: UploadFile[]) => {
setFileList([...fileList, ...files]);
return false;
};
const handleRemoveFile = (file: UploadFile) => {
setFileList((prev) => prev.filter((f) => f.uid !== file.uid));
};
const fetchCollectionTasks = async () => {
if (importConfig.source !== DataSource.COLLECTION) return;
try {
const res = await queryTasksUsingGet({ page: 0, size: 100 });
const options = res.data.content.map((task: any) => ({
label: task.name,
value: task.id,
}));
setCollectionOptions(options);
} catch (error) {
console.error("Error fetching collection tasks:", error);
}
};
const resetState = () => {
form.resetFields();
setFileList([]);
form.setFieldsValue({ files: null });
setImportConfig({ source: importConfig.source ? importConfig.source : DataSource.UPLOAD });
};
const handleImportData = async () => {
if (!data) return;
if (importConfig.source === DataSource.UPLOAD) {
await handleUpload(data);
} else if (importConfig.source === DataSource.COLLECTION) {
await updateDatasetByIdUsingPut(data.id, {
...importConfig,
});
}
onClose();
};
useEffect(() => {
if (open) {
resetState();
fetchCollectionTasks();
}
}, [open, importConfig.source]);
return (
<Modal
title="导入数据"
open={open}
width={600}
onCancel={() => {
onClose();
resetState();
}}
maskClosable={false}
footer={
<>
<Button onClick={onClose}></Button>
<Button
type="primary"
disabled={!fileList?.length && !importConfig.dataSource}
onClick={handleImportData}
>
</Button>
</>
}
>
<Form
form={form}
layout="vertical"
initialValues={importConfig || {}}
onValuesChange={(_, allValues) => setImportConfig(allValues)}
>
<Form.Item
label="数据源"
name="source"
rules={[{ required: true, message: "请选择数据源" }]}
>
<Radio.Group
buttonStyle="solid"
options={dataSourceOptions}
optionType="button"
/>
</Form.Item>
{importConfig?.source === DataSource.COLLECTION && (
<Form.Item name="dataSource" label="归集任务" required>
<Select placeholder="请选择归集任务" options={collectionOptions} />
</Form.Item>
)}
{/* obs import */}
{importConfig?.source === DataSource.OBS && (
<div className="grid grid-cols-2 gap-3 p-4 bg-blue-50 rounded-lg">
<Form.Item
name="endpoint"
rules={[{ required: true }]}
label="Endpoint"
>
<Input
className="h-8 text-xs"
placeholder="obs.cn-north-4.myhuaweicloud.com"
/>
</Form.Item>
<Form.Item
name="bucket"
rules={[{ required: true }]}
label="Bucket"
>
<Input className="h-8 text-xs" placeholder="my-bucket" />
</Form.Item>
<Form.Item
name="accessKey"
rules={[{ required: true }]}
label="Access Key"
>
<Input className="h-8 text-xs" placeholder="Access Key" />
</Form.Item>
<Form.Item
name="secretKey"
rules={[{ required: true }]}
label="Secret Key"
>
<Input
type="password"
className="h-8 text-xs"
placeholder="Secret Key"
/>
</Form.Item>
</div>
)}
{/* Local Upload Component */}
{importConfig?.source === DataSource.UPLOAD && (
<>
<Form.Item
label="自动解压上传的压缩包"
name="hasArchive"
valuePropName="checked"
initialValue={true}
>
<Switch />
</Form.Item>
<Form.Item
label="上传文件"
name="files"
rules={[
{
required: true,
message: "请上传文件",
},
]}
>
<Dragger
className="w-full"
onRemove={handleRemoveFile}
beforeUpload={handleBeforeUpload}
multiple
>
<p className="ant-upload-drag-icon">
<InboxOutlined />
</p>
<p className="ant-upload-text"></p>
<p className="ant-upload-hint"></p>
</Dragger>
</Form.Item>
</>
)}
{/* Target Configuration */}
{importConfig?.target && importConfig?.target !== DataSource.UPLOAD && (
<div className="space-y-3 p-4 bg-blue-50 rounded-lg">
{importConfig?.target === DataSource.DATABASE && (
<div className="grid grid-cols-2 gap-3">
<Form.Item
name="databaseType"
rules={[{ required: true }]}
label="数据库类型"
>
<Select
className="w-full"
options={[
{ label: "MySQL", value: "mysql" },
{ label: "PostgreSQL", value: "postgresql" },
{ label: "MongoDB", value: "mongodb" },
]}
></Select>
</Form.Item>
<Form.Item
name="tableName"
rules={[{ required: true }]}
label="表名"
>
<Input className="h-8 text-xs" placeholder="dataset_table" />
</Form.Item>
<Form.Item
name="connectionString"
rules={[{ required: true }]}
label="连接字符串"
>
<Input
className="h-8 text-xs col-span-2"
placeholder="数据库连接字符串"
/>
</Form.Item>
</div>
)}
</div>
)}
</Form>
</Modal>
);
}
import { Select, Input, Form, Radio, Modal, Button, UploadFile, Switch } from "antd";
import { InboxOutlined } from "@ant-design/icons";
import { dataSourceOptions } from "../../dataset.const";
import { Dataset, DataSource } from "../../dataset.model";
import { useEffect, useMemo, useState } from "react";
import { queryTasksUsingGet } from "@/pages/DataCollection/collection.apis";
import { updateDatasetByIdUsingPut } from "../../dataset.api";
import { sliceFile } from "@/utils/file.util";
import Dragger from "antd/es/upload/Dragger";
export default function ImportConfiguration({
data,
open,
onClose,
updateEvent = "update:dataset",
prefix,
}: {
data: Dataset | null;
open: boolean;
onClose: () => void;
updateEvent?: string;
prefix?: string;
}) {
const [form] = Form.useForm();
const [collectionOptions, setCollectionOptions] = useState([]);
const [importConfig, setImportConfig] = useState<any>({
source: DataSource.UPLOAD,
});
const [currentPrefix, setCurrentPrefix] = useState<string>("");
const [fileList, setFileList] = useState<UploadFile[]>([]);
const fileSliceList = useMemo(() => {
const sliceList = fileList.map((file) => {
const slices = sliceFile(file);
return { originFile: file, slices, name: file.name, size: file.size };
});
return sliceList;
}, [fileList]);
// 本地上传文件相关逻辑
const resetFiles = () => {
setFileList([]);
};
const handleUpload = async (dataset: Dataset) => {
const formData = new FormData();
fileList.forEach((file) => {
formData.append("file", file);
});
console.log('[ImportConfiguration] Uploading with currentPrefix:', currentPrefix);
window.dispatchEvent(
new CustomEvent("upload:dataset", {
detail: {
dataset,
files: fileSliceList,
updateEvent,
hasArchive: importConfig.hasArchive,
prefix: currentPrefix,
},
})
);
resetFiles();
};
const handleBeforeUpload = (_, files: UploadFile[]) => {
setFileList([...fileList, ...files]);
return false;
};
const handleRemoveFile = (file: UploadFile) => {
setFileList((prev) => prev.filter((f) => f.uid !== file.uid));
};
const fetchCollectionTasks = async () => {
if (importConfig.source !== DataSource.COLLECTION) return;
try {
const res = await queryTasksUsingGet({ page: 0, size: 100 });
const options = res.data.content.map((task: any) => ({
label: task.name,
value: task.id,
}));
setCollectionOptions(options);
} catch (error) {
console.error("Error fetching collection tasks:", error);
}
};
const resetState = () => {
console.log('[ImportConfiguration] resetState called, preserving currentPrefix:', currentPrefix);
form.resetFields();
setFileList([]);
form.setFieldsValue({ files: null });
setImportConfig({ source: importConfig.source ? importConfig.source : DataSource.UPLOAD });
console.log('[ImportConfiguration] resetState done, currentPrefix still:', currentPrefix);
};
const handleImportData = async () => {
if (!data) return;
console.log('[ImportConfiguration] handleImportData called, currentPrefix:', currentPrefix);
if (importConfig.source === DataSource.UPLOAD) {
await handleUpload(data);
} else if (importConfig.source === DataSource.COLLECTION) {
await updateDatasetByIdUsingPut(data.id, {
...importConfig,
});
}
onClose();
};
useEffect(() => {
if (open) {
setCurrentPrefix(prefix || "");
console.log('[ImportConfiguration] Modal opened with prefix:', prefix);
resetState();
fetchCollectionTasks();
}
}, [open]);
// Separate effect for fetching collection tasks when source changes
useEffect(() => {
if (open && importConfig.source === DataSource.COLLECTION) {
fetchCollectionTasks();
}
}, [importConfig.source]);
return (
<Modal
title="导入数据"
open={open}
width={600}
onCancel={() => {
onClose();
resetState();
}}
maskClosable={false}
footer={
<>
<Button onClick={onClose}></Button>
<Button
type="primary"
disabled={!fileList?.length && !importConfig.dataSource}
onClick={handleImportData}
>
</Button>
</>
}
>
<Form
form={form}
layout="vertical"
initialValues={importConfig || {}}
onValuesChange={(_, allValues) => setImportConfig(allValues)}
>
<Form.Item
label="数据源"
name="source"
rules={[{ required: true, message: "请选择数据源" }]}
>
<Radio.Group
buttonStyle="solid"
options={dataSourceOptions}
optionType="button"
/>
</Form.Item>
{importConfig?.source === DataSource.COLLECTION && (
<Form.Item name="dataSource" label="归集任务" required>
<Select placeholder="请选择归集任务" options={collectionOptions} />
</Form.Item>
)}
{/* obs import */}
{importConfig?.source === DataSource.OBS && (
<div className="grid grid-cols-2 gap-3 p-4 bg-blue-50 rounded-lg">
<Form.Item
name="endpoint"
rules={[{ required: true }]}
label="Endpoint"
>
<Input
className="h-8 text-xs"
placeholder="obs.cn-north-4.myhuaweicloud.com"
/>
</Form.Item>
<Form.Item
name="bucket"
rules={[{ required: true }]}
label="Bucket"
>
<Input className="h-8 text-xs" placeholder="my-bucket" />
</Form.Item>
<Form.Item
name="accessKey"
rules={[{ required: true }]}
label="Access Key"
>
<Input className="h-8 text-xs" placeholder="Access Key" />
</Form.Item>
<Form.Item
name="secretKey"
rules={[{ required: true }]}
label="Secret Key"
>
<Input
type="password"
className="h-8 text-xs"
placeholder="Secret Key"
/>
</Form.Item>
</div>
)}
{/* Local Upload Component */}
{importConfig?.source === DataSource.UPLOAD && (
<>
<Form.Item
label="自动解压上传的压缩包"
name="hasArchive"
valuePropName="checked"
initialValue={true}
>
<Switch />
</Form.Item>
<Form.Item
label="上传文件"
name="files"
rules={[
{
required: true,
message: "请上传文件",
},
]}
>
<Dragger
className="w-full"
onRemove={handleRemoveFile}
beforeUpload={handleBeforeUpload}
multiple
>
<p className="ant-upload-drag-icon">
<InboxOutlined />
</p>
<p className="ant-upload-text"></p>
<p className="ant-upload-hint"></p>
</Dragger>
</Form.Item>
</>
)}
{/* Target Configuration */}
{importConfig?.target && importConfig?.target !== DataSource.UPLOAD && (
<div className="space-y-3 p-4 bg-blue-50 rounded-lg">
{importConfig?.target === DataSource.DATABASE && (
<div className="grid grid-cols-2 gap-3">
<Form.Item
name="databaseType"
rules={[{ required: true }]}
label="数据库类型"
>
<Select
className="w-full"
options={[
{ label: "MySQL", value: "mysql" },
{ label: "PostgreSQL", value: "postgresql" },
{ label: "MongoDB", value: "mongodb" },
]}
></Select>
</Form.Item>
<Form.Item
name="tableName"
rules={[{ required: true }]}
label="表名"
>
<Input className="h-8 text-xs" placeholder="dataset_table" />
</Form.Item>
<Form.Item
name="connectionString"
rules={[{ required: true }]}
label="连接字符串"
>
<Input
className="h-8 text-xs col-span-2"
placeholder="数据库连接字符串"
/>
</Form.Item>
</div>
)}
</div>
)}
</Form>
</Modal>
);
}

View File

@@ -1,316 +1,399 @@
import { Button, Descriptions, DescriptionsProps, Modal, Table } from "antd";
import { formatBytes, formatDateTime } from "@/utils/unit";
import { Download, Trash2, Folder, File } from "lucide-react";
import { datasetTypeMap } from "../../dataset.const";
export default function Overview({ dataset, filesOperation, fetchDataset }) {
const {
fileList,
pagination,
selectedFiles,
setSelectedFiles,
previewVisible,
previewFileName,
previewContent,
setPreviewVisible,
handleDeleteFile,
handleDownloadFile,
handleBatchDeleteFiles,
handleBatchExport,
} = filesOperation;
// 文件列表多选配置
const rowSelection = {
onChange: (selectedRowKeys: React.Key[], selectedRows: any[]) => {
setSelectedFiles(selectedRowKeys as number[]);
console.log(
`selectedRowKeys: ${selectedRowKeys}`,
"selectedRows: ",
selectedRows
);
},
};
// 基本信息
const items: DescriptionsProps["items"] = [
{
key: "id",
label: "ID",
children: dataset.id,
},
{
key: "name",
label: "名称",
children: dataset.name,
},
{
key: "fileCount",
label: "文件数",
children: dataset.fileCount || 0,
},
{
key: "size",
label: "数据大小",
children: dataset.size || "0 B",
},
{
key: "datasetType",
label: "类型",
children: datasetTypeMap[dataset?.datasetType]?.label || "未知",
},
{
key: "status",
label: "状态",
children: dataset?.status?.label || "未知",
},
{
key: "createdBy",
label: "创建者",
children: dataset.createdBy || "未知",
},
{
key: "targetLocation",
label: "存储路径",
children: dataset.targetLocation || "未知",
},
{
key: "pvcName",
label: "存储名称",
children: dataset.pvcName || "未知",
},
{
key: "createdAt",
label: "创建时间",
children: dataset.createdAt,
},
{
key: "updatedAt",
label: "更新时间",
children: dataset.updatedAt,
},
{
key: "description",
label: "描述",
children: dataset.description || "无",
},
];
// 文件列表列定义
const columns = [
{
title: "文件名",
dataIndex: "fileName",
key: "fileName",
fixed: "left",
render: (text: string, record: any) => {
const isDirectory = record.id.startsWith('directory-');
const iconSize = 16;
const content = (
<div className="flex items-center">
{isDirectory ? (
<Folder className="mr-2 text-blue-500" size={iconSize} />
) : (
<File className="mr-2 text-black" size={iconSize} />
)}
<span className="truncate text-black">{text}</span>
</div>
);
if (isDirectory) {
return (
<Button
type="link"
onClick={(e) => {
const currentPath = filesOperation.pagination.prefix || '';
const newPath = `${currentPath}${record.fileName}`;
filesOperation.fetchFiles(newPath);
}}
>
{content}
</Button>
);
}
return (
<Button
type="link"
onClick={(e) => {}}
>
{content}
</Button>
);
},
},
{
title: "大小",
dataIndex: "fileSize",
key: "fileSize",
width: 150,
render: (text: number, record: any) => {
const isDirectory = record.id.startsWith('directory-');
if (isDirectory) {
return "-";
}
return formatBytes(text)
},
},
{
title: "上传时间",
dataIndex: "uploadTime",
key: "uploadTime",
width: 200,
render: (text) => formatDateTime(text),
},
{
title: "操作",
key: "action",
width: 180,
fixed: "right",
render: (_, record) => {
const isDirectory = record.id.startsWith('directory-');
if (isDirectory) {
return <div className="flex"/>;
}
return (
<div className="flex">
<Button
size="small"
type="link"
onClick={() => handleDownloadFile(record)}
>
</Button>
<Button
size="small"
type="link"
onClick={async () => {
await handleDeleteFile(record);
fetchDataset()
}
}
>
</Button>
</div>
)},
},
];
return (
<>
<div className=" flex flex-col gap-4">
{/* 基本信息 */}
<Descriptions
title="基本信息"
layout="vertical"
size="small"
items={items}
column={5}
/>
{/* 文件列表 */}
<h2 className="text-base font-semibold mt-8"></h2>
{selectedFiles.length > 0 && (
<div className="flex items-center gap-2 p-3 bg-blue-50 rounded-lg border border-blue-200">
<span className="text-sm text-blue-700 font-medium">
{selectedFiles.length}
</span>
<Button
onClick={handleBatchExport}
className="ml-auto bg-transparent"
>
<Download className="w-4 h-4 mr-2" />
</Button>
<Button
onClick={handleBatchDeleteFiles}
className="text-red-600 hover:text-red-700 hover:bg-red-50 bg-transparent"
>
<Trash2 className="w-4 h-4 mr-2" />
</Button>
</div>
)}
<div className="overflow-x-auto">
<div className="mb-2">
{(filesOperation.pagination.prefix || '') !== '' && (
<Button
type="link"
onClick={() => {
// 获取上一级目录
const currentPath = filesOperation.pagination.prefix || '';
const pathParts = currentPath.split('/').filter(Boolean);
pathParts.pop(); // 移除最后一个目录
const parentPath = pathParts.length > 0 ? `${pathParts.join('/')}/` : '';
filesOperation.fetchFiles(parentPath);
}}
className="p-0"
>
<span className="flex items-center text-blue-500">
<svg
className="w-4 h-4 mr-1"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
xmlns="http://www.w3.org/2000/svg"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M10 19l-7-7m0 0l7-7m-7 7h18"
/>
</svg>
</span>
</Button>
)}
{filesOperation.pagination.prefix && (
<span className="ml-2 text-gray-600">: {filesOperation.pagination.prefix}</span>
)}
</div>
<Table
size="middle"
rowKey="id"
columns={columns}
dataSource={fileList}
// rowSelection={rowSelection}
scroll={{ x: "max-content", y: 600 }}
pagination={{
...pagination,
showTotal: (total) => `${total}`,
onChange: (page, pageSize) => {
filesOperation.setPagination(prev => ({
...prev,
current: page,
pageSize: pageSize
}));
filesOperation.fetchFiles(pagination.prefix, page, pageSize);
}
}}
/>
</div>
</div>
{/* 文件预览弹窗 */}
<Modal
title={`文件预览:${previewFileName}`}
open={previewVisible}
onCancel={() => setPreviewVisible(false)}
footer={null}
width={700}
>
<pre
style={{
whiteSpace: "pre-wrap",
wordBreak: "break-all",
fontSize: 14,
color: "#222",
}}
>
{previewContent}
</pre>
</Modal>
</>
);
}
import { App, Button, Descriptions, DescriptionsProps, Modal, Table, Input } from "antd";
import { formatBytes, formatDateTime } from "@/utils/unit";
import { Download, Trash2, Folder, File } from "lucide-react";
import { datasetTypeMap } from "../../dataset.const";
export default function Overview({ dataset, filesOperation, fetchDataset }) {
const { modal, message } = App.useApp();
const {
fileList,
pagination,
selectedFiles,
setSelectedFiles,
previewVisible,
previewFileName,
previewContent,
setPreviewVisible,
handleDeleteFile,
handleDownloadFile,
handleBatchDeleteFiles,
handleBatchExport,
handleCreateDirectory,
handleDownloadDirectory,
handleDeleteDirectory,
} = filesOperation;
// 文件列表多选配置
const rowSelection = {
onChange: (selectedRowKeys: React.Key[], selectedRows: any[]) => {
setSelectedFiles(selectedRowKeys as number[]);
console.log(
`selectedRowKeys: ${selectedRowKeys}`,
"selectedRows: ",
selectedRows
);
},
};
// 基本信息
const items: DescriptionsProps["items"] = [
{
key: "id",
label: "ID",
children: dataset.id,
},
{
key: "name",
label: "名称",
children: dataset.name,
},
{
key: "fileCount",
label: "文件数",
children: dataset.fileCount || 0,
},
{
key: "size",
label: "数据大小",
children: dataset.size || "0 B",
},
{
key: "datasetType",
label: "类型",
children: datasetTypeMap[dataset?.datasetType]?.label || "未知",
},
{
key: "status",
label: "状态",
children: dataset?.status?.label || "未知",
},
{
key: "createdBy",
label: "创建者",
children: dataset.createdBy || "未知",
},
{
key: "targetLocation",
label: "存储路径",
children: dataset.targetLocation || "未知",
},
{
key: "pvcName",
label: "存储名称",
children: dataset.pvcName || "未知",
},
{
key: "createdAt",
label: "创建时间",
children: dataset.createdAt,
},
{
key: "updatedAt",
label: "更新时间",
children: dataset.updatedAt,
},
{
key: "description",
label: "描述",
children: dataset.description || "无",
},
];
// 文件列表列定义
const columns = [
{
title: "文件名",
dataIndex: "fileName",
key: "fileName",
fixed: "left",
render: (text: string, record: any) => {
const isDirectory = record.id.startsWith('directory-');
const iconSize = 16;
const content = (
<div className="flex items-center">
{isDirectory ? (
<Folder className="mr-2 text-blue-500" size={iconSize} />
) : (
<File className="mr-2 text-black" size={iconSize} />
)}
<span className="truncate text-black">{text}</span>
</div>
);
if (isDirectory) {
return (
<Button
type="link"
onClick={(e) => {
const currentPath = filesOperation.pagination.prefix || '';
// 文件夹路径必须以斜杠结尾
const newPath = `${currentPath}${record.fileName}/`;
filesOperation.fetchFiles(newPath, 1, filesOperation.pagination.pageSize);
}}
>
{content}
</Button>
);
}
return (
<Button
type="link"
onClick={(e) => {}}
>
{content}
</Button>
);
},
},
{
title: "大小",
dataIndex: "fileSize",
key: "fileSize",
width: 150,
render: (text: number, record: any) => {
const isDirectory = record.id.startsWith('directory-');
if (isDirectory) {
return formatBytes(record.fileSize || 0);
}
return formatBytes(text)
},
},
{
title: "包含文件数",
dataIndex: "fileCount",
key: "fileCount",
width: 120,
render: (text: number, record: any) => {
const isDirectory = record.id.startsWith('directory-');
if (!isDirectory) {
return "-";
}
return record.fileCount ?? 0;
},
},
{
title: "上传时间",
dataIndex: "uploadTime",
key: "uploadTime",
width: 200,
render: (text) => formatDateTime(text),
},
{
title: "操作",
key: "action",
width: 180,
fixed: "right",
render: (_, record) => {
const isDirectory = record.id.startsWith('directory-');
if (isDirectory) {
const currentPath = filesOperation.pagination.prefix || '';
const fullPath = `${currentPath}${record.fileName}/`;
return (
<div className="flex">
<Button
size="small"
type="link"
onClick={() => handleDownloadDirectory(fullPath, record.fileName)}
>
</Button>
<Button
size="small"
type="link"
onClick={() => {
modal.confirm({
title: '确认删除文件夹?',
content: `删除文件夹 "${record.fileName}" 将同时删除其中的所有文件和子文件夹,此操作不可恢复。`,
okText: '删除',
okType: 'danger',
cancelText: '取消',
onOk: async () => {
await handleDeleteDirectory(fullPath, record.fileName);
fetchDataset();
},
});
}}
>
</Button>
</div>
);
}
return (
<div className="flex">
<Button
size="small"
type="link"
onClick={() => handleDownloadFile(record)}
>
</Button>
<Button
size="small"
type="link"
onClick={async () => {
await handleDeleteFile(record);
fetchDataset()
}
}
>
</Button>
</div>
)},
},
];
return (
<>
<div className=" flex flex-col gap-4">
{/* 基本信息 */}
<Descriptions
title="基本信息"
layout="vertical"
size="small"
items={items}
column={5}
/>
{/* 文件列表 */}
<div className="flex items-center justify-between mt-8 mb-2">
<h2 className="text-base font-semibold"></h2>
<Button
type="primary"
size="small"
onClick={() => {
let dirName = "";
modal.confirm({
title: "新建文件夹",
content: (
<Input
autoFocus
placeholder="请输入文件夹名称"
onChange={(e) => {
dirName = e.target.value?.trim();
}}
/>
),
okText: "确定",
cancelText: "取消",
onOk: async () => {
if (!dirName) {
message.warning("请输入文件夹名称");
return Promise.reject();
}
await handleCreateDirectory(dirName);
},
});
}}
>
</Button>
</div>
{selectedFiles.length > 0 && (
<div className="flex items-center gap-2 p-3 bg-blue-50 rounded-lg border border-blue-200">
<span className="text-sm text-blue-700 font-medium">
{selectedFiles.length}
</span>
<Button
onClick={handleBatchExport}
className="ml-auto bg-transparent"
>
<Download className="w-4 h-4 mr-2" />
</Button>
<Button
onClick={handleBatchDeleteFiles}
className="text-red-600 hover:text-red-700 hover:bg-red-50 bg-transparent"
>
<Trash2 className="w-4 h-4 mr-2" />
</Button>
</div>
)}
<div className="overflow-x-auto">
<div className="mb-2">
{(filesOperation.pagination.prefix || '') !== '' && (
<Button
type="link"
onClick={() => {
// 获取上一级目录
const currentPath = filesOperation.pagination.prefix || '';
// 移除末尾的斜杠,然后按斜杠分割
const trimmedPath = currentPath.replace(/\/$/, '');
const pathParts = trimmedPath.split('/');
// 移除最后一个目录名
pathParts.pop();
// 重新组合路径,如果还有内容则加斜杠,否则为空
const parentPath = pathParts.length > 0 ? `${pathParts.join('/')}/` : '';
filesOperation.fetchFiles(parentPath, 1, filesOperation.pagination.pageSize);
}}
className="p-0"
>
<span className="flex items-center text-blue-500">
<svg
className="w-4 h-4 mr-1"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
xmlns="http://www.w3.org/2000/svg"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M10 19l-7-7m0 0l7-7m-7 7h18"
/>
</svg>
</span>
</Button>
)}
{filesOperation.pagination.prefix && (
<span className="ml-2 text-gray-600">: {filesOperation.pagination.prefix}</span>
)}
</div>
<Table
size="middle"
rowKey="id"
columns={columns}
dataSource={fileList}
// rowSelection={rowSelection}
scroll={{ x: "max-content", y: 600 }}
pagination={{
...pagination,
showTotal: (total) => `${total}`,
onChange: (page, pageSize) => {
filesOperation.fetchFiles(filesOperation.pagination.prefix, page, pageSize);
}
}}
/>
</div>
</div>
{/* 文件预览弹窗 */}
<Modal
title={`文件预览:${previewFileName}`}
open={previewVisible}
onCancel={() => setPreviewVisible(false)}
footer={null}
width={700}
>
<pre
style={{
whiteSpace: "pre-wrap",
wordBreak: "break-all",
fontSize: 14,
color: "#222",
}}
>
{previewContent}
</pre>
</Modal>
</>
);
}

View File

@@ -1,149 +1,186 @@
import type {
Dataset,
DatasetFile,
} from "@/pages/DataManagement/dataset.model";
import { App } from "antd";
import { useState } from "react";
import {
deleteDatasetFileUsingDelete,
downloadFileByIdUsingGet,
exportDatasetUsingPost,
queryDatasetFilesUsingGet,
} from "../dataset.api";
import { useParams } from "react-router";
export function useFilesOperation(dataset: Dataset) {
const { message } = App.useApp();
const { id } = useParams(); // 获取动态路由参数
// 文件相关状态
const [fileList, setFileList] = useState<DatasetFile[]>([]);
const [selectedFiles, setSelectedFiles] = useState<number[]>([]);
const [pagination, setPagination] = useState<{
current: number;
pageSize: number;
total: number;
prefix?: string;
}>({ current: 1, pageSize: 10, total: 0, prefix: '' });
// 文件预览相关状态
const [previewVisible, setPreviewVisible] = useState(false);
const [previewContent, setPreviewContent] = useState("");
const [previewFileName, setPreviewFileName] = useState("");
const fetchFiles = async (prefix: string = '', current, pageSize) => {
const params: any = {
page: current ? current : pagination.current,
size: pageSize ? pageSize : pagination.pageSize,
isWithDirectory: true,
};
if (prefix !== undefined) {
params.prefix = prefix;
} else if (pagination.prefix) {
params.prefix = pagination.prefix;
}
const { data } = await queryDatasetFilesUsingGet(id!, params);
setFileList(data.content || []);
// Update pagination with current prefix
setPagination(prev => ({
...prev,
prefix: prefix !== undefined ? prefix : prev.prefix,
total: data.totalElements || 0,
}));
};
const handleBatchDeleteFiles = () => {
if (selectedFiles.length === 0) {
message.warning({ content: "请先选择要删除的文件" });
return;
}
// 执行批量删除逻辑
selectedFiles.forEach(async (fileId) => {
await fetch(`/api/datasets/${dataset.id}/files/${fileId}`, {
method: "DELETE",
});
});
fetchFiles(); // 刷新文件列表
setSelectedFiles([]); // 清空选中状态
message.success({
content: `已删除 ${selectedFiles.length} 个文件`,
});
};
const handleDownloadFile = async (file: DatasetFile) => {
// 实际导出逻辑
await downloadFileByIdUsingGet(dataset.id, file.id, file.fileName);
// 假设导出成功
message.success({
content: `已导出 1 个文件`,
});
setSelectedFiles([]); // 清空选中状态
};
const handleShowFile = (file: any) => async () => {
// 请求文件内容并弹窗预览
try {
const res = await fetch(`/api/datasets/${dataset.id}/file/${file.id}`);
const data = await res.text();
setPreviewFileName(file.fileName);
setPreviewContent(data);
setPreviewVisible(true);
} catch (err) {
message.error({ content: "文件预览失败" });
}
};
const handleDeleteFile = async (file) => {
try {
await deleteDatasetFileUsingDelete(dataset.id, file.id);
fetchFiles(); // 刷新文件列表
message.success({ content: `文件 ${file.fileName} 已删除` });
} catch (error) {
message.error({ content: `文件 ${file.fileName} 删除失败` });
}
};
const handleBatchExport = () => {
if (selectedFiles.length === 0) {
message.warning({ content: "请先选择要导出的文件" });
return;
}
// 执行批量导出逻辑
exportDatasetUsingPost(dataset.id, { fileIds: selectedFiles })
.then(() => {
message.success({
content: `已导出 ${selectedFiles.length} 个文件`,
});
setSelectedFiles([]); // 清空选中状态
})
.catch(() => {
message.error({
content: "导出失败,请稍后再试",
});
});
};
return {
fileList,
selectedFiles,
setSelectedFiles,
pagination,
setPagination,
previewVisible,
setPreviewVisible,
previewContent,
previewFileName,
setPreviewContent,
setPreviewFileName,
fetchFiles,
setFileList,
handleBatchDeleteFiles,
handleDownloadFile,
handleShowFile,
handleDeleteFile,
handleBatchExport,
};
}
import type {
Dataset,
DatasetFile,
} from "@/pages/DataManagement/dataset.model";
import { App } from "antd";
import { useState } from "react";
import {
deleteDatasetFileUsingDelete,
downloadFileByIdUsingGet,
exportDatasetUsingPost,
queryDatasetFilesUsingGet,
createDatasetDirectoryUsingPost,
downloadDirectoryUsingGet,
deleteDirectoryUsingDelete,
} from "../dataset.api";
import { useParams } from "react-router";
export function useFilesOperation(dataset: Dataset) {
const { message } = App.useApp();
const { id } = useParams(); // 获取动态路由参数
// 文件相关状态
const [fileList, setFileList] = useState<DatasetFile[]>([]);
const [selectedFiles, setSelectedFiles] = useState<number[]>([]);
const [pagination, setPagination] = useState<{
current: number;
pageSize: number;
total: number;
prefix?: string;
}>({ current: 1, pageSize: 10, total: 0, prefix: '' });
// 文件预览相关状态
const [previewVisible, setPreviewVisible] = useState(false);
const [previewContent, setPreviewContent] = useState("");
const [previewFileName, setPreviewFileName] = useState("");
const fetchFiles = async (prefix?: string, current?, pageSize?) => {
// 如果明确传了 prefix(包括空字符串),使用传入的值;否则使用当前 pagination.prefix
const targetPrefix = prefix !== undefined ? prefix : (pagination.prefix || '');
const params: any = {
page: current !== undefined ? current : pagination.current,
size: pageSize !== undefined ? pageSize : pagination.pageSize,
isWithDirectory: true,
prefix: targetPrefix,
};
const { data } = await queryDatasetFilesUsingGet(id!, params);
setFileList(data.content || []);
// Update pagination with current prefix
setPagination(prev => ({
...prev,
current: params.page,
pageSize: params.size,
prefix: targetPrefix,
total: data.totalElements || 0,
}));
};
const handleBatchDeleteFiles = () => {
if (selectedFiles.length === 0) {
message.warning({ content: "请先选择要删除的文件" });
return;
}
// 执行批量删除逻辑
selectedFiles.forEach(async (fileId) => {
await fetch(`/api/datasets/${dataset.id}/files/${fileId}`, {
method: "DELETE",
});
});
fetchFiles(); // 刷新文件列表
setSelectedFiles([]); // 清空选中状态
message.success({
content: `已删除 ${selectedFiles.length} 个文件`,
});
};
const handleDownloadFile = async (file: DatasetFile) => {
// 实际导出逻辑
await downloadFileByIdUsingGet(dataset.id, file.id, file.fileName);
// 假设导出成功
message.success({
content: `已导出 1 个文件`,
});
setSelectedFiles([]); // 清空选中状态
};
const handleShowFile = (file: any) => async () => {
// 请求文件内容并弹窗预览
try {
const res = await fetch(`/api/datasets/${dataset.id}/file/${file.id}`);
const data = await res.text();
setPreviewFileName(file.fileName);
setPreviewContent(data);
setPreviewVisible(true);
} catch (err) {
message.error({ content: "文件预览失败" });
}
};
const handleDeleteFile = async (file) => {
try {
await deleteDatasetFileUsingDelete(dataset.id, file.id);
fetchFiles(); // 刷新文件列表
message.success({ content: `文件 ${file.fileName} 已删除` });
} catch (error) {
message.error({ content: `文件 ${file.fileName} 删除失败` });
}
};
const handleBatchExport = () => {
if (selectedFiles.length === 0) {
message.warning({ content: "请先选择要导出的文件" });
return;
}
// 执行批量导出逻辑
exportDatasetUsingPost(dataset.id, { fileIds: selectedFiles })
.then(() => {
message.success({
content: `已导出 ${selectedFiles.length} 个文件`,
});
setSelectedFiles([]); // 清空选中状态
})
.catch(() => {
message.error({
content: "导出失败,请稍后再试",
});
});
};
return {
fileList,
selectedFiles,
setSelectedFiles,
pagination,
setPagination,
previewVisible,
setPreviewVisible,
previewContent,
previewFileName,
setPreviewContent,
setPreviewFileName,
fetchFiles,
setFileList,
handleBatchDeleteFiles,
handleDownloadFile,
handleShowFile,
handleDeleteFile,
handleBatchExport,
handleCreateDirectory: async (directoryName: string) => {
const currentPrefix = pagination.prefix || "";
try {
await createDatasetDirectoryUsingPost(dataset.id, {
parentPrefix: currentPrefix,
directoryName,
});
// 创建成功后刷新当前目录,重置到第一页
await fetchFiles(currentPrefix, 1, pagination.pageSize);
message.success({ content: `文件夹 ${directoryName} 创建成功` });
} catch (error) {
message.error({ content: `文件夹 ${directoryName} 创建失败` });
throw error;
}
},
handleDownloadDirectory: async (directoryPath: string, directoryName: string) => {
try {
await downloadDirectoryUsingGet(dataset.id, directoryPath);
message.success({ content: `文件夹 ${directoryName} 下载成功` });
} catch (error) {
message.error({ content: `文件夹 ${directoryName} 下载失败` });
}
},
handleDeleteDirectory: async (directoryPath: string, directoryName: string) => {
try {
await deleteDirectoryUsingDelete(dataset.id, directoryPath);
// 删除成功后刷新当前目录
const currentPrefix = pagination.prefix || "";
await fetchFiles(currentPrefix, 1, pagination.pageSize);
message.success({ content: `文件夹 ${directoryName} 已删除` });
} catch (error) {
message.error({ content: `文件夹 ${directoryName} 删除失败` });
}
},
};
}

View File

@@ -1,398 +1,399 @@
import { Card, Button, Statistic, Table, Tooltip, Tag, App } from "antd";
import {
DownloadOutlined,
EditOutlined,
DeleteOutlined,
PlusOutlined,
UploadOutlined,
} from "@ant-design/icons";
import TagManager from "@/components/business/TagManagement";
import { Link, useNavigate } from "react-router";
import { useEffect, useMemo, useState } from "react";
import { SearchControls } from "@/components/SearchControls";
import CardView from "@/components/CardView";
import type { Dataset } from "@/pages/DataManagement/dataset.model";
import { datasetStatusMap, datasetTypeMap, mapDataset } from "../dataset.const";
import useFetchData from "@/hooks/useFetchData";
import {
downloadDatasetUsingGet,
getDatasetStatisticsUsingGet,
queryDatasetsUsingGet,
deleteDatasetByIdUsingDelete,
createDatasetTagUsingPost,
queryDatasetTagsUsingGet,
deleteDatasetTagUsingDelete,
updateDatasetTagUsingPut,
} from "../dataset.api";
import { formatBytes } from "@/utils/unit";
import EditDataset from "../Create/EditDataset";
import ImportConfiguration from "../Detail/components/ImportConfiguration";
export default function DatasetManagementPage() {
const navigate = useNavigate();
const { message } = App.useApp();
const [viewMode, setViewMode] = useState<"card" | "list">("card");
const [editDatasetOpen, setEditDatasetOpen] = useState(false);
const [currentDataset, setCurrentDataset] = useState<Dataset | null>(null);
const [showUploadDialog, setShowUploadDialog] = useState(false);
const [statisticsData, setStatisticsData] = useState<any>({
count: {},
size: {},
});
async function fetchStatistics() {
const { data } = await getDatasetStatisticsUsingGet();
const statistics = {
size: [
{
title: "数据集总数",
value: data?.totalDatasets || 0,
},
{
title: "文件总数",
value: data?.totalFiles || 0,
},
{
title: "总大小",
value: formatBytes(data?.totalSize) || '0 B',
},
],
count: [
{
title: "文本",
value: data?.count?.text || 0,
},
{
title: "图像",
value: data?.count?.image || 0,
},
{
title: "音频",
value: data?.count?.audio || 0,
},
{
title: "视频",
value: data?.count?.video || 0,
},
],
};
setStatisticsData(statistics);
}
const [tags, setTags] = useState<string[]>([]);
useEffect(() => {
const fetchTags = async () => {
const { data } = await queryDatasetTagsUsingGet();
setTags(data.map((tag) => tag.name));
};
fetchTags();
}, []);
const filterOptions = useMemo(
() => [
{
key: "type",
label: "类型",
options: [...Object.values(datasetTypeMap)],
},
{
key: "status",
label: "状态",
options: [...Object.values(datasetStatusMap)],
},
{
key: "tags",
label: "标签",
mode: "multiple",
options: tags.map((tag) => ({ label: tag, value: tag })),
},
],
[tags]
);
const {
loading,
tableData,
searchParams,
pagination,
fetchData,
setSearchParams,
handleFiltersChange,
handleKeywordChange,
} = useFetchData<Dataset>(
queryDatasetsUsingGet,
mapDataset,
30000, // 30秒轮询间隔
true, // 自动刷新
[fetchStatistics], // 额外的轮询函数
0
);
const handleDownloadDataset = async (dataset: Dataset) => {
await downloadDatasetUsingGet(dataset.id, dataset.name);
message.success("数据集下载成功");
};
const handleDeleteDataset = async (id: number) => {
if (!id) return;
await deleteDatasetByIdUsingDelete(id);
fetchData({ pageOffset: 0 });
message.success("数据删除成功");
};
const handleImportData = (dataset: Dataset) => {
setCurrentDataset(dataset);
setShowUploadDialog(true);
};
const handleRefresh = async (showMessage = true) => {
await fetchData({ pageOffset: 0 });
if (showMessage) {
message.success("数据已刷新");
}
};
const operations = [
{
key: "edit",
label: "编辑",
icon: <EditOutlined />,
onClick: (item: Dataset) => {
setCurrentDataset(item);
setEditDatasetOpen(true);
},
},
{
key: "import",
label: "导入",
icon: <UploadOutlined />,
onClick: (item: Dataset) => {
handleImportData(item);
},
},
{
key: "download",
label: "下载",
icon: <DownloadOutlined />,
onClick: (item: Dataset) => {
if (!item.id) return;
handleDownloadDataset(item);
},
},
{
key: "delete",
label: "删除",
danger: true,
confirm: {
title: "确认删除该数据集?",
description: "删除后该数据集将无法恢复,请谨慎操作。",
okText: "删除",
cancelText: "取消",
okType: "danger",
},
icon: <DeleteOutlined />,
onClick: (item: Dataset) => handleDeleteDataset(item.id),
},
];
const columns = [
{
title: "名称",
dataIndex: "name",
key: "name",
fixed: "left",
render: (name, record) => (
<Button
type="link"
onClick={() => navigate(`/data/management/detail/${record.id}`)}
>
{name}
</Button>
),
},
{
title: "类型",
dataIndex: "type",
key: "type",
width: 100,
},
{
title: "状态",
dataIndex: "status",
key: "status",
render: (status: any) => {
return (
<Tag icon={status?.icon} color={status?.color}>
{status?.label}
</Tag>
);
},
width: 120,
},
{
title: "大小",
dataIndex: "size",
key: "size",
width: 120,
},
{
title: "文件数",
dataIndex: "fileCount",
key: "fileCount",
width: 100,
},
// {
// title: "创建者",
// dataIndex: "createdBy",
// key: "createdBy",
// width: 120,
// },
{
title: "存储路径",
dataIndex: "targetLocation",
key: "targetLocation",
width: 200,
ellipsis: true,
},
{
title: "创建时间",
dataIndex: "createdAt",
key: "createdAt",
width: 180,
},
{
title: "更新时间",
dataIndex: "updatedAt",
key: "updatedAt",
width: 180,
},
{
title: "操作",
key: "actions",
width: 200,
fixed: "right",
render: (_: any, record: Dataset) => (
<div className="flex items-center gap-2">
{operations.map((op) => (
<Tooltip key={op.key} title={op.label}>
<Button
type="text"
icon={op.icon}
onClick={() => op.onClick(record)}
/>
</Tooltip>
))}
</div>
),
},
];
const renderCardView = () => (
<CardView
loading={loading}
data={tableData}
pageSize={9}
operations={operations}
pagination={pagination}
onView={(dataset) => {
navigate("/data/management/detail/" + dataset.id);
}}
/>
);
const renderListView = () => (
<Card>
<Table
columns={columns}
dataSource={tableData}
pagination={pagination}
rowKey="id"
scroll={{ x: "max-content", y: "calc(100vh - 30rem)" }}
/>
</Card>
);
useEffect(() => {
const refresh = () => {
handleRefresh(true);
};
window.addEventListener("update:datasets", refresh);
return () => {
window.removeEventListener("update:datasets", refresh);
};
}, []);
return (
<div className="gap-4 h-full flex flex-col">
{/* Header */}
<div className="flex items-center justify-between">
<h1 className="text-xl font-bold"></h1>
<div className="flex gap-2 items-center">
{/* tasks */}
<TagManager
onCreate={createDatasetTagUsingPost}
onDelete={(ids: string) => deleteDatasetTagUsingDelete({ ids })}
onUpdate={updateDatasetTagUsingPut}
onFetch={queryDatasetTagsUsingGet}
/>
<Link to="/data/management/create">
<Button
type="primary"
icon={<PlusOutlined className="w-4 h-4 mr-2" />}
>
</Button>
</Link>
</div>
</div>
{/* Statistics */}
<div className="grid grid-cols-1 gap-4">
<Card>
<div className="grid grid-cols-3">
{statisticsData.size?.map?.((item) => (
<Statistic
title={item.title}
key={item.title}
value={`${item.value}`}
/>
))}
</div>
</Card>
</div>
<SearchControls
searchTerm={searchParams.keyword}
onSearchChange={handleKeywordChange}
searchPlaceholder="搜索数据集名称、描述"
filters={filterOptions}
onFiltersChange={handleFiltersChange}
onClearFilters={() => setSearchParams({ ...searchParams, filter: {} })}
viewMode={viewMode}
onViewModeChange={setViewMode}
showViewToggle
onReload={handleRefresh}
/>
{viewMode === "card" ? renderCardView() : renderListView()}
<EditDataset
open={editDatasetOpen}
data={currentDataset}
onClose={() => {
setCurrentDataset(null);
setEditDatasetOpen(false);
}}
onRefresh={handleRefresh}
/>
<ImportConfiguration
data={currentDataset}
open={showUploadDialog}
onClose={() => {
setCurrentDataset(null);
setShowUploadDialog(false);
}}
updateEvent="update:datasets"
/>
</div>
);
}
import { Card, Button, Statistic, Table, Tooltip, Tag, App } from "antd";
import {
DownloadOutlined,
EditOutlined,
DeleteOutlined,
PlusOutlined,
UploadOutlined,
} from "@ant-design/icons";
import TagManager from "@/components/business/TagManagement";
import { Link, useNavigate } from "react-router";
import { useEffect, useMemo, useState } from "react";
import { SearchControls } from "@/components/SearchControls";
import CardView from "@/components/CardView";
import type { Dataset } from "@/pages/DataManagement/dataset.model";
import { datasetStatusMap, datasetTypeMap, mapDataset } from "../dataset.const";
import useFetchData from "@/hooks/useFetchData";
import {
downloadDatasetUsingGet,
getDatasetStatisticsUsingGet,
queryDatasetsUsingGet,
deleteDatasetByIdUsingDelete,
createDatasetTagUsingPost,
queryDatasetTagsUsingGet,
deleteDatasetTagUsingDelete,
updateDatasetTagUsingPut,
} from "../dataset.api";
import { formatBytes } from "@/utils/unit";
import EditDataset from "../Create/EditDataset";
import ImportConfiguration from "../Detail/components/ImportConfiguration";
export default function DatasetManagementPage() {
const navigate = useNavigate();
const { message } = App.useApp();
const [viewMode, setViewMode] = useState<"card" | "list">("card");
const [editDatasetOpen, setEditDatasetOpen] = useState(false);
const [currentDataset, setCurrentDataset] = useState<Dataset | null>(null);
const [showUploadDialog, setShowUploadDialog] = useState(false);
const [statisticsData, setStatisticsData] = useState<any>({
count: {},
size: {},
});
async function fetchStatistics() {
const { data } = await getDatasetStatisticsUsingGet();
const statistics = {
size: [
{
title: "数据集总数",
value: data?.totalDatasets || 0,
},
{
title: "文件总数",
value: data?.totalFiles || 0,
},
{
title: "总大小",
value: formatBytes(data?.totalSize) || '0 B',
},
],
count: [
{
title: "文本",
value: data?.count?.text || 0,
},
{
title: "图像",
value: data?.count?.image || 0,
},
{
title: "音频",
value: data?.count?.audio || 0,
},
{
title: "视频",
value: data?.count?.video || 0,
},
],
};
setStatisticsData(statistics);
}
const [tags, setTags] = useState<string[]>([]);
useEffect(() => {
const fetchTags = async () => {
const { data } = await queryDatasetTagsUsingGet();
setTags(data.map((tag) => tag.name));
};
fetchTags();
}, []);
const filterOptions = useMemo(
() => [
{
key: "type",
label: "类型",
options: [...Object.values(datasetTypeMap)],
},
{
key: "status",
label: "状态",
options: [...Object.values(datasetStatusMap)],
},
{
key: "tags",
label: "标签",
mode: "multiple",
options: tags.map((tag) => ({ label: tag, value: tag })),
},
],
[tags]
);
const {
loading,
tableData,
searchParams,
pagination,
fetchData,
setSearchParams,
handleFiltersChange,
handleKeywordChange,
} = useFetchData<Dataset>(
queryDatasetsUsingGet,
mapDataset,
30000, // 30秒轮询间隔
true, // 自动刷新
[fetchStatistics], // 额外的轮询函数
0
);
const handleDownloadDataset = async (dataset: Dataset) => {
await downloadDatasetUsingGet(dataset.id, dataset.name);
message.success("数据集下载成功");
};
const handleDeleteDataset = async (id: number) => {
if (!id) return;
await deleteDatasetByIdUsingDelete(id);
fetchData({ pageOffset: 0 });
message.success("数据删除成功");
};
const handleImportData = (dataset: Dataset) => {
setCurrentDataset(dataset);
setShowUploadDialog(true);
};
const handleRefresh = async (showMessage = true) => {
await fetchData({ pageOffset: 0 });
if (showMessage) {
message.success("数据已刷新");
}
};
const operations = [
{
key: "edit",
label: "编辑",
icon: <EditOutlined />,
onClick: (item: Dataset) => {
setCurrentDataset(item);
setEditDatasetOpen(true);
},
},
{
key: "import",
label: "导入",
icon: <UploadOutlined />,
onClick: (item: Dataset) => {
handleImportData(item);
},
},
{
key: "download",
label: "下载",
icon: <DownloadOutlined />,
onClick: (item: Dataset) => {
if (!item.id) return;
handleDownloadDataset(item);
},
},
{
key: "delete",
label: "删除",
danger: true,
confirm: {
title: "确认删除该数据集?",
description: "删除后该数据集将无法恢复,请谨慎操作。",
okText: "删除",
cancelText: "取消",
okType: "danger",
},
icon: <DeleteOutlined />,
onClick: (item: Dataset) => handleDeleteDataset(item.id),
},
];
const columns = [
{
title: "名称",
dataIndex: "name",
key: "name",
fixed: "left",
render: (name, record) => (
<Button
type="link"
onClick={() => navigate(`/data/management/detail/${record.id}`)}
>
{name}
</Button>
),
},
{
title: "类型",
dataIndex: "type",
key: "type",
width: 100,
},
{
title: "状态",
dataIndex: "status",
key: "status",
render: (status: any) => {
return (
<Tag icon={status?.icon} color={status?.color}>
{status?.label}
</Tag>
);
},
width: 120,
},
{
title: "大小",
dataIndex: "size",
key: "size",
width: 120,
},
{
title: "文件数",
dataIndex: "fileCount",
key: "fileCount",
width: 100,
},
// {
// title: "创建者",
// dataIndex: "createdBy",
// key: "createdBy",
// width: 120,
// },
{
title: "存储路径",
dataIndex: "targetLocation",
key: "targetLocation",
width: 200,
ellipsis: true,
},
{
title: "创建时间",
dataIndex: "createdAt",
key: "createdAt",
width: 180,
},
{
title: "更新时间",
dataIndex: "updatedAt",
key: "updatedAt",
width: 180,
},
{
title: "操作",
key: "actions",
width: 200,
fixed: "right",
render: (_: any, record: Dataset) => (
<div className="flex items-center gap-2">
{operations.map((op) => (
<Tooltip key={op.key} title={op.label}>
<Button
type="text"
icon={op.icon}
onClick={() => op.onClick(record)}
/>
</Tooltip>
))}
</div>
),
},
];
const renderCardView = () => (
<CardView
loading={loading}
data={tableData}
pageSize={9}
operations={operations}
pagination={pagination}
onView={(dataset) => {
navigate("/data/management/detail/" + dataset.id);
}}
/>
);
const renderListView = () => (
<Card>
<Table
columns={columns}
dataSource={tableData}
pagination={pagination}
rowKey="id"
scroll={{ x: "max-content", y: "calc(100vh - 30rem)" }}
/>
</Card>
);
useEffect(() => {
const refresh = () => {
handleRefresh(true);
};
window.addEventListener("update:datasets", refresh);
return () => {
window.removeEventListener("update:datasets", refresh);
};
}, []);
return (
<div className="gap-4 h-full flex flex-col">
{/* Header */}
<div className="flex items-center justify-between">
<h1 className="text-xl font-bold"></h1>
<div className="flex gap-2 items-center">
{/* tasks */}
<TagManager
onCreate={createDatasetTagUsingPost}
onDelete={(ids: string) => deleteDatasetTagUsingDelete({ ids })}
onUpdate={updateDatasetTagUsingPut}
onFetch={queryDatasetTagsUsingGet}
/>
<Link to="/data/management/create">
<Button
type="primary"
icon={<PlusOutlined className="w-4 h-4 mr-2" />}
>
</Button>
</Link>
</div>
</div>
{/* Statistics */}
<div className="grid grid-cols-1 gap-4">
<Card>
<div className="grid grid-cols-3">
{statisticsData.size?.map?.((item) => (
<Statistic
title={item.title}
key={item.title}
value={`${item.value}`}
/>
))}
</div>
</Card>
</div>
<SearchControls
searchTerm={searchParams.keyword}
onSearchChange={handleKeywordChange}
searchPlaceholder="搜索数据集名称、描述"
filters={filterOptions}
onFiltersChange={handleFiltersChange}
onClearFilters={() => setSearchParams({ ...searchParams, filter: {} })}
viewMode={viewMode}
onViewModeChange={setViewMode}
showViewToggle
onReload={handleRefresh}
/>
{viewMode === "card" ? renderCardView() : renderListView()}
<EditDataset
open={editDatasetOpen}
data={currentDataset}
onClose={() => {
setCurrentDataset(null);
setEditDatasetOpen(false);
}}
onRefresh={handleRefresh}
/>
<ImportConfiguration
data={currentDataset}
open={showUploadDialog}
onClose={() => {
setCurrentDataset(null);
setShowUploadDialog(false);
}}
prefix=""
updateEvent="update:datasets"
/>
</div>
);
}

View File

@@ -1,191 +1,220 @@
import { get, post, put, del, download } from "@/utils/request";
// 数据集统计接口
export function getDatasetStatisticsUsingGet() {
return get("/api/data-management/datasets/statistics");
}
export function queryDatasetStatisticsByIdUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}/statistics`);
}
// 查询数据集列表
export function queryDatasetsUsingGet(params?: any) {
return get("/api/data-management/datasets", params);
}
// 创建数据集
export function createDatasetUsingPost(data: any) {
return post("/api/data-management/datasets", data);
}
// 根据ID获取数据集详情
export function queryDatasetByIdUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}`);
}
// 更新数据集
export function updateDatasetByIdUsingPut(id: string | number, data: any) {
return put(`/api/data-management/datasets/${id}`, data);
}
// 删除数据集
export function deleteDatasetByIdUsingDelete(id: string | number) {
return del(`/api/data-management/datasets/${id}`);
}
// 下载数据集
export function downloadDatasetUsingGet(id: string | number) {
return download(`/api/data-management/datasets/${id}/files/download`);
}
// 验证数据集
export function validateDatasetUsingPost(id: string | number, data?: any) {
return post(`/api/data-management/datasets/${id}/validate`, data);
}
// 获取数据集文件列表
export function queryDatasetFilesUsingGet(id: string | number, params?: any) {
return get(`/api/data-management/datasets/${id}/files`, params);
}
// 上传数据集文件
export function uploadDatasetFileUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/files`, data);
}
export function downloadFileByIdUsingGet(
id: string | number,
fileId: string | number,
fileName: string
) {
return download(
`/api/data-management/datasets/${id}/files/${fileId}/download`,
null,
fileName
);
}
// 删除数据集文件
export function deleteDatasetFileUsingDelete(
datasetId: string | number,
fileId: string | number
) {
return del(`/api/data-management/datasets/${datasetId}/files/${fileId}`);
}
// 文件预览
export function previewDatasetUsingGet(id: string | number, params?: any) {
return get(`/api/data-management/datasets/${id}/preview`, params);
}
// 获取数据集标签
export function queryDatasetTagsUsingGet(params?: any) {
return get("/api/data-management/tags", params);
}
// 创建数据集标签
export function createDatasetTagUsingPost(data: any) {
return post("/api/data-management/tags", data);
}
// 更新数据集标签
export function updateDatasetTagUsingPut(data: any) {
return put(`/api/data-management/tags`, data);
}
// 删除数据集标签
export function deleteDatasetTagUsingDelete(data: any) {
return del(`/api/data-management/tags`, data);
}
// 数据集质量检查
export function checkDatasetQualityUsingPost(id: string | number, data?: any) {
return post(`/api/data-management/datasets/${id}/quality-check`, data);
}
// 获取数据集质量报告
export function getDatasetQualityReportUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}/quality-report`);
}
// 数据集分析
export function analyzeDatasetUsingPost(id: string | number, data?: any) {
return post(`/api/data-management/datasets/${id}/analyze`, data);
}
// 获取数据集分析结果
export function getDatasetAnalysisUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}/analysis`);
}
// 导出数据集
export function exportDatasetUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/export`, data);
}
// 复制数据集
export function copyDatasetUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/copy`, data);
}
// 获取数据集版本列表
export function queryDatasetVersionsUsingGet(
id: string | number,
params?: any
) {
return get(`/api/data-management/datasets/${id}/versions`, params);
}
// 创建数据集版本
export function createDatasetVersionUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/versions`, data);
}
// 切换数据集版本
export function switchDatasetVersionUsingPut(
id: string | number,
versionId: string | number
) {
return put(
`/api/data-management/datasets/${id}/versions/${versionId}/switch`
);
}
// 删除数据集版本
export function deleteDatasetVersionUsingDelete(
id: string | number,
versionId: string | number
) {
return del(`/api/data-management/datasets/${id}/versions/${versionId}`);
}
/**
* 文件上传相关接口
*/
export function preUploadUsingPost(id: string | number, data: any) {
return post(
`/api/data-management/datasets/${id}/files/upload/pre-upload`,
data
);
}
export function cancelUploadUsingPut(id) {
return put(
`/api/data-management/datasets/upload/cancel-upload/${id}`,
{},
{ showLoading: false }
);
}
export function uploadFileChunkUsingPost(id: string | number, params, config) {
return post(
`/api/data-management/datasets/${id}/files/upload/chunk`,
params,
{
showLoading: false,
...config,
}
);
}
import { get, post, put, del, download } from "@/utils/request";
// 数据集统计接口
export function getDatasetStatisticsUsingGet() {
return get("/api/data-management/datasets/statistics");
}
export function queryDatasetStatisticsByIdUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}/statistics`);
}
// 查询数据集列表
export function queryDatasetsUsingGet(params?: any) {
return get("/api/data-management/datasets", params);
}
// 创建数据集
export function createDatasetUsingPost(data: any) {
return post("/api/data-management/datasets", data);
}
// 根据ID获取数据集详情
export function queryDatasetByIdUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}`);
}
// 更新数据集
export function updateDatasetByIdUsingPut(id: string | number, data: any) {
return put(`/api/data-management/datasets/${id}`, data);
}
// 删除数据集
export function deleteDatasetByIdUsingDelete(id: string | number) {
return del(`/api/data-management/datasets/${id}`);
}
// 下载数据集
export function downloadDatasetUsingGet(id: string | number) {
return download(`/api/data-management/datasets/${id}/files/download`);
}
// 验证数据集
export function validateDatasetUsingPost(id: string | number, data?: any) {
return post(`/api/data-management/datasets/${id}/validate`, data);
}
// 获取数据集文件列表
export function queryDatasetFilesUsingGet(id: string | number, params?: any) {
return get(`/api/data-management/datasets/${id}/files`, params);
}
// 上传数据集文件
export function uploadDatasetFileUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/files`, data);
}
// 新建数据集文件夹
export function createDatasetDirectoryUsingPost(
id: string | number,
data: { parentPrefix?: string; directoryName: string }
) {
return post(`/api/data-management/datasets/${id}/files/directories`, data);
}
// 下载文件夹(打包为zip)
export function downloadDirectoryUsingGet(
id: string | number,
directoryPath: string
) {
const dirName = directoryPath.split('/').filter(Boolean).pop() || 'folder';
return download(
`/api/data-management/datasets/${id}/files/directories/download?prefix=${encodeURIComponent(directoryPath)}`,
null,
`${dirName}.zip`
);
}
// 删除文件夹(递归删除)
export function deleteDirectoryUsingDelete(
id: string | number,
directoryPath: string
) {
return del(`/api/data-management/datasets/${id}/files/directories?prefix=${encodeURIComponent(directoryPath)}`);
}
export function downloadFileByIdUsingGet(
id: string | number,
fileId: string | number,
fileName: string
) {
return download(
`/api/data-management/datasets/${id}/files/${fileId}/download`,
null,
fileName
);
}
// 删除数据集文件
export function deleteDatasetFileUsingDelete(
datasetId: string | number,
fileId: string | number
) {
return del(`/api/data-management/datasets/${datasetId}/files/${fileId}`);
}
// 文件预览
export function previewDatasetUsingGet(id: string | number, params?: any) {
return get(`/api/data-management/datasets/${id}/preview`, params);
}
// 获取数据集标签
export function queryDatasetTagsUsingGet(params?: any) {
return get("/api/data-management/tags", params);
}
// 创建数据集标签
export function createDatasetTagUsingPost(data: any) {
return post("/api/data-management/tags", data);
}
// 更新数据集标签
export function updateDatasetTagUsingPut(data: any) {
return put(`/api/data-management/tags`, data);
}
// 删除数据集标签
export function deleteDatasetTagUsingDelete(data: any) {
return del(`/api/data-management/tags`, data);
}
// 数据集质量检查
export function checkDatasetQualityUsingPost(id: string | number, data?: any) {
return post(`/api/data-management/datasets/${id}/quality-check`, data);
}
// 获取数据集质量报告
export function getDatasetQualityReportUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}/quality-report`);
}
// 数据集分析
export function analyzeDatasetUsingPost(id: string | number, data?: any) {
return post(`/api/data-management/datasets/${id}/analyze`, data);
}
// 获取数据集分析结果
export function getDatasetAnalysisUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}/analysis`);
}
// 导出数据集
export function exportDatasetUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/export`, data);
}
// 复制数据集
export function copyDatasetUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/copy`, data);
}
// 获取数据集版本列表
export function queryDatasetVersionsUsingGet(
id: string | number,
params?: any
) {
return get(`/api/data-management/datasets/${id}/versions`, params);
}
// 创建数据集版本
export function createDatasetVersionUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/versions`, data);
}
// 切换数据集版本
export function switchDatasetVersionUsingPut(
id: string | number,
versionId: string | number
) {
return put(
`/api/data-management/datasets/${id}/versions/${versionId}/switch`
);
}
// 删除数据集版本
export function deleteDatasetVersionUsingDelete(
id: string | number,
versionId: string | number
) {
return del(`/api/data-management/datasets/${id}/versions/${versionId}`);
}
/**
* 文件上传相关接口
*/
export function preUploadUsingPost(id: string | number, data: any) {
return post(
`/api/data-management/datasets/${id}/files/upload/pre-upload`,
data
);
}
export function cancelUploadUsingPut(id) {
return put(
`/api/data-management/datasets/upload/cancel-upload/${id}`,
{},
{ showLoading: false }
);
}
export function uploadFileChunkUsingPost(id: string | number, params, config) {
return post(
`/api/data-management/datasets/${id}/files/upload/chunk`,
params,
{
showLoading: false,
...config,
}
);
}

View File

@@ -1,67 +1,69 @@
import {
cancelUploadUsingPut,
preUploadUsingPost,
uploadFileChunkUsingPost,
} from "@/pages/DataManagement/dataset.api";
import { Button, Empty, Progress } from "antd";
import { DeleteOutlined } from "@ant-design/icons";
import { useEffect } from "react";
import { useFileSliceUpload } from "@/hooks/useSliceUpload";
export default function TaskUpload() {
const { createTask, taskList, removeTask, handleUpload } = useFileSliceUpload(
{
preUpload: preUploadUsingPost,
uploadChunk: uploadFileChunkUsingPost,
cancelUpload: cancelUploadUsingPut,
}
);
useEffect(() => {
const uploadHandler = (e: any) => {
const { files } = e.detail;
const task = createTask(e.detail);
handleUpload({ task, files });
};
window.addEventListener("upload:dataset", uploadHandler);
return () => {
window.removeEventListener("upload:dataset", uploadHandler);
};
}, []);
return (
<div
className="w-90 max-w-90 max-h-96 overflow-y-auto p-2"
id="header-task-popover"
>
{taskList.length > 0 &&
taskList.map((task) => (
<div key={task.key} className="border-b border-gray-200 pb-2">
<div className="flex items-center justify-between">
<div>{task.title}</div>
<Button
type="text"
danger
disabled={!task?.cancelFn}
onClick={() =>
removeTask({
...task,
isCancel: true,
})
}
icon={<DeleteOutlined />}
></Button>
</div>
<Progress size="small" percent={task.percent} />
</div>
))}
{taskList.length === 0 && (
<Empty
image={Empty.PRESENTED_IMAGE_SIMPLE}
description="暂无上传任务"
/>
)}
</div>
);
}
import {
cancelUploadUsingPut,
preUploadUsingPost,
uploadFileChunkUsingPost,
} from "@/pages/DataManagement/dataset.api";
import { Button, Empty, Progress } from "antd";
import { DeleteOutlined } from "@ant-design/icons";
import { useEffect } from "react";
import { useFileSliceUpload } from "@/hooks/useSliceUpload";
export default function TaskUpload() {
const { createTask, taskList, removeTask, handleUpload } = useFileSliceUpload(
{
preUpload: preUploadUsingPost,
uploadChunk: uploadFileChunkUsingPost,
cancelUpload: cancelUploadUsingPut,
}
);
useEffect(() => {
const uploadHandler = (e: any) => {
console.log('[TaskUpload] Received upload event detail:', e.detail);
const { files } = e.detail;
const task = createTask(e.detail);
console.log('[TaskUpload] Created task with prefix:', task.prefix);
handleUpload({ task, files });
};
window.addEventListener("upload:dataset", uploadHandler);
return () => {
window.removeEventListener("upload:dataset", uploadHandler);
};
}, []);
return (
<div
className="w-90 max-w-90 max-h-96 overflow-y-auto p-2"
id="header-task-popover"
>
{taskList.length > 0 &&
taskList.map((task) => (
<div key={task.key} className="border-b border-gray-200 pb-2">
<div className="flex items-center justify-between">
<div>{task.title}</div>
<Button
type="text"
danger
disabled={!task?.cancelFn}
onClick={() =>
removeTask({
...task,
isCancel: true,
})
}
icon={<DeleteOutlined />}
></Button>
</div>
<Progress size="small" percent={task.percent} />
</div>
))}
{taskList.length === 0 && (
<Empty
image={Empty.PRESENTED_IMAGE_SIMPLE}
description="暂无上传任务"
/>
)}
</div>
);
}

View File

@@ -1,60 +1,95 @@
"""
Tables of Annotation Management Module
"""
import uuid
from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP, Text, Integer, JSON, Date, ForeignKey
from sqlalchemy.sql import func
from app.db.session import Base
class AnnotationTemplate(Base):
"""标注配置模板模型"""
__tablename__ = "t_dm_annotation_templates"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
name = Column(String(100), nullable=False, comment="模板名称")
description = Column(String(500), nullable=True, comment="模板描述")
data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries")
labeling_type = Column(String(50), nullable=False, comment="标注类型: classification/detection/segmentation/ner/relation/etc")
configuration = Column(JSON, nullable=False, comment="标注配置(包含labels定义等)")
style = Column(String(32), nullable=False, comment="样式配置: horizontal/vertical")
category = Column(String(50), default='custom', comment="模板分类: medical/general/custom/system")
built_in = Column(Boolean, default=False, comment="是否系统内置模板")
version = Column(String(20), default='1.0', comment="模板版本")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
def __repr__(self):
return f"<AnnotationTemplate(id={self.id}, name={self.name}, data_type={self.data_type})>"
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None
class LabelingProject(Base):
"""标注项目模型"""
__tablename__ = "t_dm_labeling_projects"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
name = Column(String(100), nullable=False, comment="项目名称")
labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID")
template_id = Column(String(36), ForeignKey('t_dm_annotation_templates.id', ondelete='SET NULL'), nullable=True, comment="使用的模板ID")
configuration = Column(JSON, nullable=True, comment="项目配置(可能包含对模板的自定义修改)")
progress = Column(JSON, nullable=True, comment="项目进度信息")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
def __repr__(self):
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
"""Tables of Annotation Management Module"""
import uuid
from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey
from sqlalchemy.sql import func
from app.db.session import Base
class AnnotationTemplate(Base):
"""标注配置模板模型"""
__tablename__ = "t_dm_annotation_templates"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
name = Column(String(100), nullable=False, comment="模板名称")
description = Column(String(500), nullable=True, comment="模板描述")
data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries")
labeling_type = Column(String(50), nullable=False, comment="标注类型: classification/detection/segmentation/ner/relation/etc")
configuration = Column(JSON, nullable=False, comment="标注配置(包含labels定义等)")
style = Column(String(32), nullable=False, comment="样式配置: horizontal/vertical")
category = Column(String(50), default='custom', comment="模板分类: medical/general/custom/system")
built_in = Column(Boolean, default=False, comment="是否系统内置模板")
version = Column(String(20), default='1.0', comment="模板版本")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
def __repr__(self):
return f"<AnnotationTemplate(id={self.id}, name={self.name}, data_type={self.data_type})>"
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None
class LabelingProject(Base):
"""标注项目模型"""
__tablename__ = "t_dm_labeling_projects"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
name = Column(String(100), nullable=False, comment="项目名称")
labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID")
template_id = Column(String(36), ForeignKey('t_dm_annotation_templates.id', ondelete='SET NULL'), nullable=True, comment="使用的模板ID")
configuration = Column(JSON, nullable=True, comment="项目配置(可能包含对模板的自定义修改)")
progress = Column(JSON, nullable=True, comment="项目进度信息")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
def __repr__(self):
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None
class AutoAnnotationTask(Base):
"""自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
__tablename__ = "t_dm_auto_annotation_tasks"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
name = Column(String(255), nullable=False, comment="任务名称")
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
dataset_name = Column(String(255), nullable=True, comment="数据集名称(冗余字段,方便查询)")
config = Column(JSON, nullable=False, comment="任务配置(模型规模、置信度等)")
file_ids = Column(JSON, nullable=True, comment="要处理的文件ID列表,为空则处理数据集所有图像")
status = Column(String(50), nullable=False, default="pending", comment="任务状态: pending/running/completed/failed")
progress = Column(Integer, default=0, comment="任务进度 0-100")
total_images = Column(Integer, default=0, comment="总图片数")
processed_images = Column(Integer, default=0, comment="已处理图片数")
detected_objects = Column(Integer, default=0, comment="检测到的对象总数")
output_path = Column(String(500), nullable=True, comment="输出路径")
error_message = Column(Text, nullable=True, comment="错误信息")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(
TIMESTAMP,
server_default=func.current_timestamp(),
onupdate=func.current_timestamp(),
comment="更新时间",
)
completed_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
def __repr__(self) -> str: # pragma: no cover - repr 简单返回
return f"<AutoAnnotationTask(id={self.id}, name={self.name}, status={self.status})>"
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None

View File

@@ -1,16 +1,18 @@
from fastapi import APIRouter
from .config import router as about_router
from .project import router as project_router
from .task import router as task_router
from .template import router as template_router
router = APIRouter(
prefix="/annotation",
tags = ["annotation"]
)
router.include_router(about_router)
router.include_router(project_router)
router.include_router(task_router)
router.include_router(template_router)
from fastapi import APIRouter
from .config import router as about_router
from .project import router as project_router
from .task import router as task_router
from .template import router as template_router
from .auto import router as auto_router
router = APIRouter(
prefix="/annotation",
tags = ["annotation"]
)
router.include_router(about_router)
router.include_router(project_router)
router.include_router(task_router)
router.include_router(template_router)
router.include_router(auto_router)

View File

@@ -0,0 +1,196 @@
"""FastAPI routes for Auto Annotation tasks.
These routes back the frontend AutoAnnotation module:
- GET /api/annotation/auto
- POST /api/annotation/auto
- DELETE /api/annotation/auto/{task_id}
- GET /api/annotation/auto/{task_id}/status (simple wrapper)
"""
from __future__ import annotations
from typing import List
from fastapi import APIRouter, Depends, HTTPException, Path
from fastapi.responses import StreamingResponse
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.session import get_db
from app.module.shared.schema import StandardResponse
from app.module.dataset import DatasetManagementService
from app.core.logging import get_logger
from ..schema.auto import (
CreateAutoAnnotationTaskRequest,
AutoAnnotationTaskResponse,
)
from ..service.auto import AutoAnnotationTaskService
router = APIRouter(
prefix="/auto",
tags=["annotation/auto"],
)
logger = get_logger(__name__)
service = AutoAnnotationTaskService()
@router.get("", response_model=StandardResponse[List[AutoAnnotationTaskResponse]])
async def list_auto_annotation_tasks(
db: AsyncSession = Depends(get_db),
):
"""获取自动标注任务列表。
前端当前不传分页参数,这里直接返回所有未删除任务。
"""
tasks = await service.list_tasks(db)
return StandardResponse(
code=200,
message="success",
data=tasks,
)
@router.post("", response_model=StandardResponse[AutoAnnotationTaskResponse])
async def create_auto_annotation_task(
request: CreateAutoAnnotationTaskRequest,
db: AsyncSession = Depends(get_db),
):
"""创建自动标注任务。
当前仅创建任务记录并置为 pending,实际执行由后续调度/worker 完成。
"""
logger.info(
"Creating auto annotation task: name=%s, dataset_id=%s, config=%s, file_ids=%s",
request.name,
request.dataset_id,
request.config.model_dump(by_alias=True),
request.file_ids,
)
# 尝试获取数据集名称和文件数量用于冗余字段,失败时不阻塞任务创建
dataset_name = None
total_images = 0
try:
dm_client = DatasetManagementService(db)
# Service.get_dataset 返回 DatasetResponse,包含 name 和 fileCount
dataset = await dm_client.get_dataset(request.dataset_id)
if dataset is not None:
dataset_name = dataset.name
# 如果提供了 file_ids,则 total_images 为选中文件数;否则使用数据集文件数
if request.file_ids:
total_images = len(request.file_ids)
else:
total_images = getattr(dataset, "fileCount", 0) or 0
except Exception as e: # pragma: no cover - 容错
logger.warning("Failed to fetch dataset name for auto task: %s", e)
task = await service.create_task(
db,
request,
dataset_name=dataset_name,
total_images=total_images,
)
return StandardResponse(
code=200,
message="success",
data=task,
)
@router.get("/{task_id}/status", response_model=StandardResponse[AutoAnnotationTaskResponse])
async def get_auto_annotation_task_status(
task_id: str = Path(..., description="任务ID"),
db: AsyncSession = Depends(get_db),
):
"""获取单个自动标注任务状态。
前端当前主要通过列表轮询,这里提供按 ID 查询的补充接口。
"""
task = await service.get_task(db, task_id)
if not task:
raise HTTPException(status_code=404, detail="Task not found")
return StandardResponse(
code=200,
message="success",
data=task,
)
@router.delete("/{task_id}", response_model=StandardResponse[bool])
async def delete_auto_annotation_task(
task_id: str = Path(..., description="任务ID"),
db: AsyncSession = Depends(get_db),
):
"""删除(软删除)自动标注任务,仅标记 deleted_at。"""
ok = await service.soft_delete_task(db, task_id)
if not ok:
raise HTTPException(status_code=404, detail="Task not found")
return StandardResponse(
code=200,
message="success",
data=True,
)
@router.get("/{task_id}/download")
async def download_auto_annotation_result(
task_id: str = Path(..., description="任务ID"),
db: AsyncSession = Depends(get_db),
):
"""下载指定自动标注任务的结果 ZIP。"""
import io
import os
import zipfile
import tempfile
# 复用服务层获取任务信息
task = await service.get_task(db, task_id)
if not task:
raise HTTPException(status_code=404, detail="Task not found")
if not task.output_path:
raise HTTPException(status_code=400, detail="Task has no output path")
output_dir = task.output_path
if not os.path.isdir(output_dir):
raise HTTPException(status_code=404, detail="Output directory not found")
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".zip")
os.close(tmp_fd)
with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zf:
for root, _, files in os.walk(output_dir):
for filename in files:
file_path = os.path.join(root, filename)
arcname = os.path.relpath(file_path, output_dir)
zf.write(file_path, arcname)
file_size = os.path.getsize(tmp_path)
if file_size == 0:
raise HTTPException(status_code=500, detail="Generated ZIP is empty")
def iterfile():
with open(tmp_path, "rb") as f:
while True:
chunk = f.read(8192)
if not chunk:
break
yield chunk
filename = f"{task.name}_annotations.zip"
headers = {
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(file_size),
}
return StreamingResponse(iterfile(), media_type="application/zip", headers=headers)

View File

@@ -0,0 +1,73 @@
"""Schemas for Auto Annotation tasks"""
from __future__ import annotations
from typing import List, Optional, Dict, Any
from datetime import datetime
from pydantic import BaseModel, Field, ConfigDict
class AutoAnnotationConfig(BaseModel):
"""自动标注任务配置(与前端 payload 对齐)"""
model_size: str = Field(alias="modelSize", description="模型规模: n/s/m/l/x")
conf_threshold: float = Field(alias="confThreshold", description="置信度阈值 0-1")
target_classes: List[int] = Field(
default_factory=list,
alias="targetClasses",
description="目标类别ID列表,空表示全部类别",
)
output_dataset_name: Optional[str] = Field(
default=None,
alias="outputDatasetName",
description="自动标注结果要写入的新数据集名称(可选)",
)
model_config = ConfigDict(populate_by_name=True)
class CreateAutoAnnotationTaskRequest(BaseModel):
"""创建自动标注任务的请求体,对齐前端 CreateAutoAnnotationDialog 发送的结构"""
name: str = Field(..., min_length=1, max_length=255, description="任务名称")
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
config: AutoAnnotationConfig = Field(..., description="任务配置")
file_ids: Optional[List[str]] = Field(None, alias="fileIds", description="要处理的文件ID列表,为空则处理数据集中所有图像")
model_config = ConfigDict(populate_by_name=True)
class AutoAnnotationTaskResponse(BaseModel):
"""自动标注任务响应模型(列表/详情均可复用)"""
id: str = Field(..., description="任务ID")
name: str = Field(..., description="任务名称")
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
dataset_name: Optional[str] = Field(None, alias="datasetName", description="数据集名称")
source_datasets: Optional[List[str]] = Field(
default=None,
alias="sourceDatasets",
description="本任务实际处理涉及到的所有数据集名称列表",
)
config: Dict[str, Any] = Field(..., description="任务配置")
status: str = Field(..., description="任务状态")
progress: int = Field(..., description="任务进度 0-100")
total_images: int = Field(..., alias="totalImages", description="总图片数")
processed_images: int = Field(..., alias="processedImages", description="已处理图片数")
detected_objects: int = Field(..., alias="detectedObjects", description="检测到的对象总数")
output_path: Optional[str] = Field(None, alias="outputPath", description="输出路径")
error_message: Optional[str] = Field(None, alias="errorMessage", description="错误信息")
created_at: datetime = Field(..., alias="createdAt", description="创建时间")
updated_at: Optional[datetime] = Field(None, alias="updatedAt", description="更新时间")
completed_at: Optional[datetime] = Field(None, alias="completedAt", description="完成时间")
model_config = ConfigDict(populate_by_name=True, from_attributes=True)
class AutoAnnotationTaskListResponse(BaseModel):
"""自动标注任务列表响应,目前前端直接使用数组,这里预留分页结构"""
content: List[AutoAnnotationTaskResponse] = Field(..., description="任务列表")
total: int = Field(..., description="总数")
model_config = ConfigDict(populate_by_name=True)

View File

@@ -0,0 +1,154 @@
"""Service layer for Auto Annotation tasks"""
from __future__ import annotations
from typing import List, Optional
from datetime import datetime
from uuid import uuid4
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.models.annotation_management import AutoAnnotationTask
from app.db.models.dataset_management import Dataset, DatasetFiles
from ..schema.auto import (
CreateAutoAnnotationTaskRequest,
AutoAnnotationTaskResponse,
)
class AutoAnnotationTaskService:
"""自动标注任务服务(仅管理任务元数据,真正执行由 runtime 负责)"""
async def create_task(
self,
db: AsyncSession,
request: CreateAutoAnnotationTaskRequest,
dataset_name: Optional[str] = None,
total_images: int = 0,
) -> AutoAnnotationTaskResponse:
"""创建自动标注任务,初始状态为 pending。
这里仅插入任务记录,不负责真正执行 YOLO 推理,
后续可以由调度器/worker 读取该表并更新进度。
"""
now = datetime.now()
task = AutoAnnotationTask(
id=str(uuid4()),
name=request.name,
dataset_id=request.dataset_id,
dataset_name=dataset_name,
config=request.config.model_dump(by_alias=True),
file_ids=request.file_ids, # 存储用户选择的文件ID列表
status="pending",
progress=0,
total_images=total_images,
processed_images=0,
detected_objects=0,
created_at=now,
updated_at=now,
)
db.add(task)
await db.commit()
await db.refresh(task)
# 创建后附带 sourceDatasets 信息(通常只有一个原始数据集)
resp = AutoAnnotationTaskResponse.model_validate(task)
try:
resp.source_datasets = await self._compute_source_datasets(db, task)
except Exception:
resp.source_datasets = [dataset_name] if dataset_name else [request.dataset_id]
return resp
async def list_tasks(self, db: AsyncSession) -> List[AutoAnnotationTaskResponse]:
"""获取未软删除的自动标注任务列表,按创建时间倒序。"""
result = await db.execute(
select(AutoAnnotationTask)
.where(AutoAnnotationTask.deleted_at.is_(None))
.order_by(AutoAnnotationTask.created_at.desc())
)
tasks: List[AutoAnnotationTask] = list(result.scalars().all())
responses: List[AutoAnnotationTaskResponse] = []
for task in tasks:
resp = AutoAnnotationTaskResponse.model_validate(task)
try:
resp.source_datasets = await self._compute_source_datasets(db, task)
except Exception:
# 出错时降级为单个 datasetName/datasetId
fallback_name = getattr(task, "dataset_name", None)
fallback_id = getattr(task, "dataset_id", "")
resp.source_datasets = [fallback_name] if fallback_name else [fallback_id]
responses.append(resp)
return responses
async def get_task(self, db: AsyncSession, task_id: str) -> Optional[AutoAnnotationTaskResponse]:
result = await db.execute(
select(AutoAnnotationTask).where(
AutoAnnotationTask.id == task_id,
AutoAnnotationTask.deleted_at.is_(None),
)
)
task = result.scalar_one_or_none()
if not task:
return None
resp = AutoAnnotationTaskResponse.model_validate(task)
try:
resp.source_datasets = await self._compute_source_datasets(db, task)
except Exception:
fallback_name = getattr(task, "dataset_name", None)
fallback_id = getattr(task, "dataset_id", "")
resp.source_datasets = [fallback_name] if fallback_name else [fallback_id]
return resp
async def _compute_source_datasets(
self,
db: AsyncSession,
task: AutoAnnotationTask,
) -> List[str]:
"""根据任务的 file_ids 推断实际涉及到的所有数据集名称。
- 如果存在 file_ids,则通过 t_dm_dataset_files 反查 dataset_id,再关联 t_dm_datasets 获取名称;
- 如果没有 file_ids,则退回到任务上冗余的 dataset_name/dataset_id。
"""
file_ids = task.file_ids or []
if file_ids:
stmt = (
select(Dataset.name)
.join(DatasetFiles, Dataset.id == DatasetFiles.dataset_id)
.where(DatasetFiles.id.in_(file_ids))
.distinct()
)
result = await db.execute(stmt)
names = [row[0] for row in result.fetchall() if row[0]]
if names:
return names
# 回退:只显示一个数据集
if task.dataset_name:
return [task.dataset_name]
if task.dataset_id:
return [task.dataset_id]
return []
async def soft_delete_task(self, db: AsyncSession, task_id: str) -> bool:
result = await db.execute(
select(AutoAnnotationTask).where(
AutoAnnotationTask.id == task_id,
AutoAnnotationTask.deleted_at.is_(None),
)
)
task = result.scalar_one_or_none()
if not task:
return False
task.deleted_at = datetime.now()
await db.commit()
return True

View File

@@ -1,8 +1,8 @@
#!/bin/bash
set -e
if [-d $LOCAL_FILES_DOCUMENT_ROOT ] && $LOCAL_FILES_SERVING_ENABLED; then
echo "Using local document root: $LOCAL_FILES_DOCUMENT_ROOT"
if [ -d "${LOCAL_FILES_DOCUMENT_ROOT}" ] && [ "${LOCAL_FILES_SERVING_ENABLED}" = "true" ]; then
echo "Using local document root: ${LOCAL_FILES_DOCUMENT_ROOT}"
fi
# 启动应用

17
runtime/ops/__init__.py Normal file
View File

@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
"""Datamate built-in operators package.
This package contains built-in operators for filtering, slicing, annotation, etc.
It is mounted into the runtime container under ``datamate.ops`` so that
``from datamate.ops.annotation...`` imports work correctly.
"""
__all__ = [
"annotation",
"filter",
"formatter",
"llms",
"mapper",
"slicer",
"user",
]

View File

@@ -0,0 +1,6 @@
# -*- coding: utf-8 -*-
"""Annotation-related operators (e.g. YOLO detection)."""
__all__ = [
"image_object_detection_bounding_box",
]

View File

@@ -0,0 +1,9 @@
"""Image object detection (YOLOv8) operator package.
This package exposes the ImageObjectDetectionBoundingBox annotator so that
the auto-annotation worker can import it via different module paths.
"""
from .process import ImageObjectDetectionBoundingBox
__all__ = ["ImageObjectDetectionBoundingBox"]

View File

@@ -0,0 +1,3 @@
name: image_object_detection_bounding_box
version: 0.1.0
description: "YOLOv8-based object detection operator for auto annotation"

View File

@@ -0,0 +1,214 @@
#!/user/bin/python
# -- encoding: utf-8 --
"""
Description: 图像目标检测算子
Create: 2025/12/17
"""
import os
import json
import time
from typing import Dict, Any
import cv2
import numpy as np
from loguru import logger
try:
from ultralytics import YOLO
except ImportError:
logger.warning("ultralytics not installed. Please install it using: pip install ultralytics")
YOLO = None
from datamate.core.base_op import Mapper
# COCO 80 类别映射
COCO_CLASS_MAP = {
0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane",
5: "bus", 6: "train", 7: "truck", 8: "boat", 9: "traffic light",
10: "fire hydrant", 11: "stop sign", 12: "parking meter", 13: "bench",
14: "bird", 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow",
20: "elephant", 21: "bear", 22: "zebra", 23: "giraffe", 24: "backpack",
25: "umbrella", 26: "handbag", 27: "tie", 28: "suitcase", 29: "frisbee",
30: "skis", 31: "snowboard", 32: "sports ball", 33: "kite",
34: "baseball bat", 35: "baseball glove", 36: "skateboard",
37: "surfboard", 38: "tennis racket", 39: "bottle",
40: "wine glass", 41: "cup", 42: "fork", 43: "knife", 44: "spoon",
45: "bowl", 46: "banana", 47: "apple", 48: "sandwich", 49: "orange",
50: "broccoli", 51: "carrot", 52: "hot dog", 53: "pizza",
54: "donut", 55: "cake", 56: "chair", 57: "couch",
58: "potted plant", 59: "bed", 60: "dining table", 61: "toilet",
62: "tv", 63: "laptop", 64: "mouse", 65: "remote",
66: "keyboard", 67: "cell phone", 68: "microwave", 69: "oven",
70: "toaster", 71: "sink", 72: "refrigerator", 73: "book",
74: "clock", 75: "vase", 76: "scissors", 77: "teddy bear",
78: "hair drier", 79: "toothbrush"
}
class ImageObjectDetectionBoundingBox(Mapper):
"""图像目标检测算子"""
# 模型映射
MODEL_MAP = {
"n": "yolov8n.pt",
"s": "yolov8s.pt",
"m": "yolov8m.pt",
"l": "yolov8l.pt",
"x": "yolov8x.pt",
}
def __init__(self, *args, **kwargs):
super(ImageObjectDetectionBoundingBox, self).__init__(*args, **kwargs)
# 获取参数
self._model_size = kwargs.get("modelSize", "l")
self._conf_threshold = kwargs.get("confThreshold", 0.7)
self._target_classes = kwargs.get("targetClasses", [])
self._output_dir = kwargs.get("outputDir", None) # 输出目录
# 如果目标类别为空列表,则检测所有类别
if not self._target_classes:
self._target_classes = None
else:
# 确保是整数列表
self._target_classes = [int(cls_id) for cls_id in self._target_classes]
# 获取模型路径
model_filename = self.MODEL_MAP.get(self._model_size, "yolov8l.pt")
current_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_dir, model_filename)
# 初始化模型
if YOLO is None:
raise ImportError("ultralytics is not installed. Please install it.")
if not os.path.exists(model_path):
logger.warning(f"Model file {model_path} not found. Downloading from ultralytics...")
self.model = YOLO(model_filename) # 自动下载
else:
self.model = YOLO(model_path)
logger.info(f"Loaded YOLOv8 model: {model_filename}, "
f"conf_threshold: {self._conf_threshold}, "
f"target_classes: {self._target_classes}")
@staticmethod
def _get_color_by_class_id(class_id: int):
"""根据 class_id 生成稳定颜色(BGR,OpenCV 用)"""
np.random.seed(class_id)
color = np.random.randint(0, 255, size=3).tolist()
return tuple(color)
def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]:
"""执行目标检测"""
start = time.time()
# 读取图像文件
image_path = sample.get(self.image_key)
if not image_path or not os.path.exists(image_path):
logger.warning(f"Image file not found: {image_path}")
return sample
# 读取图像
img = cv2.imread(image_path)
if img is None:
logger.warning(f"Failed to read image: {image_path}")
return sample
# 执行目标检测
results = self.model(img, conf=self._conf_threshold)
r = results[0]
# 准备标注数据
h, w = img.shape[:2]
annotations = {
"image": os.path.basename(image_path),
"width": w,
"height": h,
"model_size": self._model_size,
"conf_threshold": self._conf_threshold,
"selected_class_ids": self._target_classes,
"detections": []
}
# 处理检测结果
if r.boxes is not None:
for box in r.boxes:
cls_id = int(box.cls[0])
# 过滤目标类别
if self._target_classes is not None and cls_id not in self._target_classes:
continue
conf = float(box.conf[0])
x1, y1, x2, y2 = map(float, box.xyxy[0])
label = COCO_CLASS_MAP.get(cls_id, f"class_{cls_id}")
# 记录检测结果
annotations["detections"].append({
"label": label,
"class_id": cls_id,
"confidence": round(conf, 4),
"bbox_xyxy": [x1, y1, x2, y2],
"bbox_xywh": [x1, y1, x2 - x1, y2 - y1]
})
# 在图像上绘制
color = self._get_color_by_class_id(cls_id)
cv2.rectangle(
img,
(int(x1), int(y1)),
(int(x2), int(y2)),
color,
2
)
cv2.putText(
img,
f"{label} {conf:.2f}",
(int(x1), max(int(y1) - 5, 10)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
color,
1
)
# 确定输出目录
if self._output_dir and os.path.exists(self._output_dir):
output_dir = self._output_dir
else:
output_dir = os.path.dirname(image_path)
# 创建输出子目录(可选,用于组织文件)
images_dir = os.path.join(output_dir, "images")
annotations_dir = os.path.join(output_dir, "annotations")
os.makedirs(images_dir, exist_ok=True)
os.makedirs(annotations_dir, exist_ok=True)
# 保持原始文件名(不添加后缀),确保一一对应
base_name = os.path.basename(image_path)
name_without_ext = os.path.splitext(base_name)[0]
# 保存标注图像(保持原始扩展名或使用jpg)
output_filename = base_name
output_path = os.path.join(images_dir, output_filename)
cv2.imwrite(output_path, img)
# 保存标注 JSON(文件名与图像对应)
json_filename = f"{name_without_ext}.json"
json_path = os.path.join(annotations_dir, json_filename)
with open(json_path, "w", encoding="utf-8") as f:
json.dump(annotations, f, indent=2, ensure_ascii=False)
# 更新样本数据
sample["detection_count"] = len(annotations["detections"])
sample["output_image"] = output_path
sample["annotations_file"] = json_path
sample["annotations"] = annotations
logger.info(f"Image: {os.path.basename(image_path)}, "
f"Detections: {len(annotations['detections'])}, "
f"Time: {(time.time() - start):.4f}s")
return sample

View File

@@ -0,0 +1,166 @@
import os
import json
from pathlib import Path
from ultralytics import YOLO
import cv2
import numpy as np
def get_color_by_class_id(class_id: int):
"""根据 class_id 生成稳定颜色(BGR)"""
np.random.seed(class_id)
color = np.random.randint(0, 255, size=3).tolist()
return tuple(color)
def mask_to_polygons(mask: np.ndarray):
"""将二值 mask 转换为 COCO 风格多边形列表"""
contours, _ = cv2.findContours(
mask,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE
)
polygons = []
for contour in contours:
if contour.shape[0] < 3:
continue
polygon = contour.flatten().tolist()
polygons.append(polygon)
return polygons
IMAGE_DIR = "C:/Users/meta/Desktop/Datamate/yolo/Photos"
OUT_IMG_DIR = "outputs_seg/images"
OUT_JSON_DIR = "outputs_seg/annotations"
MODEL_MAP = {
"n": "yolov8n-seg.pt",
"s": "yolov8s-seg.pt",
"m": "yolov8m-seg.pt",
"l": "yolov8l-seg.pt",
"x": "yolov8x-seg.pt",
}
MODEL_KEY = "x"
MODEL_PATH = MODEL_MAP[MODEL_KEY]
CONF_THRES = 0.7
DRAW_BBOX = True
COCO_CLASS_MAP = {
0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane",
5: "bus", 6: "train", 7: "truck", 8: "boat", 9: "traffic light",
10: "fire hydrant", 11: "stop sign", 12: "parking meter", 13: "bench",
14: "bird", 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow",
20: "elephant", 21: "bear", 22: "zebra", 23: "giraffe", 24: "backpack",
25: "umbrella", 26: "handbag", 27: "tie", 28: "suitcase", 29: "frisbee",
30: "skis", 31: "snowboard", 32: "sports ball", 33: "kite",
34: "baseball bat", 35: "baseball glove", 36: "skateboard",
37: "surfboard", 38: "tennis racket", 39: "bottle",
40: "wine glass", 41: "cup", 42: "fork", 43: "knife", 44: "spoon",
45: "bowl", 46: "banana", 47: "apple", 48: "sandwich", 49: "orange",
50: "broccoli", 51: "carrot", 52: "hot dog", 53: "pizza",
54: "donut", 55: "cake", 56: "chair", 57: "couch",
58: "potted plant", 59: "bed", 60: "dining table", 61: "toilet",
62: "tv", 63: "laptop", 64: "mouse", 65: "remote",
66: "keyboard", 67: "cell phone", 68: "microwave", 69: "oven",
70: "toaster", 71: "sink", 72: "refrigerator", 73: "book",
74: "clock", 75: "vase", 76: "scissors", 77: "teddy bear",
78: "hair drier", 79: "toothbrush"
}
TARGET_CLASS_IDS = [0, 2, 5]
os.makedirs(OUT_IMG_DIR, exist_ok=True)
os.makedirs(OUT_JSON_DIR, exist_ok=True)
if TARGET_CLASS_IDS is not None:
for cid in TARGET_CLASS_IDS:
if cid not in COCO_CLASS_MAP:
raise ValueError(f"Invalid class id: {cid}")
model = YOLO(MODEL_PATH)
image_paths = list(Path(IMAGE_DIR).glob("*.*"))
for img_path in image_paths:
img = cv2.imread(str(img_path))
if img is None:
print(f"[WARN] Failed to read {img_path}")
continue
results = model(img, conf=CONF_THRES)
r = results[0]
h, w = img.shape[:2]
annotations = {
"image": img_path.name,
"width": w,
"height": h,
"model_key": MODEL_KEY,
"conf_threshold": CONF_THRES,
"supported_classes": COCO_CLASS_MAP,
"selected_class_ids": TARGET_CLASS_IDS,
"instances": []
}
if r.boxes is not None and r.masks is not None:
for i, box in enumerate(r.boxes):
cls_id = int(box.cls[0])
if TARGET_CLASS_IDS is not None and cls_id not in TARGET_CLASS_IDS:
continue
conf = float(box.conf[0])
x1, y1, x2, y2 = map(float, box.xyxy[0])
label = COCO_CLASS_MAP[cls_id]
mask = r.masks.data[i].cpu().numpy()
mask = (mask > 0.5).astype(np.uint8)
mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
color = get_color_by_class_id(cls_id)
img[mask == 1] = (
img[mask == 1] * 0.5 + np.array(color) * 0.5
).astype(np.uint8)
if True:
cv2.rectangle(
img,
(int(x1), int(y1)),
(int(x2), int(y2)),
color,
2
)
cv2.putText(
img,
f"{label} {conf:.2f}",
(int(x1), max(int(y1) - 5, 10)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
color,
1
)
polygons = mask_to_polygons(mask)
annotations["instances"].append({
"label": label,
"class_id": cls_id,
"confidence": round(conf, 4),
"bbox_xyxy": [x1, y1, x2, y2],
"segmentation": polygons
})
out_img_path = os.path.join(OUT_IMG_DIR, img_path.name)
out_json_path = os.path.join(OUT_JSON_DIR, img_path.stem + ".json")
cv2.imwrite(out_img_path, img)
with open(out_json_path, "w", encoding="utf-8") as f:
json.dump(annotations, f, indent=2, ensure_ascii=False)
print(f"[OK] {img_path.name}")
print("Segmentation batch finished.")

View File

@@ -31,4 +31,5 @@ dependencies = [
"sqlalchemy>=2.0.44",
"xmltodict>=1.0.2",
"zhconv>=1.4.3",
"ultralytics>=8.0.0",
]

View File

@@ -0,0 +1,603 @@
# -*- coding: utf-8 -*-
"""Simple background worker for auto-annotation tasks.
This module runs inside the datamate-runtime container (operator_runtime service).
It polls `t_dm_auto_annotation_tasks` for pending tasks and performs YOLO
inference using the ImageObjectDetectionBoundingBox operator, updating
progress back to the same table so that the datamate-python backend and
frontend can display real-time status.
设计目标(最小可用版本):
- 单实例 worker,串行处理 `pending` 状态的任务。
- 对指定数据集下的所有已完成文件逐张执行目标检测。
- 按已处理图片数更新 `processed_images`、`progress`、`detected_objects`、`status` 等字段。
- 失败时将任务标记为 `failed` 并记录 `error_message`。
注意:
- 为了保持简单,目前不处理 "running" 状态的恢复逻辑;容器重启时,
已处于 running 的任务不会被重新拉起,需要后续扩展。
"""
from __future__ import annotations
import json
import os
import sys
import threading
import time
import uuid
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from loguru import logger
from sqlalchemy import text
from datamate.sql_manager.sql_manager import SQLManager
# 尝试多种导入路径,适配不同的打包/安装方式
ImageObjectDetectionBoundingBox = None # type: ignore
try:
# 优先使用 datamate.ops 路径(源码 COPY 到 /opt/runtime/datamate/ops 情况)
from datamate.ops.annotation.image_object_detection_bounding_box.process import ( # type: ignore
ImageObjectDetectionBoundingBox,
)
logger.info(
"Imported ImageObjectDetectionBoundingBox from datamate.ops.annotation.image_object_detection_bounding_box",
)
except Exception as e1: # pragma: no cover - 导入失败时仅记录日志,避免整体崩溃
logger.error(
"Failed to import ImageObjectDetectionBoundingBox via datamate.ops: {}",
e1,
)
try:
# 兼容顶层 ops 包安装的情况(通过 ops.pth 暴露)
from ops.annotation.image_object_detection_bounding_box.process import ( # type: ignore
ImageObjectDetectionBoundingBox,
)
logger.info(
"Imported ImageObjectDetectionBoundingBox from top-level ops.annotation.image_object_detection_bounding_box",
)
except Exception as e2:
logger.error(
"Failed to import ImageObjectDetectionBoundingBox via top-level ops package: {}",
e2,
)
ImageObjectDetectionBoundingBox = None
# 进一步兜底:直接从本地 runtime/ops 目录加载算子(开发环境常用场景)
if ImageObjectDetectionBoundingBox is None:
try:
project_root = Path(__file__).resolve().parents[2]
ops_root = project_root / "ops"
if ops_root.is_dir():
# 确保 ops 的父目录在 sys.path 中,这样可以按 "ops.xxx" 导入
if str(project_root) not in sys.path:
sys.path.insert(0, str(project_root))
from ops.annotation.image_object_detection_bounding_box.process import ( # type: ignore
ImageObjectDetectionBoundingBox,
)
logger.info(
"Imported ImageObjectDetectionBoundingBox from local runtime/ops.annotation.image_object_detection_bounding_box",
)
else:
logger.warning(
"Local runtime/ops directory not found when trying to import ImageObjectDetectionBoundingBox: {}",
ops_root,
)
except Exception as e3: # pragma: no cover - 兜底失败仅记录日志
logger.error(
"Failed to import ImageObjectDetectionBoundingBox from local runtime/ops: {}",
e3,
)
ImageObjectDetectionBoundingBox = None
POLL_INTERVAL_SECONDS = float(os.getenv("AUTO_ANNOTATION_POLL_INTERVAL", "5"))
DEFAULT_OUTPUT_ROOT = os.getenv(
"AUTO_ANNOTATION_OUTPUT_ROOT", "/dataset"
)
def _fetch_pending_task() -> Optional[Dict[str, Any]]:
"""从 t_dm_auto_annotation_tasks 中取出一个 pending 任务。"""
sql = text(
"""
SELECT id, name, dataset_id, dataset_name, config, file_ids, status,
total_images, processed_images, detected_objects, output_path
FROM t_dm_auto_annotation_tasks
WHERE status = 'pending' AND deleted_at IS NULL
ORDER BY created_at ASC
LIMIT 1
"""
)
with SQLManager.create_connect() as conn:
result = conn.execute(sql).fetchone()
if not result:
return None
row = dict(result._mapping) # type: ignore[attr-defined]
try:
row["config"] = json.loads(row["config"]) if row.get("config") else {}
except Exception:
row["config"] = {}
try:
raw_ids = row.get("file_ids")
if not raw_ids:
row["file_ids"] = None
elif isinstance(raw_ids, str):
row["file_ids"] = json.loads(raw_ids)
else:
row["file_ids"] = raw_ids
except Exception:
row["file_ids"] = None
return row
def _update_task_status(
task_id: str,
*,
status: str,
progress: Optional[int] = None,
processed_images: Optional[int] = None,
detected_objects: Optional[int] = None,
total_images: Optional[int] = None,
output_path: Optional[str] = None,
error_message: Optional[str] = None,
completed: bool = False,
) -> None:
"""更新任务的状态和统计字段。"""
fields: List[str] = ["status = :status", "updated_at = :updated_at"]
params: Dict[str, Any] = {
"task_id": task_id,
"status": status,
"updated_at": datetime.now(),
}
if progress is not None:
fields.append("progress = :progress")
params["progress"] = int(progress)
if processed_images is not None:
fields.append("processed_images = :processed_images")
params["processed_images"] = int(processed_images)
if detected_objects is not None:
fields.append("detected_objects = :detected_objects")
params["detected_objects"] = int(detected_objects)
if total_images is not None:
fields.append("total_images = :total_images")
params["total_images"] = int(total_images)
if output_path is not None:
fields.append("output_path = :output_path")
params["output_path"] = output_path
if error_message is not None:
fields.append("error_message = :error_message")
params["error_message"] = error_message[:2000]
if completed:
fields.append("completed_at = :completed_at")
params["completed_at"] = datetime.now()
sql = text(
f"""
UPDATE t_dm_auto_annotation_tasks
SET {', '.join(fields)}
WHERE id = :task_id
"""
)
with SQLManager.create_connect() as conn:
conn.execute(sql, params)
def _load_dataset_files(dataset_id: str) -> List[Tuple[str, str, str]]:
"""加载指定数据集下的所有已完成文件。"""
sql = text(
"""
SELECT id, file_path, file_name
FROM t_dm_dataset_files
WHERE dataset_id = :dataset_id
AND status = 'ACTIVE'
ORDER BY created_at ASC
"""
)
with SQLManager.create_connect() as conn:
rows = conn.execute(sql, {"dataset_id": dataset_id}).fetchall()
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
def _load_files_by_ids(file_ids: List[str]) -> List[Tuple[str, str, str]]:
"""根据文件ID列表加载文件记录,支持跨多个数据集。"""
if not file_ids:
return []
placeholders = ", ".join(f":id{i}" for i in range(len(file_ids)))
sql = text(
f"""
SELECT id, file_path, file_name
FROM t_dm_dataset_files
WHERE id IN ({placeholders})
AND status = 'ACTIVE'
ORDER BY created_at ASC
"""
)
params = {f"id{i}": str(fid) for i, fid in enumerate(file_ids)}
with SQLManager.create_connect() as conn:
rows = conn.execute(sql, params).fetchall()
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
def _ensure_output_dir(output_dir: str) -> str:
"""确保输出目录及其 images/、annotations/ 子目录存在。"""
os.makedirs(output_dir, exist_ok=True)
os.makedirs(os.path.join(output_dir, "images"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "annotations"), exist_ok=True)
return output_dir
def _create_output_dataset(
source_dataset_id: str,
source_dataset_name: str,
output_dataset_name: str,
) -> Tuple[str, str]:
"""为自动标注结果创建一个新的数据集并返回 (dataset_id, path)。"""
new_dataset_id = str(uuid.uuid4())
dataset_base_path = DEFAULT_OUTPUT_ROOT.rstrip("/") or "/dataset"
output_dir = os.path.join(dataset_base_path, new_dataset_id)
description = (
f"Auto annotations for dataset {source_dataset_name or source_dataset_id}"[:255]
)
sql = text(
"""
INSERT INTO t_dm_datasets (id, name, description, dataset_type, path, status)
VALUES (:id, :name, :description, :dataset_type, :path, :status)
"""
)
params = {
"id": new_dataset_id,
"name": output_dataset_name,
"description": description,
"dataset_type": "IMAGE",
"path": output_dir,
"status": "ACTIVE",
}
with SQLManager.create_connect() as conn:
conn.execute(sql, params)
return new_dataset_id, output_dir
def _register_output_dataset(
task_id: str,
output_dataset_id: str,
output_dir: str,
output_dataset_name: str,
total_images: int,
) -> None:
"""将自动标注结果注册到新建的数据集。"""
images_dir = os.path.join(output_dir, "images")
if not os.path.isdir(images_dir):
logger.warning(
"Auto-annotation images directory not found for task {}: {}",
task_id,
images_dir,
)
return
image_files: List[Tuple[str, str, int]] = []
annotation_files: List[Tuple[str, str, int]] = []
total_size = 0
for file_name in sorted(os.listdir(images_dir)):
file_path = os.path.join(images_dir, file_name)
if not os.path.isfile(file_path):
continue
try:
file_size = os.path.getsize(file_path)
except OSError:
file_size = 0
image_files.append((file_name, file_path, int(file_size)))
total_size += int(file_size)
annotations_dir = os.path.join(output_dir, "annotations")
if os.path.isdir(annotations_dir):
for file_name in sorted(os.listdir(annotations_dir)):
file_path = os.path.join(annotations_dir, file_name)
if not os.path.isfile(file_path):
continue
try:
file_size = os.path.getsize(file_path)
except OSError:
file_size = 0
annotation_files.append((file_name, file_path, int(file_size)))
total_size += int(file_size)
if not image_files:
logger.warning(
"No image files found in auto-annotation output for task {}: {}",
task_id,
images_dir,
)
return
insert_file_sql = text(
"""
INSERT INTO t_dm_dataset_files (
id, dataset_id, file_name, file_path, file_type, file_size, status
) VALUES (
:id, :dataset_id, :file_name, :file_path, :file_type, :file_size, :status
)
"""
)
update_dataset_stat_sql = text(
"""
UPDATE t_dm_datasets
SET file_count = COALESCE(file_count, 0) + :add_count,
size_bytes = COALESCE(size_bytes, 0) + :add_size
WHERE id = :dataset_id
"""
)
with SQLManager.create_connect() as conn:
added_count = 0
for file_name, file_path, file_size in image_files:
ext = os.path.splitext(file_name)[1].lstrip(".").upper() or None
conn.execute(
insert_file_sql,
{
"id": str(uuid.uuid4()),
"dataset_id": output_dataset_id,
"file_name": file_name,
"file_path": file_path,
"file_type": ext,
"file_size": int(file_size),
"status": "ACTIVE",
},
)
added_count += 1
for file_name, file_path, file_size in annotation_files:
ext = os.path.splitext(file_name)[1].lstrip(".").upper() or None
conn.execute(
insert_file_sql,
{
"id": str(uuid.uuid4()),
"dataset_id": output_dataset_id,
"file_name": file_name,
"file_path": file_path,
"file_type": ext,
"file_size": int(file_size),
"status": "ACTIVE",
},
)
added_count += 1
if added_count > 0:
conn.execute(
update_dataset_stat_sql,
{
"dataset_id": output_dataset_id,
"add_count": added_count,
"add_size": int(total_size),
},
)
logger.info(
"Registered auto-annotation output into dataset: dataset_id={}, name={}, added_files={}, added_size_bytes={}, task_id={}, output_dir={}",
output_dataset_id,
output_dataset_name,
len(image_files) + len(annotation_files),
total_size,
task_id,
output_dir,
)
def _process_single_task(task: Dict[str, Any]) -> None:
"""执行单个自动标注任务。"""
if ImageObjectDetectionBoundingBox is None:
logger.error(
"YOLO operator not available (import failed earlier), skip auto-annotation task: {}",
task["id"],
)
_update_task_status(
task["id"],
status="failed",
error_message="YOLO operator not available in runtime container",
)
return
task_id = str(task["id"])
dataset_id = str(task["dataset_id"])
task_name = str(task.get("name") or "")
source_dataset_name = str(task.get("dataset_name") or "")
cfg: Dict[str, Any] = task.get("config") or {}
selected_file_ids: Optional[List[str]] = task.get("file_ids") or None
model_size = cfg.get("modelSize", "l")
conf_threshold = float(cfg.get("confThreshold", 0.7))
target_classes = cfg.get("targetClasses", []) or []
output_dataset_name = cfg.get("outputDatasetName")
if not output_dataset_name:
base_name = source_dataset_name or task_name or f"dataset-{dataset_id[:8]}"
output_dataset_name = f"{base_name}_auto_{task_id[:8]}"
logger.info(
"Start processing auto-annotation task: id={}, dataset_id={}, model_size={}, conf_threshold={}, target_classes={}, output_dataset_name={}",
task_id,
dataset_id,
model_size,
conf_threshold,
target_classes,
output_dataset_name,
)
_update_task_status(task_id, status="running", progress=0)
if selected_file_ids:
all_files = _load_files_by_ids(selected_file_ids)
else:
all_files = _load_dataset_files(dataset_id)
files = [(path, name) for _, path, name in all_files]
total_images = len(files)
if total_images == 0:
logger.warning("No files found for dataset {} when running auto-annotation task {}", dataset_id, task_id)
_update_task_status(
task_id,
status="completed",
progress=100,
total_images=0,
processed_images=0,
detected_objects=0,
completed=True,
output_path=None,
)
return
output_dataset_id, output_dir = _create_output_dataset(
source_dataset_id=dataset_id,
source_dataset_name=source_dataset_name,
output_dataset_name=output_dataset_name,
)
output_dir = _ensure_output_dir(output_dir)
try:
detector = ImageObjectDetectionBoundingBox(
modelSize=model_size,
confThreshold=conf_threshold,
targetClasses=target_classes,
outputDir=output_dir,
)
except Exception as e:
logger.error("Failed to init YOLO detector for task {}: {}", task_id, e)
_update_task_status(
task_id,
status="failed",
total_images=total_images,
processed_images=0,
detected_objects=0,
error_message=f"Init YOLO detector failed: {e}",
)
return
processed = 0
detected_total = 0
for file_path, file_name in files:
try:
sample = {
"image": file_path,
"filename": file_name,
}
result = detector.execute(sample)
annotations = (result or {}).get("annotations", {})
detections = annotations.get("detections", [])
detected_total += len(detections)
processed += 1
progress = int(processed * 100 / total_images) if total_images > 0 else 100
_update_task_status(
task_id,
status="running",
progress=progress,
processed_images=processed,
detected_objects=detected_total,
total_images=total_images,
output_path=output_dir,
)
except Exception as e:
logger.error(
"Failed to process image for task {}: file_path={}, error={}",
task_id,
file_path,
e,
)
continue
_update_task_status(
task_id,
status="completed",
progress=100,
processed_images=processed,
detected_objects=detected_total,
total_images=total_images,
output_path=output_dir,
completed=True,
)
logger.info(
"Completed auto-annotation task: id={}, total_images={}, processed={}, detected_objects={}, output_path={}",
task_id,
total_images,
processed,
detected_total,
output_dir,
)
if output_dataset_name and output_dataset_id:
try:
_register_output_dataset(
task_id=task_id,
output_dataset_id=output_dataset_id,
output_dir=output_dir,
output_dataset_name=output_dataset_name,
total_images=total_images,
)
except Exception as e: # pragma: no cover - 防御性日志
logger.error(
"Failed to register auto-annotation output as dataset for task {}: {}",
task_id,
e,
)
def _worker_loop() -> None:
"""Worker 主循环,在独立线程中运行。"""
logger.info(
"Auto-annotation worker started with poll interval {} seconds, output root {}",
POLL_INTERVAL_SECONDS,
DEFAULT_OUTPUT_ROOT,
)
while True:
try:
task = _fetch_pending_task()
if not task:
time.sleep(POLL_INTERVAL_SECONDS)
continue
_process_single_task(task)
except Exception as e: # pragma: no cover - 防御性日志
logger.error("Auto-annotation worker loop error: {}", e)
time.sleep(POLL_INTERVAL_SECONDS)
def start_auto_annotation_worker() -> None:
"""在后台线程中启动自动标注 worker。"""
thread = threading.Thread(target=_worker_loop, name="auto-annotation-worker", daemon=True)
thread.start()
logger.info("Auto-annotation worker thread started: {}", thread.name)

View File

@@ -1,163 +1,174 @@
import os
from typing import Optional, Dict, Any, List
import uvicorn
import yaml
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from jsonargparse import ArgumentParser
from loguru import logger
from pydantic import BaseModel
from datamate.common.error_code import ErrorCode
from datamate.scheduler import cmd_scheduler
from datamate.scheduler import func_scheduler
from datamate.wrappers import WRAPPERS
# 日志配置
LOG_DIR = "/var/log/datamate/runtime"
os.makedirs(LOG_DIR, exist_ok=True)
logger.add(
f"{LOG_DIR}/runtime.log",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} - {message}",
level="DEBUG",
enqueue=True
)
app = FastAPI()
class APIException(Exception):
"""自定义API异常"""
def __init__(self, error_code: ErrorCode, detail: Optional[str] = None,
extra_data: Optional[Dict] = None):
self.error_code = error_code
self.detail = detail or error_code.value[1]
self.code = error_code.value[0]
self.extra_data = extra_data
super().__init__(self.detail)
def to_dict(self) -> Dict[str, Any]:
result = {
"code": self.code,
"message": self.detail,
"success": False
}
if self.extra_data:
result["data"] = self.extra_data
return result
@app.exception_handler(APIException)
async def api_exception_handler(request: Request, exc: APIException):
return JSONResponse(
status_code=200, # 业务错误返回 200,错误信息在响应体中
content=exc.to_dict()
)
class QueryTaskRequest(BaseModel):
task_ids: List[str]
@app.post("/api/task/list")
async def query_task_info(request: QueryTaskRequest):
try:
return [{task_id: cmd_scheduler.get_task_status(task_id)} for task_id in request.task_ids]
except Exception as e:
raise APIException(ErrorCode.UNKNOWN_ERROR)
@app.post("/api/task/{task_id}/submit")
async def submit_task(task_id):
config_path = f"/flow/{task_id}/process.yaml"
logger.info("Start submitting job...")
dataset_path = get_from_cfg(task_id, "dataset_path")
if not check_valid_path(dataset_path):
logger.error(f"dataset_path is not existed! please check this path.")
raise APIException(ErrorCode.FILE_NOT_FOUND_ERROR)
try:
executor_type = get_from_cfg(task_id, "executor_type")
await WRAPPERS.get(executor_type).submit(task_id, config_path)
except Exception as e:
logger.error(f"Error happens during submitting task. Error Info following: {e}")
raise APIException(ErrorCode.SUBMIT_TASK_ERROR)
logger.info(f"task id: {task_id} has been submitted.")
success_json_info = JSONResponse(
content={"status": "Success", "message": f"{task_id} has been submitted"},
status_code=200
)
return success_json_info
@app.post("/api/task/{task_id}/stop")
async def stop_task(task_id):
logger.info("Start stopping ray job...")
success_json_info = JSONResponse(
content={"status": "Success", "message": f"{task_id} has been stopped"},
status_code=200
)
try:
executor_type = get_from_cfg(task_id, "executor_type")
if not WRAPPERS.get(executor_type).cancel(task_id):
raise APIException(ErrorCode.CANCEL_TASK_ERROR)
except Exception as e:
if isinstance(e, APIException):
raise e
raise APIException(ErrorCode.UNKNOWN_ERROR)
logger.info(f"{task_id} has been stopped.")
return success_json_info
def check_valid_path(file_path):
full_path = os.path.abspath(file_path)
return os.path.exists(full_path)
def get_from_cfg(task_id, key):
config_path = f"/flow/{task_id}/process.yaml"
if not check_valid_path(config_path):
logger.error(f"config_path is not existed! please check this path.")
raise APIException(ErrorCode.FILE_NOT_FOUND_ERROR)
with open(config_path, "r", encoding='utf-8') as f:
content = f.read()
cfg = yaml.safe_load(content)
return cfg[key]
def parse_args():
parser = ArgumentParser(description="Create API for Submitting Job to Data-juicer")
parser.add_argument(
'--ip',
type=str,
default="0.0.0.0",
help='Service ip for this API, default to use 0.0.0.0.'
)
parser.add_argument(
'--port',
type=int,
default=8080,
help='Service port for this API, default to use 8600.'
)
return parser.parse_args()
if __name__ == '__main__':
p_args = parse_args()
uvicorn.run(
app,
host=p_args.ip,
port=p_args.port
)
import os
from typing import Optional, Dict, Any, List
import uvicorn
import yaml
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from jsonargparse import ArgumentParser
from loguru import logger
from pydantic import BaseModel
from datamate.common.error_code import ErrorCode
from datamate.scheduler import cmd_scheduler
from datamate.scheduler import func_scheduler
from datamate.wrappers import WRAPPERS
from datamate.auto_annotation_worker import start_auto_annotation_worker
# 日志配置
LOG_DIR = "/var/log/datamate/runtime"
os.makedirs(LOG_DIR, exist_ok=True)
logger.add(
f"{LOG_DIR}/runtime.log",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} - {message}",
level="DEBUG",
enqueue=True
)
app = FastAPI()
class APIException(Exception):
"""自定义API异常"""
def __init__(self, error_code: ErrorCode, detail: Optional[str] = None,
extra_data: Optional[Dict] = None):
self.error_code = error_code
self.detail = detail or error_code.value[1]
self.code = error_code.value[0]
self.extra_data = extra_data
super().__init__(self.detail)
def to_dict(self) -> Dict[str, Any]:
result = {
"code": self.code,
"message": self.detail,
"success": False
}
if self.extra_data:
result["data"] = self.extra_data
return result
@app.on_event("startup")
async def startup_event():
"""FastAPI 启动时初始化后台自动标注 worker。"""
try:
start_auto_annotation_worker()
except Exception as e: # pragma: no cover - 防御性日志
logger.error("Failed to start auto-annotation worker: {}", e)
@app.exception_handler(APIException)
async def api_exception_handler(request: Request, exc: APIException):
return JSONResponse(
status_code=200, # 业务错误返回 200,错误信息在响应体中
content=exc.to_dict()
)
class QueryTaskRequest(BaseModel):
task_ids: List[str]
@app.post("/api/task/list")
async def query_task_info(request: QueryTaskRequest):
try:
return [{task_id: cmd_scheduler.get_task_status(task_id)} for task_id in request.task_ids]
except Exception as e:
raise APIException(ErrorCode.UNKNOWN_ERROR)
@app.post("/api/task/{task_id}/submit")
async def submit_task(task_id):
config_path = f"/flow/{task_id}/process.yaml"
logger.info("Start submitting job...")
dataset_path = get_from_cfg(task_id, "dataset_path")
if not check_valid_path(dataset_path):
logger.error(f"dataset_path is not existed! please check this path.")
raise APIException(ErrorCode.FILE_NOT_FOUND_ERROR)
try:
executor_type = get_from_cfg(task_id, "executor_type")
await WRAPPERS.get(executor_type).submit(task_id, config_path)
except Exception as e:
logger.error(f"Error happens during submitting task. Error Info following: {e}")
raise APIException(ErrorCode.SUBMIT_TASK_ERROR)
logger.info(f"task id: {task_id} has been submitted.")
success_json_info = JSONResponse(
content={"status": "Success", "message": f"{task_id} has been submitted"},
status_code=200
)
return success_json_info
@app.post("/api/task/{task_id}/stop")
async def stop_task(task_id):
logger.info("Start stopping ray job...")
success_json_info = JSONResponse(
content={"status": "Success", "message": f"{task_id} has been stopped"},
status_code=200
)
try:
executor_type = get_from_cfg(task_id, "executor_type")
if not WRAPPERS.get(executor_type).cancel(task_id):
raise APIException(ErrorCode.CANCEL_TASK_ERROR)
except Exception as e:
if isinstance(e, APIException):
raise e
raise APIException(ErrorCode.UNKNOWN_ERROR)
logger.info(f"{task_id} has been stopped.")
return success_json_info
def check_valid_path(file_path):
full_path = os.path.abspath(file_path)
return os.path.exists(full_path)
def get_from_cfg(task_id, key):
config_path = f"/flow/{task_id}/process.yaml"
if not check_valid_path(config_path):
logger.error(f"config_path is not existed! please check this path.")
raise APIException(ErrorCode.FILE_NOT_FOUND_ERROR)
with open(config_path, "r", encoding='utf-8') as f:
content = f.read()
cfg = yaml.safe_load(content)
return cfg[key]
def parse_args():
parser = ArgumentParser(description="Create API for Submitting Job to Data-juicer")
parser.add_argument(
'--ip',
type=str,
default="0.0.0.0",
help='Service ip for this API, default to use 0.0.0.0.'
)
parser.add_argument(
'--port',
type=int,
default=8080,
help='Service port for this API, default to use 8600.'
)
return parser.parse_args()
if __name__ == '__main__':
p_args = parse_args()
uvicorn.run(
app,
host=p_args.ip,
port=p_args.port
)

View File

@@ -1,473 +1,497 @@
use datamate;
CREATE TABLE t_dm_annotation_templates (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
name VARCHAR(100) NOT NULL COMMENT '模板名称',
description VARCHAR(500) COMMENT '模板描述',
data_type VARCHAR(50) NOT NULL COMMENT '数据类型: image/text/audio/video/timeseries',
labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型: classification/detection/segmentation/ner/relation/etc',
configuration JSON NOT NULL COMMENT '标注配置(包含labels定义等)',
style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical',
category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类: medical/general/custom/system',
built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板',
version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
INDEX idx_data_type (data_type),
INDEX idx_labeling_type (labeling_type),
INDEX idx_category (category),
INDEX idx_built_in (built_in)
) COMMENT='标注配置模板表';
CREATE TABLE t_dm_labeling_projects (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
name VARCHAR(100) NOT NULL COMMENT '项目名称',
labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
template_id VARCHAR(36) NULL COMMENT '使用的模板ID',
configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
progress JSON COMMENT '项目进度信息',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
INDEX idx_dataset_id (dataset_id),
INDEX idx_template_id (template_id),
INDEX idx_labeling_project_id (labeling_project_id)
) COMMENT='标注项目表';
-- 内置标注模板初始化数据
-- 这些模板将在系统首次启动时自动创建
-- 使用 INSERT ... ON DUPLICATE KEY UPDATE 来覆盖已存在的记录
-- 1. 图像分类模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-image-classification-001',
'图像分类',
'简单的多标签图像分类模板',
'图像',
'分类',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'choice',
'toName', 'image',
'type', 'Choices',
'options', JSON_ARRAY('Cat', 'Dog', 'Bird', 'Other'),
'required', true,
'description', '选择最符合图像内容的标签'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
'type', 'Image',
'value', '$image'
)
)
),
'horizontal',
'计算机视觉',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 2. 目标检测模板(矩形框)
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-object-detection-001',
'目标检测(边界框)',
'使用矩形边界框进行目标检测',
'图像',
'目标检测',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'label',
'toName', 'image',
'type', 'RectangleLabels',
'labels', JSON_ARRAY('Person', 'Vehicle', 'Animal', 'Object'),
'required', false,
'description', '在图像中框出目标并标注类别'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
'type', 'Image',
'value', '$image'
)
)
),
'horizontal',
'计算机视觉',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 3. 图像分割模板(多边形)
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-image-segmentation-001',
'图像分割(多边形)',
'使用多边形标注进行语义分割',
'图像',
'分割',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'label',
'toName', 'image',
'type', 'PolygonLabels',
'labels', JSON_ARRAY('Background', 'Foreground', 'Person', 'Car'),
'required', false,
'description', '使用多边形框选需要分割的区域'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
'type', 'Image',
'value', '$image'
)
)
),
'horizontal',
'计算机视觉',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 4. 文本分类模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-text-classification-001',
'文本情感分类',
'将文本中表达的情感划分到预定义的类别',
'文本',
'分类',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'choice',
'toName', 'text',
'type', 'Choices',
'options', JSON_ARRAY('Positive', 'Negative', 'Neutral'),
'required', true,
'description', '对文本的情感或类别进行选择'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 5. 命名实体识别(NER)
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-ner-001',
'命名实体识别',
'从文本中抽取并标注命名实体',
'文本',
'实体识别',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'label',
'toName', 'text',
'type', 'Labels',
'labels', JSON_ARRAY('PERSON', 'ORG', 'LOC', 'DATE', 'MISC'),
'required', false,
'description', '在文本中标注人物、地点等实体'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 6. 音频分类模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-audio-classification-001',
'音频分类',
'将音频片段划分到不同类别',
'音频',
'分类',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'choice',
'toName', 'audio',
'type', 'Choices',
'options', JSON_ARRAY('Speech', 'Music', 'Noise', 'Silence'),
'required', true,
'description', '选择音频片段对应的类别'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'audio',
'type', 'Audio',
'value', '$audio'
)
)
),
'horizontal',
'音频',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 7. 文本多标签分类模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-text-multilabel-001',
'文本多标签分类',
'可为文本选择多个标签,适用于主题、内容类别等多标签任务',
'文本',
'分类',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'labels',
'toName', 'text',
'type', 'Choices',
'options', JSON_ARRAY('Sports','Politics','Tech','Entertainment'),
'required', true,
'choice', 'multiple',
'description', '可选择多个标签'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 8. 文本摘要模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-text-summarization-001',
'文本摘要',
'根据原文撰写简要摘要',
'文本',
'摘要',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'summary',
'toName', 'text',
'type', 'TextArea',
'required', true,
'description', '在此填写摘要内容'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 9. 关键词抽取模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-keyword-extract-001',
'关键词抽取',
'从文本中选出关键词或关键短语',
'文本',
'实体识别',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'kw',
'toName', 'text',
'type', 'Labels',
'labels', JSON_ARRAY('Keyword'),
'required', false,
'description', '高亮文本并标注关键词'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
use datamate;
CREATE TABLE t_dm_annotation_templates (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
name VARCHAR(100) NOT NULL COMMENT '模板名称',
description VARCHAR(500) COMMENT '模板描述',
data_type VARCHAR(50) NOT NULL COMMENT '数据类型: image/text/audio/video/timeseries',
labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型: classification/detection/segmentation/ner/relation/etc',
configuration JSON NOT NULL COMMENT '标注配置(包含labels定义等)',
style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical',
category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类: medical/general/custom/system',
built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板',
version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
INDEX idx_data_type (data_type),
INDEX idx_labeling_type (labeling_type),
INDEX idx_category (category),
INDEX idx_built_in (built_in)
) COMMENT='标注配置模板表';
CREATE TABLE t_dm_labeling_projects (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
name VARCHAR(100) NOT NULL COMMENT '项目名称',
labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
template_id VARCHAR(36) NULL COMMENT '使用的模板ID',
configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
progress JSON COMMENT '项目进度信息',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
INDEX idx_dataset_id (dataset_id),
INDEX idx_template_id (template_id),
INDEX idx_labeling_project_id (labeling_project_id)
) COMMENT='标注项目表';
-- 自动标注任务表
CREATE TABLE t_dm_auto_annotation_tasks (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
name VARCHAR(255) NOT NULL COMMENT '任务名称',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
dataset_name VARCHAR(255) COMMENT '数据集名称(冗余字段,方便查询)',
config JSON NOT NULL COMMENT '任务配置(模型规模、置信度等)',
file_ids JSON COMMENT '要处理的文件ID列表,为空则处理数据集所有图像',
status VARCHAR(50) NOT NULL DEFAULT 'pending' COMMENT '任务状态: pending/running/completed/failed',
progress INT DEFAULT 0 COMMENT '任务进度 0-100',
total_images INT DEFAULT 0 COMMENT '总图片数',
processed_images INT DEFAULT 0 COMMENT '已处理图片数',
detected_objects INT DEFAULT 0 COMMENT '检测到的对象总数',
output_path VARCHAR(500) COMMENT '输出路径',
error_message TEXT COMMENT '错误信息',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
completed_at TIMESTAMP NULL COMMENT '完成时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
INDEX idx_dataset_id (dataset_id),
INDEX idx_status (status),
INDEX idx_created_at (created_at)
) COMMENT='自动标注任务表';
-- 内置标注模板初始化数据
-- 这些模板将在系统首次启动时自动创建
-- 使用 INSERT ... ON DUPLICATE KEY UPDATE 来覆盖已存在的记录
-- 1. 图像分类模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-image-classification-001',
'图像分类',
'简单的多标签图像分类模板',
'图像',
'分类',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'choice',
'toName', 'image',
'type', 'Choices',
'options', JSON_ARRAY('Cat', 'Dog', 'Bird', 'Other'),
'required', true,
'description', '选择最符合图像内容的标签'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
'type', 'Image',
'value', '$image'
)
)
),
'horizontal',
'计算机视觉',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 2. 目标检测模板(矩形框)
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-object-detection-001',
'目标检测(边界框)',
'使用矩形边界框进行目标检测',
'图像',
'目标检测',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'label',
'toName', 'image',
'type', 'RectangleLabels',
'labels', JSON_ARRAY('Person', 'Vehicle', 'Animal', 'Object'),
'required', false,
'description', '在图像中框出目标并标注类别'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
'type', 'Image',
'value', '$image'
)
)
),
'horizontal',
'计算机视觉',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 3. 图像分割模板(多边形)
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-image-segmentation-001',
'图像分割(多边形)',
'使用多边形标注进行语义分割',
'图像',
'分割',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'label',
'toName', 'image',
'type', 'PolygonLabels',
'labels', JSON_ARRAY('Background', 'Foreground', 'Person', 'Car'),
'required', false,
'description', '使用多边形框选需要分割的区域'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
'type', 'Image',
'value', '$image'
)
)
),
'horizontal',
'计算机视觉',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 4. 文本分类模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-text-classification-001',
'文本情感分类',
'将文本中表达的情感划分到预定义的类别',
'文本',
'分类',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'choice',
'toName', 'text',
'type', 'Choices',
'options', JSON_ARRAY('Positive', 'Negative', 'Neutral'),
'required', true,
'description', '对文本的情感或类别进行选择'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 5. 命名实体识别(NER)
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-ner-001',
'命名实体识别',
'从文本中抽取并标注命名实体',
'文本',
'实体识别',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'label',
'toName', 'text',
'type', 'Labels',
'labels', JSON_ARRAY('PERSON', 'ORG', 'LOC', 'DATE', 'MISC'),
'required', false,
'description', '在文本中标注人物、地点等实体'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 6. 音频分类模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-audio-classification-001',
'音频分类',
'将音频片段划分到不同类别',
'音频',
'分类',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'choice',
'toName', 'audio',
'type', 'Choices',
'options', JSON_ARRAY('Speech', 'Music', 'Noise', 'Silence'),
'required', true,
'description', '选择音频片段对应的类别'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'audio',
'type', 'Audio',
'value', '$audio'
)
)
),
'horizontal',
'音频',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 7. 文本多标签分类模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-text-multilabel-001',
'文本多标签分类',
'可为文本选择多个标签,适用于主题、内容类别等多标签任务',
'文本',
'分类',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'labels',
'toName', 'text',
'type', 'Choices',
'options', JSON_ARRAY('Sports','Politics','Tech','Entertainment'),
'required', true,
'choice', 'multiple',
'description', '可选择多个标签'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 8. 文本摘要模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-text-summarization-001',
'文本摘要',
'根据原文撰写简要摘要',
'文本',
'摘要',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'summary',
'toName', 'text',
'type', 'TextArea',
'required', true,
'description', '在此填写摘要内容'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 9. 关键词抽取模板
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, style, category, built_in, version, created_at
) VALUES (
'tpl-keyword-extract-001',
'关键词抽取',
'从文本中选出关键词或关键短语',
'文本',
'实体识别',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'kw',
'toName', 'text',
'type', 'Labels',
'labels', JSON_ARRAY('Keyword'),
'required', false,
'description', '高亮文本并标注关键词'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();

View File

@@ -1,5 +1,4 @@
FROM maven:3-eclipse-temurin-21 AS builder
COPY backend/ /opt/backend
RUN cd /opt/backend/services && \

View File

@@ -24,7 +24,6 @@ WORKDIR /opt/runtime
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -e .[all] --system \
&& uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
&& uv pip uninstall torch torchvision --system \
&& python -m spacy download zh_core_web_sm \
&& echo "/usr/local/lib/ops/site-packages" > /usr/local/lib/python3.11/site-packages/ops.pth