feat(auto-annotation): integrate YOLO auto-labeling and enhance data management (#223)

* feat(auto-annotation): initial setup

* chore: remove package-lock.json

* chore: 清理本地测试脚本与 Maven 设置

* chore: change package-lock.json
This commit is contained in:
Kecheng Sha
2026-01-05 14:22:44 +08:00
committed by GitHub
parent ccfb84c034
commit 3f1ad6a872
44 changed files with 8503 additions and 5238 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,54 +1,63 @@
package com.datamate.datamanagement.domain.model.dataset; package com.datamate.datamanagement.domain.model.dataset;
import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableName; import com.baomidou.mybatisplus.annotation.TableId;
import com.fasterxml.jackson.core.type.TypeReference; import com.baomidou.mybatisplus.annotation.TableName;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.core.type.TypeReference;
import lombok.*; import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.extern.slf4j.Slf4j; import lombok.*;
import lombok.extern.slf4j.Slf4j;
import java.time.LocalDateTime;
import java.util.Collections; import java.time.LocalDateTime;
import java.util.List; import java.util.Collections;
import java.util.List;
/**
* 数据集文件实体(与数据库表 t_dm_dataset_files 对齐) /**
*/ * 数据集文件实体(与数据库表 t_dm_dataset_files 对齐)
@Getter */
@Setter @Getter
@Builder @Setter
@Slf4j @Builder
@NoArgsConstructor @Slf4j
@AllArgsConstructor @NoArgsConstructor
@TableName("t_dm_dataset_files") @AllArgsConstructor
public class DatasetFile { @TableName("t_dm_dataset_files")
@TableId public class DatasetFile {
private String id; // UUID @TableId
private String datasetId; // UUID private String id; // UUID
private String fileName; private String datasetId; // UUID
private String filePath; private String fileName;
private String fileType; // JPG/PNG/DCM/TXT private String filePath;
private Long fileSize; // bytes private String fileType; // JPG/PNG/DCM/TXT
private String checkSum; private Long fileSize; // bytes
private String tags; private String checkSum;
private String metadata; private String tags;
private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR private String metadata;
private LocalDateTime uploadTime; private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR
private LocalDateTime lastAccessTime; private LocalDateTime uploadTime;
private LocalDateTime createdAt; private LocalDateTime lastAccessTime;
private LocalDateTime updatedAt; private LocalDateTime createdAt;
private LocalDateTime updatedAt;
/**
* 解析标签 /** 标记是否为目录(非持久化字段) */
* @TableField(exist = false)
* @return 标签列表 private Boolean directory;
*/
public List<FileTag> analyzeTag() { /** 目录包含的文件数量(非持久化字段) */
try { @TableField(exist = false)
ObjectMapper mapper = new ObjectMapper(); private Long fileCount;
return mapper.readValue(tags, new TypeReference<List<FileTag>>() {});
} catch (Exception e) { /**
return Collections.emptyList(); * 解析标签
} *
} * @return 标签列表
} */
public List<FileTag> analyzeTag() {
try {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(tags, new TypeReference<List<FileTag>>() {});
} catch (Exception e) {
return Collections.emptyList();
}
}
}

View File

@@ -1,21 +1,24 @@
package com.datamate.datamanagement.domain.model.dataset; package com.datamate.datamanagement.domain.model.dataset;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Getter; import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.Setter; import lombok.Setter;
/** /**
* 数据集文件上传检查信息 * 数据集文件上传检查信息
*/ */
@Getter @Getter
@Setter @Setter
@NoArgsConstructor @NoArgsConstructor
@AllArgsConstructor @AllArgsConstructor
public class DatasetFileUploadCheckInfo { public class DatasetFileUploadCheckInfo {
/** 数据集id */ /** 数据集id */
private String datasetId; private String datasetId;
/** 是否为压缩包上传 */ /** 是否为压缩包上传 */
private boolean hasArchive; private boolean hasArchive;
}
/** 目标子目录前缀,例如 "images/",为空表示数据集根目录 */
private String prefix;
}

View File

@@ -1,43 +1,47 @@
package com.datamate.datamanagement.infrastructure.exception; package com.datamate.datamanagement.infrastructure.exception;
import com.datamate.common.infrastructure.exception.ErrorCode; import com.datamate.common.infrastructure.exception.ErrorCode;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Getter; import lombok.Getter;
/** /**
* 数据管理模块错误码 * 数据管理模块错误码
* *
* @author dallas * @author dallas
* @since 2025-10-20 * @since 2025-10-20
*/ */
@Getter @Getter
@AllArgsConstructor @AllArgsConstructor
public enum DataManagementErrorCode implements ErrorCode { public enum DataManagementErrorCode implements ErrorCode {
/** /**
* 数据集不存在 * 数据集不存在
*/ */
DATASET_NOT_FOUND("data_management.0001", "数据集不存在"), DATASET_NOT_FOUND("data_management.0001", "数据集不存在"),
/** /**
* 数据集已存在 * 数据集已存在
*/ */
DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"), DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"),
/** /**
* 数据集状态错误 * 数据集状态错误
*/ */
DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"), DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"),
/** /**
* 数据集标签不存在 * 数据集标签不存在
*/ */
DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"), DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"),
/** /**
* 数据集标签已存在 * 数据集标签已存在
*/ */
DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在"), DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在"),
/** /**
* 数据集标签已存在 * 数据集文件已存在
*/ */
DATASET_FILE_ALREADY_EXISTS("data_management.0006", "数据集文件已存在"); DATASET_FILE_ALREADY_EXISTS("data_management.0006", "数据集文件已存在"),
/**
private final String code; * 目录不存在
private final String message; */
} DIRECTORY_NOT_FOUND("data_management.0007", "目录不存在");
private final String code;
private final String message;
}

View File

@@ -0,0 +1,20 @@
package com.datamate.datamanagement.interfaces.dto;
import jakarta.validation.constraints.NotBlank;
import lombok.Getter;
import lombok.Setter;
/**
* 创建数据集子目录请求
*/
@Getter
@Setter
public class CreateDirectoryRequest {
/** 父级前缀路径,例如 "images/",为空表示数据集根目录 */
private String parentPrefix;
/** 新建目录名称 */
@NotBlank
private String directoryName;
}

View File

@@ -1,36 +1,40 @@
package com.datamate.datamanagement.interfaces.dto; package com.datamate.datamanagement.interfaces.dto;
import lombok.Getter; import lombok.Getter;
import lombok.Setter; import lombok.Setter;
import java.time.LocalDateTime; import java.time.LocalDateTime;
/** /**
* 数据集文件响应DTO * 数据集文件响应DTO
*/ */
@Getter @Getter
@Setter @Setter
public class DatasetFileResponse { public class DatasetFileResponse {
/** 文件ID */ /** 文件ID */
private String id; private String id;
/** 文件名 */ /** 文件名 */
private String fileName; private String fileName;
/** 原始文件名 */ /** 原始文件名 */
private String originalName; private String originalName;
/** 文件类型 */ /** 文件类型 */
private String fileType; private String fileType;
/** 文件大小(字节) */ /** 文件大小(字节) */
private Long fileSize; private Long fileSize;
/** 文件状态 */ /** 文件状态 */
private String status; private String status;
/** 文件描述 */ /** 文件描述 */
private String description; private String description;
/** 文件路径 */ /** 文件路径 */
private String filePath; private String filePath;
/** 上传时间 */ /** 上传时间 */
private LocalDateTime uploadTime; private LocalDateTime uploadTime;
/** 最后更新时间 */ /** 最后更新时间 */
private LocalDateTime lastAccessTime; private LocalDateTime lastAccessTime;
/** 上传者 */ /** 上传者 */
private String uploadedBy; private String uploadedBy;
} /** 是否为目录 */
private Boolean directory;
/** 目录文件数量 */
private Long fileCount;
}

View File

@@ -1,22 +1,25 @@
package com.datamate.datamanagement.interfaces.dto; package com.datamate.datamanagement.interfaces.dto;
import jakarta.validation.constraints.Min; import jakarta.validation.constraints.Min;
import lombok.Getter; import lombok.Getter;
import lombok.Setter; import lombok.Setter;
/** /**
* 切片上传预上传请求 * 切片上传预上传请求
*/ */
@Getter @Getter
@Setter @Setter
public class UploadFilesPreRequest { public class UploadFilesPreRequest {
/** 是否为压缩包上传 */ /** 是否为压缩包上传 */
private boolean hasArchive; private boolean hasArchive;
/** 总文件数量 */ /** 总文件数量 */
@Min(1) @Min(1)
private int totalFileNum; private int totalFileNum;
/** 总文件大小 */ /** 总文件大小 */
private long totalSize; private long totalSize;
}
/** 目标子目录前缀,例如 "images/",为空表示数据集根目录 */
private String prefix;
}

View File

@@ -1,165 +1,197 @@
package com.datamate.datamanagement.interfaces.rest; package com.datamate.datamanagement.interfaces.rest;
import com.datamate.common.infrastructure.common.IgnoreResponseWrap; import com.datamate.common.infrastructure.common.IgnoreResponseWrap;
import com.datamate.common.infrastructure.common.Response; import com.datamate.common.infrastructure.common.Response;
import com.datamate.common.infrastructure.exception.SystemErrorCode; import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.common.interfaces.PagedResponse; import com.datamate.common.interfaces.PagedResponse;
import com.datamate.common.interfaces.PagingQuery; import com.datamate.common.interfaces.PagingQuery;
import com.datamate.datamanagement.application.DatasetFileApplicationService; import com.datamate.datamanagement.application.DatasetFileApplicationService;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile; import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter; import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest; import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest; import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse; import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
import jakarta.servlet.http.HttpServletResponse; import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
import jakarta.validation.Valid; import jakarta.servlet.http.HttpServletResponse;
import lombok.extern.slf4j.Slf4j; import jakarta.validation.Valid;
import org.springframework.beans.factory.annotation.Autowired; import lombok.extern.slf4j.Slf4j;
import org.springframework.core.io.Resource; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpHeaders; import org.springframework.core.io.Resource;
import org.springframework.http.HttpStatus; import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType; import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity; import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.*; import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
import java.util.List;
/**
* 数据集文件 REST 控制器(UUID 模式) /**
*/ * 数据集文件 REST 控制器(UUID 模式)
@Slf4j */
@RestController @Slf4j
@RequestMapping("/data-management/datasets/{datasetId}/files") @RestController
public class DatasetFileController { @RequestMapping("/data-management/datasets/{datasetId}/files")
public class DatasetFileController {
private final DatasetFileApplicationService datasetFileApplicationService;
private final DatasetFileApplicationService datasetFileApplicationService;
@Autowired
public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService) { @Autowired
this.datasetFileApplicationService = datasetFileApplicationService; public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService) {
} this.datasetFileApplicationService = datasetFileApplicationService;
}
@GetMapping
public Response<PagedResponse<DatasetFile>> getDatasetFiles( @GetMapping
@PathVariable("datasetId") String datasetId, public Response<PagedResponse<DatasetFile>> getDatasetFiles(
@RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory, @PathVariable("datasetId") String datasetId,
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page, @RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory,
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size, @RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) { @RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
PagingQuery pagingQuery = new PagingQuery(page, size); @RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) {
PagedResponse<DatasetFile> filesPage; PagingQuery pagingQuery = new PagingQuery(page, size);
if (isWithDirectory) { PagedResponse<DatasetFile> filesPage;
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery); if (isWithDirectory) {
} else { filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, null, null, pagingQuery); } else {
} filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, null, null, pagingQuery);
return Response.ok(filesPage); }
} return Response.ok(filesPage);
}
@GetMapping("/{fileId}")
public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById( @GetMapping("/{fileId}")
@PathVariable("datasetId") String datasetId, public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById(
@PathVariable("fileId") String fileId) { @PathVariable("datasetId") String datasetId,
try { @PathVariable("fileId") String fileId) {
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId); try {
return ResponseEntity.ok(Response.ok(DatasetConverter.INSTANCE.convertToResponse(datasetFile))); DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
} catch (IllegalArgumentException e) { return ResponseEntity.ok(Response.ok(DatasetConverter.INSTANCE.convertToResponse(datasetFile)));
return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null)); } catch (IllegalArgumentException e) {
} return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null));
} }
}
@DeleteMapping("/{fileId}")
public ResponseEntity<Response<Void>> deleteDatasetFile( @DeleteMapping("/{fileId}")
@PathVariable("datasetId") String datasetId, public ResponseEntity<Response<Void>> deleteDatasetFile(
@PathVariable("fileId") String fileId) { @PathVariable("datasetId") String datasetId,
try { @PathVariable("fileId") String fileId) {
datasetFileApplicationService.deleteDatasetFile(datasetId, fileId); try {
return ResponseEntity.ok().build(); datasetFileApplicationService.deleteDatasetFile(datasetId, fileId);
} catch (IllegalArgumentException e) { return ResponseEntity.ok().build();
return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null)); } catch (IllegalArgumentException e) {
} return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null));
} }
}
@IgnoreResponseWrap
@GetMapping(value = "/{fileId}/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE + ";charset=UTF-8") @IgnoreResponseWrap
public ResponseEntity<Resource> downloadDatasetFileById(@PathVariable("datasetId") String datasetId, @GetMapping(value = "/{fileId}/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE + ";charset=UTF-8")
@PathVariable("fileId") String fileId) { public ResponseEntity<Resource> downloadDatasetFileById(@PathVariable("datasetId") String datasetId,
try { @PathVariable("fileId") String fileId) {
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId); try {
Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId); DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId);
return ResponseEntity.ok()
.contentType(MediaType.APPLICATION_OCTET_STREAM) return ResponseEntity.ok()
.header(HttpHeaders.CONTENT_DISPOSITION, .contentType(MediaType.APPLICATION_OCTET_STREAM)
"attachment; filename=\"" + datasetFile.getFileName() + "\"") .header(HttpHeaders.CONTENT_DISPOSITION,
.body(resource); "attachment; filename=\"" + datasetFile.getFileName() + "\"")
} catch (IllegalArgumentException e) { .body(resource);
return ResponseEntity.status(HttpStatus.NOT_FOUND).build(); } catch (IllegalArgumentException e) {
} catch (Exception e) { return ResponseEntity.status(HttpStatus.NOT_FOUND).build();
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build(); } catch (Exception e) {
} return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build();
} }
}
@IgnoreResponseWrap
@GetMapping(value = "/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE) @IgnoreResponseWrap
public void downloadDatasetFileAsZip(@PathVariable("datasetId") String datasetId, HttpServletResponse response) { @GetMapping(value = "/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE)
datasetFileApplicationService.downloadDatasetFileAsZip(datasetId, response); public void downloadDatasetFileAsZip(@PathVariable("datasetId") String datasetId, HttpServletResponse response) {
} datasetFileApplicationService.downloadDatasetFileAsZip(datasetId, response);
}
/**
* 文件上传请求 /**
* * 文件上传请求
* @param request 批量文件上传请求 *
* @return 批量上传请求id * @param request 批量文件上传请求
*/ * @return 批量上传请求id
@PostMapping("/upload/pre-upload") */
public ResponseEntity<Response<String>> preUpload(@PathVariable("datasetId") String datasetId, @PostMapping("/upload/pre-upload")
@RequestBody @Valid UploadFilesPreRequest request) { public ResponseEntity<Response<String>> preUpload(@PathVariable("datasetId") String datasetId,
return ResponseEntity.ok(Response.ok(datasetFileApplicationService.preUpload(request, datasetId))); @RequestBody @Valid UploadFilesPreRequest request) {
} return ResponseEntity.ok(Response.ok(datasetFileApplicationService.preUpload(request, datasetId)));
}
/**
* 分块上传 /**
* * 分块上传
* @param uploadFileRequest 上传文件请求 *
*/ * @param uploadFileRequest 上传文件请求
@PostMapping("/upload/chunk") */
public ResponseEntity<Void> chunkUpload(@PathVariable("datasetId") String datasetId, @PostMapping("/upload/chunk")
@Valid UploadFileRequest uploadFileRequest) { public ResponseEntity<Void> chunkUpload(@PathVariable("datasetId") String datasetId,
log.info("file upload reqId:{}, fileNo:{}, total chunk num:{}, current chunkNo:{}", @Valid UploadFileRequest uploadFileRequest) {
uploadFileRequest.getReqId(), uploadFileRequest.getFileNo(), uploadFileRequest.getTotalChunkNum(), log.info("file upload reqId:{}, fileNo:{}, total chunk num:{}, current chunkNo:{}",
uploadFileRequest.getChunkNo()); uploadFileRequest.getReqId(), uploadFileRequest.getFileNo(), uploadFileRequest.getTotalChunkNum(),
datasetFileApplicationService.chunkUpload(datasetId, uploadFileRequest); uploadFileRequest.getChunkNo());
return ResponseEntity.ok().build(); datasetFileApplicationService.chunkUpload(datasetId, uploadFileRequest);
} return ResponseEntity.ok().build();
}
/**
* 将指定路径中的文件拷贝到数据集目录下 /**
* * 将指定路径中的文件拷贝到数据集目录下
* @param datasetId 数据集ID *
* @param req 源文件路径列表 * @param datasetId 数据集ID
* @return 数据集文件响应DTO列表 * @param req 源文件路径列表
*/ * @return 数据集文件响应DTO列表
@PostMapping("/upload/copy") */
public List<DatasetFileResponse> copyFilesToDatasetDir(@PathVariable("datasetId") String datasetId, @PostMapping("/upload/copy")
@RequestBody @Valid CopyFilesRequest req) { public List<DatasetFileResponse> copyFilesToDatasetDir(@PathVariable("datasetId") String datasetId,
List<DatasetFile> datasetFiles = datasetFileApplicationService.copyFilesToDatasetDir(datasetId, req); @RequestBody @Valid CopyFilesRequest req) {
return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles); List<DatasetFile> datasetFiles = datasetFileApplicationService.copyFilesToDatasetDir(datasetId, req);
} return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles);
}
/**
* 添加文件到数据集(仅创建数据库记录,不执行文件系统操作) /**
* * 添加文件到数据集(仅创建数据库记录,不执行文件系统操作)
* @param datasetId 数据集ID *
* @param req 添加文件请求(包含源文件路径列表和softAdd标志) * @param datasetId 数据集ID
* @return 数据集文件响应DTO列表 * @param req 添加文件请求(包含源文件路径列表和softAdd标志)
*/ * @return 数据集文件响应DTO列表
@PostMapping("/upload/add") */
public List<DatasetFileResponse> addFilesToDataset(@PathVariable("datasetId") String datasetId, @PostMapping("/upload/add")
@RequestBody @Valid AddFilesRequest req) { public List<DatasetFileResponse> addFilesToDataset(@PathVariable("datasetId") String datasetId,
List<DatasetFile> datasetFiles = datasetFileApplicationService.addFilesToDataset(datasetId, req); @RequestBody @Valid AddFilesRequest req) {
return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles); List<DatasetFile> datasetFiles = datasetFileApplicationService.addFilesToDataset(datasetId, req);
} return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles);
} }
/**
* 在数据集下创建子目录
*/
@PostMapping("/directories")
public ResponseEntity<Void> createDirectory(@PathVariable("datasetId") String datasetId,
@RequestBody @Valid CreateDirectoryRequest req) {
datasetFileApplicationService.createDirectory(datasetId, req);
return ResponseEntity.ok().build();
}
/**
* 下载目录(压缩为 ZIP)
*/
@IgnoreResponseWrap
@GetMapping(value = "/directories/download", produces = "application/zip")
public void downloadDirectory(@PathVariable("datasetId") String datasetId,
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix,
HttpServletResponse response) {
datasetFileApplicationService.downloadDirectory(datasetId, prefix, response);
}
/**
* 删除目录及其所有内容
*/
@DeleteMapping("/directories")
public ResponseEntity<Void> deleteDirectory(@PathVariable("datasetId") String datasetId,
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) {
datasetFileApplicationService.deleteDirectory(datasetId, prefix);
return ResponseEntity.ok().build();
}
}

View File

@@ -1,17 +1,18 @@
package com.datamate.common.infrastructure.exception; package com.datamate.common.infrastructure.exception;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Getter; import lombok.Getter;
/** /**
* CommonErrorCode * CommonErrorCode
* *
* @since 2025/12/5 * @since 2025/12/5
*/ */
@Getter @Getter
@AllArgsConstructor @AllArgsConstructor
public enum CommonErrorCode implements ErrorCode{ public enum CommonErrorCode implements ErrorCode{
PRE_UPLOAD_REQUEST_NOT_EXIST("common.0101", "预上传请求不存在"); PARAM_ERROR("common.0001", "参数错误"),
private final String code; PRE_UPLOAD_REQUEST_NOT_EXIST("common.0101", "预上传请求不存在");
private final String message; private final String code;
} private final String message;
}

View File

@@ -199,15 +199,11 @@ function CardView<T extends BaseCardDataType>(props: CardViewProps<T>) {
? "" ? ""
: "bg-gradient-to-br from-sky-300 to-blue-500 text-white" : "bg-gradient-to-br from-sky-300 to-blue-500 text-white"
}`} }`}
style={{ style={
...(item?.iconColor item?.iconColor
? { backgroundColor: item.iconColor } ? { backgroundColor: item.iconColor }
: {}), : {}
backgroundImage: }
"linear-gradient(180deg, rgba(255,255,255,0.35), rgba(255,255,255,0.05))",
boxShadow:
"inset 0 0 0 1px rgba(255,255,255,0.25)",
}}
> >
<div className="w-[2.1rem] h-[2.1rem] text-gray-50">{item?.icon}</div> <div className="w-[2.1rem] h-[2.1rem] text-gray-50">{item?.icon}</div>
</div> </div>

View File

@@ -1,331 +1,406 @@
import React, { useCallback, useEffect } from "react"; import React, { useCallback, useEffect } from "react";
import { Button, Input, Table } from "antd"; import { Button, Input, Table, message } from "antd";
import { RightOutlined } from "@ant-design/icons"; import { RightOutlined } from "@ant-design/icons";
import { mapDataset } from "@/pages/DataManagement/dataset.const"; import { mapDataset } from "@/pages/DataManagement/dataset.const";
import { import {
Dataset, Dataset,
DatasetFile, DatasetFile,
DatasetType, DatasetType,
} from "@/pages/DataManagement/dataset.model"; } from "@/pages/DataManagement/dataset.model";
import { import {
queryDatasetFilesUsingGet, queryDatasetFilesUsingGet,
queryDatasetsUsingGet, queryDatasetsUsingGet,
} from "@/pages/DataManagement/dataset.api"; } from "@/pages/DataManagement/dataset.api";
import { formatBytes } from "@/utils/unit"; import { formatBytes } from "@/utils/unit";
import { useDebouncedEffect } from "@/hooks/useDebouncedEffect"; import { useDebouncedEffect } from "@/hooks/useDebouncedEffect";
interface DatasetFileTransferProps interface DatasetFileTransferProps
extends React.HTMLAttributes<HTMLDivElement> { extends React.HTMLAttributes<HTMLDivElement> {
open: boolean; open: boolean;
selectedFilesMap: { [key: string]: DatasetFile }; selectedFilesMap: { [key: string]: DatasetFile };
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void; onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
onDatasetSelect?: (dataset: Dataset | null) => void; onDatasetSelect?: (dataset: Dataset | null) => void;
} datasetTypeFilter?: DatasetType;
}
const fileCols = [
{ const fileCols = [
title: "所属数据集", {
dataIndex: "datasetName", title: "所属数据集",
key: "datasetName", dataIndex: "datasetName",
ellipsis: true, key: "datasetName",
}, ellipsis: true,
{ },
title: "文件名", {
dataIndex: "fileName", title: "文件名",
key: "fileName", dataIndex: "fileName",
ellipsis: true, key: "fileName",
}, ellipsis: true,
{ },
title: "大小", {
dataIndex: "fileSize", title: "大小",
key: "fileSize", dataIndex: "fileSize",
ellipsis: true, key: "fileSize",
render: formatBytes, ellipsis: true,
}, render: formatBytes,
]; },
];
// Customize Table Transfer
const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({ // Customize Table Transfer
open, const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
selectedFilesMap, open,
onSelectedFilesChange, selectedFilesMap,
onDatasetSelect, onSelectedFilesChange,
...props onDatasetSelect,
}) => { datasetTypeFilter = DatasetType.TEXT,
const [datasets, setDatasets] = React.useState<Dataset[]>([]); ...props
const [datasetSearch, setDatasetSearch] = React.useState<string>(""); }) => {
const [datasetPagination, setDatasetPagination] = React.useState<{ const [datasets, setDatasets] = React.useState<Dataset[]>([]);
current: number; const [datasetSearch, setDatasetSearch] = React.useState<string>("");
pageSize: number; const [datasetPagination, setDatasetPagination] = React.useState<{
total: number; current: number;
}>({ current: 1, pageSize: 10, total: 0 }); pageSize: number;
total: number;
const [files, setFiles] = React.useState<DatasetFile[]>([]); }>({ current: 1, pageSize: 10, total: 0 });
const [filesSearch, setFilesSearch] = React.useState<string>("");
const [filesPagination, setFilesPagination] = React.useState<{ const [files, setFiles] = React.useState<DatasetFile[]>([]);
current: number; const [filesSearch, setFilesSearch] = React.useState<string>("");
pageSize: number; const [filesPagination, setFilesPagination] = React.useState<{
total: number; current: number;
}>({ current: 1, pageSize: 10, total: 0 }); pageSize: number;
total: number;
const [showFiles, setShowFiles] = React.useState<boolean>(false); }>({ current: 1, pageSize: 10, total: 0 });
const [selectedDataset, setSelectedDataset] = React.useState<Dataset | null>(
null const [showFiles, setShowFiles] = React.useState<boolean>(false);
); const [selectedDataset, setSelectedDataset] = React.useState<Dataset | null>(
const [datasetSelections, setDatasetSelections] = React.useState<Dataset[]>( null
[] );
); const [datasetSelections, setDatasetSelections] = React.useState<Dataset[]>(
[]
const fetchDatasets = async () => { );
const { data } = await queryDatasetsUsingGet({ const [selectingAll, setSelectingAll] = React.useState<boolean>(false);
// Ant Design Table pagination.current is 1-based; ensure backend also receives 1-based value
page: datasetPagination.current, const fetchDatasets = async () => {
size: datasetPagination.pageSize, const { data } = await queryDatasetsUsingGet({
keyword: datasetSearch, // Ant Design Table pagination.current is 1-based; ensure backend also receives 1-based value
type: DatasetType.TEXT, page: datasetPagination.current,
}); size: datasetPagination.pageSize,
setDatasets(data.content.map(mapDataset) || []); keyword: datasetSearch,
setDatasetPagination((prev) => ({ type: datasetTypeFilter,
...prev, });
total: data.totalElements, setDatasets(data.content.map(mapDataset) || []);
})); setDatasetPagination((prev) => ({
}; ...prev,
total: data.totalElements,
useDebouncedEffect( }));
() => { };
fetchDatasets();
}, useDebouncedEffect(
[datasetSearch, datasetPagination.pageSize, datasetPagination.current], () => {
300 fetchDatasets();
); },
[datasetSearch, datasetPagination.pageSize, datasetPagination.current],
const fetchFiles = useCallback( 300
async ( );
options?: Partial<{ page: number; pageSize: number; keyword: string }>
) => { const fetchFiles = useCallback(
if (!selectedDataset) return; async (
const page = options?.page ?? filesPagination.current; options?: Partial<{ page: number; pageSize: number; keyword: string }>
const pageSize = options?.pageSize ?? filesPagination.pageSize; ) => {
const keyword = options?.keyword ?? filesSearch; if (!selectedDataset) return;
const page = options?.page ?? filesPagination.current;
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, { const pageSize = options?.pageSize ?? filesPagination.pageSize;
page, const keyword = options?.keyword ?? filesSearch;
size: pageSize,
keyword, const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
}); page,
setFiles( size: pageSize,
(data.content || []).map((item: DatasetFile) => ({ keyword,
...item, });
key: item.id, setFiles(
datasetName: selectedDataset.name, (data.content || []).map((item: DatasetFile) => ({
})) ...item,
); id: item.id,
setFilesPagination((prev) => ({ key: String(item.id), // rowKey 使用字符串,确保与 selectedRowKeys 类型一致
...prev, datasetName: selectedDataset.name,
current: page, }))
pageSize, );
total: data.totalElements, setFilesPagination((prev) => ({
})); ...prev,
}, current: page,
[selectedDataset, filesPagination.current, filesPagination.pageSize, filesSearch] pageSize,
); total: data.totalElements,
}));
useEffect(() => { },
// 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求 [selectedDataset, filesPagination.current, filesPagination.pageSize, filesSearch]
if (selectedDataset) { );
setFilesPagination({ current: 1, pageSize: 10, total: 0 });
fetchFiles({ page: 1, pageSize: 10 }).catch(() => {}); useEffect(() => {
} else { // 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求
setFiles([]); if (selectedDataset) {
setFilesPagination({ current: 1, pageSize: 10, total: 0 }); setFilesPagination({ current: 1, pageSize: 10, total: 0 });
} // 后端 page 参数为 0-based,这里传 0 获取第一页
// 只在 selectedDataset 变化时触发 fetchFiles({ page: 0, pageSize: 10 }).catch(() => {});
// eslint-disable-next-line react-hooks/exhaustive-deps } else {
}, [selectedDataset]); setFiles([]);
setFilesPagination({ current: 1, pageSize: 10, total: 0 });
useEffect(() => { }
onDatasetSelect?.(selectedDataset); // 只在 selectedDataset 变化时触发
}, [selectedDataset, onDatasetSelect]); // eslint-disable-next-line react-hooks/exhaustive-deps
}, [selectedDataset]);
const toggleSelectFile = (record: DatasetFile) => {
if (!selectedFilesMap[record.id]) { useEffect(() => {
onSelectedFilesChange({ onDatasetSelect?.(selectedDataset);
...selectedFilesMap, }, [selectedDataset, onDatasetSelect]);
[record.id]: record,
}); const handleSelectAllInDataset = useCallback(async () => {
} else { if (!selectedDataset) {
const newSelectedFiles = { ...selectedFilesMap }; message.warning("请先选择一个数据集");
delete newSelectedFiles[record.id]; return;
onSelectedFilesChange(newSelectedFiles); }
}
}; try {
setSelectingAll(true);
useEffect(() => {
if (!open) { const pageSize = 1000; // 分批拉取,避免后端单页限制
// 重置状态 let page = 0; // 后端 page 参数为 0-based,从 0 开始
setDatasets([]); let total = 0;
setDatasetSearch(""); const allFiles: DatasetFile[] = [];
setDatasetPagination({ current: 1, pageSize: 10, total: 0 });
setFiles([]); while (true) {
setFilesSearch(""); const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
setFilesPagination({ current: 1, pageSize: 10, total: 0 }); page,
setShowFiles(false); size: pageSize,
setSelectedDataset(null); });
setDatasetSelections([]);
onDatasetSelect?.(null); const content: DatasetFile[] = (data.content || []).map(
} (item: DatasetFile) => ({
}, [open, onDatasetSelect]); ...item,
key: item.id,
const datasetCols = [ datasetName: selectedDataset.name,
{ }),
title: "数据集名称", );
dataIndex: "name",
key: "name", if (!content.length) {
ellipsis: true, break;
}, }
{
title: "文件数", allFiles.push(...content);
dataIndex: "fileCount", // 优先用后端的 totalElements,否则用当前累积数
key: "fileCount", total = typeof data.totalElements === "number" ? data.totalElements : allFiles.length;
ellipsis: true,
}, // 如果这一页数量小于 pageSize,说明已经拿完;否则继续下一页
{ if (content.length < pageSize) {
title: "大小", break;
dataIndex: "totalSize", }
key: "totalSize",
ellipsis: true, page += 1;
render: formatBytes, }
},
]; const newMap: { [key: string]: DatasetFile } = { ...selectedFilesMap };
allFiles.forEach((file) => {
return ( if (file && file.id != null) {
<div {...props}> newMap[String(file.id)] = file;
<div className="grid grid-cols-25 gap-4 w-full"> }
<div className="border-card flex flex-col col-span-12"> });
<div className="border-bottom p-2 font-bold"></div>
<div className="p-2"> onSelectedFilesChange(newMap);
<Input
placeholder="搜索数据集名称..." const count = total || allFiles.length;
value={datasetSearch} if (count > 0) {
allowClear message.success(`已选中当前数据集的全部 ${count} 个文件`);
onChange={(e) => setDatasetSearch(e.target.value)} } else {
/> message.info("当前数据集下没有可选文件");
</div> }
<Table } catch (error) {
scroll={{ y: 400 }} console.error("Failed to select all files in dataset", error);
rowKey="id" message.error("全选整个数据集失败,请稍后重试");
size="small" } finally {
rowClassName={(record) => setSelectingAll(false);
selectedDataset?.id === record.id ? "bg-blue-100" : "" }
} }, [selectedDataset, selectedFilesMap, onSelectedFilesChange]);
onRow={(record: Dataset) => ({
onClick: () => { const toggleSelectFile = (record: DatasetFile) => {
setSelectedDataset(record); if (!selectedFilesMap[record.id]) {
if (!datasetSelections.find((d) => d.id === record.id)) { onSelectedFilesChange({
setDatasetSelections([...datasetSelections, record]); ...selectedFilesMap,
} else { [record.id]: record,
setDatasetSelections( });
datasetSelections.filter((d) => d.id !== record.id) } else {
); const newSelectedFiles = { ...selectedFilesMap };
} delete newSelectedFiles[record.id];
}, onSelectedFilesChange(newSelectedFiles);
})} }
dataSource={datasets} };
columns={datasetCols}
pagination={{ useEffect(() => {
...datasetPagination, if (!open) {
onChange: (page, pageSize) => // 重置状态
setDatasetPagination({ setDatasets([]);
current: page, setDatasetSearch("");
pageSize: pageSize || datasetPagination.pageSize, setDatasetPagination({ current: 1, pageSize: 10, total: 0 });
total: datasetPagination.total, setFiles([]);
}), setFilesSearch("");
}} setFilesPagination({ current: 1, pageSize: 10, total: 0 });
/> setShowFiles(false);
</div> setSelectedDataset(null);
<RightOutlined /> setDatasetSelections([]);
<div className="border-card flex flex-col col-span-12"> onDatasetSelect?.(null);
<div className="border-bottom p-2 font-bold"></div> }
<div className="p-2"> }, [open, onDatasetSelect]);
<Input
placeholder="搜索文件名称..." const datasetCols = [
value={filesSearch} {
onChange={(e) => setFilesSearch(e.target.value)} title: "数据集名称",
/> dataIndex: "name",
</div> key: "name",
<Table ellipsis: true,
scroll={{ y: 400 }} },
rowKey="id" {
size="small" title: "文件数",
dataSource={files} dataIndex: "fileCount",
columns={fileCols.slice(1, fileCols.length)} key: "fileCount",
pagination={{ ellipsis: true,
...filesPagination, },
onChange: (page, pageSize) => { {
const nextPageSize = pageSize || filesPagination.pageSize; title: "大小",
setFilesPagination((prev) => ({ dataIndex: "totalSize",
...prev, key: "totalSize",
current: page, ellipsis: true,
pageSize: nextPageSize, render: formatBytes,
})); },
fetchFiles({ page, pageSize: nextPageSize }).catch(() => {}); ];
},
}} return (
onRow={(record: DatasetFile) => ({ <div {...props}>
onClick: () => toggleSelectFile(record), <div className="grid grid-cols-25 gap-4 w-full">
})} <div className="border-card flex flex-col col-span-12">
rowSelection={{ <div className="border-bottom p-2 font-bold"></div>
type: "checkbox", <div className="p-2">
selectedRowKeys: Object.keys(selectedFilesMap), <Input
placeholder="搜索数据集名称..."
// 单选 value={datasetSearch}
onSelect: (record: DatasetFile) => { allowClear
toggleSelectFile(record); onChange={(e) => setDatasetSearch(e.target.value)}
}, />
</div>
// 全选 <Table
onSelectAll: (selected, selectedRows: DatasetFile[]) => { scroll={{ y: 400 }}
if (selected) { rowKey="id"
// ✔ 全选 -> 将 files 列表全部加入 selectedFilesMap size="small"
const newMap: Record<string, DatasetFile> = { ...selectedFilesMap }; rowClassName={(record) =>
selectedRows.forEach((f) => { selectedDataset?.id === record.id ? "bg-blue-100" : ""
newMap[f.id] = f; }
}); onRow={(record: Dataset) => ({
onSelectedFilesChange(newMap); onClick: () => {
} else { setSelectedDataset(record);
// ✘ 取消全选 -> 清空 map if (!datasetSelections.find((d) => d.id === record.id)) {
const newMap = { ...selectedFilesMap }; setDatasetSelections([...datasetSelections, record]);
Object.keys(newMap).forEach((id) => { } else {
if (files.some((f) => String(f.id) === id)) { setDatasetSelections(
// 仅移除当前页对应文件 datasetSelections.filter((d) => d.id !== record.id)
delete newMap[id]; );
} }
}); },
onSelectedFilesChange(newMap); })}
} dataSource={datasets}
}, columns={datasetCols}
pagination={{
getCheckboxProps: (record: DatasetFile) => ({ ...datasetPagination,
name: record.fileName, onChange: (page, pageSize) =>
}), setDatasetPagination({
}} current: page,
/> pageSize: pageSize || datasetPagination.pageSize,
</div> total: datasetPagination.total,
</div> }),
<Button className="mt-4" onClick={() => setShowFiles(!showFiles)}> }}
{showFiles ? "取消预览" : "预览"} />
</Button> </div>
<div hidden={!showFiles}> <RightOutlined />
<Table <div className="border-card flex flex-col col-span-12">
scroll={{ y: 400 }} <div className="border-bottom p-2 font-bold flex justify-between items-center">
rowKey="id" <span></span>
size="small" <Button
dataSource={Object.values(selectedFilesMap)} type="link"
columns={fileCols} size="small"
/> onClick={handleSelectAllInDataset}
</div> disabled={!selectedDataset}
</div> loading={selectingAll}
); >
};
</Button>
export default DatasetFileTransfer; </div>
<div className="p-2">
<Input
placeholder="搜索文件名称..."
value={filesSearch}
onChange={(e) => setFilesSearch(e.target.value)}
/>
</div>
<Table
scroll={{ y: 400 }}
rowKey={(record) => String(record.id)}
size="small"
dataSource={files}
columns={fileCols.slice(1, fileCols.length)}
pagination={{
...filesPagination,
onChange: (page, pageSize) => {
const nextPageSize = pageSize || filesPagination.pageSize;
setFilesPagination((prev) => ({
...prev,
current: page,
pageSize: nextPageSize,
}));
// 前端分页是 1-based,后端是 0-based,所以这里传 page - 1
fetchFiles({ page: page - 1, pageSize: nextPageSize }).catch(() => {});
},
}}
onRow={(record: DatasetFile) => ({
onClick: () => toggleSelectFile(record),
})}
rowSelection={{
type: "checkbox",
selectedRowKeys: Object.keys(selectedFilesMap),
preserveSelectedRowKeys: true,
// 单选
onSelect: (record: DatasetFile) => {
toggleSelectFile(record);
},
// 全选 - 改为全选整个数据集而不是当前页
onSelectAll: (selected, selectedRows: DatasetFile[]) => {
if (selected) {
// 点击表头“全选”时,改为一键全选当前数据集的全部文件
// 而不是只选中当前页
handleSelectAllInDataset();
} else {
// 取消表头“全选”时,清空当前已选文件
onSelectedFilesChange({});
}
},
getCheckboxProps: (record: DatasetFile) => ({
name: record.fileName,
}),
}}
/>
</div>
</div>
<Button className="mt-4" onClick={() => setShowFiles(!showFiles)}>
{showFiles ? "取消预览" : "预览"}
</Button>
<div hidden={!showFiles}>
<Table
scroll={{ y: 400 }}
rowKey="id"
size="small"
dataSource={Object.values(selectedFilesMap)}
columns={fileCols}
/>
</div>
</div>
);
};
export default DatasetFileTransfer;

View File

@@ -1,187 +1,198 @@
import { TaskItem } from "@/pages/DataManagement/dataset.model"; import { TaskItem } from "@/pages/DataManagement/dataset.model";
import { calculateSHA256, checkIsFilesExist } from "@/utils/file.util"; import { calculateSHA256, checkIsFilesExist } from "@/utils/file.util";
import { App } from "antd"; import { App } from "antd";
import { useRef, useState } from "react"; import { useRef, useState } from "react";
export function useFileSliceUpload( export function useFileSliceUpload(
{ {
preUpload, preUpload,
uploadChunk, uploadChunk,
cancelUpload, cancelUpload,
}: { }: {
preUpload: (id: string, params: any) => Promise<{ data: number }>; preUpload: (id: string, params: any) => Promise<{ data: number }>;
uploadChunk: (id: string, formData: FormData, config: any) => Promise<any>; uploadChunk: (id: string, formData: FormData, config: any) => Promise<any>;
cancelUpload: ((reqId: number) => Promise<any>) | null; cancelUpload: ((reqId: number) => Promise<any>) | null;
}, },
showTaskCenter = true // 上传时是否显示任务中心 showTaskCenter = true // 上传时是否显示任务中心
) { ) {
const { message } = App.useApp(); const { message } = App.useApp();
const [taskList, setTaskList] = useState<TaskItem[]>([]); const [taskList, setTaskList] = useState<TaskItem[]>([]);
const taskListRef = useRef<TaskItem[]>([]); // 用于固定任务顺序 const taskListRef = useRef<TaskItem[]>([]); // 用于固定任务顺序
const createTask = (detail: any = {}) => { const createTask = (detail: any = {}) => {
const { dataset } = detail; const { dataset } = detail;
const title = `上传数据集: ${dataset.name} `; const title = `上传数据集: ${dataset.name} `;
const controller = new AbortController(); const controller = new AbortController();
const task: TaskItem = { const task: TaskItem = {
key: dataset.id, key: dataset.id,
title, title,
percent: 0, percent: 0,
reqId: -1, reqId: -1,
controller, controller,
size: 0, size: 0,
updateEvent: detail.updateEvent, updateEvent: detail.updateEvent,
hasArchive: detail.hasArchive, hasArchive: detail.hasArchive,
}; prefix: detail.prefix,
taskListRef.current = [task, ...taskListRef.current]; };
taskListRef.current = [task, ...taskListRef.current];
setTaskList(taskListRef.current);
return task; setTaskList(taskListRef.current);
}; return task;
};
const updateTaskList = (task: TaskItem) => {
taskListRef.current = taskListRef.current.map((item) => const updateTaskList = (task: TaskItem) => {
item.key === task.key ? task : item taskListRef.current = taskListRef.current.map((item) =>
); item.key === task.key ? task : item
setTaskList(taskListRef.current); );
}; setTaskList(taskListRef.current);
};
const removeTask = (task: TaskItem) => {
const { key } = task; const removeTask = (task: TaskItem) => {
taskListRef.current = taskListRef.current.filter( const { key } = task;
(item) => item.key !== key taskListRef.current = taskListRef.current.filter(
); (item) => item.key !== key
setTaskList(taskListRef.current); );
if (task.isCancel && task.cancelFn) { setTaskList(taskListRef.current);
task.cancelFn(); if (task.isCancel && task.cancelFn) {
} task.cancelFn();
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent)); }
if (showTaskCenter) { if (task.updateEvent) {
window.dispatchEvent( // 携带前缀信息,便于刷新后仍停留在当前目录
new CustomEvent("show:task-popover", { detail: { show: false } }) window.dispatchEvent(
); new CustomEvent(task.updateEvent, {
} detail: { prefix: (task as any).prefix },
}; })
);
async function buildFormData({ file, reqId, i, j }) { }
const formData = new FormData(); if (showTaskCenter) {
const { slices, name, size } = file; window.dispatchEvent(
const checkSum = await calculateSHA256(slices[j]); new CustomEvent("show:task-popover", { detail: { show: false } })
formData.append("file", slices[j]); );
formData.append("reqId", reqId.toString()); }
formData.append("fileNo", (i + 1).toString()); };
formData.append("chunkNo", (j + 1).toString());
formData.append("fileName", name); async function buildFormData({ file, reqId, i, j }) {
formData.append("fileSize", size.toString()); const formData = new FormData();
formData.append("totalChunkNum", slices.length.toString()); const { slices, name, size } = file;
formData.append("checkSumHex", checkSum); const checkSum = await calculateSHA256(slices[j]);
return formData; formData.append("file", slices[j]);
} formData.append("reqId", reqId.toString());
formData.append("fileNo", (i + 1).toString());
async function uploadSlice(task: TaskItem, fileInfo) { formData.append("chunkNo", (j + 1).toString());
if (!task) { formData.append("fileName", name);
return; formData.append("fileSize", size.toString());
} formData.append("totalChunkNum", slices.length.toString());
const { reqId, key } = task; formData.append("checkSumHex", checkSum);
const { loaded, i, j, files, totalSize } = fileInfo; return formData;
const formData = await buildFormData({ }
file: files[i],
i, async function uploadSlice(task: TaskItem, fileInfo) {
j, if (!task) {
reqId, return;
}); }
const { reqId, key } = task;
let newTask = { ...task }; const { loaded, i, j, files, totalSize } = fileInfo;
await uploadChunk(key, formData, { const formData = await buildFormData({
onUploadProgress: (e) => { file: files[i],
const loadedSize = loaded + e.loaded; i,
const curPercent = Number((loadedSize / totalSize) * 100).toFixed(2); j,
reqId,
newTask = { });
...newTask,
...taskListRef.current.find((item) => item.key === key), let newTask = { ...task };
size: loadedSize, await uploadChunk(key, formData, {
percent: curPercent >= 100 ? 99.99 : curPercent, onUploadProgress: (e) => {
}; const loadedSize = loaded + e.loaded;
updateTaskList(newTask); const curPercent = Number((loadedSize / totalSize) * 100).toFixed(2);
},
}); newTask = {
} ...newTask,
...taskListRef.current.find((item) => item.key === key),
async function uploadFile({ task, files, totalSize }) { size: loadedSize,
const { data: reqId } = await preUpload(task.key, { percent: curPercent >= 100 ? 99.99 : curPercent,
totalFileNum: files.length, };
totalSize, updateTaskList(newTask);
datasetId: task.key, },
hasArchive: task.hasArchive, });
}); }
const newTask: TaskItem = { async function uploadFile({ task, files, totalSize }) {
...task, console.log('[useSliceUpload] Calling preUpload with prefix:', task.prefix);
reqId, const { data: reqId } = await preUpload(task.key, {
isCancel: false, totalFileNum: files.length,
cancelFn: () => { totalSize,
task.controller.abort(); datasetId: task.key,
cancelUpload?.(reqId); hasArchive: task.hasArchive,
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent)); prefix: task.prefix,
}, });
}; console.log('[useSliceUpload] PreUpload response reqId:', reqId);
updateTaskList(newTask);
if (showTaskCenter) { const newTask: TaskItem = {
window.dispatchEvent( ...task,
new CustomEvent("show:task-popover", { detail: { show: true } }) reqId,
); isCancel: false,
} cancelFn: () => {
// // 更新数据状态 task.controller.abort();
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent)); cancelUpload?.(reqId);
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent));
let loaded = 0; },
for (let i = 0; i < files.length; i++) { };
const { slices } = files[i]; updateTaskList(newTask);
for (let j = 0; j < slices.length; j++) { if (showTaskCenter) {
await uploadSlice(newTask, { window.dispatchEvent(
loaded, new CustomEvent("show:task-popover", { detail: { show: true } })
i, );
j, }
files, // // 更新数据状态
totalSize, if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent));
});
loaded += slices[j].size; let loaded = 0;
} for (let i = 0; i < files.length; i++) {
} const { slices } = files[i];
removeTask(newTask); for (let j = 0; j < slices.length; j++) {
} await uploadSlice(newTask, {
loaded,
const handleUpload = async ({ task, files }) => { i,
const isErrorFile = await checkIsFilesExist(files); j,
if (isErrorFile) { files,
message.error("文件被修改或删除,请重新选择文件上传"); totalSize,
removeTask({ });
...task, loaded += slices[j].size;
isCancel: false, }
...taskListRef.current.find((item) => item.key === task.key), }
}); removeTask(newTask);
return; }
}
const handleUpload = async ({ task, files }) => {
try { const isErrorFile = await checkIsFilesExist(files);
const totalSize = files.reduce((acc, file) => acc + file.size, 0); if (isErrorFile) {
await uploadFile({ task, files, totalSize }); message.error("文件被修改或删除,请重新选择文件上传");
} catch (err) { removeTask({
console.error(err); ...task,
message.error("文件上传失败,请稍后重试"); isCancel: false,
removeTask({ ...taskListRef.current.find((item) => item.key === task.key),
...task, });
isCancel: true, return;
...taskListRef.current.find((item) => item.key === task.key), }
});
} try {
}; const totalSize = files.reduce((acc, file) => acc + file.size, 0);
await uploadFile({ task, files, totalSize });
return { } catch (err) {
taskList, console.error(err);
createTask, message.error("文件上传失败,请稍后重试");
removeTask, removeTask({
handleUpload, ...task,
}; isCancel: true,
} ...taskListRef.current.find((item) => item.key === task.key),
});
}
};
return {
taskList,
createTask,
removeTask,
handleUpload,
};
}

View File

@@ -0,0 +1,302 @@
import { useState, useEffect } from "react";
import { Card, Button, Table, message, Modal, Tag, Progress, Space, Tooltip } from "antd";
import {
PlusOutlined,
DeleteOutlined,
DownloadOutlined,
ReloadOutlined,
EyeOutlined,
} from "@ant-design/icons";
import type { ColumnType } from "antd/es/table";
import type { AutoAnnotationTask, AutoAnnotationStatus } from "../annotation.model";
import {
queryAutoAnnotationTasksUsingGet,
deleteAutoAnnotationTaskByIdUsingDelete,
downloadAutoAnnotationResultUsingGet,
} from "../annotation.api";
import CreateAutoAnnotationDialog from "./components/CreateAutoAnnotationDialog";
const STATUS_COLORS: Record<AutoAnnotationStatus, string> = {
pending: "default",
running: "processing",
completed: "success",
failed: "error",
cancelled: "default",
};
const STATUS_LABELS: Record<AutoAnnotationStatus, string> = {
pending: "等待中",
running: "处理中",
completed: "已完成",
failed: "失败",
cancelled: "已取消",
};
const MODEL_SIZE_LABELS: Record<string, string> = {
n: "YOLOv8n (最快)",
s: "YOLOv8s",
m: "YOLOv8m",
l: "YOLOv8l (推荐)",
x: "YOLOv8x (最精确)",
};
export default function AutoAnnotation() {
const [loading, setLoading] = useState(false);
const [tasks, setTasks] = useState<AutoAnnotationTask[]>([]);
const [showCreateDialog, setShowCreateDialog] = useState(false);
const [selectedRowKeys, setSelectedRowKeys] = useState<string[]>([]);
useEffect(() => {
fetchTasks();
const interval = setInterval(() => {
fetchTasks(true);
}, 3000);
return () => clearInterval(interval);
}, []);
const fetchTasks = async (silent = false) => {
if (!silent) setLoading(true);
try {
const response = await queryAutoAnnotationTasksUsingGet();
setTasks(response.data || response || []);
} catch (error) {
console.error("Failed to fetch auto annotation tasks:", error);
if (!silent) message.error("获取任务列表失败");
} finally {
if (!silent) setLoading(false);
}
};
const handleDelete = (task: AutoAnnotationTask) => {
Modal.confirm({
title: `确认删除自动标注任务「${task.name}」吗?`,
content: "删除任务后,已生成的标注结果不会被删除。",
okText: "删除",
okType: "danger",
cancelText: "取消",
onOk: async () => {
try {
await deleteAutoAnnotationTaskByIdUsingDelete(task.id);
message.success("任务删除成功");
fetchTasks();
setSelectedRowKeys((keys) => keys.filter((k) => k !== task.id));
} catch (error) {
console.error(error);
message.error("删除失败,请稍后重试");
}
},
});
};
const handleDownload = async (task: AutoAnnotationTask) => {
try {
message.loading("正在准备下载...", 0);
await downloadAutoAnnotationResultUsingGet(task.id);
message.destroy();
message.success("下载已开始");
} catch (error) {
console.error(error);
message.destroy();
message.error("下载失败");
}
};
const handleViewResult = (task: AutoAnnotationTask) => {
if (task.outputPath) {
Modal.info({
title: "标注结果路径",
content: (
<div>
<p>{task.outputPath}</p>
<p>{task.detectedObjects}</p>
<p>
{task.processedImages} / {task.totalImages}
</p>
</div>
),
});
}
};
const columns: ColumnType<AutoAnnotationTask>[] = [
{ title: "任务名称", dataIndex: "name", key: "name", width: 200 },
{
title: "数据集",
dataIndex: "datasetName",
key: "datasetName",
width: 220,
render: (_: any, record: AutoAnnotationTask) => {
const list =
record.sourceDatasets && record.sourceDatasets.length > 0
? record.sourceDatasets
: record.datasetName
? [record.datasetName]
: [];
if (list.length === 0) return "-";
const text = list.join(",");
return (
<Tooltip title={text}>
<span>{text}</span>
</Tooltip>
);
},
},
{
title: "模型",
dataIndex: ["config", "modelSize"],
key: "modelSize",
width: 120,
render: (size: string) => MODEL_SIZE_LABELS[size] || size,
},
{
title: "置信度",
dataIndex: ["config", "confThreshold"],
key: "confThreshold",
width: 100,
render: (threshold: number) => `${(threshold * 100).toFixed(0)}%`,
},
{
title: "目标类别",
dataIndex: ["config", "targetClasses"],
key: "targetClasses",
width: 120,
render: (classes: number[]) => (
<Tooltip
title={classes.length > 0 ? classes.join(", ") : "全部类别"}
>
<span>
{classes.length > 0
? `${classes.length} 个类别`
: "全部类别"}
</span>
</Tooltip>
),
},
{
title: "状态",
dataIndex: "status",
key: "status",
width: 100,
render: (status: AutoAnnotationStatus) => (
<Tag color={STATUS_COLORS[status]}>{STATUS_LABELS[status]}</Tag>
),
},
{
title: "进度",
dataIndex: "progress",
key: "progress",
width: 150,
render: (progress: number, record: AutoAnnotationTask) => (
<div>
<Progress percent={progress} size="small" />
<div style={{ fontSize: "12px", color: "#999" }}>
{record.processedImages} / {record.totalImages}
</div>
</div>
),
},
{
title: "检测对象数",
dataIndex: "detectedObjects",
key: "detectedObjects",
width: 100,
render: (count: number) => count.toLocaleString(),
},
{
title: "创建时间",
dataIndex: "createdAt",
key: "createdAt",
width: 150,
render: (time: string) => new Date(time).toLocaleString(),
},
{
title: "操作",
key: "actions",
width: 180,
fixed: "right",
render: (_: any, record: AutoAnnotationTask) => (
<Space size="small">
{record.status === "completed" && (
<>
<Tooltip title="查看结果">
<Button
type="link"
size="small"
icon={<EyeOutlined />}
onClick={() => handleViewResult(record)}
/>
</Tooltip>
<Tooltip title="下载结果">
<Button
type="link"
size="small"
icon={<DownloadOutlined />}
onClick={() => handleDownload(record)}
/>
</Tooltip>
</>
)}
<Tooltip title="删除">
<Button
type="link"
size="small"
danger
icon={<DeleteOutlined />}
onClick={() => handleDelete(record)}
/>
</Tooltip>
</Space>
),
},
];
return (
<div>
<Card
title="自动标注任务"
extra={
<Space>
<Button
type="primary"
icon={<PlusOutlined />}
onClick={() => setShowCreateDialog(true)}
>
</Button>
<Button
icon={<ReloadOutlined />}
loading={loading}
onClick={() => fetchTasks()}
>
</Button>
</Space>
}
>
<Table
rowKey="id"
loading={loading}
columns={columns}
dataSource={tasks}
rowSelection={{
selectedRowKeys,
onChange: (keys) => setSelectedRowKeys(keys as string[]),
}}
pagination={{ pageSize: 10 }}
scroll={{ x: 1000 }}
/>
</Card>
<CreateAutoAnnotationDialog
visible={showCreateDialog}
onCancel={() => setShowCreateDialog(false)}
onSuccess={() => {
setShowCreateDialog(false);
fetchTasks();
}}
/>
</div>
);
}

View File

@@ -0,0 +1,286 @@
import { useState, useEffect } from "react";
import { Modal, Form, Input, Select, Slider, message, Checkbox } from "antd";
import { createAutoAnnotationTaskUsingPost } from "../../annotation.api";
import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api";
import { mapDataset } from "@/pages/DataManagement/dataset.const";
import { DatasetType, type DatasetFile, type Dataset } from "@/pages/DataManagement/dataset.model";
import DatasetFileTransfer from "@/components/business/DatasetFileTransfer";
const { Option } = Select;
interface CreateAutoAnnotationDialogProps {
visible: boolean;
onCancel: () => void;
onSuccess: () => void;
}
const COCO_CLASSES = [
{ id: 0, name: "person", label: "人" },
{ id: 1, name: "bicycle", label: "自行车" },
{ id: 2, name: "car", label: "汽车" },
{ id: 3, name: "motorcycle", label: "摩托车" },
{ id: 4, name: "airplane", label: "飞机" },
{ id: 5, name: "bus", label: "公交车" },
{ id: 6, name: "train", label: "火车" },
{ id: 7, name: "truck", label: "卡车" },
{ id: 8, name: "boat", label: "船" },
{ id: 9, name: "traffic light", label: "红绿灯" },
{ id: 10, name: "fire hydrant", label: "消防栓" },
{ id: 11, name: "stop sign", label: "停止标志" },
{ id: 12, name: "parking meter", label: "停车计时器" },
{ id: 13, name: "bench", label: "长椅" },
{ id: 14, name: "bird", label: "鸟" },
{ id: 15, name: "cat", label: "猫" },
{ id: 16, name: "dog", label: "狗" },
{ id: 17, name: "horse", label: "马" },
{ id: 18, name: "sheep", label: "羊" },
{ id: 19, name: "cow", label: "牛" },
{ id: 20, name: "elephant", label: "大象" },
{ id: 21, name: "bear", label: "熊" },
{ id: 22, name: "zebra", label: "斑马" },
{ id: 23, name: "giraffe", label: "长颈鹿" },
{ id: 24, name: "backpack", label: "背包" },
{ id: 25, name: "umbrella", label: "雨伞" },
{ id: 26, name: "handbag", label: "手提包" },
{ id: 27, name: "tie", label: "领带" },
{ id: 28, name: "suitcase", label: "行李箱" },
{ id: 29, name: "frisbee", label: "飞盘" },
{ id: 30, name: "skis", label: "滑雪板" },
{ id: 31, name: "snowboard", label: "滑雪板" },
{ id: 32, name: "sports ball", label: "球类" },
{ id: 33, name: "kite", label: "风筝" },
{ id: 34, name: "baseball bat", label: "棒球棒" },
{ id: 35, name: "baseball glove", label: "棒球手套" },
{ id: 36, name: "skateboard", label: "滑板" },
{ id: 37, name: "surfboard", label: "冲浪板" },
{ id: 38, name: "tennis racket", label: "网球拍" },
{ id: 39, name: "bottle", label: "瓶子" },
{ id: 40, name: "wine glass", label: "酒杯" },
{ id: 41, name: "cup", label: "杯子" },
{ id: 42, name: "fork", label: "叉子" },
{ id: 43, name: "knife", label: "刀" },
{ id: 44, name: "spoon", label: "勺子" },
{ id: 45, name: "bowl", label: "碗" },
{ id: 46, name: "banana", label: "香蕉" },
{ id: 47, name: "apple", label: "苹果" },
{ id: 48, name: "sandwich", label: "三明治" },
{ id: 49, name: "orange", label: "橙子" },
{ id: 50, name: "broccoli", label: "西兰花" },
{ id: 51, name: "carrot", label: "胡萝卜" },
{ id: 52, name: "hot dog", label: "热狗" },
{ id: 53, name: "pizza", label: "披萨" },
{ id: 54, name: "donut", label: "甜甜圈" },
{ id: 55, name: "cake", label: "蛋糕" },
{ id: 56, name: "chair", label: "椅子" },
{ id: 57, name: "couch", label: "沙发" },
{ id: 58, name: "potted plant", label: "盆栽" },
{ id: 59, name: "bed", label: "床" },
{ id: 60, name: "dining table", label: "餐桌" },
{ id: 61, name: "toilet", label: "马桶" },
{ id: 62, name: "tv", label: "电视" },
{ id: 63, name: "laptop", label: "笔记本电脑" },
{ id: 64, name: "mouse", label: "鼠标" },
{ id: 65, name: "remote", label: "遥控器" },
{ id: 66, name: "keyboard", label: "键盘" },
{ id: 67, name: "cell phone", label: "手机" },
{ id: 68, name: "microwave", label: "微波炉" },
{ id: 69, name: "oven", label: "烤箱" },
{ id: 70, name: "toaster", label: "烤面包机" },
{ id: 71, name: "sink", label: "水槽" },
{ id: 72, name: "refrigerator", label: "冰箱" },
{ id: 73, name: "book", label: "书" },
{ id: 74, name: "clock", label: "钟表" },
{ id: 75, name: "vase", label: "花瓶" },
{ id: 76, name: "scissors", label: "剪刀" },
{ id: 77, name: "teddy bear", label: "玩具熊" },
{ id: 78, name: "hair drier", label: "吹风机" },
{ id: 79, name: "toothbrush", label: "牙刷" },
];
export default function CreateAutoAnnotationDialog({
visible,
onCancel,
onSuccess,
}: CreateAutoAnnotationDialogProps) {
const [form] = Form.useForm();
const [loading, setLoading] = useState(false);
const [datasets, setDatasets] = useState<any[]>([]);
const [selectAllClasses, setSelectAllClasses] = useState(true);
const [selectedFilesMap, setSelectedFilesMap] = useState<Record<string, DatasetFile>>({});
const [selectedDataset, setSelectedDataset] = useState<Dataset | null>(null);
const [imageFileCount, setImageFileCount] = useState(0);
useEffect(() => {
if (visible) {
fetchDatasets();
form.resetFields();
form.setFieldsValue({
modelSize: "l",
confThreshold: 0.7,
targetClasses: [],
});
}
}, [visible, form]);
const fetchDatasets = async () => {
try {
const { data } = await queryDatasetsUsingGet({
page: 0,
pageSize: 1000,
});
const imageDatasets = (data.content || [])
.map(mapDataset)
.filter((ds: any) => ds.datasetType === DatasetType.IMAGE);
setDatasets(imageDatasets);
} catch (error) {
console.error("Failed to fetch datasets:", error);
message.error("获取数据集列表失败");
}
};
useEffect(() => {
const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"];
const count = Object.values(selectedFilesMap).filter((file) => {
const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || "";
return imageExtensions.includes(ext);
}).length;
setImageFileCount(count);
}, [selectedFilesMap]);
const handleSubmit = async () => {
try {
const values = await form.validateFields();
if (imageFileCount === 0) {
message.error("请至少选择一个图像文件");
return;
}
setLoading(true);
const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"];
const imageFileIds = Object.values(selectedFilesMap)
.filter((file) => {
const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || "";
return imageExtensions.includes(ext);
})
.map((file) => file.id);
const payload = {
name: values.name,
datasetId: values.datasetId,
fileIds: imageFileIds,
config: {
modelSize: values.modelSize,
confThreshold: values.confThreshold,
targetClasses: selectAllClasses ? [] : values.targetClasses || [],
outputDatasetName: values.outputDatasetName || undefined,
},
};
await createAutoAnnotationTaskUsingPost(payload);
message.success("自动标注任务创建成功");
onSuccess();
} catch (error: any) {
if (error.errorFields) return;
console.error("Failed to create auto annotation task:", error);
message.error(error.message || "创建任务失败");
} finally {
setLoading(false);
}
};
const handleClassSelectionChange = (checked: boolean) => {
setSelectAllClasses(checked);
if (checked) {
form.setFieldsValue({ targetClasses: [] });
}
};
return (
<Modal
title="创建自动标注任务"
open={visible}
onCancel={onCancel}
onOk={handleSubmit}
confirmLoading={loading}
width={600}
destroyOnClose
>
<Form form={form} layout="vertical" preserve={false}>
<Form.Item
name="name"
label="任务名称"
rules={[
{ required: true, message: "请输入任务名称" },
{ max: 100, message: "任务名称不能超过100个字符" },
]}
>
<Input placeholder="请输入任务名称" />
</Form.Item>
<Form.Item label="选择数据集和图像文件" required>
<DatasetFileTransfer
open
selectedFilesMap={selectedFilesMap}
onSelectedFilesChange={setSelectedFilesMap}
onDatasetSelect={(dataset) => {
setSelectedDataset(dataset);
form.setFieldsValue({ datasetId: dataset?.id ?? "" });
}}
datasetTypeFilter={DatasetType.IMAGE}
/>
{selectedDataset && (
<div className="mt-2 p-2 bg-blue-50 rounded border border-blue-200 text-xs">
<span className="font-medium">{selectedDataset.name}</span> -
<span className="font-medium text-blue-600"> {imageFileCount} </span>
</div>
)}
</Form.Item>
<Form.Item hidden name="datasetId" rules={[{ required: true, message: "请选择数据集" }]}>
<Input type="hidden" />
</Form.Item>
<Form.Item name="modelSize" label="模型规模" rules={[{ required: true, message: "请选择模型规模" }]}>
<Select>
<Option value="n">YOLOv8n ()</Option>
<Option value="s">YOLOv8s</Option>
<Option value="m">YOLOv8m</Option>
<Option value="l">YOLOv8l ()</Option>
<Option value="x">YOLOv8x ()</Option>
</Select>
</Form.Item>
<Form.Item
name="confThreshold"
label="置信度阈值"
rules={[{ required: true, message: "请选择置信度阈值" }]}
>
<Slider min={0.1} max={0.9} step={0.05} tooltip={{ formatter: (v) => `${(v || 0) * 100}%` }} />
</Form.Item>
<Form.Item label="目标类别">
<Checkbox checked={selectAllClasses} onChange={(e) => handleClassSelectionChange(e.target.checked)}>
</Checkbox>
{!selectAllClasses && (
<Form.Item name="targetClasses" noStyle>
<Select mode="multiple" placeholder="选择目标类别" style={{ marginTop: 8 }}>
{COCO_CLASSES.map((cls) => (
<Option key={cls.id} value={cls.id}>
{cls.label} ({cls.name})
</Option>
))}
</Select>
</Form.Item>
)}
</Form.Item>
<Form.Item name="outputDatasetName" label="输出数据集名称 (可选)">
<Input placeholder="留空则将结果写入原数据集的标签中" />
</Form.Item>
</Form>
</Modal>
);
}

View File

@@ -0,0 +1 @@
export { default } from "./AutoAnnotation";

View File

@@ -1,192 +1,489 @@
import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api"; import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api";
import { mapDataset } from "@/pages/DataManagement/dataset.const"; import { mapDataset } from "@/pages/DataManagement/dataset.const";
import { Button, Form, Input, Modal, Select, message } from "antd"; import { Button, Form, Input, Modal, Select, message, Tabs, Slider, Checkbox } from "antd";
import TextArea from "antd/es/input/TextArea"; import TextArea from "antd/es/input/TextArea";
import { useEffect, useState } from "react"; import { useEffect, useState } from "react";
import { createAnnotationTaskUsingPost, queryAnnotationTemplatesUsingGet } from "../../annotation.api"; import {
import { Dataset } from "@/pages/DataManagement/dataset.model"; createAnnotationTaskUsingPost,
import type { AnnotationTemplate } from "../../annotation.model"; queryAnnotationTemplatesUsingGet,
createAutoAnnotationTaskUsingPost,
export default function CreateAnnotationTask({ } from "../../annotation.api";
open, import DatasetFileTransfer from "@/components/business/DatasetFileTransfer";
onClose, import { DatasetType, type Dataset, type DatasetFile } from "@/pages/DataManagement/dataset.model";
onRefresh, import type { AnnotationTemplate } from "../../annotation.model";
}: {
open: boolean; const { Option } = Select;
onClose: () => void;
onRefresh: () => void; const COCO_CLASSES = [
}) { { id: 0, name: "person", label: "人" },
const [form] = Form.useForm(); { id: 1, name: "bicycle", label: "自行车" },
const [datasets, setDatasets] = useState<Dataset[]>([]); { id: 2, name: "car", label: "汽车" },
const [templates, setTemplates] = useState<AnnotationTemplate[]>([]); { id: 3, name: "motorcycle", label: "摩托车" },
const [submitting, setSubmitting] = useState(false); { id: 4, name: "airplane", label: "飞机" },
const [nameManuallyEdited, setNameManuallyEdited] = useState(false); { id: 5, name: "bus", label: "公交车" },
{ id: 6, name: "train", label: "火车" },
useEffect(() => { { id: 7, name: "truck", label: "卡车" },
if (!open) return; { id: 8, name: "boat", label: "船" },
const fetchData = async () => { { id: 9, name: "traffic light", label: "红绿灯" },
try { { id: 10, name: "fire hydrant", label: "消防栓" },
// Fetch datasets { id: 11, name: "stop sign", label: "停止标志" },
const { data: datasetData } = await queryDatasetsUsingGet({ { id: 12, name: "parking meter", label: "停车计时器" },
page: 0, { id: 13, name: "bench", label: "长椅" },
pageSize: 1000, // Use camelCase for HTTP params { id: 14, name: "bird", label: "鸟" },
}); { id: 15, name: "cat", label: "猫" },
setDatasets(datasetData.content.map(mapDataset) || []); { id: 16, name: "dog", label: "狗" },
{ id: 17, name: "horse", label: "马" },
// Fetch templates { id: 18, name: "sheep", label: "羊" },
const templateResponse = await queryAnnotationTemplatesUsingGet({ { id: 19, name: "cow", label: "牛" },
page: 1, { id: 20, name: "elephant", label: "大象" },
size: 100, // Backend max is 100 (template API uses 'size' not 'pageSize') { id: 21, name: "bear", label: "熊" },
}); { id: 22, name: "zebra", label: "斑马" },
{ id: 23, name: "giraffe", label: "长颈鹿" },
// The API returns: {code, message, data: {content, total, page, ...}} { id: 24, name: "backpack", label: "背包" },
if (templateResponse.code === 200 && templateResponse.data) { { id: 25, name: "umbrella", label: "雨伞" },
const fetchedTemplates = templateResponse.data.content || []; { id: 26, name: "handbag", label: "手提包" },
console.log("Fetched templates:", fetchedTemplates); { id: 27, name: "tie", label: "领带" },
setTemplates(fetchedTemplates); { id: 28, name: "suitcase", label: "行李箱" },
} else { { id: 29, name: "frisbee", label: "飞盘" },
console.error("Failed to fetch templates:", templateResponse); { id: 30, name: "skis", label: "滑雪板" },
setTemplates([]); { id: 31, name: "snowboard", label: "滑雪板" },
} { id: 32, name: "sports ball", label: "球类" },
} catch (error) { { id: 33, name: "kite", label: "风筝" },
console.error("Error fetching data:", error); { id: 34, name: "baseball bat", label: "棒球棒" },
setTemplates([]); { id: 35, name: "baseball glove", label: "棒球手套" },
} { id: 36, name: "skateboard", label: "滑板" },
}; { id: 37, name: "surfboard", label: "冲浪板" },
fetchData(); { id: 38, name: "tennis racket", label: "网球拍" },
}, [open]); { id: 39, name: "bottle", label: "瓶子" },
{ id: 40, name: "wine glass", label: "酒杯" },
// Reset form and manual-edit flag when modal opens { id: 41, name: "cup", label: "杯子" },
useEffect(() => { { id: 42, name: "fork", label: "叉子" },
if (open) { { id: 43, name: "knife", label: "刀" },
form.resetFields(); { id: 44, name: "spoon", label: "勺子" },
setNameManuallyEdited(false); { id: 45, name: "bowl", label: "碗" },
} { id: 46, name: "banana", label: "香蕉" },
}, [open, form]); { id: 47, name: "apple", label: "苹果" },
{ id: 48, name: "sandwich", label: "三明治" },
const handleSubmit = async () => { { id: 49, name: "orange", label: "橙子" },
try { { id: 50, name: "broccoli", label: "西兰花" },
const values = await form.validateFields(); { id: 51, name: "carrot", label: "胡萝卜" },
setSubmitting(true); { id: 52, name: "hot dog", label: "热狗" },
// Send templateId instead of labelingConfig { id: 53, name: "pizza", label: "披萨" },
const requestData = { { id: 54, name: "donut", label: "甜甜圈" },
name: values.name, { id: 55, name: "cake", label: "蛋糕" },
description: values.description, { id: 56, name: "chair", label: "椅子" },
datasetId: values.datasetId, { id: 57, name: "couch", label: "沙发" },
templateId: values.templateId, { id: 58, name: "potted plant", label: "盆栽" },
}; { id: 59, name: "bed", label: "床" },
await createAnnotationTaskUsingPost(requestData); { id: 60, name: "dining table", label: "餐桌" },
message?.success?.("创建标注任务成功"); { id: 61, name: "toilet", label: "马桶" },
onClose(); { id: 62, name: "tv", label: "电视" },
onRefresh(); { id: 63, name: "laptop", label: "笔记本电脑" },
} catch (err: any) { { id: 64, name: "mouse", label: "鼠标" },
console.error("Create annotation task failed", err); { id: 65, name: "remote", label: "遥控器" },
const msg = err?.message || err?.data?.message || "创建失败,请稍后重试"; { id: 66, name: "keyboard", label: "键盘" },
(message as any)?.error?.(msg); { id: 67, name: "cell phone", label: "手机" },
} finally { { id: 68, name: "microwave", label: "微波炉" },
setSubmitting(false); { id: 69, name: "oven", label: "烤箱" },
} { id: 70, name: "toaster", label: "烤面包机" },
}; { id: 71, name: "sink", label: "水槽" },
{ id: 72, name: "refrigerator", label: "冰箱" },
return ( { id: 73, name: "book", label: "书" },
<Modal { id: 74, name: "clock", label: "钟表" },
open={open} { id: 75, name: "vase", label: "花瓶" },
onCancel={onClose} { id: 76, name: "scissors", label: "剪刀" },
title="创建标注任务" { id: 77, name: "teddy bear", label: "玩具熊" },
footer={ { id: 78, name: "hair drier", label: "吹风机" },
<> { id: 79, name: "toothbrush", label: "牙刷" },
<Button onClick={onClose} disabled={submitting}> ];
</Button> export default function CreateAnnotationTask({
<Button type="primary" onClick={handleSubmit} loading={submitting}> open,
onClose,
</Button> onRefresh,
</> }: {
} open: boolean;
width={800} onClose: () => void;
> onRefresh: () => void;
<Form form={form} layout="vertical"> }) {
{/* 数据集 与 标注工程名称 并排显示(数据集在左) */} const [manualForm] = Form.useForm();
<div className="grid grid-cols-2 gap-4"> const [autoForm] = Form.useForm();
<Form.Item const [datasets, setDatasets] = useState<Dataset[]>([]);
label="数据集" const [templates, setTemplates] = useState<AnnotationTemplate[]>([]);
name="datasetId" const [submitting, setSubmitting] = useState(false);
rules={[{ required: true, message: "请选择数据集" }]} const [nameManuallyEdited, setNameManuallyEdited] = useState(false);
> const [activeMode, setActiveMode] = useState<"manual" | "auto">("manual");
<Select
placeholder="请选择数据集" const [selectAllClasses, setSelectAllClasses] = useState(true);
options={datasets.map((dataset) => { const [selectedFilesMap, setSelectedFilesMap] = useState<Record<string, DatasetFile>>({});
return { const [selectedDataset, setSelectedDataset] = useState<Dataset | null>(null);
label: ( const [imageFileCount, setImageFileCount] = useState(0);
<div className="flex items-center justify-between gap-3 py-2">
<div className="flex items-center font-sm text-gray-900"> useEffect(() => {
<span className="mr-2">{(dataset as any).icon}</span> if (!open) return;
<span>{dataset.name}</span> const fetchData = async () => {
</div> try {
<div className="text-xs text-gray-500">{dataset.size}</div> // Fetch datasets
</div> const { data: datasetData } = await queryDatasetsUsingGet({
), page: 0,
value: dataset.id, pageSize: 1000, // Use camelCase for HTTP params
}; });
})} setDatasets(datasetData.content.map(mapDataset) || []);
onChange={(value) => {
// 如果用户未手动修改名称,则用数据集名称作为默认任务名 // Fetch templates
if (!nameManuallyEdited) { const templateResponse = await queryAnnotationTemplatesUsingGet({
const ds = datasets.find((d) => d.id === value); page: 1,
if (ds) { size: 100, // Backend max is 100 (template API uses 'size' not 'pageSize')
form.setFieldsValue({ name: ds.name }); });
}
} // The API returns: {code, message, data: {content, total, page, ...}}
}} if (templateResponse.code === 200 && templateResponse.data) {
/> const fetchedTemplates = templateResponse.data.content || [];
</Form.Item> console.log("Fetched templates:", fetchedTemplates);
setTemplates(fetchedTemplates);
<Form.Item } else {
label="标注工程名称" console.error("Failed to fetch templates:", templateResponse);
name="name" setTemplates([]);
rules={[{ required: true, message: "请输入任务名称" }]} }
> } catch (error) {
<Input console.error("Error fetching data:", error);
placeholder="输入标注工程名称" setTemplates([]);
onChange={() => setNameManuallyEdited(true)} }
/> };
</Form.Item> fetchData();
</div> }, [open]);
{/* 描述变为可选 */}
<Form.Item label="描述" name="description"> // Reset form and manual-edit flag when modal opens
<TextArea placeholder="(可选)详细描述标注任务的要求和目标" rows={3} /> useEffect(() => {
</Form.Item> if (open) {
manualForm.resetFields();
{/* 标注模板选择 */} autoForm.resetFields();
<Form.Item setNameManuallyEdited(false);
label="标注模板" setActiveMode("manual");
name="templateId" setSelectAllClasses(true);
rules={[{ required: true, message: "请选择标注模板" }]} setSelectedFilesMap({});
> setSelectedDataset(null);
<Select setImageFileCount(0);
placeholder={templates.length === 0 ? "暂无可用模板,请先创建模板" : "请选择标注模板"} }
showSearch }, [open, manualForm, autoForm]);
optionFilterProp="label"
notFoundContent={templates.length === 0 ? "暂无模板,请前往「标注模板」页面创建" : "未找到匹配的模板"} useEffect(() => {
options={templates.map((template) => ({ const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"];
label: template.name, const count = Object.values(selectedFilesMap).filter((file) => {
value: template.id, const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || "";
// Add description as subtitle return imageExtensions.includes(ext);
title: template.description, }).length;
}))} setImageFileCount(count);
optionRender={(option) => ( }, [selectedFilesMap]);
<div>
<div style={{ fontWeight: 500 }}>{option.label}</div> const handleManualSubmit = async () => {
{option.data.title && ( try {
<div style={{ fontSize: 12, color: '#999', marginTop: 2 }}> const values = await manualForm.validateFields();
{option.data.title} setSubmitting(true);
</div> // Send templateId instead of labelingConfig
)} const requestData = {
</div> name: values.name,
)} description: values.description,
/> datasetId: values.datasetId,
</Form.Item> templateId: values.templateId,
</Form> };
</Modal> await createAnnotationTaskUsingPost(requestData);
); message?.success?.("创建标注任务成功");
} onClose();
onRefresh();
} catch (err: any) {
console.error("Create annotation task failed", err);
const msg = err?.message || err?.data?.message || "创建失败,请稍后重试";
(message as any)?.error?.(msg);
} finally {
setSubmitting(false);
}
};
const handleAutoSubmit = async () => {
try {
const values = await autoForm.validateFields();
if (imageFileCount === 0) {
message.error("请至少选择一个图像文件");
return;
}
setSubmitting(true);
const imageExtensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"];
const imageFileIds = Object.values(selectedFilesMap)
.filter((file) => {
const ext = file.fileName?.toLowerCase().match(/\.[^.]+$/)?.[0] || "";
return imageExtensions.includes(ext);
})
.map((file) => file.id);
const payload = {
name: values.name,
datasetId: values.datasetId,
fileIds: imageFileIds,
config: {
modelSize: values.modelSize,
confThreshold: values.confThreshold,
targetClasses: selectAllClasses ? [] : values.targetClasses || [],
outputDatasetName: values.outputDatasetName || undefined,
},
};
await createAutoAnnotationTaskUsingPost(payload);
message.success("自动标注任务创建成功");
// 触发上层刷新自动标注任务列表
(onRefresh as any)?.("auto");
onClose();
} catch (error: any) {
if (error.errorFields) return;
console.error("Failed to create auto annotation task:", error);
message.error(error.message || "创建自动标注任务失败");
} finally {
setSubmitting(false);
}
};
const handleClassSelectionChange = (checked: boolean) => {
setSelectAllClasses(checked);
if (checked) {
autoForm.setFieldsValue({ targetClasses: [] });
}
};
return (
<Modal
open={open}
onCancel={onClose}
title="创建标注任务"
footer={
<>
<Button onClick={onClose} disabled={submitting}>
</Button>
<Button
type="primary"
onClick={activeMode === "manual" ? handleManualSubmit : handleAutoSubmit}
loading={submitting}
>
</Button>
</>
}
width={800}
>
<Tabs
activeKey={activeMode}
onChange={(key) => setActiveMode(key as "manual" | "auto")}
items={[
{
key: "manual",
label: "手动标注",
children: (
<Form form={manualForm} layout="vertical">
{/* 数据集 与 标注工程名称 并排显示(数据集在左) */}
<div className="grid grid-cols-2 gap-4">
<Form.Item
label="数据集"
name="datasetId"
rules={[{ required: true, message: "请选择数据集" }]}
>
<Select
placeholder="请选择数据集"
options={datasets.map((dataset) => {
return {
label: (
<div className="flex items-center justify-between gap-3 py-2">
<div className="flex items-center font-sm text-gray-900">
<span className="mr-2">{(dataset as any).icon}</span>
<span>{dataset.name}</span>
</div>
<div className="text-xs text-gray-500">{dataset.size}</div>
</div>
),
value: dataset.id,
};
})}
onChange={(value) => {
// 如果用户未手动修改名称,则用数据集名称作为默认任务名
if (!nameManuallyEdited) {
const ds = datasets.find((d) => d.id === value);
if (ds) {
let defaultName = ds.name || "";
if (defaultName.length < 3) {
defaultName = `${defaultName}-标注`;
}
manualForm.setFieldsValue({ name: defaultName });
}
}
}}
/>
</Form.Item>
<Form.Item
label="标注工程名称"
name="name"
rules={[
{
validator: (_rule, value) => {
const trimmed = (value || "").trim();
if (!trimmed) {
return Promise.reject(new Error("请输入任务名称"));
}
if (trimmed.length < 3) {
return Promise.reject(
new Error("任务名称至少需要 3 个字符(不含首尾空格,Label Studio 限制)"),
);
}
return Promise.resolve();
},
},
]}
>
<Input
placeholder="输入标注工程名称"
onChange={() => setNameManuallyEdited(true)}
/>
</Form.Item>
</div>
{/* 描述变为可选 */}
<Form.Item label="描述" name="description">
<TextArea placeholder="(可选)详细描述标注任务的要求和目标" rows={3} />
</Form.Item>
{/* 标注模板选择 */}
<Form.Item
label="标注模板"
name="templateId"
rules={[{ required: true, message: "请选择标注模板" }]}
>
<Select
placeholder={templates.length === 0 ? "暂无可用模板,请先创建模板" : "请选择标注模板"}
showSearch
optionFilterProp="label"
notFoundContent={templates.length === 0 ? "暂无模板,请前往「标注模板」页面创建" : "未找到匹配的模板"}
options={templates.map((template) => ({
label: template.name,
value: template.id,
// Add description as subtitle
title: template.description,
}))}
optionRender={(option) => (
<div>
<div style={{ fontWeight: 500 }}>{option.label}</div>
{option.data.title && (
<div style={{ fontSize: 12, color: '#999', marginTop: 2 }}>
{option.data.title}
</div>
)}
</div>
)}
/>
</Form.Item>
</Form>
),
},
{
key: "auto",
label: "自动标注",
children: (
<Form form={autoForm} layout="vertical" preserve={false}>
<Form.Item
name="name"
label="任务名称"
rules={[
{ required: true, message: "请输入任务名称" },
{ max: 100, message: "任务名称不能超过100个字符" },
]}
>
<Input placeholder="请输入任务名称" />
</Form.Item>
<Form.Item label="选择数据集和图像文件" required>
<DatasetFileTransfer
open
selectedFilesMap={selectedFilesMap}
onSelectedFilesChange={setSelectedFilesMap}
onDatasetSelect={(dataset) => {
setSelectedDataset(dataset as Dataset | null);
autoForm.setFieldsValue({ datasetId: dataset?.id ?? "" });
}}
datasetTypeFilter={DatasetType.IMAGE}
/>
{selectedDataset && (
<div className="mt-2 p-2 bg-blue-50 rounded border border-blue-200 text-xs">
<span className="font-medium">{selectedDataset.name}</span> -
<span className="font-medium text-blue-600"> {imageFileCount} </span>
</div>
)}
</Form.Item>
<Form.Item
hidden
name="datasetId"
rules={[{ required: true, message: "请选择数据集" }]}
>
<Input type="hidden" />
</Form.Item>
<Form.Item
name="modelSize"
label="模型规模"
rules={[{ required: true, message: "请选择模型规模" }]}
initialValue="l"
>
<Select>
<Option value="n">YOLOv8n ()</Option>
<Option value="s">YOLOv8s</Option>
<Option value="m">YOLOv8m</Option>
<Option value="l">YOLOv8l ()</Option>
<Option value="x">YOLOv8x ()</Option>
</Select>
</Form.Item>
<Form.Item
name="confThreshold"
label="置信度阈值"
rules={[{ required: true, message: "请选择置信度阈值" }]}
initialValue={0.7}
>
<Slider
min={0.1}
max={0.9}
step={0.05}
tooltip={{ formatter: (v) => `${(v || 0) * 100}%` }}
/>
</Form.Item>
<Form.Item label="目标类别">
<Checkbox
checked={selectAllClasses}
onChange={(e) => handleClassSelectionChange(e.target.checked)}
>
</Checkbox>
{!selectAllClasses && (
<Form.Item name="targetClasses" noStyle>
<Select mode="multiple" placeholder="选择目标类别" style={{ marginTop: 8 }}>
{COCO_CLASSES.map((cls) => (
<Option key={cls.id} value={cls.id}>
{cls.label} ({cls.name})
</Option>
))}
</Select>
</Form.Item>
)}
</Form.Item>
<Form.Item name="outputDatasetName" label="输出数据集名称 (可选)">
<Input placeholder="留空则将结果写入原数据集的标签中" />
</Form.Item>
</Form>
),
},
]}
/>
</Modal>
);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,50 +1,67 @@
import { get, post, put, del } from "@/utils/request"; import { get, post, put, del, download } from "@/utils/request";
// 标注任务管理相关接口 // 标注任务管理相关接口
export function queryAnnotationTasksUsingGet(params?: any) { export function queryAnnotationTasksUsingGet(params?: any) {
return get("/api/annotation/project", params); return get("/api/annotation/project", params);
} }
export function createAnnotationTaskUsingPost(data: any) { export function createAnnotationTaskUsingPost(data: any) {
return post("/api/annotation/project", data); return post("/api/annotation/project", data);
} }
export function syncAnnotationTaskUsingPost(data: any) { export function syncAnnotationTaskUsingPost(data: any) {
return post(`/api/annotation/task/sync`, data); return post(`/api/annotation/task/sync`, data);
} }
export function deleteAnnotationTaskByIdUsingDelete(mappingId: string) { export function deleteAnnotationTaskByIdUsingDelete(mappingId: string) {
// Backend expects mapping UUID as path parameter // Backend expects mapping UUID as path parameter
return del(`/api/annotation/project/${mappingId}`); return del(`/api/annotation/project/${mappingId}`);
} }
export function loginAnnotationUsingGet(mappingId: string) { // 标签配置管理
return get("/api/annotation/project/${mappingId}/login"); export function getTagConfigUsingGet() {
} return get("/api/annotation/tags/config");
}
// 标签配置管理
export function getTagConfigUsingGet() { // 标注模板管理
return get("/api/annotation/tags/config"); export function queryAnnotationTemplatesUsingGet(params?: any) {
} return get("/api/annotation/template", params);
}
// 标注模板管理
export function queryAnnotationTemplatesUsingGet(params?: any) { export function createAnnotationTemplateUsingPost(data: any) {
return get("/api/annotation/template", params); return post("/api/annotation/template", data);
} }
export function createAnnotationTemplateUsingPost(data: any) { export function updateAnnotationTemplateByIdUsingPut(
return post("/api/annotation/template", data); templateId: string | number,
} data: any
) {
export function updateAnnotationTemplateByIdUsingPut( return put(`/api/annotation/template/${templateId}`, data);
templateId: string | number, }
data: any
) { export function deleteAnnotationTemplateByIdUsingDelete(
return put(`/api/annotation/template/${templateId}`, data); templateId: string | number
} ) {
return del(`/api/annotation/template/${templateId}`);
export function deleteAnnotationTemplateByIdUsingDelete( }
templateId: string | number
) { // 自动标注任务管理
return del(`/api/annotation/template/${templateId}`); export function queryAutoAnnotationTasksUsingGet(params?: any) {
} return get("/api/annotation/auto", params);
}
export function createAutoAnnotationTaskUsingPost(data: any) {
return post("/api/annotation/auto", data);
}
export function deleteAutoAnnotationTaskByIdUsingDelete(taskId: string) {
return del(`/api/annotation/auto/${taskId}`);
}
export function getAutoAnnotationTaskStatusUsingGet(taskId: string) {
return get(`/api/annotation/auto/${taskId}/status`);
}
export function downloadAutoAnnotationResultUsingGet(taskId: string) {
return download(`/api/annotation/auto/${taskId}/download`);
}

View File

@@ -1,245 +1,260 @@
import { useEffect, useMemo, useState } from "react"; import { useEffect, useMemo, useState } from "react";
import { Breadcrumb, App, Tabs } from "antd"; import { Breadcrumb, App, Tabs } from "antd";
import { import {
ReloadOutlined, ReloadOutlined,
DownloadOutlined, DownloadOutlined,
UploadOutlined, UploadOutlined,
EditOutlined, EditOutlined,
DeleteOutlined, DeleteOutlined,
} from "@ant-design/icons"; } from "@ant-design/icons";
import DetailHeader from "@/components/DetailHeader"; import DetailHeader from "@/components/DetailHeader";
import { mapDataset, datasetTypeMap } from "../dataset.const"; import { mapDataset, datasetTypeMap } from "../dataset.const";
import type { Dataset } from "@/pages/DataManagement/dataset.model"; import type { Dataset } from "@/pages/DataManagement/dataset.model";
import { Link, useNavigate, useParams } from "react-router"; import { Link, useNavigate, useParams } from "react-router";
import { useFilesOperation } from "./useFilesOperation"; import { useFilesOperation } from "./useFilesOperation";
import { import {
createDatasetTagUsingPost, createDatasetTagUsingPost,
deleteDatasetByIdUsingDelete, deleteDatasetByIdUsingDelete,
downloadDatasetUsingGet, downloadDatasetUsingGet,
queryDatasetByIdUsingGet, queryDatasetByIdUsingGet,
queryDatasetTagsUsingGet, queryDatasetTagsUsingGet,
updateDatasetByIdUsingPut, updateDatasetByIdUsingPut,
} from "../dataset.api"; } from "../dataset.api";
import DataQuality from "./components/DataQuality"; import DataQuality from "./components/DataQuality";
import DataLineageFlow from "./components/DataLineageFlow"; import DataLineageFlow from "./components/DataLineageFlow";
import Overview from "./components/Overview"; import Overview from "./components/Overview";
import { Activity, Clock, File, FileType } from "lucide-react"; import { Activity, Clock, File, FileType } from "lucide-react";
import EditDataset from "../Create/EditDataset"; import EditDataset from "../Create/EditDataset";
import ImportConfiguration from "./components/ImportConfiguration"; import ImportConfiguration from "./components/ImportConfiguration";
const tabList = [ const tabList = [
{ {
key: "overview", key: "overview",
label: "概览", label: "概览",
}, },
{ {
key: "lineage", key: "lineage",
label: "数据血缘", label: "数据血缘",
}, },
{ {
key: "quality", key: "quality",
label: "数据质量", label: "数据质量",
}, },
]; ];
export default function DatasetDetail() { export default function DatasetDetail() {
const { id } = useParams(); // 获取动态路由参数 const { id } = useParams(); // 获取动态路由参数
const navigate = useNavigate(); const navigate = useNavigate();
const [activeTab, setActiveTab] = useState("overview"); const [activeTab, setActiveTab] = useState("overview");
const { message } = App.useApp(); const { message } = App.useApp();
const [showEditDialog, setShowEditDialog] = useState(false); const [showEditDialog, setShowEditDialog] = useState(false);
const [dataset, setDataset] = useState<Dataset>({} as Dataset); const [dataset, setDataset] = useState<Dataset>({} as Dataset);
const filesOperation = useFilesOperation(dataset); const filesOperation = useFilesOperation(dataset);
const [showUploadDialog, setShowUploadDialog] = useState(false); const [showUploadDialog, setShowUploadDialog] = useState(false);
const navigateItems = useMemo( const navigateItems = useMemo(
() => [ () => [
{ {
title: <Link to="/data/management"></Link>, title: <Link to="/data/management"></Link>,
}, },
{ {
title: dataset.name || "数据集详情", title: dataset.name || "数据集详情",
}, },
], ],
[dataset] [dataset]
); );
const fetchDataset = async () => { const fetchDataset = async () => {
const { data } = await queryDatasetByIdUsingGet(id as unknown as number); const { data } = await queryDatasetByIdUsingGet(id as unknown as number);
setDataset(mapDataset(data)); setDataset(mapDataset(data));
}; };
useEffect(() => { useEffect(() => {
fetchDataset(); fetchDataset();
filesOperation.fetchFiles(); filesOperation.fetchFiles('', 1, 10); // 从根目录开始,第一页
}, []); }, []);
const handleRefresh = async (showMessage = true) => { const handleRefresh = async (showMessage = true, prefixOverride?: string) => {
fetchDataset(); fetchDataset();
filesOperation.fetchFiles(); // 刷新当前目录,保持在当前页
if (showMessage) message.success({ content: "数据刷新成功" }); const targetPrefix =
}; prefixOverride !== undefined
? prefixOverride
const handleDownload = async () => { : filesOperation.pagination.prefix;
await downloadDatasetUsingGet(dataset.id); filesOperation.fetchFiles(
message.success("文件下载成功"); targetPrefix,
}; filesOperation.pagination.current,
filesOperation.pagination.pageSize
const handleDeleteDataset = async () => { );
await deleteDatasetByIdUsingDelete(dataset.id); if (showMessage) message.success({ content: "数据刷新成功" });
navigate("/data/management"); };
message.success("数据集删除成功");
}; const handleDownload = async () => {
await downloadDatasetUsingGet(dataset.id);
useEffect(() => { message.success("文件下载成功");
const refreshData = () => { };
handleRefresh(false);
}; const handleDeleteDataset = async () => {
window.addEventListener("update:dataset", refreshData); await deleteDatasetByIdUsingDelete(dataset.id);
return () => { navigate("/data/management");
window.removeEventListener("update:dataset", refreshData); message.success("数据集删除成功");
}; };
}, []);
useEffect(() => {
// 基本信息描述项 const refreshData = (e: Event) => {
const statistics = [ const custom = e as CustomEvent<{ prefix?: string }>;
{ const prefixOverride = custom.detail?.prefix;
icon: <File className="text-blue-400 w-4 h-4" />, handleRefresh(false, prefixOverride);
key: "file", };
value: dataset?.fileCount || 0, window.addEventListener("update:dataset", refreshData as EventListener);
}, return () => {
{ window.removeEventListener(
icon: <Activity className="text-blue-400 w-4 h-4" />, "update:dataset",
key: "size", refreshData as EventListener
value: dataset?.size || "0 B", );
}, };
{ }, []);
icon: <FileType className="text-blue-400 w-4 h-4" />,
key: "type", // 基本信息描述项
value: const statistics = [
datasetTypeMap[dataset?.datasetType as keyof typeof datasetTypeMap] {
?.label || icon: <File className="text-blue-400 w-4 h-4" />,
dataset?.type || key: "file",
"未知", value: dataset?.fileCount || 0,
}, },
{ {
icon: <Clock className="text-blue-400 w-4 h-4" />, icon: <Activity className="text-blue-400 w-4 h-4" />,
key: "time", key: "size",
value: dataset?.updatedAt, value: dataset?.size || "0 B",
}, },
]; {
icon: <FileType className="text-blue-400 w-4 h-4" />,
// 数据集操作列表 key: "type",
const operations = [ value:
{ datasetTypeMap[dataset?.datasetType as keyof typeof datasetTypeMap]
key: "edit", ?.label ||
label: "编辑", dataset?.type ||
icon: <EditOutlined />, "未知",
onClick: () => { },
setShowEditDialog(true); {
}, icon: <Clock className="text-blue-400 w-4 h-4" />,
}, key: "time",
value: dataset?.updatedAt,
{ },
key: "upload", ];
label: "导入数据",
icon: <UploadOutlined />, // 数据集操作列表
onClick: () => setShowUploadDialog(true), const operations = [
}, {
{ key: "edit",
key: "export", label: "编辑",
label: "导出", icon: <EditOutlined />,
icon: <DownloadOutlined />, onClick: () => {
// isDropdown: true, setShowEditDialog(true);
// items: [ },
// { key: "alpaca", label: "Alpaca 格式", icon: <FileTextOutlined /> }, },
// { key: "jsonl", label: "JSONL 格式", icon: <DatabaseOutlined /> },
// { key: "csv", label: "CSV 格式", icon: <FileTextOutlined /> }, {
// { key: "coco", label: "COCO 格式", icon: <FileImageOutlined /> }, key: "upload",
// ], label: "导入数据",
onClick: () => handleDownload(), icon: <UploadOutlined />,
}, onClick: () => setShowUploadDialog(true),
{ },
key: "refresh", {
label: "刷新", key: "export",
icon: <ReloadOutlined />, label: "导出",
onClick: handleRefresh, icon: <DownloadOutlined />,
}, // isDropdown: true,
{ // items: [
key: "delete", // { key: "alpaca", label: "Alpaca 格式", icon: <FileTextOutlined /> },
label: "删除", // { key: "jsonl", label: "JSONL 格式", icon: <DatabaseOutlined /> },
danger: true, // { key: "csv", label: "CSV 格式", icon: <FileTextOutlined /> },
confirm: { // { key: "coco", label: "COCO 格式", icon: <FileImageOutlined /> },
title: "确认删除该数据集?", // ],
description: "删除后该数据集将无法恢复,请谨慎操作。", onClick: () => handleDownload(),
okText: "删除", },
cancelText: "取消", {
okType: "danger", key: "refresh",
}, label: "刷新",
icon: <DeleteOutlined />, icon: <ReloadOutlined />,
onClick: handleDeleteDataset, onClick: handleRefresh,
}, },
]; {
key: "delete",
return ( label: "删除",
<div className="h-full flex flex-col gap-4"> danger: true,
<Breadcrumb items={navigateItems} /> confirm: {
{/* Header */} title: "确认删除该数据集?",
<DetailHeader description: "删除后该数据集将无法恢复,请谨慎操作。",
data={dataset} okText: "删除",
statistics={statistics} cancelText: "取消",
operations={operations} okType: "danger",
tagConfig={{ },
showAdd: true, icon: <DeleteOutlined />,
tags: dataset.tags || [], onClick: handleDeleteDataset,
onFetchTags: async () => { },
const res = await queryDatasetTagsUsingGet({ ];
page: 0,
pageSize: 1000, return (
}); <div className="h-full flex flex-col gap-4">
return res.data || []; <Breadcrumb items={navigateItems} />
}, {/* Header */}
onCreateAndTag: async (tagName) => { <DetailHeader
const res = await createDatasetTagUsingPost({ name: tagName }); data={dataset}
if (res.data) { statistics={statistics}
await updateDatasetByIdUsingPut(dataset.id, { operations={operations}
tags: [...dataset.tags.map((tag) => tag.name), res.data.name], tagConfig={{
}); showAdd: true,
handleRefresh(); tags: dataset.tags || [],
} onFetchTags: async () => {
}, const res = await queryDatasetTagsUsingGet({
onAddTag: async (tag) => { page: 0,
const res = await updateDatasetByIdUsingPut(dataset.id, { pageSize: 1000,
tags: [...dataset.tags.map((tag) => tag.name), tag], });
}); return res.data || [];
if (res.data) { },
handleRefresh(); onCreateAndTag: async (tagName) => {
} const res = await createDatasetTagUsingPost({ name: tagName });
}, if (res.data) {
}} await updateDatasetByIdUsingPut(dataset.id, {
/> tags: [...dataset.tags.map((tag) => tag.name), res.data.name],
<div className="flex-overflow-auto p-6 pt-2 bg-white rounded-md shadow"> });
<Tabs activeKey={activeTab} items={tabList} onChange={setActiveTab} /> handleRefresh();
<div className="h-full overflow-auto"> }
{activeTab === "overview" && ( },
<Overview dataset={dataset} filesOperation={filesOperation} fetchDataset={fetchDataset}/> onAddTag: async (tag) => {
)} const res = await updateDatasetByIdUsingPut(dataset.id, {
{activeTab === "lineage" && <DataLineageFlow dataset={dataset} />} tags: [...dataset.tags.map((tag) => tag.name), tag],
{activeTab === "quality" && <DataQuality />} });
</div> if (res.data) {
</div> handleRefresh();
<ImportConfiguration }
data={dataset} },
open={showUploadDialog} }}
onClose={() => setShowUploadDialog(false)} />
updateEvent="update:dataset" <div className="flex-overflow-auto p-6 pt-2 bg-white rounded-md shadow">
/> <Tabs activeKey={activeTab} items={tabList} onChange={setActiveTab} />
<EditDataset <div className="h-full overflow-auto">
data={dataset} {activeTab === "overview" && (
open={showEditDialog} <Overview dataset={dataset} filesOperation={filesOperation} fetchDataset={fetchDataset}/>
onClose={() => setShowEditDialog(false)} )}
onRefresh={handleRefresh} {activeTab === "lineage" && <DataLineageFlow dataset={dataset} />}
/> {activeTab === "quality" && <DataQuality />}
</div> </div>
); </div>
} <ImportConfiguration
data={dataset}
open={showUploadDialog}
onClose={() => setShowUploadDialog(false)}
prefix={filesOperation.pagination.prefix}
updateEvent="update:dataset"
/>
<EditDataset
data={dataset}
open={showEditDialog}
onClose={() => setShowEditDialog(false)}
onRefresh={handleRefresh}
/>
</div>
);
}

View File

@@ -1,277 +1,294 @@
import { Select, Input, Form, Radio, Modal, Button, UploadFile, Switch } from "antd"; import { Select, Input, Form, Radio, Modal, Button, UploadFile, Switch } from "antd";
import { InboxOutlined } from "@ant-design/icons"; import { InboxOutlined } from "@ant-design/icons";
import { dataSourceOptions } from "../../dataset.const"; import { dataSourceOptions } from "../../dataset.const";
import { Dataset, DataSource } from "../../dataset.model"; import { Dataset, DataSource } from "../../dataset.model";
import { useEffect, useMemo, useState } from "react"; import { useEffect, useMemo, useState } from "react";
import { queryTasksUsingGet } from "@/pages/DataCollection/collection.apis"; import { queryTasksUsingGet } from "@/pages/DataCollection/collection.apis";
import { updateDatasetByIdUsingPut } from "../../dataset.api"; import { updateDatasetByIdUsingPut } from "../../dataset.api";
import { sliceFile } from "@/utils/file.util"; import { sliceFile } from "@/utils/file.util";
import Dragger from "antd/es/upload/Dragger"; import Dragger from "antd/es/upload/Dragger";
export default function ImportConfiguration({ export default function ImportConfiguration({
data, data,
open, open,
onClose, onClose,
updateEvent = "update:dataset", updateEvent = "update:dataset",
}: { prefix,
data: Dataset | null; }: {
open: boolean; data: Dataset | null;
onClose: () => void; open: boolean;
updateEvent?: string; onClose: () => void;
}) { updateEvent?: string;
const [form] = Form.useForm(); prefix?: string;
const [collectionOptions, setCollectionOptions] = useState([]); }) {
const [importConfig, setImportConfig] = useState<any>({ const [form] = Form.useForm();
source: DataSource.UPLOAD, const [collectionOptions, setCollectionOptions] = useState([]);
}); const [importConfig, setImportConfig] = useState<any>({
source: DataSource.UPLOAD,
const [fileList, setFileList] = useState<UploadFile[]>([]); });
const fileSliceList = useMemo(() => { const [currentPrefix, setCurrentPrefix] = useState<string>("");
const sliceList = fileList.map((file) => {
const slices = sliceFile(file); const [fileList, setFileList] = useState<UploadFile[]>([]);
return { originFile: file, slices, name: file.name, size: file.size }; const fileSliceList = useMemo(() => {
}); const sliceList = fileList.map((file) => {
return sliceList; const slices = sliceFile(file);
}, [fileList]); return { originFile: file, slices, name: file.name, size: file.size };
});
// 本地上传文件相关逻辑 return sliceList;
}, [fileList]);
const resetFiles = () => {
setFileList([]); // 本地上传文件相关逻辑
};
const resetFiles = () => {
const handleUpload = async (dataset: Dataset) => { setFileList([]);
const formData = new FormData(); };
fileList.forEach((file) => {
formData.append("file", file); const handleUpload = async (dataset: Dataset) => {
}); const formData = new FormData();
window.dispatchEvent( fileList.forEach((file) => {
new CustomEvent("upload:dataset", { formData.append("file", file);
detail: { });
dataset, console.log('[ImportConfiguration] Uploading with currentPrefix:', currentPrefix);
files: fileSliceList, window.dispatchEvent(
updateEvent, new CustomEvent("upload:dataset", {
hasArchive: importConfig.hasArchive, detail: {
}, dataset,
}) files: fileSliceList,
); updateEvent,
resetFiles(); hasArchive: importConfig.hasArchive,
}; prefix: currentPrefix,
},
const handleBeforeUpload = (_, files: UploadFile[]) => { })
setFileList([...fileList, ...files]); );
return false; resetFiles();
}; };
const handleRemoveFile = (file: UploadFile) => { const handleBeforeUpload = (_, files: UploadFile[]) => {
setFileList((prev) => prev.filter((f) => f.uid !== file.uid)); setFileList([...fileList, ...files]);
}; return false;
};
const fetchCollectionTasks = async () => {
if (importConfig.source !== DataSource.COLLECTION) return; const handleRemoveFile = (file: UploadFile) => {
try { setFileList((prev) => prev.filter((f) => f.uid !== file.uid));
const res = await queryTasksUsingGet({ page: 0, size: 100 }); };
const options = res.data.content.map((task: any) => ({
label: task.name, const fetchCollectionTasks = async () => {
value: task.id, if (importConfig.source !== DataSource.COLLECTION) return;
})); try {
setCollectionOptions(options); const res = await queryTasksUsingGet({ page: 0, size: 100 });
} catch (error) { const options = res.data.content.map((task: any) => ({
console.error("Error fetching collection tasks:", error); label: task.name,
} value: task.id,
}; }));
setCollectionOptions(options);
const resetState = () => { } catch (error) {
form.resetFields(); console.error("Error fetching collection tasks:", error);
setFileList([]); }
form.setFieldsValue({ files: null }); };
setImportConfig({ source: importConfig.source ? importConfig.source : DataSource.UPLOAD });
}; const resetState = () => {
console.log('[ImportConfiguration] resetState called, preserving currentPrefix:', currentPrefix);
const handleImportData = async () => { form.resetFields();
if (!data) return; setFileList([]);
if (importConfig.source === DataSource.UPLOAD) { form.setFieldsValue({ files: null });
await handleUpload(data); setImportConfig({ source: importConfig.source ? importConfig.source : DataSource.UPLOAD });
} else if (importConfig.source === DataSource.COLLECTION) { console.log('[ImportConfiguration] resetState done, currentPrefix still:', currentPrefix);
await updateDatasetByIdUsingPut(data.id, { };
...importConfig,
}); const handleImportData = async () => {
} if (!data) return;
onClose(); console.log('[ImportConfiguration] handleImportData called, currentPrefix:', currentPrefix);
}; if (importConfig.source === DataSource.UPLOAD) {
await handleUpload(data);
useEffect(() => { } else if (importConfig.source === DataSource.COLLECTION) {
if (open) { await updateDatasetByIdUsingPut(data.id, {
resetState(); ...importConfig,
fetchCollectionTasks(); });
} }
}, [open, importConfig.source]); onClose();
};
return (
<Modal useEffect(() => {
title="导入数据" if (open) {
open={open} setCurrentPrefix(prefix || "");
width={600} console.log('[ImportConfiguration] Modal opened with prefix:', prefix);
onCancel={() => { resetState();
onClose(); fetchCollectionTasks();
resetState(); }
}} }, [open]);
maskClosable={false}
footer={ // Separate effect for fetching collection tasks when source changes
<> useEffect(() => {
<Button onClick={onClose}></Button> if (open && importConfig.source === DataSource.COLLECTION) {
<Button fetchCollectionTasks();
type="primary" }
disabled={!fileList?.length && !importConfig.dataSource} }, [importConfig.source]);
onClick={handleImportData}
> return (
<Modal
</Button> title="导入数据"
</> open={open}
} width={600}
> onCancel={() => {
<Form onClose();
form={form} resetState();
layout="vertical" }}
initialValues={importConfig || {}} maskClosable={false}
onValuesChange={(_, allValues) => setImportConfig(allValues)} footer={
> <>
<Form.Item <Button onClick={onClose}></Button>
label="数据源" <Button
name="source" type="primary"
rules={[{ required: true, message: "请选择数据源" }]} disabled={!fileList?.length && !importConfig.dataSource}
> onClick={handleImportData}
<Radio.Group >
buttonStyle="solid"
options={dataSourceOptions} </Button>
optionType="button" </>
/> }
</Form.Item> >
{importConfig?.source === DataSource.COLLECTION && ( <Form
<Form.Item name="dataSource" label="归集任务" required> form={form}
<Select placeholder="请选择归集任务" options={collectionOptions} /> layout="vertical"
</Form.Item> initialValues={importConfig || {}}
)} onValuesChange={(_, allValues) => setImportConfig(allValues)}
>
{/* obs import */} <Form.Item
{importConfig?.source === DataSource.OBS && ( label="数据源"
<div className="grid grid-cols-2 gap-3 p-4 bg-blue-50 rounded-lg"> name="source"
<Form.Item rules={[{ required: true, message: "请选择数据源" }]}
name="endpoint" >
rules={[{ required: true }]} <Radio.Group
label="Endpoint" buttonStyle="solid"
> options={dataSourceOptions}
<Input optionType="button"
className="h-8 text-xs" />
placeholder="obs.cn-north-4.myhuaweicloud.com" </Form.Item>
/> {importConfig?.source === DataSource.COLLECTION && (
</Form.Item> <Form.Item name="dataSource" label="归集任务" required>
<Form.Item <Select placeholder="请选择归集任务" options={collectionOptions} />
name="bucket" </Form.Item>
rules={[{ required: true }]} )}
label="Bucket"
> {/* obs import */}
<Input className="h-8 text-xs" placeholder="my-bucket" /> {importConfig?.source === DataSource.OBS && (
</Form.Item> <div className="grid grid-cols-2 gap-3 p-4 bg-blue-50 rounded-lg">
<Form.Item <Form.Item
name="accessKey" name="endpoint"
rules={[{ required: true }]} rules={[{ required: true }]}
label="Access Key" label="Endpoint"
> >
<Input className="h-8 text-xs" placeholder="Access Key" /> <Input
</Form.Item> className="h-8 text-xs"
<Form.Item placeholder="obs.cn-north-4.myhuaweicloud.com"
name="secretKey" />
rules={[{ required: true }]} </Form.Item>
label="Secret Key" <Form.Item
> name="bucket"
<Input rules={[{ required: true }]}
type="password" label="Bucket"
className="h-8 text-xs" >
placeholder="Secret Key" <Input className="h-8 text-xs" placeholder="my-bucket" />
/> </Form.Item>
</Form.Item> <Form.Item
</div> name="accessKey"
)} rules={[{ required: true }]}
label="Access Key"
{/* Local Upload Component */} >
{importConfig?.source === DataSource.UPLOAD && ( <Input className="h-8 text-xs" placeholder="Access Key" />
<> </Form.Item>
<Form.Item <Form.Item
label="自动解压上传的压缩包" name="secretKey"
name="hasArchive" rules={[{ required: true }]}
valuePropName="checked" label="Secret Key"
initialValue={true} >
> <Input
<Switch /> type="password"
</Form.Item> className="h-8 text-xs"
<Form.Item placeholder="Secret Key"
label="上传文件" />
name="files" </Form.Item>
rules={[ </div>
{ )}
required: true,
message: "请上传文件", {/* Local Upload Component */}
}, {importConfig?.source === DataSource.UPLOAD && (
]} <>
> <Form.Item
<Dragger label="自动解压上传的压缩包"
className="w-full" name="hasArchive"
onRemove={handleRemoveFile} valuePropName="checked"
beforeUpload={handleBeforeUpload} initialValue={true}
multiple >
> <Switch />
<p className="ant-upload-drag-icon"> </Form.Item>
<InboxOutlined /> <Form.Item
</p> label="上传文件"
<p className="ant-upload-text"></p> name="files"
<p className="ant-upload-hint"></p> rules={[
</Dragger> {
</Form.Item> required: true,
</> message: "请上传文件",
)} },
]}
{/* Target Configuration */} >
{importConfig?.target && importConfig?.target !== DataSource.UPLOAD && ( <Dragger
<div className="space-y-3 p-4 bg-blue-50 rounded-lg"> className="w-full"
{importConfig?.target === DataSource.DATABASE && ( onRemove={handleRemoveFile}
<div className="grid grid-cols-2 gap-3"> beforeUpload={handleBeforeUpload}
<Form.Item multiple
name="databaseType" >
rules={[{ required: true }]} <p className="ant-upload-drag-icon">
label="数据库类型" <InboxOutlined />
> </p>
<Select <p className="ant-upload-text"></p>
className="w-full" <p className="ant-upload-hint"></p>
options={[ </Dragger>
{ label: "MySQL", value: "mysql" }, </Form.Item>
{ label: "PostgreSQL", value: "postgresql" }, </>
{ label: "MongoDB", value: "mongodb" }, )}
]}
></Select> {/* Target Configuration */}
</Form.Item> {importConfig?.target && importConfig?.target !== DataSource.UPLOAD && (
<Form.Item <div className="space-y-3 p-4 bg-blue-50 rounded-lg">
name="tableName" {importConfig?.target === DataSource.DATABASE && (
rules={[{ required: true }]} <div className="grid grid-cols-2 gap-3">
label="表名" <Form.Item
> name="databaseType"
<Input className="h-8 text-xs" placeholder="dataset_table" /> rules={[{ required: true }]}
</Form.Item> label="数据库类型"
<Form.Item >
name="connectionString" <Select
rules={[{ required: true }]} className="w-full"
label="连接字符串" options={[
> { label: "MySQL", value: "mysql" },
<Input { label: "PostgreSQL", value: "postgresql" },
className="h-8 text-xs col-span-2" { label: "MongoDB", value: "mongodb" },
placeholder="数据库连接字符串" ]}
/> ></Select>
</Form.Item> </Form.Item>
</div> <Form.Item
)} name="tableName"
</div> rules={[{ required: true }]}
)} label="表名"
</Form> >
</Modal> <Input className="h-8 text-xs" placeholder="dataset_table" />
); </Form.Item>
} <Form.Item
name="connectionString"
rules={[{ required: true }]}
label="连接字符串"
>
<Input
className="h-8 text-xs col-span-2"
placeholder="数据库连接字符串"
/>
</Form.Item>
</div>
)}
</div>
)}
</Form>
</Modal>
);
}

View File

@@ -1,316 +1,399 @@
import { Button, Descriptions, DescriptionsProps, Modal, Table } from "antd"; import { App, Button, Descriptions, DescriptionsProps, Modal, Table, Input } from "antd";
import { formatBytes, formatDateTime } from "@/utils/unit"; import { formatBytes, formatDateTime } from "@/utils/unit";
import { Download, Trash2, Folder, File } from "lucide-react"; import { Download, Trash2, Folder, File } from "lucide-react";
import { datasetTypeMap } from "../../dataset.const"; import { datasetTypeMap } from "../../dataset.const";
export default function Overview({ dataset, filesOperation, fetchDataset }) { export default function Overview({ dataset, filesOperation, fetchDataset }) {
const { const { modal, message } = App.useApp();
fileList, const {
pagination, fileList,
selectedFiles, pagination,
setSelectedFiles, selectedFiles,
previewVisible, setSelectedFiles,
previewFileName, previewVisible,
previewContent, previewFileName,
setPreviewVisible, previewContent,
handleDeleteFile, setPreviewVisible,
handleDownloadFile, handleDeleteFile,
handleBatchDeleteFiles, handleDownloadFile,
handleBatchExport, handleBatchDeleteFiles,
} = filesOperation; handleBatchExport,
handleCreateDirectory,
// 文件列表多选配置 handleDownloadDirectory,
const rowSelection = { handleDeleteDirectory,
onChange: (selectedRowKeys: React.Key[], selectedRows: any[]) => { } = filesOperation;
setSelectedFiles(selectedRowKeys as number[]);
console.log( // 文件列表多选配置
`selectedRowKeys: ${selectedRowKeys}`, const rowSelection = {
"selectedRows: ", onChange: (selectedRowKeys: React.Key[], selectedRows: any[]) => {
selectedRows setSelectedFiles(selectedRowKeys as number[]);
); console.log(
}, `selectedRowKeys: ${selectedRowKeys}`,
}; "selectedRows: ",
// 基本信息 selectedRows
const items: DescriptionsProps["items"] = [ );
{ },
key: "id", };
label: "ID", // 基本信息
children: dataset.id, const items: DescriptionsProps["items"] = [
}, {
{ key: "id",
key: "name", label: "ID",
label: "名称", children: dataset.id,
children: dataset.name, },
}, {
{ key: "name",
key: "fileCount", label: "名称",
label: "文件数", children: dataset.name,
children: dataset.fileCount || 0, },
}, {
{ key: "fileCount",
key: "size", label: "文件数",
label: "数据大小", children: dataset.fileCount || 0,
children: dataset.size || "0 B", },
}, {
key: "size",
{ label: "数据大小",
key: "datasetType", children: dataset.size || "0 B",
label: "类型", },
children: datasetTypeMap[dataset?.datasetType]?.label || "未知",
}, {
{ key: "datasetType",
key: "status", label: "类型",
label: "状态", children: datasetTypeMap[dataset?.datasetType]?.label || "未知",
children: dataset?.status?.label || "未知", },
}, {
{ key: "status",
key: "createdBy", label: "状态",
label: "创建者", children: dataset?.status?.label || "未知",
children: dataset.createdBy || "未知", },
}, {
{ key: "createdBy",
key: "targetLocation", label: "创建者",
label: "存储路径", children: dataset.createdBy || "未知",
children: dataset.targetLocation || "未知", },
}, {
{ key: "targetLocation",
key: "pvcName", label: "存储路径",
label: "存储名称", children: dataset.targetLocation || "未知",
children: dataset.pvcName || "未知", },
}, {
{ key: "pvcName",
key: "createdAt", label: "存储名称",
label: "创建时间", children: dataset.pvcName || "未知",
children: dataset.createdAt, },
}, {
{ key: "createdAt",
key: "updatedAt", label: "创建时间",
label: "更新时间", children: dataset.createdAt,
children: dataset.updatedAt, },
}, {
{ key: "updatedAt",
key: "description", label: "更新时间",
label: "描述", children: dataset.updatedAt,
children: dataset.description || "无", },
}, {
]; key: "description",
label: "描述",
// 文件列表列定义 children: dataset.description || "无",
const columns = [ },
{ ];
title: "文件名",
dataIndex: "fileName", // 文件列表列定义
key: "fileName", const columns = [
fixed: "left", {
render: (text: string, record: any) => { title: "文件名",
const isDirectory = record.id.startsWith('directory-'); dataIndex: "fileName",
const iconSize = 16; key: "fileName",
fixed: "left",
const content = ( render: (text: string, record: any) => {
<div className="flex items-center"> const isDirectory = record.id.startsWith('directory-');
{isDirectory ? ( const iconSize = 16;
<Folder className="mr-2 text-blue-500" size={iconSize} />
) : ( const content = (
<File className="mr-2 text-black" size={iconSize} /> <div className="flex items-center">
)} {isDirectory ? (
<span className="truncate text-black">{text}</span> <Folder className="mr-2 text-blue-500" size={iconSize} />
</div> ) : (
); <File className="mr-2 text-black" size={iconSize} />
)}
if (isDirectory) { <span className="truncate text-black">{text}</span>
return ( </div>
<Button );
type="link"
onClick={(e) => { if (isDirectory) {
const currentPath = filesOperation.pagination.prefix || ''; return (
const newPath = `${currentPath}${record.fileName}`; <Button
filesOperation.fetchFiles(newPath); type="link"
}} onClick={(e) => {
> const currentPath = filesOperation.pagination.prefix || '';
{content} // 文件夹路径必须以斜杠结尾
</Button> const newPath = `${currentPath}${record.fileName}/`;
); filesOperation.fetchFiles(newPath, 1, filesOperation.pagination.pageSize);
} }}
>
return ( {content}
<Button </Button>
type="link" );
onClick={(e) => {}} }
>
{content} return (
</Button> <Button
); type="link"
}, onClick={(e) => {}}
}, >
{ {content}
title: "大小", </Button>
dataIndex: "fileSize", );
key: "fileSize", },
width: 150, },
render: (text: number, record: any) => { {
const isDirectory = record.id.startsWith('directory-'); title: "大小",
if (isDirectory) { dataIndex: "fileSize",
return "-"; key: "fileSize",
} width: 150,
return formatBytes(text) render: (text: number, record: any) => {
}, const isDirectory = record.id.startsWith('directory-');
}, if (isDirectory) {
{ return formatBytes(record.fileSize || 0);
title: "上传时间", }
dataIndex: "uploadTime", return formatBytes(text)
key: "uploadTime", },
width: 200, },
render: (text) => formatDateTime(text), {
}, title: "包含文件数",
{ dataIndex: "fileCount",
title: "操作", key: "fileCount",
key: "action", width: 120,
width: 180, render: (text: number, record: any) => {
fixed: "right", const isDirectory = record.id.startsWith('directory-');
render: (_, record) => { if (!isDirectory) {
const isDirectory = record.id.startsWith('directory-'); return "-";
if (isDirectory) { }
return <div className="flex"/>; return record.fileCount ?? 0;
} },
return ( },
<div className="flex"> {
<Button title: "上传时间",
size="small" dataIndex: "uploadTime",
type="link" key: "uploadTime",
onClick={() => handleDownloadFile(record)} width: 200,
> render: (text) => formatDateTime(text),
},
</Button> {
<Button title: "操作",
size="small" key: "action",
type="link" width: 180,
onClick={async () => { fixed: "right",
await handleDeleteFile(record); render: (_, record) => {
fetchDataset() const isDirectory = record.id.startsWith('directory-');
}
} if (isDirectory) {
> const currentPath = filesOperation.pagination.prefix || '';
const fullPath = `${currentPath}${record.fileName}/`;
</Button>
</div> return (
)}, <div className="flex">
}, <Button
]; size="small"
type="link"
return ( onClick={() => handleDownloadDirectory(fullPath, record.fileName)}
<> >
<div className=" flex flex-col gap-4">
{/* 基本信息 */} </Button>
<Descriptions <Button
title="基本信息" size="small"
layout="vertical" type="link"
size="small" onClick={() => {
items={items} modal.confirm({
column={5} title: '确认删除文件夹?',
/> content: `删除文件夹 "${record.fileName}" 将同时删除其中的所有文件和子文件夹,此操作不可恢复。`,
okText: '删除',
{/* 文件列表 */} okType: 'danger',
<h2 className="text-base font-semibold mt-8"></h2> cancelText: '取消',
{selectedFiles.length > 0 && ( onOk: async () => {
<div className="flex items-center gap-2 p-3 bg-blue-50 rounded-lg border border-blue-200"> await handleDeleteDirectory(fullPath, record.fileName);
<span className="text-sm text-blue-700 font-medium"> fetchDataset();
{selectedFiles.length} },
</span> });
<Button }}
onClick={handleBatchExport} >
className="ml-auto bg-transparent"
> </Button>
<Download className="w-4 h-4 mr-2" /> </div>
);
</Button> }
<Button
onClick={handleBatchDeleteFiles} return (
className="text-red-600 hover:text-red-700 hover:bg-red-50 bg-transparent" <div className="flex">
> <Button
<Trash2 className="w-4 h-4 mr-2" /> size="small"
type="link"
</Button> onClick={() => handleDownloadFile(record)}
</div> >
)}
<div className="overflow-x-auto"> </Button>
<div className="mb-2"> <Button
{(filesOperation.pagination.prefix || '') !== '' && ( size="small"
<Button type="link"
type="link" onClick={async () => {
onClick={() => { await handleDeleteFile(record);
// 获取上一级目录 fetchDataset()
const currentPath = filesOperation.pagination.prefix || ''; }
const pathParts = currentPath.split('/').filter(Boolean); }
pathParts.pop(); // 移除最后一个目录 >
const parentPath = pathParts.length > 0 ? `${pathParts.join('/')}/` : '';
filesOperation.fetchFiles(parentPath); </Button>
}} </div>
className="p-0" )},
> },
<span className="flex items-center text-blue-500"> ];
<svg
className="w-4 h-4 mr-1" return (
fill="none" <>
stroke="currentColor" <div className=" flex flex-col gap-4">
viewBox="0 0 24 24" {/* 基本信息 */}
xmlns="http://www.w3.org/2000/svg" <Descriptions
> title="基本信息"
<path layout="vertical"
strokeLinecap="round" size="small"
strokeLinejoin="round" items={items}
strokeWidth={2} column={5}
d="M10 19l-7-7m0 0l7-7m-7 7h18" />
/>
</svg> {/* 文件列表 */}
<div className="flex items-center justify-between mt-8 mb-2">
</span> <h2 className="text-base font-semibold"></h2>
</Button> <Button
)} type="primary"
{filesOperation.pagination.prefix && ( size="small"
<span className="ml-2 text-gray-600">: {filesOperation.pagination.prefix}</span> onClick={() => {
)} let dirName = "";
</div> modal.confirm({
<Table title: "新建文件夹",
size="middle" content: (
rowKey="id" <Input
columns={columns} autoFocus
dataSource={fileList} placeholder="请输入文件夹名称"
// rowSelection={rowSelection} onChange={(e) => {
scroll={{ x: "max-content", y: 600 }} dirName = e.target.value?.trim();
pagination={{ }}
...pagination, />
showTotal: (total) => `${total}`, ),
onChange: (page, pageSize) => { okText: "确定",
filesOperation.setPagination(prev => ({ cancelText: "取消",
...prev, onOk: async () => {
current: page, if (!dirName) {
pageSize: pageSize message.warning("请输入文件夹名称");
})); return Promise.reject();
filesOperation.fetchFiles(pagination.prefix, page, pageSize); }
} await handleCreateDirectory(dirName);
}} },
/> });
</div> }}
</div> >
{/* 文件预览弹窗 */}
<Modal </Button>
title={`文件预览:${previewFileName}`} </div>
open={previewVisible} {selectedFiles.length > 0 && (
onCancel={() => setPreviewVisible(false)} <div className="flex items-center gap-2 p-3 bg-blue-50 rounded-lg border border-blue-200">
footer={null} <span className="text-sm text-blue-700 font-medium">
width={700} {selectedFiles.length}
> </span>
<pre <Button
style={{ onClick={handleBatchExport}
whiteSpace: "pre-wrap", className="ml-auto bg-transparent"
wordBreak: "break-all", >
fontSize: 14, <Download className="w-4 h-4 mr-2" />
color: "#222",
}} </Button>
> <Button
{previewContent} onClick={handleBatchDeleteFiles}
</pre> className="text-red-600 hover:text-red-700 hover:bg-red-50 bg-transparent"
</Modal> >
</> <Trash2 className="w-4 h-4 mr-2" />
);
} </Button>
</div>
)}
<div className="overflow-x-auto">
<div className="mb-2">
{(filesOperation.pagination.prefix || '') !== '' && (
<Button
type="link"
onClick={() => {
// 获取上一级目录
const currentPath = filesOperation.pagination.prefix || '';
// 移除末尾的斜杠,然后按斜杠分割
const trimmedPath = currentPath.replace(/\/$/, '');
const pathParts = trimmedPath.split('/');
// 移除最后一个目录名
pathParts.pop();
// 重新组合路径,如果还有内容则加斜杠,否则为空
const parentPath = pathParts.length > 0 ? `${pathParts.join('/')}/` : '';
filesOperation.fetchFiles(parentPath, 1, filesOperation.pagination.pageSize);
}}
className="p-0"
>
<span className="flex items-center text-blue-500">
<svg
className="w-4 h-4 mr-1"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
xmlns="http://www.w3.org/2000/svg"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M10 19l-7-7m0 0l7-7m-7 7h18"
/>
</svg>
</span>
</Button>
)}
{filesOperation.pagination.prefix && (
<span className="ml-2 text-gray-600">: {filesOperation.pagination.prefix}</span>
)}
</div>
<Table
size="middle"
rowKey="id"
columns={columns}
dataSource={fileList}
// rowSelection={rowSelection}
scroll={{ x: "max-content", y: 600 }}
pagination={{
...pagination,
showTotal: (total) => `${total}`,
onChange: (page, pageSize) => {
filesOperation.fetchFiles(filesOperation.pagination.prefix, page, pageSize);
}
}}
/>
</div>
</div>
{/* 文件预览弹窗 */}
<Modal
title={`文件预览:${previewFileName}`}
open={previewVisible}
onCancel={() => setPreviewVisible(false)}
footer={null}
width={700}
>
<pre
style={{
whiteSpace: "pre-wrap",
wordBreak: "break-all",
fontSize: 14,
color: "#222",
}}
>
{previewContent}
</pre>
</Modal>
</>
);
}

View File

@@ -1,149 +1,186 @@
import type { import type {
Dataset, Dataset,
DatasetFile, DatasetFile,
} from "@/pages/DataManagement/dataset.model"; } from "@/pages/DataManagement/dataset.model";
import { App } from "antd"; import { App } from "antd";
import { useState } from "react"; import { useState } from "react";
import { import {
deleteDatasetFileUsingDelete, deleteDatasetFileUsingDelete,
downloadFileByIdUsingGet, downloadFileByIdUsingGet,
exportDatasetUsingPost, exportDatasetUsingPost,
queryDatasetFilesUsingGet, queryDatasetFilesUsingGet,
} from "../dataset.api"; createDatasetDirectoryUsingPost,
import { useParams } from "react-router"; downloadDirectoryUsingGet,
deleteDirectoryUsingDelete,
export function useFilesOperation(dataset: Dataset) { } from "../dataset.api";
const { message } = App.useApp(); import { useParams } from "react-router";
const { id } = useParams(); // 获取动态路由参数
export function useFilesOperation(dataset: Dataset) {
// 文件相关状态 const { message } = App.useApp();
const [fileList, setFileList] = useState<DatasetFile[]>([]); const { id } = useParams(); // 获取动态路由参数
const [selectedFiles, setSelectedFiles] = useState<number[]>([]);
const [pagination, setPagination] = useState<{ // 文件相关状态
current: number; const [fileList, setFileList] = useState<DatasetFile[]>([]);
pageSize: number; const [selectedFiles, setSelectedFiles] = useState<number[]>([]);
total: number; const [pagination, setPagination] = useState<{
prefix?: string; current: number;
}>({ current: 1, pageSize: 10, total: 0, prefix: '' }); pageSize: number;
total: number;
// 文件预览相关状态 prefix?: string;
const [previewVisible, setPreviewVisible] = useState(false); }>({ current: 1, pageSize: 10, total: 0, prefix: '' });
const [previewContent, setPreviewContent] = useState("");
const [previewFileName, setPreviewFileName] = useState(""); // 文件预览相关状态
const [previewVisible, setPreviewVisible] = useState(false);
const fetchFiles = async (prefix: string = '', current, pageSize) => { const [previewContent, setPreviewContent] = useState("");
const params: any = { const [previewFileName, setPreviewFileName] = useState("");
page: current ? current : pagination.current,
size: pageSize ? pageSize : pagination.pageSize, const fetchFiles = async (prefix?: string, current?, pageSize?) => {
isWithDirectory: true, // 如果明确传了 prefix(包括空字符串),使用传入的值;否则使用当前 pagination.prefix
}; const targetPrefix = prefix !== undefined ? prefix : (pagination.prefix || '');
if (prefix !== undefined) { const params: any = {
params.prefix = prefix; page: current !== undefined ? current : pagination.current,
} else if (pagination.prefix) { size: pageSize !== undefined ? pageSize : pagination.pageSize,
params.prefix = pagination.prefix; isWithDirectory: true,
} prefix: targetPrefix,
};
const { data } = await queryDatasetFilesUsingGet(id!, params);
setFileList(data.content || []); const { data } = await queryDatasetFilesUsingGet(id!, params);
setFileList(data.content || []);
// Update pagination with current prefix
setPagination(prev => ({ // Update pagination with current prefix
...prev, setPagination(prev => ({
prefix: prefix !== undefined ? prefix : prev.prefix, ...prev,
total: data.totalElements || 0, current: params.page,
})); pageSize: params.size,
}; prefix: targetPrefix,
total: data.totalElements || 0,
const handleBatchDeleteFiles = () => { }));
if (selectedFiles.length === 0) { };
message.warning({ content: "请先选择要删除的文件" });
return; const handleBatchDeleteFiles = () => {
} if (selectedFiles.length === 0) {
// 执行批量删除逻辑 message.warning({ content: "请先选择要删除的文件" });
selectedFiles.forEach(async (fileId) => { return;
await fetch(`/api/datasets/${dataset.id}/files/${fileId}`, { }
method: "DELETE", // 执行批量删除逻辑
}); selectedFiles.forEach(async (fileId) => {
}); await fetch(`/api/datasets/${dataset.id}/files/${fileId}`, {
fetchFiles(); // 刷新文件列表 method: "DELETE",
setSelectedFiles([]); // 清空选中状态 });
message.success({ });
content: `已删除 ${selectedFiles.length} 个文件`, fetchFiles(); // 刷新文件列表
}); setSelectedFiles([]); // 清空选中状态
}; message.success({
content: `已删除 ${selectedFiles.length} 个文件`,
const handleDownloadFile = async (file: DatasetFile) => { });
// 实际导出逻辑 };
await downloadFileByIdUsingGet(dataset.id, file.id, file.fileName);
// 假设导出成功 const handleDownloadFile = async (file: DatasetFile) => {
message.success({ // 实际导出逻辑
content: `已导出 1 个文件`, await downloadFileByIdUsingGet(dataset.id, file.id, file.fileName);
}); // 假设导出成功
setSelectedFiles([]); // 清空选中状态 message.success({
}; content: `已导出 1 个文件`,
});
const handleShowFile = (file: any) => async () => { setSelectedFiles([]); // 清空选中状态
// 请求文件内容并弹窗预览 };
try {
const res = await fetch(`/api/datasets/${dataset.id}/file/${file.id}`); const handleShowFile = (file: any) => async () => {
const data = await res.text(); // 请求文件内容并弹窗预览
setPreviewFileName(file.fileName); try {
setPreviewContent(data); const res = await fetch(`/api/datasets/${dataset.id}/file/${file.id}`);
setPreviewVisible(true); const data = await res.text();
} catch (err) { setPreviewFileName(file.fileName);
message.error({ content: "文件预览失败" }); setPreviewContent(data);
} setPreviewVisible(true);
}; } catch (err) {
message.error({ content: "文件预览失败" });
const handleDeleteFile = async (file) => { }
try { };
await deleteDatasetFileUsingDelete(dataset.id, file.id);
fetchFiles(); // 刷新文件列表 const handleDeleteFile = async (file) => {
message.success({ content: `文件 ${file.fileName} 已删除` }); try {
} catch (error) { await deleteDatasetFileUsingDelete(dataset.id, file.id);
message.error({ content: `文件 ${file.fileName} 删除失败` }); fetchFiles(); // 刷新文件列表
} message.success({ content: `文件 ${file.fileName} 已删除` });
}; } catch (error) {
message.error({ content: `文件 ${file.fileName} 删除失败` });
const handleBatchExport = () => { }
if (selectedFiles.length === 0) { };
message.warning({ content: "请先选择要导出的文件" });
return; const handleBatchExport = () => {
} if (selectedFiles.length === 0) {
// 执行批量导出逻辑 message.warning({ content: "请先选择要导出的文件" });
exportDatasetUsingPost(dataset.id, { fileIds: selectedFiles }) return;
.then(() => { }
message.success({ // 执行批量导出逻辑
content: `已导出 ${selectedFiles.length} 个文件`, exportDatasetUsingPost(dataset.id, { fileIds: selectedFiles })
}); .then(() => {
setSelectedFiles([]); // 清空选中状态 message.success({
}) content: `已导出 ${selectedFiles.length} 个文件`,
.catch(() => { });
message.error({ setSelectedFiles([]); // 清空选中状态
content: "导出失败,请稍后再试", })
}); .catch(() => {
}); message.error({
}; content: "导出失败,请稍后再试",
});
return { });
fileList, };
selectedFiles,
setSelectedFiles, return {
pagination, fileList,
setPagination, selectedFiles,
previewVisible, setSelectedFiles,
setPreviewVisible, pagination,
previewContent, setPagination,
previewFileName, previewVisible,
setPreviewContent, setPreviewVisible,
setPreviewFileName, previewContent,
fetchFiles, previewFileName,
setFileList, setPreviewContent,
handleBatchDeleteFiles, setPreviewFileName,
handleDownloadFile, fetchFiles,
handleShowFile, setFileList,
handleDeleteFile, handleBatchDeleteFiles,
handleBatchExport, handleDownloadFile,
}; handleShowFile,
} handleDeleteFile,
handleBatchExport,
handleCreateDirectory: async (directoryName: string) => {
const currentPrefix = pagination.prefix || "";
try {
await createDatasetDirectoryUsingPost(dataset.id, {
parentPrefix: currentPrefix,
directoryName,
});
// 创建成功后刷新当前目录,重置到第一页
await fetchFiles(currentPrefix, 1, pagination.pageSize);
message.success({ content: `文件夹 ${directoryName} 创建成功` });
} catch (error) {
message.error({ content: `文件夹 ${directoryName} 创建失败` });
throw error;
}
},
handleDownloadDirectory: async (directoryPath: string, directoryName: string) => {
try {
await downloadDirectoryUsingGet(dataset.id, directoryPath);
message.success({ content: `文件夹 ${directoryName} 下载成功` });
} catch (error) {
message.error({ content: `文件夹 ${directoryName} 下载失败` });
}
},
handleDeleteDirectory: async (directoryPath: string, directoryName: string) => {
try {
await deleteDirectoryUsingDelete(dataset.id, directoryPath);
// 删除成功后刷新当前目录
const currentPrefix = pagination.prefix || "";
await fetchFiles(currentPrefix, 1, pagination.pageSize);
message.success({ content: `文件夹 ${directoryName} 已删除` });
} catch (error) {
message.error({ content: `文件夹 ${directoryName} 删除失败` });
}
},
};
}

View File

@@ -1,398 +1,399 @@
import { Card, Button, Statistic, Table, Tooltip, Tag, App } from "antd"; import { Card, Button, Statistic, Table, Tooltip, Tag, App } from "antd";
import { import {
DownloadOutlined, DownloadOutlined,
EditOutlined, EditOutlined,
DeleteOutlined, DeleteOutlined,
PlusOutlined, PlusOutlined,
UploadOutlined, UploadOutlined,
} from "@ant-design/icons"; } from "@ant-design/icons";
import TagManager from "@/components/business/TagManagement"; import TagManager from "@/components/business/TagManagement";
import { Link, useNavigate } from "react-router"; import { Link, useNavigate } from "react-router";
import { useEffect, useMemo, useState } from "react"; import { useEffect, useMemo, useState } from "react";
import { SearchControls } from "@/components/SearchControls"; import { SearchControls } from "@/components/SearchControls";
import CardView from "@/components/CardView"; import CardView from "@/components/CardView";
import type { Dataset } from "@/pages/DataManagement/dataset.model"; import type { Dataset } from "@/pages/DataManagement/dataset.model";
import { datasetStatusMap, datasetTypeMap, mapDataset } from "../dataset.const"; import { datasetStatusMap, datasetTypeMap, mapDataset } from "../dataset.const";
import useFetchData from "@/hooks/useFetchData"; import useFetchData from "@/hooks/useFetchData";
import { import {
downloadDatasetUsingGet, downloadDatasetUsingGet,
getDatasetStatisticsUsingGet, getDatasetStatisticsUsingGet,
queryDatasetsUsingGet, queryDatasetsUsingGet,
deleteDatasetByIdUsingDelete, deleteDatasetByIdUsingDelete,
createDatasetTagUsingPost, createDatasetTagUsingPost,
queryDatasetTagsUsingGet, queryDatasetTagsUsingGet,
deleteDatasetTagUsingDelete, deleteDatasetTagUsingDelete,
updateDatasetTagUsingPut, updateDatasetTagUsingPut,
} from "../dataset.api"; } from "../dataset.api";
import { formatBytes } from "@/utils/unit"; import { formatBytes } from "@/utils/unit";
import EditDataset from "../Create/EditDataset"; import EditDataset from "../Create/EditDataset";
import ImportConfiguration from "../Detail/components/ImportConfiguration"; import ImportConfiguration from "../Detail/components/ImportConfiguration";
export default function DatasetManagementPage() { export default function DatasetManagementPage() {
const navigate = useNavigate(); const navigate = useNavigate();
const { message } = App.useApp(); const { message } = App.useApp();
const [viewMode, setViewMode] = useState<"card" | "list">("card"); const [viewMode, setViewMode] = useState<"card" | "list">("card");
const [editDatasetOpen, setEditDatasetOpen] = useState(false); const [editDatasetOpen, setEditDatasetOpen] = useState(false);
const [currentDataset, setCurrentDataset] = useState<Dataset | null>(null); const [currentDataset, setCurrentDataset] = useState<Dataset | null>(null);
const [showUploadDialog, setShowUploadDialog] = useState(false); const [showUploadDialog, setShowUploadDialog] = useState(false);
const [statisticsData, setStatisticsData] = useState<any>({ const [statisticsData, setStatisticsData] = useState<any>({
count: {}, count: {},
size: {}, size: {},
}); });
async function fetchStatistics() { async function fetchStatistics() {
const { data } = await getDatasetStatisticsUsingGet(); const { data } = await getDatasetStatisticsUsingGet();
const statistics = { const statistics = {
size: [ size: [
{ {
title: "数据集总数", title: "数据集总数",
value: data?.totalDatasets || 0, value: data?.totalDatasets || 0,
}, },
{ {
title: "文件总数", title: "文件总数",
value: data?.totalFiles || 0, value: data?.totalFiles || 0,
}, },
{ {
title: "总大小", title: "总大小",
value: formatBytes(data?.totalSize) || '0 B', value: formatBytes(data?.totalSize) || '0 B',
}, },
], ],
count: [ count: [
{ {
title: "文本", title: "文本",
value: data?.count?.text || 0, value: data?.count?.text || 0,
}, },
{ {
title: "图像", title: "图像",
value: data?.count?.image || 0, value: data?.count?.image || 0,
}, },
{ {
title: "音频", title: "音频",
value: data?.count?.audio || 0, value: data?.count?.audio || 0,
}, },
{ {
title: "视频", title: "视频",
value: data?.count?.video || 0, value: data?.count?.video || 0,
}, },
], ],
}; };
setStatisticsData(statistics); setStatisticsData(statistics);
} }
const [tags, setTags] = useState<string[]>([]); const [tags, setTags] = useState<string[]>([]);
useEffect(() => { useEffect(() => {
const fetchTags = async () => { const fetchTags = async () => {
const { data } = await queryDatasetTagsUsingGet(); const { data } = await queryDatasetTagsUsingGet();
setTags(data.map((tag) => tag.name)); setTags(data.map((tag) => tag.name));
}; };
fetchTags(); fetchTags();
}, []); }, []);
const filterOptions = useMemo( const filterOptions = useMemo(
() => [ () => [
{ {
key: "type", key: "type",
label: "类型", label: "类型",
options: [...Object.values(datasetTypeMap)], options: [...Object.values(datasetTypeMap)],
}, },
{ {
key: "status", key: "status",
label: "状态", label: "状态",
options: [...Object.values(datasetStatusMap)], options: [...Object.values(datasetStatusMap)],
}, },
{ {
key: "tags", key: "tags",
label: "标签", label: "标签",
mode: "multiple", mode: "multiple",
options: tags.map((tag) => ({ label: tag, value: tag })), options: tags.map((tag) => ({ label: tag, value: tag })),
}, },
], ],
[tags] [tags]
); );
const { const {
loading, loading,
tableData, tableData,
searchParams, searchParams,
pagination, pagination,
fetchData, fetchData,
setSearchParams, setSearchParams,
handleFiltersChange, handleFiltersChange,
handleKeywordChange, handleKeywordChange,
} = useFetchData<Dataset>( } = useFetchData<Dataset>(
queryDatasetsUsingGet, queryDatasetsUsingGet,
mapDataset, mapDataset,
30000, // 30秒轮询间隔 30000, // 30秒轮询间隔
true, // 自动刷新 true, // 自动刷新
[fetchStatistics], // 额外的轮询函数 [fetchStatistics], // 额外的轮询函数
0 0
); );
const handleDownloadDataset = async (dataset: Dataset) => { const handleDownloadDataset = async (dataset: Dataset) => {
await downloadDatasetUsingGet(dataset.id, dataset.name); await downloadDatasetUsingGet(dataset.id, dataset.name);
message.success("数据集下载成功"); message.success("数据集下载成功");
}; };
const handleDeleteDataset = async (id: number) => { const handleDeleteDataset = async (id: number) => {
if (!id) return; if (!id) return;
await deleteDatasetByIdUsingDelete(id); await deleteDatasetByIdUsingDelete(id);
fetchData({ pageOffset: 0 }); fetchData({ pageOffset: 0 });
message.success("数据删除成功"); message.success("数据删除成功");
}; };
const handleImportData = (dataset: Dataset) => { const handleImportData = (dataset: Dataset) => {
setCurrentDataset(dataset); setCurrentDataset(dataset);
setShowUploadDialog(true); setShowUploadDialog(true);
}; };
const handleRefresh = async (showMessage = true) => { const handleRefresh = async (showMessage = true) => {
await fetchData({ pageOffset: 0 }); await fetchData({ pageOffset: 0 });
if (showMessage) { if (showMessage) {
message.success("数据已刷新"); message.success("数据已刷新");
} }
}; };
const operations = [ const operations = [
{ {
key: "edit", key: "edit",
label: "编辑", label: "编辑",
icon: <EditOutlined />, icon: <EditOutlined />,
onClick: (item: Dataset) => { onClick: (item: Dataset) => {
setCurrentDataset(item); setCurrentDataset(item);
setEditDatasetOpen(true); setEditDatasetOpen(true);
}, },
}, },
{ {
key: "import", key: "import",
label: "导入", label: "导入",
icon: <UploadOutlined />, icon: <UploadOutlined />,
onClick: (item: Dataset) => { onClick: (item: Dataset) => {
handleImportData(item); handleImportData(item);
}, },
}, },
{ {
key: "download", key: "download",
label: "下载", label: "下载",
icon: <DownloadOutlined />, icon: <DownloadOutlined />,
onClick: (item: Dataset) => { onClick: (item: Dataset) => {
if (!item.id) return; if (!item.id) return;
handleDownloadDataset(item); handleDownloadDataset(item);
}, },
}, },
{ {
key: "delete", key: "delete",
label: "删除", label: "删除",
danger: true, danger: true,
confirm: { confirm: {
title: "确认删除该数据集?", title: "确认删除该数据集?",
description: "删除后该数据集将无法恢复,请谨慎操作。", description: "删除后该数据集将无法恢复,请谨慎操作。",
okText: "删除", okText: "删除",
cancelText: "取消", cancelText: "取消",
okType: "danger", okType: "danger",
}, },
icon: <DeleteOutlined />, icon: <DeleteOutlined />,
onClick: (item: Dataset) => handleDeleteDataset(item.id), onClick: (item: Dataset) => handleDeleteDataset(item.id),
}, },
]; ];
const columns = [ const columns = [
{ {
title: "名称", title: "名称",
dataIndex: "name", dataIndex: "name",
key: "name", key: "name",
fixed: "left", fixed: "left",
render: (name, record) => ( render: (name, record) => (
<Button <Button
type="link" type="link"
onClick={() => navigate(`/data/management/detail/${record.id}`)} onClick={() => navigate(`/data/management/detail/${record.id}`)}
> >
{name} {name}
</Button> </Button>
), ),
}, },
{ {
title: "类型", title: "类型",
dataIndex: "type", dataIndex: "type",
key: "type", key: "type",
width: 100, width: 100,
}, },
{ {
title: "状态", title: "状态",
dataIndex: "status", dataIndex: "status",
key: "status", key: "status",
render: (status: any) => { render: (status: any) => {
return ( return (
<Tag icon={status?.icon} color={status?.color}> <Tag icon={status?.icon} color={status?.color}>
{status?.label} {status?.label}
</Tag> </Tag>
); );
}, },
width: 120, width: 120,
}, },
{ {
title: "大小", title: "大小",
dataIndex: "size", dataIndex: "size",
key: "size", key: "size",
width: 120, width: 120,
}, },
{ {
title: "文件数", title: "文件数",
dataIndex: "fileCount", dataIndex: "fileCount",
key: "fileCount", key: "fileCount",
width: 100, width: 100,
}, },
// { // {
// title: "创建者", // title: "创建者",
// dataIndex: "createdBy", // dataIndex: "createdBy",
// key: "createdBy", // key: "createdBy",
// width: 120, // width: 120,
// }, // },
{ {
title: "存储路径", title: "存储路径",
dataIndex: "targetLocation", dataIndex: "targetLocation",
key: "targetLocation", key: "targetLocation",
width: 200, width: 200,
ellipsis: true, ellipsis: true,
}, },
{ {
title: "创建时间", title: "创建时间",
dataIndex: "createdAt", dataIndex: "createdAt",
key: "createdAt", key: "createdAt",
width: 180, width: 180,
}, },
{ {
title: "更新时间", title: "更新时间",
dataIndex: "updatedAt", dataIndex: "updatedAt",
key: "updatedAt", key: "updatedAt",
width: 180, width: 180,
}, },
{ {
title: "操作", title: "操作",
key: "actions", key: "actions",
width: 200, width: 200,
fixed: "right", fixed: "right",
render: (_: any, record: Dataset) => ( render: (_: any, record: Dataset) => (
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
{operations.map((op) => ( {operations.map((op) => (
<Tooltip key={op.key} title={op.label}> <Tooltip key={op.key} title={op.label}>
<Button <Button
type="text" type="text"
icon={op.icon} icon={op.icon}
onClick={() => op.onClick(record)} onClick={() => op.onClick(record)}
/> />
</Tooltip> </Tooltip>
))} ))}
</div> </div>
), ),
}, },
]; ];
const renderCardView = () => ( const renderCardView = () => (
<CardView <CardView
loading={loading} loading={loading}
data={tableData} data={tableData}
pageSize={9} pageSize={9}
operations={operations} operations={operations}
pagination={pagination} pagination={pagination}
onView={(dataset) => { onView={(dataset) => {
navigate("/data/management/detail/" + dataset.id); navigate("/data/management/detail/" + dataset.id);
}} }}
/> />
); );
const renderListView = () => ( const renderListView = () => (
<Card> <Card>
<Table <Table
columns={columns} columns={columns}
dataSource={tableData} dataSource={tableData}
pagination={pagination} pagination={pagination}
rowKey="id" rowKey="id"
scroll={{ x: "max-content", y: "calc(100vh - 30rem)" }} scroll={{ x: "max-content", y: "calc(100vh - 30rem)" }}
/> />
</Card> </Card>
); );
useEffect(() => { useEffect(() => {
const refresh = () => { const refresh = () => {
handleRefresh(true); handleRefresh(true);
}; };
window.addEventListener("update:datasets", refresh); window.addEventListener("update:datasets", refresh);
return () => { return () => {
window.removeEventListener("update:datasets", refresh); window.removeEventListener("update:datasets", refresh);
}; };
}, []); }, []);
return ( return (
<div className="gap-4 h-full flex flex-col"> <div className="gap-4 h-full flex flex-col">
{/* Header */} {/* Header */}
<div className="flex items-center justify-between"> <div className="flex items-center justify-between">
<h1 className="text-xl font-bold"></h1> <h1 className="text-xl font-bold"></h1>
<div className="flex gap-2 items-center"> <div className="flex gap-2 items-center">
{/* tasks */} {/* tasks */}
<TagManager <TagManager
onCreate={createDatasetTagUsingPost} onCreate={createDatasetTagUsingPost}
onDelete={(ids: string) => deleteDatasetTagUsingDelete({ ids })} onDelete={(ids: string) => deleteDatasetTagUsingDelete({ ids })}
onUpdate={updateDatasetTagUsingPut} onUpdate={updateDatasetTagUsingPut}
onFetch={queryDatasetTagsUsingGet} onFetch={queryDatasetTagsUsingGet}
/> />
<Link to="/data/management/create"> <Link to="/data/management/create">
<Button <Button
type="primary" type="primary"
icon={<PlusOutlined className="w-4 h-4 mr-2" />} icon={<PlusOutlined className="w-4 h-4 mr-2" />}
> >
</Button> </Button>
</Link> </Link>
</div> </div>
</div> </div>
{/* Statistics */} {/* Statistics */}
<div className="grid grid-cols-1 gap-4"> <div className="grid grid-cols-1 gap-4">
<Card> <Card>
<div className="grid grid-cols-3"> <div className="grid grid-cols-3">
{statisticsData.size?.map?.((item) => ( {statisticsData.size?.map?.((item) => (
<Statistic <Statistic
title={item.title} title={item.title}
key={item.title} key={item.title}
value={`${item.value}`} value={`${item.value}`}
/> />
))} ))}
</div> </div>
</Card> </Card>
</div> </div>
<SearchControls <SearchControls
searchTerm={searchParams.keyword} searchTerm={searchParams.keyword}
onSearchChange={handleKeywordChange} onSearchChange={handleKeywordChange}
searchPlaceholder="搜索数据集名称、描述" searchPlaceholder="搜索数据集名称、描述"
filters={filterOptions} filters={filterOptions}
onFiltersChange={handleFiltersChange} onFiltersChange={handleFiltersChange}
onClearFilters={() => setSearchParams({ ...searchParams, filter: {} })} onClearFilters={() => setSearchParams({ ...searchParams, filter: {} })}
viewMode={viewMode} viewMode={viewMode}
onViewModeChange={setViewMode} onViewModeChange={setViewMode}
showViewToggle showViewToggle
onReload={handleRefresh} onReload={handleRefresh}
/> />
{viewMode === "card" ? renderCardView() : renderListView()} {viewMode === "card" ? renderCardView() : renderListView()}
<EditDataset <EditDataset
open={editDatasetOpen} open={editDatasetOpen}
data={currentDataset} data={currentDataset}
onClose={() => { onClose={() => {
setCurrentDataset(null); setCurrentDataset(null);
setEditDatasetOpen(false); setEditDatasetOpen(false);
}} }}
onRefresh={handleRefresh} onRefresh={handleRefresh}
/> />
<ImportConfiguration <ImportConfiguration
data={currentDataset} data={currentDataset}
open={showUploadDialog} open={showUploadDialog}
onClose={() => { onClose={() => {
setCurrentDataset(null); setCurrentDataset(null);
setShowUploadDialog(false); setShowUploadDialog(false);
}} }}
updateEvent="update:datasets" prefix=""
/> updateEvent="update:datasets"
</div> />
); </div>
} );
}

View File

@@ -1,191 +1,220 @@
import { get, post, put, del, download } from "@/utils/request"; import { get, post, put, del, download } from "@/utils/request";
// 数据集统计接口 // 数据集统计接口
export function getDatasetStatisticsUsingGet() { export function getDatasetStatisticsUsingGet() {
return get("/api/data-management/datasets/statistics"); return get("/api/data-management/datasets/statistics");
} }
export function queryDatasetStatisticsByIdUsingGet(id: string | number) { export function queryDatasetStatisticsByIdUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}/statistics`); return get(`/api/data-management/datasets/${id}/statistics`);
} }
// 查询数据集列表 // 查询数据集列表
export function queryDatasetsUsingGet(params?: any) { export function queryDatasetsUsingGet(params?: any) {
return get("/api/data-management/datasets", params); return get("/api/data-management/datasets", params);
} }
// 创建数据集 // 创建数据集
export function createDatasetUsingPost(data: any) { export function createDatasetUsingPost(data: any) {
return post("/api/data-management/datasets", data); return post("/api/data-management/datasets", data);
} }
// 根据ID获取数据集详情 // 根据ID获取数据集详情
export function queryDatasetByIdUsingGet(id: string | number) { export function queryDatasetByIdUsingGet(id: string | number) {
return get(`/api/data-management/datasets/${id}`); return get(`/api/data-management/datasets/${id}`);
} }
// 更新数据集 // 更新数据集
export function updateDatasetByIdUsingPut(id: string | number, data: any) { export function updateDatasetByIdUsingPut(id: string | number, data: any) {
return put(`/api/data-management/datasets/${id}`, data); return put(`/api/data-management/datasets/${id}`, data);
} }
// 删除数据集 // 删除数据集
export function deleteDatasetByIdUsingDelete(id: string | number) { export function deleteDatasetByIdUsingDelete(id: string | number) {
return del(`/api/data-management/datasets/${id}`); return del(`/api/data-management/datasets/${id}`);
} }
// 下载数据集 // 下载数据集
export function downloadDatasetUsingGet(id: string | number) { export function downloadDatasetUsingGet(id: string | number) {
return download(`/api/data-management/datasets/${id}/files/download`); return download(`/api/data-management/datasets/${id}/files/download`);
} }
// 验证数据集 // 验证数据集
export function validateDatasetUsingPost(id: string | number, data?: any) { export function validateDatasetUsingPost(id: string | number, data?: any) {
return post(`/api/data-management/datasets/${id}/validate`, data); return post(`/api/data-management/datasets/${id}/validate`, data);
} }
// 获取数据集文件列表 // 获取数据集文件列表
export function queryDatasetFilesUsingGet(id: string | number, params?: any) { export function queryDatasetFilesUsingGet(id: string | number, params?: any) {
return get(`/api/data-management/datasets/${id}/files`, params); return get(`/api/data-management/datasets/${id}/files`, params);
} }
// 上传数据集文件 // 上传数据集文件
export function uploadDatasetFileUsingPost(id: string | number, data: any) { export function uploadDatasetFileUsingPost(id: string | number, data: any) {
return post(`/api/data-management/datasets/${id}/files`, data); return post(`/api/data-management/datasets/${id}/files`, data);
} }
export function downloadFileByIdUsingGet( // 新建数据集文件夹
id: string | number, export function createDatasetDirectoryUsingPost(
fileId: string | number, id: string | number,
fileName: string data: { parentPrefix?: string; directoryName: string }
) { ) {
return download( return post(`/api/data-management/datasets/${id}/files/directories`, data);
`/api/data-management/datasets/${id}/files/${fileId}/download`, }
null,
fileName // 下载文件夹(打包为zip)
); export function downloadDirectoryUsingGet(
} id: string | number,
directoryPath: string
// 删除数据集文件 ) {
export function deleteDatasetFileUsingDelete( const dirName = directoryPath.split('/').filter(Boolean).pop() || 'folder';
datasetId: string | number, return download(
fileId: string | number `/api/data-management/datasets/${id}/files/directories/download?prefix=${encodeURIComponent(directoryPath)}`,
) { null,
return del(`/api/data-management/datasets/${datasetId}/files/${fileId}`); `${dirName}.zip`
} );
}
// 文件预览
export function previewDatasetUsingGet(id: string | number, params?: any) { // 删除文件夹(递归删除)
return get(`/api/data-management/datasets/${id}/preview`, params); export function deleteDirectoryUsingDelete(
} id: string | number,
directoryPath: string
// 获取数据集标签 ) {
export function queryDatasetTagsUsingGet(params?: any) { return del(`/api/data-management/datasets/${id}/files/directories?prefix=${encodeURIComponent(directoryPath)}`);
return get("/api/data-management/tags", params); }
}
export function downloadFileByIdUsingGet(
// 创建数据集标签 id: string | number,
export function createDatasetTagUsingPost(data: any) { fileId: string | number,
return post("/api/data-management/tags", data); fileName: string
} ) {
return download(
// 更新数据集标签 `/api/data-management/datasets/${id}/files/${fileId}/download`,
export function updateDatasetTagUsingPut(data: any) { null,
return put(`/api/data-management/tags`, data); fileName
} );
}
// 删除数据集标签
export function deleteDatasetTagUsingDelete(data: any) { // 删除数据集文件
return del(`/api/data-management/tags`, data); export function deleteDatasetFileUsingDelete(
} datasetId: string | number,
fileId: string | number
// 数据集质量检查 ) {
export function checkDatasetQualityUsingPost(id: string | number, data?: any) { return del(`/api/data-management/datasets/${datasetId}/files/${fileId}`);
return post(`/api/data-management/datasets/${id}/quality-check`, data); }
}
// 文件预览
// 获取数据集质量报告 export function previewDatasetUsingGet(id: string | number, params?: any) {
export function getDatasetQualityReportUsingGet(id: string | number) { return get(`/api/data-management/datasets/${id}/preview`, params);
return get(`/api/data-management/datasets/${id}/quality-report`); }
}
// 获取数据集标签
// 数据集分析 export function queryDatasetTagsUsingGet(params?: any) {
export function analyzeDatasetUsingPost(id: string | number, data?: any) { return get("/api/data-management/tags", params);
return post(`/api/data-management/datasets/${id}/analyze`, data); }
}
// 创建数据集标签
// 获取数据集分析结果 export function createDatasetTagUsingPost(data: any) {
export function getDatasetAnalysisUsingGet(id: string | number) { return post("/api/data-management/tags", data);
return get(`/api/data-management/datasets/${id}/analysis`); }
}
// 更新数据集标签
// 导出数据集 export function updateDatasetTagUsingPut(data: any) {
export function exportDatasetUsingPost(id: string | number, data: any) { return put(`/api/data-management/tags`, data);
return post(`/api/data-management/datasets/${id}/export`, data); }
}
// 删除数据集标签
// 复制数据集 export function deleteDatasetTagUsingDelete(data: any) {
export function copyDatasetUsingPost(id: string | number, data: any) { return del(`/api/data-management/tags`, data);
return post(`/api/data-management/datasets/${id}/copy`, data); }
}
// 数据集质量检查
// 获取数据集版本列表 export function checkDatasetQualityUsingPost(id: string | number, data?: any) {
export function queryDatasetVersionsUsingGet( return post(`/api/data-management/datasets/${id}/quality-check`, data);
id: string | number, }
params?: any
) { // 获取数据集质量报告
return get(`/api/data-management/datasets/${id}/versions`, params); export function getDatasetQualityReportUsingGet(id: string | number) {
} return get(`/api/data-management/datasets/${id}/quality-report`);
}
// 创建数据集版本
export function createDatasetVersionUsingPost(id: string | number, data: any) { // 数据集分析
return post(`/api/data-management/datasets/${id}/versions`, data); export function analyzeDatasetUsingPost(id: string | number, data?: any) {
} return post(`/api/data-management/datasets/${id}/analyze`, data);
}
// 切换数据集版本
export function switchDatasetVersionUsingPut( // 获取数据集分析结果
id: string | number, export function getDatasetAnalysisUsingGet(id: string | number) {
versionId: string | number return get(`/api/data-management/datasets/${id}/analysis`);
) { }
return put(
`/api/data-management/datasets/${id}/versions/${versionId}/switch` // 导出数据集
); export function exportDatasetUsingPost(id: string | number, data: any) {
} return post(`/api/data-management/datasets/${id}/export`, data);
}
// 删除数据集版本
export function deleteDatasetVersionUsingDelete( // 复制数据集
id: string | number, export function copyDatasetUsingPost(id: string | number, data: any) {
versionId: string | number return post(`/api/data-management/datasets/${id}/copy`, data);
) { }
return del(`/api/data-management/datasets/${id}/versions/${versionId}`);
} // 获取数据集版本列表
export function queryDatasetVersionsUsingGet(
/** id: string | number,
* 文件上传相关接口 params?: any
*/ ) {
return get(`/api/data-management/datasets/${id}/versions`, params);
export function preUploadUsingPost(id: string | number, data: any) { }
return post(
`/api/data-management/datasets/${id}/files/upload/pre-upload`, // 创建数据集版本
data export function createDatasetVersionUsingPost(id: string | number, data: any) {
); return post(`/api/data-management/datasets/${id}/versions`, data);
} }
export function cancelUploadUsingPut(id) { // 切换数据集版本
return put( export function switchDatasetVersionUsingPut(
`/api/data-management/datasets/upload/cancel-upload/${id}`, id: string | number,
{}, versionId: string | number
{ showLoading: false } ) {
); return put(
} `/api/data-management/datasets/${id}/versions/${versionId}/switch`
);
export function uploadFileChunkUsingPost(id: string | number, params, config) { }
return post(
`/api/data-management/datasets/${id}/files/upload/chunk`, // 删除数据集版本
params, export function deleteDatasetVersionUsingDelete(
{ id: string | number,
showLoading: false, versionId: string | number
...config, ) {
} return del(`/api/data-management/datasets/${id}/versions/${versionId}`);
); }
}
/**
* 文件上传相关接口
*/
export function preUploadUsingPost(id: string | number, data: any) {
return post(
`/api/data-management/datasets/${id}/files/upload/pre-upload`,
data
);
}
export function cancelUploadUsingPut(id) {
return put(
`/api/data-management/datasets/upload/cancel-upload/${id}`,
{},
{ showLoading: false }
);
}
export function uploadFileChunkUsingPost(id: string | number, params, config) {
return post(
`/api/data-management/datasets/${id}/files/upload/chunk`,
params,
{
showLoading: false,
...config,
}
);
}

View File

@@ -1,67 +1,69 @@
import { import {
cancelUploadUsingPut, cancelUploadUsingPut,
preUploadUsingPost, preUploadUsingPost,
uploadFileChunkUsingPost, uploadFileChunkUsingPost,
} from "@/pages/DataManagement/dataset.api"; } from "@/pages/DataManagement/dataset.api";
import { Button, Empty, Progress } from "antd"; import { Button, Empty, Progress } from "antd";
import { DeleteOutlined } from "@ant-design/icons"; import { DeleteOutlined } from "@ant-design/icons";
import { useEffect } from "react"; import { useEffect } from "react";
import { useFileSliceUpload } from "@/hooks/useSliceUpload"; import { useFileSliceUpload } from "@/hooks/useSliceUpload";
export default function TaskUpload() { export default function TaskUpload() {
const { createTask, taskList, removeTask, handleUpload } = useFileSliceUpload( const { createTask, taskList, removeTask, handleUpload } = useFileSliceUpload(
{ {
preUpload: preUploadUsingPost, preUpload: preUploadUsingPost,
uploadChunk: uploadFileChunkUsingPost, uploadChunk: uploadFileChunkUsingPost,
cancelUpload: cancelUploadUsingPut, cancelUpload: cancelUploadUsingPut,
} }
); );
useEffect(() => { useEffect(() => {
const uploadHandler = (e: any) => { const uploadHandler = (e: any) => {
const { files } = e.detail; console.log('[TaskUpload] Received upload event detail:', e.detail);
const task = createTask(e.detail); const { files } = e.detail;
handleUpload({ task, files }); const task = createTask(e.detail);
}; console.log('[TaskUpload] Created task with prefix:', task.prefix);
window.addEventListener("upload:dataset", uploadHandler); handleUpload({ task, files });
return () => { };
window.removeEventListener("upload:dataset", uploadHandler); window.addEventListener("upload:dataset", uploadHandler);
}; return () => {
}, []); window.removeEventListener("upload:dataset", uploadHandler);
};
return ( }, []);
<div
className="w-90 max-w-90 max-h-96 overflow-y-auto p-2" return (
id="header-task-popover" <div
> className="w-90 max-w-90 max-h-96 overflow-y-auto p-2"
{taskList.length > 0 && id="header-task-popover"
taskList.map((task) => ( >
<div key={task.key} className="border-b border-gray-200 pb-2"> {taskList.length > 0 &&
<div className="flex items-center justify-between"> taskList.map((task) => (
<div>{task.title}</div> <div key={task.key} className="border-b border-gray-200 pb-2">
<Button <div className="flex items-center justify-between">
type="text" <div>{task.title}</div>
danger <Button
disabled={!task?.cancelFn} type="text"
onClick={() => danger
removeTask({ disabled={!task?.cancelFn}
...task, onClick={() =>
isCancel: true, removeTask({
}) ...task,
} isCancel: true,
icon={<DeleteOutlined />} })
></Button> }
</div> icon={<DeleteOutlined />}
></Button>
<Progress size="small" percent={task.percent} /> </div>
</div>
))} <Progress size="small" percent={task.percent} />
{taskList.length === 0 && ( </div>
<Empty ))}
image={Empty.PRESENTED_IMAGE_SIMPLE} {taskList.length === 0 && (
description="暂无上传任务" <Empty
/> image={Empty.PRESENTED_IMAGE_SIMPLE}
)} description="暂无上传任务"
</div> />
); )}
} </div>
);
}

View File

@@ -1,60 +1,95 @@
""" """Tables of Annotation Management Module"""
Tables of Annotation Management Module
""" import uuid
from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey
import uuid from sqlalchemy.sql import func
from sqlalchemy import Column, String, BigInteger, Boolean, TIMESTAMP, Text, Integer, JSON, Date, ForeignKey
from sqlalchemy.sql import func from app.db.session import Base
from app.db.session import Base class AnnotationTemplate(Base):
"""标注配置模板模型"""
class AnnotationTemplate(Base):
"""标注配置模板模型""" __tablename__ = "t_dm_annotation_templates"
__tablename__ = "t_dm_annotation_templates" id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
name = Column(String(100), nullable=False, comment="模板名称")
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") description = Column(String(500), nullable=True, comment="模板描述")
name = Column(String(100), nullable=False, comment="模板名称") data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries")
description = Column(String(500), nullable=True, comment="模板描述") labeling_type = Column(String(50), nullable=False, comment="标注类型: classification/detection/segmentation/ner/relation/etc")
data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries") configuration = Column(JSON, nullable=False, comment="标注配置(包含labels定义等)")
labeling_type = Column(String(50), nullable=False, comment="标注类型: classification/detection/segmentation/ner/relation/etc") style = Column(String(32), nullable=False, comment="样式配置: horizontal/vertical")
configuration = Column(JSON, nullable=False, comment="标注配置(包含labels定义等)") category = Column(String(50), default='custom', comment="模板分类: medical/general/custom/system")
style = Column(String(32), nullable=False, comment="样式配置: horizontal/vertical") built_in = Column(Boolean, default=False, comment="是否系统内置模板")
category = Column(String(50), default='custom', comment="模板分类: medical/general/custom/system") version = Column(String(20), default='1.0', comment="模板版本")
built_in = Column(Boolean, default=False, comment="是否系统内置模板") created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
version = Column(String(20), default='1.0', comment="模板版本") updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)") def __repr__(self):
return f"<AnnotationTemplate(id={self.id}, name={self.name}, data_type={self.data_type})>"
def __repr__(self):
return f"<AnnotationTemplate(id={self.id}, name={self.name}, data_type={self.data_type})>" @property
def is_deleted(self) -> bool:
@property """检查是否已被软删除"""
def is_deleted(self) -> bool: return self.deleted_at is not None
"""检查是否已被软删除"""
return self.deleted_at is not None class LabelingProject(Base):
"""标注项目模型"""
class LabelingProject(Base):
"""标注项目模型""" __tablename__ = "t_dm_labeling_projects"
__tablename__ = "t_dm_labeling_projects" id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") name = Column(String(100), nullable=False, comment="项目名称")
dataset_id = Column(String(36), nullable=False, comment="数据集ID") labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID")
name = Column(String(100), nullable=False, comment="项目名称") template_id = Column(String(36), ForeignKey('t_dm_annotation_templates.id', ondelete='SET NULL'), nullable=True, comment="使用的模板ID")
labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID") configuration = Column(JSON, nullable=True, comment="项目配置(可能包含对模板的自定义修改)")
template_id = Column(String(36), ForeignKey('t_dm_annotation_templates.id', ondelete='SET NULL'), nullable=True, comment="使用的模板ID") progress = Column(JSON, nullable=True, comment="项目进度信息")
configuration = Column(JSON, nullable=True, comment="项目配置(可能包含对模板的自定义修改)") created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
progress = Column(JSON, nullable=True, comment="项目进度信息") updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)") def __repr__(self):
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
def __repr__(self):
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>" @property
def is_deleted(self) -> bool:
@property """检查是否已被软删除"""
def is_deleted(self) -> bool: return self.deleted_at is not None
"""检查是否已被软删除"""
class AutoAnnotationTask(Base):
"""自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
__tablename__ = "t_dm_auto_annotation_tasks"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
name = Column(String(255), nullable=False, comment="任务名称")
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
dataset_name = Column(String(255), nullable=True, comment="数据集名称(冗余字段,方便查询)")
config = Column(JSON, nullable=False, comment="任务配置(模型规模、置信度等)")
file_ids = Column(JSON, nullable=True, comment="要处理的文件ID列表,为空则处理数据集所有图像")
status = Column(String(50), nullable=False, default="pending", comment="任务状态: pending/running/completed/failed")
progress = Column(Integer, default=0, comment="任务进度 0-100")
total_images = Column(Integer, default=0, comment="总图片数")
processed_images = Column(Integer, default=0, comment="已处理图片数")
detected_objects = Column(Integer, default=0, comment="检测到的对象总数")
output_path = Column(String(500), nullable=True, comment="输出路径")
error_message = Column(Text, nullable=True, comment="错误信息")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(
TIMESTAMP,
server_default=func.current_timestamp(),
onupdate=func.current_timestamp(),
comment="更新时间",
)
completed_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
def __repr__(self) -> str: # pragma: no cover - repr 简单返回
return f"<AutoAnnotationTask(id={self.id}, name={self.name}, status={self.status})>"
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None return self.deleted_at is not None

View File

@@ -1,16 +1,18 @@
from fastapi import APIRouter from fastapi import APIRouter
from .config import router as about_router from .config import router as about_router
from .project import router as project_router from .project import router as project_router
from .task import router as task_router from .task import router as task_router
from .template import router as template_router from .template import router as template_router
from .auto import router as auto_router
router = APIRouter(
prefix="/annotation", router = APIRouter(
tags = ["annotation"] prefix="/annotation",
) tags = ["annotation"]
)
router.include_router(about_router)
router.include_router(project_router) router.include_router(about_router)
router.include_router(task_router) router.include_router(project_router)
router.include_router(template_router) router.include_router(task_router)
router.include_router(template_router)
router.include_router(auto_router)

View File

@@ -0,0 +1,196 @@
"""FastAPI routes for Auto Annotation tasks.
These routes back the frontend AutoAnnotation module:
- GET /api/annotation/auto
- POST /api/annotation/auto
- DELETE /api/annotation/auto/{task_id}
- GET /api/annotation/auto/{task_id}/status (simple wrapper)
"""
from __future__ import annotations
from typing import List
from fastapi import APIRouter, Depends, HTTPException, Path
from fastapi.responses import StreamingResponse
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.session import get_db
from app.module.shared.schema import StandardResponse
from app.module.dataset import DatasetManagementService
from app.core.logging import get_logger
from ..schema.auto import (
CreateAutoAnnotationTaskRequest,
AutoAnnotationTaskResponse,
)
from ..service.auto import AutoAnnotationTaskService
router = APIRouter(
prefix="/auto",
tags=["annotation/auto"],
)
logger = get_logger(__name__)
service = AutoAnnotationTaskService()
@router.get("", response_model=StandardResponse[List[AutoAnnotationTaskResponse]])
async def list_auto_annotation_tasks(
db: AsyncSession = Depends(get_db),
):
"""获取自动标注任务列表。
前端当前不传分页参数,这里直接返回所有未删除任务。
"""
tasks = await service.list_tasks(db)
return StandardResponse(
code=200,
message="success",
data=tasks,
)
@router.post("", response_model=StandardResponse[AutoAnnotationTaskResponse])
async def create_auto_annotation_task(
request: CreateAutoAnnotationTaskRequest,
db: AsyncSession = Depends(get_db),
):
"""创建自动标注任务。
当前仅创建任务记录并置为 pending,实际执行由后续调度/worker 完成。
"""
logger.info(
"Creating auto annotation task: name=%s, dataset_id=%s, config=%s, file_ids=%s",
request.name,
request.dataset_id,
request.config.model_dump(by_alias=True),
request.file_ids,
)
# 尝试获取数据集名称和文件数量用于冗余字段,失败时不阻塞任务创建
dataset_name = None
total_images = 0
try:
dm_client = DatasetManagementService(db)
# Service.get_dataset 返回 DatasetResponse,包含 name 和 fileCount
dataset = await dm_client.get_dataset(request.dataset_id)
if dataset is not None:
dataset_name = dataset.name
# 如果提供了 file_ids,则 total_images 为选中文件数;否则使用数据集文件数
if request.file_ids:
total_images = len(request.file_ids)
else:
total_images = getattr(dataset, "fileCount", 0) or 0
except Exception as e: # pragma: no cover - 容错
logger.warning("Failed to fetch dataset name for auto task: %s", e)
task = await service.create_task(
db,
request,
dataset_name=dataset_name,
total_images=total_images,
)
return StandardResponse(
code=200,
message="success",
data=task,
)
@router.get("/{task_id}/status", response_model=StandardResponse[AutoAnnotationTaskResponse])
async def get_auto_annotation_task_status(
task_id: str = Path(..., description="任务ID"),
db: AsyncSession = Depends(get_db),
):
"""获取单个自动标注任务状态。
前端当前主要通过列表轮询,这里提供按 ID 查询的补充接口。
"""
task = await service.get_task(db, task_id)
if not task:
raise HTTPException(status_code=404, detail="Task not found")
return StandardResponse(
code=200,
message="success",
data=task,
)
@router.delete("/{task_id}", response_model=StandardResponse[bool])
async def delete_auto_annotation_task(
task_id: str = Path(..., description="任务ID"),
db: AsyncSession = Depends(get_db),
):
"""删除(软删除)自动标注任务,仅标记 deleted_at。"""
ok = await service.soft_delete_task(db, task_id)
if not ok:
raise HTTPException(status_code=404, detail="Task not found")
return StandardResponse(
code=200,
message="success",
data=True,
)
@router.get("/{task_id}/download")
async def download_auto_annotation_result(
task_id: str = Path(..., description="任务ID"),
db: AsyncSession = Depends(get_db),
):
"""下载指定自动标注任务的结果 ZIP。"""
import io
import os
import zipfile
import tempfile
# 复用服务层获取任务信息
task = await service.get_task(db, task_id)
if not task:
raise HTTPException(status_code=404, detail="Task not found")
if not task.output_path:
raise HTTPException(status_code=400, detail="Task has no output path")
output_dir = task.output_path
if not os.path.isdir(output_dir):
raise HTTPException(status_code=404, detail="Output directory not found")
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".zip")
os.close(tmp_fd)
with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zf:
for root, _, files in os.walk(output_dir):
for filename in files:
file_path = os.path.join(root, filename)
arcname = os.path.relpath(file_path, output_dir)
zf.write(file_path, arcname)
file_size = os.path.getsize(tmp_path)
if file_size == 0:
raise HTTPException(status_code=500, detail="Generated ZIP is empty")
def iterfile():
with open(tmp_path, "rb") as f:
while True:
chunk = f.read(8192)
if not chunk:
break
yield chunk
filename = f"{task.name}_annotations.zip"
headers = {
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(file_size),
}
return StreamingResponse(iterfile(), media_type="application/zip", headers=headers)

View File

@@ -0,0 +1,73 @@
"""Schemas for Auto Annotation tasks"""
from __future__ import annotations
from typing import List, Optional, Dict, Any
from datetime import datetime
from pydantic import BaseModel, Field, ConfigDict
class AutoAnnotationConfig(BaseModel):
"""自动标注任务配置(与前端 payload 对齐)"""
model_size: str = Field(alias="modelSize", description="模型规模: n/s/m/l/x")
conf_threshold: float = Field(alias="confThreshold", description="置信度阈值 0-1")
target_classes: List[int] = Field(
default_factory=list,
alias="targetClasses",
description="目标类别ID列表,空表示全部类别",
)
output_dataset_name: Optional[str] = Field(
default=None,
alias="outputDatasetName",
description="自动标注结果要写入的新数据集名称(可选)",
)
model_config = ConfigDict(populate_by_name=True)
class CreateAutoAnnotationTaskRequest(BaseModel):
"""创建自动标注任务的请求体,对齐前端 CreateAutoAnnotationDialog 发送的结构"""
name: str = Field(..., min_length=1, max_length=255, description="任务名称")
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
config: AutoAnnotationConfig = Field(..., description="任务配置")
file_ids: Optional[List[str]] = Field(None, alias="fileIds", description="要处理的文件ID列表,为空则处理数据集中所有图像")
model_config = ConfigDict(populate_by_name=True)
class AutoAnnotationTaskResponse(BaseModel):
"""自动标注任务响应模型(列表/详情均可复用)"""
id: str = Field(..., description="任务ID")
name: str = Field(..., description="任务名称")
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
dataset_name: Optional[str] = Field(None, alias="datasetName", description="数据集名称")
source_datasets: Optional[List[str]] = Field(
default=None,
alias="sourceDatasets",
description="本任务实际处理涉及到的所有数据集名称列表",
)
config: Dict[str, Any] = Field(..., description="任务配置")
status: str = Field(..., description="任务状态")
progress: int = Field(..., description="任务进度 0-100")
total_images: int = Field(..., alias="totalImages", description="总图片数")
processed_images: int = Field(..., alias="processedImages", description="已处理图片数")
detected_objects: int = Field(..., alias="detectedObjects", description="检测到的对象总数")
output_path: Optional[str] = Field(None, alias="outputPath", description="输出路径")
error_message: Optional[str] = Field(None, alias="errorMessage", description="错误信息")
created_at: datetime = Field(..., alias="createdAt", description="创建时间")
updated_at: Optional[datetime] = Field(None, alias="updatedAt", description="更新时间")
completed_at: Optional[datetime] = Field(None, alias="completedAt", description="完成时间")
model_config = ConfigDict(populate_by_name=True, from_attributes=True)
class AutoAnnotationTaskListResponse(BaseModel):
"""自动标注任务列表响应,目前前端直接使用数组,这里预留分页结构"""
content: List[AutoAnnotationTaskResponse] = Field(..., description="任务列表")
total: int = Field(..., description="总数")
model_config = ConfigDict(populate_by_name=True)

View File

@@ -0,0 +1,154 @@
"""Service layer for Auto Annotation tasks"""
from __future__ import annotations
from typing import List, Optional
from datetime import datetime
from uuid import uuid4
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.models.annotation_management import AutoAnnotationTask
from app.db.models.dataset_management import Dataset, DatasetFiles
from ..schema.auto import (
CreateAutoAnnotationTaskRequest,
AutoAnnotationTaskResponse,
)
class AutoAnnotationTaskService:
"""自动标注任务服务(仅管理任务元数据,真正执行由 runtime 负责)"""
async def create_task(
self,
db: AsyncSession,
request: CreateAutoAnnotationTaskRequest,
dataset_name: Optional[str] = None,
total_images: int = 0,
) -> AutoAnnotationTaskResponse:
"""创建自动标注任务,初始状态为 pending。
这里仅插入任务记录,不负责真正执行 YOLO 推理,
后续可以由调度器/worker 读取该表并更新进度。
"""
now = datetime.now()
task = AutoAnnotationTask(
id=str(uuid4()),
name=request.name,
dataset_id=request.dataset_id,
dataset_name=dataset_name,
config=request.config.model_dump(by_alias=True),
file_ids=request.file_ids, # 存储用户选择的文件ID列表
status="pending",
progress=0,
total_images=total_images,
processed_images=0,
detected_objects=0,
created_at=now,
updated_at=now,
)
db.add(task)
await db.commit()
await db.refresh(task)
# 创建后附带 sourceDatasets 信息(通常只有一个原始数据集)
resp = AutoAnnotationTaskResponse.model_validate(task)
try:
resp.source_datasets = await self._compute_source_datasets(db, task)
except Exception:
resp.source_datasets = [dataset_name] if dataset_name else [request.dataset_id]
return resp
async def list_tasks(self, db: AsyncSession) -> List[AutoAnnotationTaskResponse]:
"""获取未软删除的自动标注任务列表,按创建时间倒序。"""
result = await db.execute(
select(AutoAnnotationTask)
.where(AutoAnnotationTask.deleted_at.is_(None))
.order_by(AutoAnnotationTask.created_at.desc())
)
tasks: List[AutoAnnotationTask] = list(result.scalars().all())
responses: List[AutoAnnotationTaskResponse] = []
for task in tasks:
resp = AutoAnnotationTaskResponse.model_validate(task)
try:
resp.source_datasets = await self._compute_source_datasets(db, task)
except Exception:
# 出错时降级为单个 datasetName/datasetId
fallback_name = getattr(task, "dataset_name", None)
fallback_id = getattr(task, "dataset_id", "")
resp.source_datasets = [fallback_name] if fallback_name else [fallback_id]
responses.append(resp)
return responses
async def get_task(self, db: AsyncSession, task_id: str) -> Optional[AutoAnnotationTaskResponse]:
result = await db.execute(
select(AutoAnnotationTask).where(
AutoAnnotationTask.id == task_id,
AutoAnnotationTask.deleted_at.is_(None),
)
)
task = result.scalar_one_or_none()
if not task:
return None
resp = AutoAnnotationTaskResponse.model_validate(task)
try:
resp.source_datasets = await self._compute_source_datasets(db, task)
except Exception:
fallback_name = getattr(task, "dataset_name", None)
fallback_id = getattr(task, "dataset_id", "")
resp.source_datasets = [fallback_name] if fallback_name else [fallback_id]
return resp
async def _compute_source_datasets(
self,
db: AsyncSession,
task: AutoAnnotationTask,
) -> List[str]:
"""根据任务的 file_ids 推断实际涉及到的所有数据集名称。
- 如果存在 file_ids,则通过 t_dm_dataset_files 反查 dataset_id,再关联 t_dm_datasets 获取名称;
- 如果没有 file_ids,则退回到任务上冗余的 dataset_name/dataset_id。
"""
file_ids = task.file_ids or []
if file_ids:
stmt = (
select(Dataset.name)
.join(DatasetFiles, Dataset.id == DatasetFiles.dataset_id)
.where(DatasetFiles.id.in_(file_ids))
.distinct()
)
result = await db.execute(stmt)
names = [row[0] for row in result.fetchall() if row[0]]
if names:
return names
# 回退:只显示一个数据集
if task.dataset_name:
return [task.dataset_name]
if task.dataset_id:
return [task.dataset_id]
return []
async def soft_delete_task(self, db: AsyncSession, task_id: str) -> bool:
result = await db.execute(
select(AutoAnnotationTask).where(
AutoAnnotationTask.id == task_id,
AutoAnnotationTask.deleted_at.is_(None),
)
)
task = result.scalar_one_or_none()
if not task:
return False
task.deleted_at = datetime.now()
await db.commit()
return True

View File

@@ -1,8 +1,8 @@
#!/bin/bash #!/bin/bash
set -e set -e
if [-d $LOCAL_FILES_DOCUMENT_ROOT ] && $LOCAL_FILES_SERVING_ENABLED; then if [ -d "${LOCAL_FILES_DOCUMENT_ROOT}" ] && [ "${LOCAL_FILES_SERVING_ENABLED}" = "true" ]; then
echo "Using local document root: $LOCAL_FILES_DOCUMENT_ROOT" echo "Using local document root: ${LOCAL_FILES_DOCUMENT_ROOT}"
fi fi
# 启动应用 # 启动应用

17
runtime/ops/__init__.py Normal file
View File

@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
"""Datamate built-in operators package.
This package contains built-in operators for filtering, slicing, annotation, etc.
It is mounted into the runtime container under ``datamate.ops`` so that
``from datamate.ops.annotation...`` imports work correctly.
"""
__all__ = [
"annotation",
"filter",
"formatter",
"llms",
"mapper",
"slicer",
"user",
]

View File

@@ -0,0 +1,6 @@
# -*- coding: utf-8 -*-
"""Annotation-related operators (e.g. YOLO detection)."""
__all__ = [
"image_object_detection_bounding_box",
]

View File

@@ -0,0 +1,9 @@
"""Image object detection (YOLOv8) operator package.
This package exposes the ImageObjectDetectionBoundingBox annotator so that
the auto-annotation worker can import it via different module paths.
"""
from .process import ImageObjectDetectionBoundingBox
__all__ = ["ImageObjectDetectionBoundingBox"]

View File

@@ -0,0 +1,3 @@
name: image_object_detection_bounding_box
version: 0.1.0
description: "YOLOv8-based object detection operator for auto annotation"

View File

@@ -0,0 +1,214 @@
#!/user/bin/python
# -- encoding: utf-8 --
"""
Description: 图像目标检测算子
Create: 2025/12/17
"""
import os
import json
import time
from typing import Dict, Any
import cv2
import numpy as np
from loguru import logger
try:
from ultralytics import YOLO
except ImportError:
logger.warning("ultralytics not installed. Please install it using: pip install ultralytics")
YOLO = None
from datamate.core.base_op import Mapper
# COCO 80 类别映射
COCO_CLASS_MAP = {
0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane",
5: "bus", 6: "train", 7: "truck", 8: "boat", 9: "traffic light",
10: "fire hydrant", 11: "stop sign", 12: "parking meter", 13: "bench",
14: "bird", 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow",
20: "elephant", 21: "bear", 22: "zebra", 23: "giraffe", 24: "backpack",
25: "umbrella", 26: "handbag", 27: "tie", 28: "suitcase", 29: "frisbee",
30: "skis", 31: "snowboard", 32: "sports ball", 33: "kite",
34: "baseball bat", 35: "baseball glove", 36: "skateboard",
37: "surfboard", 38: "tennis racket", 39: "bottle",
40: "wine glass", 41: "cup", 42: "fork", 43: "knife", 44: "spoon",
45: "bowl", 46: "banana", 47: "apple", 48: "sandwich", 49: "orange",
50: "broccoli", 51: "carrot", 52: "hot dog", 53: "pizza",
54: "donut", 55: "cake", 56: "chair", 57: "couch",
58: "potted plant", 59: "bed", 60: "dining table", 61: "toilet",
62: "tv", 63: "laptop", 64: "mouse", 65: "remote",
66: "keyboard", 67: "cell phone", 68: "microwave", 69: "oven",
70: "toaster", 71: "sink", 72: "refrigerator", 73: "book",
74: "clock", 75: "vase", 76: "scissors", 77: "teddy bear",
78: "hair drier", 79: "toothbrush"
}
class ImageObjectDetectionBoundingBox(Mapper):
"""图像目标检测算子"""
# 模型映射
MODEL_MAP = {
"n": "yolov8n.pt",
"s": "yolov8s.pt",
"m": "yolov8m.pt",
"l": "yolov8l.pt",
"x": "yolov8x.pt",
}
def __init__(self, *args, **kwargs):
super(ImageObjectDetectionBoundingBox, self).__init__(*args, **kwargs)
# 获取参数
self._model_size = kwargs.get("modelSize", "l")
self._conf_threshold = kwargs.get("confThreshold", 0.7)
self._target_classes = kwargs.get("targetClasses", [])
self._output_dir = kwargs.get("outputDir", None) # 输出目录
# 如果目标类别为空列表,则检测所有类别
if not self._target_classes:
self._target_classes = None
else:
# 确保是整数列表
self._target_classes = [int(cls_id) for cls_id in self._target_classes]
# 获取模型路径
model_filename = self.MODEL_MAP.get(self._model_size, "yolov8l.pt")
current_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_dir, model_filename)
# 初始化模型
if YOLO is None:
raise ImportError("ultralytics is not installed. Please install it.")
if not os.path.exists(model_path):
logger.warning(f"Model file {model_path} not found. Downloading from ultralytics...")
self.model = YOLO(model_filename) # 自动下载
else:
self.model = YOLO(model_path)
logger.info(f"Loaded YOLOv8 model: {model_filename}, "
f"conf_threshold: {self._conf_threshold}, "
f"target_classes: {self._target_classes}")
@staticmethod
def _get_color_by_class_id(class_id: int):
"""根据 class_id 生成稳定颜色(BGR,OpenCV 用)"""
np.random.seed(class_id)
color = np.random.randint(0, 255, size=3).tolist()
return tuple(color)
def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]:
"""执行目标检测"""
start = time.time()
# 读取图像文件
image_path = sample.get(self.image_key)
if not image_path or not os.path.exists(image_path):
logger.warning(f"Image file not found: {image_path}")
return sample
# 读取图像
img = cv2.imread(image_path)
if img is None:
logger.warning(f"Failed to read image: {image_path}")
return sample
# 执行目标检测
results = self.model(img, conf=self._conf_threshold)
r = results[0]
# 准备标注数据
h, w = img.shape[:2]
annotations = {
"image": os.path.basename(image_path),
"width": w,
"height": h,
"model_size": self._model_size,
"conf_threshold": self._conf_threshold,
"selected_class_ids": self._target_classes,
"detections": []
}
# 处理检测结果
if r.boxes is not None:
for box in r.boxes:
cls_id = int(box.cls[0])
# 过滤目标类别
if self._target_classes is not None and cls_id not in self._target_classes:
continue
conf = float(box.conf[0])
x1, y1, x2, y2 = map(float, box.xyxy[0])
label = COCO_CLASS_MAP.get(cls_id, f"class_{cls_id}")
# 记录检测结果
annotations["detections"].append({
"label": label,
"class_id": cls_id,
"confidence": round(conf, 4),
"bbox_xyxy": [x1, y1, x2, y2],
"bbox_xywh": [x1, y1, x2 - x1, y2 - y1]
})
# 在图像上绘制
color = self._get_color_by_class_id(cls_id)
cv2.rectangle(
img,
(int(x1), int(y1)),
(int(x2), int(y2)),
color,
2
)
cv2.putText(
img,
f"{label} {conf:.2f}",
(int(x1), max(int(y1) - 5, 10)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
color,
1
)
# 确定输出目录
if self._output_dir and os.path.exists(self._output_dir):
output_dir = self._output_dir
else:
output_dir = os.path.dirname(image_path)
# 创建输出子目录(可选,用于组织文件)
images_dir = os.path.join(output_dir, "images")
annotations_dir = os.path.join(output_dir, "annotations")
os.makedirs(images_dir, exist_ok=True)
os.makedirs(annotations_dir, exist_ok=True)
# 保持原始文件名(不添加后缀),确保一一对应
base_name = os.path.basename(image_path)
name_without_ext = os.path.splitext(base_name)[0]
# 保存标注图像(保持原始扩展名或使用jpg)
output_filename = base_name
output_path = os.path.join(images_dir, output_filename)
cv2.imwrite(output_path, img)
# 保存标注 JSON(文件名与图像对应)
json_filename = f"{name_without_ext}.json"
json_path = os.path.join(annotations_dir, json_filename)
with open(json_path, "w", encoding="utf-8") as f:
json.dump(annotations, f, indent=2, ensure_ascii=False)
# 更新样本数据
sample["detection_count"] = len(annotations["detections"])
sample["output_image"] = output_path
sample["annotations_file"] = json_path
sample["annotations"] = annotations
logger.info(f"Image: {os.path.basename(image_path)}, "
f"Detections: {len(annotations['detections'])}, "
f"Time: {(time.time() - start):.4f}s")
return sample

View File

@@ -0,0 +1,166 @@
import os
import json
from pathlib import Path
from ultralytics import YOLO
import cv2
import numpy as np
def get_color_by_class_id(class_id: int):
"""根据 class_id 生成稳定颜色(BGR)"""
np.random.seed(class_id)
color = np.random.randint(0, 255, size=3).tolist()
return tuple(color)
def mask_to_polygons(mask: np.ndarray):
"""将二值 mask 转换为 COCO 风格多边形列表"""
contours, _ = cv2.findContours(
mask,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE
)
polygons = []
for contour in contours:
if contour.shape[0] < 3:
continue
polygon = contour.flatten().tolist()
polygons.append(polygon)
return polygons
IMAGE_DIR = "C:/Users/meta/Desktop/Datamate/yolo/Photos"
OUT_IMG_DIR = "outputs_seg/images"
OUT_JSON_DIR = "outputs_seg/annotations"
MODEL_MAP = {
"n": "yolov8n-seg.pt",
"s": "yolov8s-seg.pt",
"m": "yolov8m-seg.pt",
"l": "yolov8l-seg.pt",
"x": "yolov8x-seg.pt",
}
MODEL_KEY = "x"
MODEL_PATH = MODEL_MAP[MODEL_KEY]
CONF_THRES = 0.7
DRAW_BBOX = True
COCO_CLASS_MAP = {
0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane",
5: "bus", 6: "train", 7: "truck", 8: "boat", 9: "traffic light",
10: "fire hydrant", 11: "stop sign", 12: "parking meter", 13: "bench",
14: "bird", 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow",
20: "elephant", 21: "bear", 22: "zebra", 23: "giraffe", 24: "backpack",
25: "umbrella", 26: "handbag", 27: "tie", 28: "suitcase", 29: "frisbee",
30: "skis", 31: "snowboard", 32: "sports ball", 33: "kite",
34: "baseball bat", 35: "baseball glove", 36: "skateboard",
37: "surfboard", 38: "tennis racket", 39: "bottle",
40: "wine glass", 41: "cup", 42: "fork", 43: "knife", 44: "spoon",
45: "bowl", 46: "banana", 47: "apple", 48: "sandwich", 49: "orange",
50: "broccoli", 51: "carrot", 52: "hot dog", 53: "pizza",
54: "donut", 55: "cake", 56: "chair", 57: "couch",
58: "potted plant", 59: "bed", 60: "dining table", 61: "toilet",
62: "tv", 63: "laptop", 64: "mouse", 65: "remote",
66: "keyboard", 67: "cell phone", 68: "microwave", 69: "oven",
70: "toaster", 71: "sink", 72: "refrigerator", 73: "book",
74: "clock", 75: "vase", 76: "scissors", 77: "teddy bear",
78: "hair drier", 79: "toothbrush"
}
TARGET_CLASS_IDS = [0, 2, 5]
os.makedirs(OUT_IMG_DIR, exist_ok=True)
os.makedirs(OUT_JSON_DIR, exist_ok=True)
if TARGET_CLASS_IDS is not None:
for cid in TARGET_CLASS_IDS:
if cid not in COCO_CLASS_MAP:
raise ValueError(f"Invalid class id: {cid}")
model = YOLO(MODEL_PATH)
image_paths = list(Path(IMAGE_DIR).glob("*.*"))
for img_path in image_paths:
img = cv2.imread(str(img_path))
if img is None:
print(f"[WARN] Failed to read {img_path}")
continue
results = model(img, conf=CONF_THRES)
r = results[0]
h, w = img.shape[:2]
annotations = {
"image": img_path.name,
"width": w,
"height": h,
"model_key": MODEL_KEY,
"conf_threshold": CONF_THRES,
"supported_classes": COCO_CLASS_MAP,
"selected_class_ids": TARGET_CLASS_IDS,
"instances": []
}
if r.boxes is not None and r.masks is not None:
for i, box in enumerate(r.boxes):
cls_id = int(box.cls[0])
if TARGET_CLASS_IDS is not None and cls_id not in TARGET_CLASS_IDS:
continue
conf = float(box.conf[0])
x1, y1, x2, y2 = map(float, box.xyxy[0])
label = COCO_CLASS_MAP[cls_id]
mask = r.masks.data[i].cpu().numpy()
mask = (mask > 0.5).astype(np.uint8)
mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
color = get_color_by_class_id(cls_id)
img[mask == 1] = (
img[mask == 1] * 0.5 + np.array(color) * 0.5
).astype(np.uint8)
if True:
cv2.rectangle(
img,
(int(x1), int(y1)),
(int(x2), int(y2)),
color,
2
)
cv2.putText(
img,
f"{label} {conf:.2f}",
(int(x1), max(int(y1) - 5, 10)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
color,
1
)
polygons = mask_to_polygons(mask)
annotations["instances"].append({
"label": label,
"class_id": cls_id,
"confidence": round(conf, 4),
"bbox_xyxy": [x1, y1, x2, y2],
"segmentation": polygons
})
out_img_path = os.path.join(OUT_IMG_DIR, img_path.name)
out_json_path = os.path.join(OUT_JSON_DIR, img_path.stem + ".json")
cv2.imwrite(out_img_path, img)
with open(out_json_path, "w", encoding="utf-8") as f:
json.dump(annotations, f, indent=2, ensure_ascii=False)
print(f"[OK] {img_path.name}")
print("Segmentation batch finished.")

View File

@@ -31,4 +31,5 @@ dependencies = [
"sqlalchemy>=2.0.44", "sqlalchemy>=2.0.44",
"xmltodict>=1.0.2", "xmltodict>=1.0.2",
"zhconv>=1.4.3", "zhconv>=1.4.3",
"ultralytics>=8.0.0",
] ]

View File

@@ -0,0 +1,603 @@
# -*- coding: utf-8 -*-
"""Simple background worker for auto-annotation tasks.
This module runs inside the datamate-runtime container (operator_runtime service).
It polls `t_dm_auto_annotation_tasks` for pending tasks and performs YOLO
inference using the ImageObjectDetectionBoundingBox operator, updating
progress back to the same table so that the datamate-python backend and
frontend can display real-time status.
设计目标(最小可用版本):
- 单实例 worker,串行处理 `pending` 状态的任务。
- 对指定数据集下的所有已完成文件逐张执行目标检测。
- 按已处理图片数更新 `processed_images`、`progress`、`detected_objects`、`status` 等字段。
- 失败时将任务标记为 `failed` 并记录 `error_message`。
注意:
- 为了保持简单,目前不处理 "running" 状态的恢复逻辑;容器重启时,
已处于 running 的任务不会被重新拉起,需要后续扩展。
"""
from __future__ import annotations
import json
import os
import sys
import threading
import time
import uuid
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from loguru import logger
from sqlalchemy import text
from datamate.sql_manager.sql_manager import SQLManager
# 尝试多种导入路径,适配不同的打包/安装方式
ImageObjectDetectionBoundingBox = None # type: ignore
try:
# 优先使用 datamate.ops 路径(源码 COPY 到 /opt/runtime/datamate/ops 情况)
from datamate.ops.annotation.image_object_detection_bounding_box.process import ( # type: ignore
ImageObjectDetectionBoundingBox,
)
logger.info(
"Imported ImageObjectDetectionBoundingBox from datamate.ops.annotation.image_object_detection_bounding_box",
)
except Exception as e1: # pragma: no cover - 导入失败时仅记录日志,避免整体崩溃
logger.error(
"Failed to import ImageObjectDetectionBoundingBox via datamate.ops: {}",
e1,
)
try:
# 兼容顶层 ops 包安装的情况(通过 ops.pth 暴露)
from ops.annotation.image_object_detection_bounding_box.process import ( # type: ignore
ImageObjectDetectionBoundingBox,
)
logger.info(
"Imported ImageObjectDetectionBoundingBox from top-level ops.annotation.image_object_detection_bounding_box",
)
except Exception as e2:
logger.error(
"Failed to import ImageObjectDetectionBoundingBox via top-level ops package: {}",
e2,
)
ImageObjectDetectionBoundingBox = None
# 进一步兜底:直接从本地 runtime/ops 目录加载算子(开发环境常用场景)
if ImageObjectDetectionBoundingBox is None:
try:
project_root = Path(__file__).resolve().parents[2]
ops_root = project_root / "ops"
if ops_root.is_dir():
# 确保 ops 的父目录在 sys.path 中,这样可以按 "ops.xxx" 导入
if str(project_root) not in sys.path:
sys.path.insert(0, str(project_root))
from ops.annotation.image_object_detection_bounding_box.process import ( # type: ignore
ImageObjectDetectionBoundingBox,
)
logger.info(
"Imported ImageObjectDetectionBoundingBox from local runtime/ops.annotation.image_object_detection_bounding_box",
)
else:
logger.warning(
"Local runtime/ops directory not found when trying to import ImageObjectDetectionBoundingBox: {}",
ops_root,
)
except Exception as e3: # pragma: no cover - 兜底失败仅记录日志
logger.error(
"Failed to import ImageObjectDetectionBoundingBox from local runtime/ops: {}",
e3,
)
ImageObjectDetectionBoundingBox = None
POLL_INTERVAL_SECONDS = float(os.getenv("AUTO_ANNOTATION_POLL_INTERVAL", "5"))
DEFAULT_OUTPUT_ROOT = os.getenv(
"AUTO_ANNOTATION_OUTPUT_ROOT", "/dataset"
)
def _fetch_pending_task() -> Optional[Dict[str, Any]]:
"""从 t_dm_auto_annotation_tasks 中取出一个 pending 任务。"""
sql = text(
"""
SELECT id, name, dataset_id, dataset_name, config, file_ids, status,
total_images, processed_images, detected_objects, output_path
FROM t_dm_auto_annotation_tasks
WHERE status = 'pending' AND deleted_at IS NULL
ORDER BY created_at ASC
LIMIT 1
"""
)
with SQLManager.create_connect() as conn:
result = conn.execute(sql).fetchone()
if not result:
return None
row = dict(result._mapping) # type: ignore[attr-defined]
try:
row["config"] = json.loads(row["config"]) if row.get("config") else {}
except Exception:
row["config"] = {}
try:
raw_ids = row.get("file_ids")
if not raw_ids:
row["file_ids"] = None
elif isinstance(raw_ids, str):
row["file_ids"] = json.loads(raw_ids)
else:
row["file_ids"] = raw_ids
except Exception:
row["file_ids"] = None
return row
def _update_task_status(
task_id: str,
*,
status: str,
progress: Optional[int] = None,
processed_images: Optional[int] = None,
detected_objects: Optional[int] = None,
total_images: Optional[int] = None,
output_path: Optional[str] = None,
error_message: Optional[str] = None,
completed: bool = False,
) -> None:
"""更新任务的状态和统计字段。"""
fields: List[str] = ["status = :status", "updated_at = :updated_at"]
params: Dict[str, Any] = {
"task_id": task_id,
"status": status,
"updated_at": datetime.now(),
}
if progress is not None:
fields.append("progress = :progress")
params["progress"] = int(progress)
if processed_images is not None:
fields.append("processed_images = :processed_images")
params["processed_images"] = int(processed_images)
if detected_objects is not None:
fields.append("detected_objects = :detected_objects")
params["detected_objects"] = int(detected_objects)
if total_images is not None:
fields.append("total_images = :total_images")
params["total_images"] = int(total_images)
if output_path is not None:
fields.append("output_path = :output_path")
params["output_path"] = output_path
if error_message is not None:
fields.append("error_message = :error_message")
params["error_message"] = error_message[:2000]
if completed:
fields.append("completed_at = :completed_at")
params["completed_at"] = datetime.now()
sql = text(
f"""
UPDATE t_dm_auto_annotation_tasks
SET {', '.join(fields)}
WHERE id = :task_id
"""
)
with SQLManager.create_connect() as conn:
conn.execute(sql, params)
def _load_dataset_files(dataset_id: str) -> List[Tuple[str, str, str]]:
"""加载指定数据集下的所有已完成文件。"""
sql = text(
"""
SELECT id, file_path, file_name
FROM t_dm_dataset_files
WHERE dataset_id = :dataset_id
AND status = 'ACTIVE'
ORDER BY created_at ASC
"""
)
with SQLManager.create_connect() as conn:
rows = conn.execute(sql, {"dataset_id": dataset_id}).fetchall()
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
def _load_files_by_ids(file_ids: List[str]) -> List[Tuple[str, str, str]]:
"""根据文件ID列表加载文件记录,支持跨多个数据集。"""
if not file_ids:
return []
placeholders = ", ".join(f":id{i}" for i in range(len(file_ids)))
sql = text(
f"""
SELECT id, file_path, file_name
FROM t_dm_dataset_files
WHERE id IN ({placeholders})
AND status = 'ACTIVE'
ORDER BY created_at ASC
"""
)
params = {f"id{i}": str(fid) for i, fid in enumerate(file_ids)}
with SQLManager.create_connect() as conn:
rows = conn.execute(sql, params).fetchall()
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
def _ensure_output_dir(output_dir: str) -> str:
"""确保输出目录及其 images/、annotations/ 子目录存在。"""
os.makedirs(output_dir, exist_ok=True)
os.makedirs(os.path.join(output_dir, "images"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "annotations"), exist_ok=True)
return output_dir
def _create_output_dataset(
source_dataset_id: str,
source_dataset_name: str,
output_dataset_name: str,
) -> Tuple[str, str]:
"""为自动标注结果创建一个新的数据集并返回 (dataset_id, path)。"""
new_dataset_id = str(uuid.uuid4())
dataset_base_path = DEFAULT_OUTPUT_ROOT.rstrip("/") or "/dataset"
output_dir = os.path.join(dataset_base_path, new_dataset_id)
description = (
f"Auto annotations for dataset {source_dataset_name or source_dataset_id}"[:255]
)
sql = text(
"""
INSERT INTO t_dm_datasets (id, name, description, dataset_type, path, status)
VALUES (:id, :name, :description, :dataset_type, :path, :status)
"""
)
params = {
"id": new_dataset_id,
"name": output_dataset_name,
"description": description,
"dataset_type": "IMAGE",
"path": output_dir,
"status": "ACTIVE",
}
with SQLManager.create_connect() as conn:
conn.execute(sql, params)
return new_dataset_id, output_dir
def _register_output_dataset(
task_id: str,
output_dataset_id: str,
output_dir: str,
output_dataset_name: str,
total_images: int,
) -> None:
"""将自动标注结果注册到新建的数据集。"""
images_dir = os.path.join(output_dir, "images")
if not os.path.isdir(images_dir):
logger.warning(
"Auto-annotation images directory not found for task {}: {}",
task_id,
images_dir,
)
return
image_files: List[Tuple[str, str, int]] = []
annotation_files: List[Tuple[str, str, int]] = []
total_size = 0
for file_name in sorted(os.listdir(images_dir)):
file_path = os.path.join(images_dir, file_name)
if not os.path.isfile(file_path):
continue
try:
file_size = os.path.getsize(file_path)
except OSError:
file_size = 0
image_files.append((file_name, file_path, int(file_size)))
total_size += int(file_size)
annotations_dir = os.path.join(output_dir, "annotations")
if os.path.isdir(annotations_dir):
for file_name in sorted(os.listdir(annotations_dir)):
file_path = os.path.join(annotations_dir, file_name)
if not os.path.isfile(file_path):
continue
try:
file_size = os.path.getsize(file_path)
except OSError:
file_size = 0
annotation_files.append((file_name, file_path, int(file_size)))
total_size += int(file_size)
if not image_files:
logger.warning(
"No image files found in auto-annotation output for task {}: {}",
task_id,
images_dir,
)
return
insert_file_sql = text(
"""
INSERT INTO t_dm_dataset_files (
id, dataset_id, file_name, file_path, file_type, file_size, status
) VALUES (
:id, :dataset_id, :file_name, :file_path, :file_type, :file_size, :status
)
"""
)
update_dataset_stat_sql = text(
"""
UPDATE t_dm_datasets
SET file_count = COALESCE(file_count, 0) + :add_count,
size_bytes = COALESCE(size_bytes, 0) + :add_size
WHERE id = :dataset_id
"""
)
with SQLManager.create_connect() as conn:
added_count = 0
for file_name, file_path, file_size in image_files:
ext = os.path.splitext(file_name)[1].lstrip(".").upper() or None
conn.execute(
insert_file_sql,
{
"id": str(uuid.uuid4()),
"dataset_id": output_dataset_id,
"file_name": file_name,
"file_path": file_path,
"file_type": ext,
"file_size": int(file_size),
"status": "ACTIVE",
},
)
added_count += 1
for file_name, file_path, file_size in annotation_files:
ext = os.path.splitext(file_name)[1].lstrip(".").upper() or None
conn.execute(
insert_file_sql,
{
"id": str(uuid.uuid4()),
"dataset_id": output_dataset_id,
"file_name": file_name,
"file_path": file_path,
"file_type": ext,
"file_size": int(file_size),
"status": "ACTIVE",
},
)
added_count += 1
if added_count > 0:
conn.execute(
update_dataset_stat_sql,
{
"dataset_id": output_dataset_id,
"add_count": added_count,
"add_size": int(total_size),
},
)
logger.info(
"Registered auto-annotation output into dataset: dataset_id={}, name={}, added_files={}, added_size_bytes={}, task_id={}, output_dir={}",
output_dataset_id,
output_dataset_name,
len(image_files) + len(annotation_files),
total_size,
task_id,
output_dir,
)
def _process_single_task(task: Dict[str, Any]) -> None:
"""执行单个自动标注任务。"""
if ImageObjectDetectionBoundingBox is None:
logger.error(
"YOLO operator not available (import failed earlier), skip auto-annotation task: {}",
task["id"],
)
_update_task_status(
task["id"],
status="failed",
error_message="YOLO operator not available in runtime container",
)
return
task_id = str(task["id"])
dataset_id = str(task["dataset_id"])
task_name = str(task.get("name") or "")
source_dataset_name = str(task.get("dataset_name") or "")
cfg: Dict[str, Any] = task.get("config") or {}
selected_file_ids: Optional[List[str]] = task.get("file_ids") or None
model_size = cfg.get("modelSize", "l")
conf_threshold = float(cfg.get("confThreshold", 0.7))
target_classes = cfg.get("targetClasses", []) or []
output_dataset_name = cfg.get("outputDatasetName")
if not output_dataset_name:
base_name = source_dataset_name or task_name or f"dataset-{dataset_id[:8]}"
output_dataset_name = f"{base_name}_auto_{task_id[:8]}"
logger.info(
"Start processing auto-annotation task: id={}, dataset_id={}, model_size={}, conf_threshold={}, target_classes={}, output_dataset_name={}",
task_id,
dataset_id,
model_size,
conf_threshold,
target_classes,
output_dataset_name,
)
_update_task_status(task_id, status="running", progress=0)
if selected_file_ids:
all_files = _load_files_by_ids(selected_file_ids)
else:
all_files = _load_dataset_files(dataset_id)
files = [(path, name) for _, path, name in all_files]
total_images = len(files)
if total_images == 0:
logger.warning("No files found for dataset {} when running auto-annotation task {}", dataset_id, task_id)
_update_task_status(
task_id,
status="completed",
progress=100,
total_images=0,
processed_images=0,
detected_objects=0,
completed=True,
output_path=None,
)
return
output_dataset_id, output_dir = _create_output_dataset(
source_dataset_id=dataset_id,
source_dataset_name=source_dataset_name,
output_dataset_name=output_dataset_name,
)
output_dir = _ensure_output_dir(output_dir)
try:
detector = ImageObjectDetectionBoundingBox(
modelSize=model_size,
confThreshold=conf_threshold,
targetClasses=target_classes,
outputDir=output_dir,
)
except Exception as e:
logger.error("Failed to init YOLO detector for task {}: {}", task_id, e)
_update_task_status(
task_id,
status="failed",
total_images=total_images,
processed_images=0,
detected_objects=0,
error_message=f"Init YOLO detector failed: {e}",
)
return
processed = 0
detected_total = 0
for file_path, file_name in files:
try:
sample = {
"image": file_path,
"filename": file_name,
}
result = detector.execute(sample)
annotations = (result or {}).get("annotations", {})
detections = annotations.get("detections", [])
detected_total += len(detections)
processed += 1
progress = int(processed * 100 / total_images) if total_images > 0 else 100
_update_task_status(
task_id,
status="running",
progress=progress,
processed_images=processed,
detected_objects=detected_total,
total_images=total_images,
output_path=output_dir,
)
except Exception as e:
logger.error(
"Failed to process image for task {}: file_path={}, error={}",
task_id,
file_path,
e,
)
continue
_update_task_status(
task_id,
status="completed",
progress=100,
processed_images=processed,
detected_objects=detected_total,
total_images=total_images,
output_path=output_dir,
completed=True,
)
logger.info(
"Completed auto-annotation task: id={}, total_images={}, processed={}, detected_objects={}, output_path={}",
task_id,
total_images,
processed,
detected_total,
output_dir,
)
if output_dataset_name and output_dataset_id:
try:
_register_output_dataset(
task_id=task_id,
output_dataset_id=output_dataset_id,
output_dir=output_dir,
output_dataset_name=output_dataset_name,
total_images=total_images,
)
except Exception as e: # pragma: no cover - 防御性日志
logger.error(
"Failed to register auto-annotation output as dataset for task {}: {}",
task_id,
e,
)
def _worker_loop() -> None:
"""Worker 主循环,在独立线程中运行。"""
logger.info(
"Auto-annotation worker started with poll interval {} seconds, output root {}",
POLL_INTERVAL_SECONDS,
DEFAULT_OUTPUT_ROOT,
)
while True:
try:
task = _fetch_pending_task()
if not task:
time.sleep(POLL_INTERVAL_SECONDS)
continue
_process_single_task(task)
except Exception as e: # pragma: no cover - 防御性日志
logger.error("Auto-annotation worker loop error: {}", e)
time.sleep(POLL_INTERVAL_SECONDS)
def start_auto_annotation_worker() -> None:
"""在后台线程中启动自动标注 worker。"""
thread = threading.Thread(target=_worker_loop, name="auto-annotation-worker", daemon=True)
thread.start()
logger.info("Auto-annotation worker thread started: {}", thread.name)

View File

@@ -1,163 +1,174 @@
import os import os
from typing import Optional, Dict, Any, List from typing import Optional, Dict, Any, List
import uvicorn import uvicorn
import yaml import yaml
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from jsonargparse import ArgumentParser from jsonargparse import ArgumentParser
from loguru import logger from loguru import logger
from pydantic import BaseModel from pydantic import BaseModel
from datamate.common.error_code import ErrorCode from datamate.common.error_code import ErrorCode
from datamate.scheduler import cmd_scheduler from datamate.scheduler import cmd_scheduler
from datamate.scheduler import func_scheduler from datamate.scheduler import func_scheduler
from datamate.wrappers import WRAPPERS from datamate.wrappers import WRAPPERS
from datamate.auto_annotation_worker import start_auto_annotation_worker
# 日志配置
LOG_DIR = "/var/log/datamate/runtime" # 日志配置
os.makedirs(LOG_DIR, exist_ok=True) LOG_DIR = "/var/log/datamate/runtime"
logger.add( os.makedirs(LOG_DIR, exist_ok=True)
f"{LOG_DIR}/runtime.log", logger.add(
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} - {message}", f"{LOG_DIR}/runtime.log",
level="DEBUG", format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} - {message}",
enqueue=True level="DEBUG",
) enqueue=True
)
app = FastAPI()
app = FastAPI()
class APIException(Exception):
"""自定义API异常""" class APIException(Exception):
"""自定义API异常"""
def __init__(self, error_code: ErrorCode, detail: Optional[str] = None,
extra_data: Optional[Dict] = None): def __init__(self, error_code: ErrorCode, detail: Optional[str] = None,
self.error_code = error_code extra_data: Optional[Dict] = None):
self.detail = detail or error_code.value[1] self.error_code = error_code
self.code = error_code.value[0] self.detail = detail or error_code.value[1]
self.extra_data = extra_data self.code = error_code.value[0]
super().__init__(self.detail) self.extra_data = extra_data
super().__init__(self.detail)
def to_dict(self) -> Dict[str, Any]:
result = { def to_dict(self) -> Dict[str, Any]:
"code": self.code, result = {
"message": self.detail, "code": self.code,
"success": False "message": self.detail,
} "success": False
if self.extra_data: }
result["data"] = self.extra_data if self.extra_data:
return result result["data"] = self.extra_data
return result
@app.exception_handler(APIException)
async def api_exception_handler(request: Request, exc: APIException): @app.on_event("startup")
return JSONResponse( async def startup_event():
status_code=200, # 业务错误返回 200,错误信息在响应体中 """FastAPI 启动时初始化后台自动标注 worker。"""
content=exc.to_dict()
) try:
start_auto_annotation_worker()
except Exception as e: # pragma: no cover - 防御性日志
class QueryTaskRequest(BaseModel): logger.error("Failed to start auto-annotation worker: {}", e)
task_ids: List[str]
@app.exception_handler(APIException)
@app.post("/api/task/list") async def api_exception_handler(request: Request, exc: APIException):
async def query_task_info(request: QueryTaskRequest): return JSONResponse(
try: status_code=200, # 业务错误返回 200,错误信息在响应体中
return [{task_id: cmd_scheduler.get_task_status(task_id)} for task_id in request.task_ids] content=exc.to_dict()
except Exception as e: )
raise APIException(ErrorCode.UNKNOWN_ERROR)
class QueryTaskRequest(BaseModel):
@app.post("/api/task/{task_id}/submit") task_ids: List[str]
async def submit_task(task_id):
config_path = f"/flow/{task_id}/process.yaml"
logger.info("Start submitting job...") @app.post("/api/task/list")
async def query_task_info(request: QueryTaskRequest):
dataset_path = get_from_cfg(task_id, "dataset_path") try:
if not check_valid_path(dataset_path): return [{task_id: cmd_scheduler.get_task_status(task_id)} for task_id in request.task_ids]
logger.error(f"dataset_path is not existed! please check this path.") except Exception as e:
raise APIException(ErrorCode.FILE_NOT_FOUND_ERROR) raise APIException(ErrorCode.UNKNOWN_ERROR)
try:
executor_type = get_from_cfg(task_id, "executor_type") @app.post("/api/task/{task_id}/submit")
await WRAPPERS.get(executor_type).submit(task_id, config_path) async def submit_task(task_id):
config_path = f"/flow/{task_id}/process.yaml"
except Exception as e: logger.info("Start submitting job...")
logger.error(f"Error happens during submitting task. Error Info following: {e}")
raise APIException(ErrorCode.SUBMIT_TASK_ERROR) dataset_path = get_from_cfg(task_id, "dataset_path")
if not check_valid_path(dataset_path):
logger.info(f"task id: {task_id} has been submitted.") logger.error(f"dataset_path is not existed! please check this path.")
success_json_info = JSONResponse( raise APIException(ErrorCode.FILE_NOT_FOUND_ERROR)
content={"status": "Success", "message": f"{task_id} has been submitted"},
status_code=200 try:
) executor_type = get_from_cfg(task_id, "executor_type")
return success_json_info await WRAPPERS.get(executor_type).submit(task_id, config_path)
except Exception as e:
@app.post("/api/task/{task_id}/stop") logger.error(f"Error happens during submitting task. Error Info following: {e}")
async def stop_task(task_id): raise APIException(ErrorCode.SUBMIT_TASK_ERROR)
logger.info("Start stopping ray job...")
success_json_info = JSONResponse( logger.info(f"task id: {task_id} has been submitted.")
content={"status": "Success", "message": f"{task_id} has been stopped"}, success_json_info = JSONResponse(
status_code=200 content={"status": "Success", "message": f"{task_id} has been submitted"},
) status_code=200
)
try: return success_json_info
executor_type = get_from_cfg(task_id, "executor_type")
if not WRAPPERS.get(executor_type).cancel(task_id):
raise APIException(ErrorCode.CANCEL_TASK_ERROR) @app.post("/api/task/{task_id}/stop")
except Exception as e: async def stop_task(task_id):
if isinstance(e, APIException): logger.info("Start stopping ray job...")
raise e success_json_info = JSONResponse(
raise APIException(ErrorCode.UNKNOWN_ERROR) content={"status": "Success", "message": f"{task_id} has been stopped"},
status_code=200
logger.info(f"{task_id} has been stopped.") )
return success_json_info
try:
executor_type = get_from_cfg(task_id, "executor_type")
def check_valid_path(file_path): if not WRAPPERS.get(executor_type).cancel(task_id):
full_path = os.path.abspath(file_path) raise APIException(ErrorCode.CANCEL_TASK_ERROR)
return os.path.exists(full_path) except Exception as e:
if isinstance(e, APIException):
raise e
def get_from_cfg(task_id, key): raise APIException(ErrorCode.UNKNOWN_ERROR)
config_path = f"/flow/{task_id}/process.yaml"
if not check_valid_path(config_path): logger.info(f"{task_id} has been stopped.")
logger.error(f"config_path is not existed! please check this path.") return success_json_info
raise APIException(ErrorCode.FILE_NOT_FOUND_ERROR)
with open(config_path, "r", encoding='utf-8') as f: def check_valid_path(file_path):
content = f.read() full_path = os.path.abspath(file_path)
cfg = yaml.safe_load(content) return os.path.exists(full_path)
return cfg[key]
def get_from_cfg(task_id, key):
def parse_args(): config_path = f"/flow/{task_id}/process.yaml"
parser = ArgumentParser(description="Create API for Submitting Job to Data-juicer") if not check_valid_path(config_path):
logger.error(f"config_path is not existed! please check this path.")
parser.add_argument( raise APIException(ErrorCode.FILE_NOT_FOUND_ERROR)
'--ip',
type=str, with open(config_path, "r", encoding='utf-8') as f:
default="0.0.0.0", content = f.read()
help='Service ip for this API, default to use 0.0.0.0.' cfg = yaml.safe_load(content)
) return cfg[key]
parser.add_argument(
'--port', def parse_args():
type=int, parser = ArgumentParser(description="Create API for Submitting Job to Data-juicer")
default=8080,
help='Service port for this API, default to use 8600.' parser.add_argument(
) '--ip',
type=str,
return parser.parse_args() default="0.0.0.0",
help='Service ip for this API, default to use 0.0.0.0.'
)
if __name__ == '__main__':
p_args = parse_args() parser.add_argument(
'--port',
uvicorn.run( type=int,
app, default=8080,
host=p_args.ip, help='Service port for this API, default to use 8600.'
port=p_args.port )
)
return parser.parse_args()
if __name__ == '__main__':
p_args = parse_args()
uvicorn.run(
app,
host=p_args.ip,
port=p_args.port
)

View File

@@ -1,473 +1,497 @@
use datamate; use datamate;
CREATE TABLE t_dm_annotation_templates ( CREATE TABLE t_dm_annotation_templates (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
name VARCHAR(100) NOT NULL COMMENT '模板名称', name VARCHAR(100) NOT NULL COMMENT '模板名称',
description VARCHAR(500) COMMENT '模板描述', description VARCHAR(500) COMMENT '模板描述',
data_type VARCHAR(50) NOT NULL COMMENT '数据类型: image/text/audio/video/timeseries', data_type VARCHAR(50) NOT NULL COMMENT '数据类型: image/text/audio/video/timeseries',
labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型: classification/detection/segmentation/ner/relation/etc', labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型: classification/detection/segmentation/ner/relation/etc',
configuration JSON NOT NULL COMMENT '标注配置(包含labels定义等)', configuration JSON NOT NULL COMMENT '标注配置(包含labels定义等)',
style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical', style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical',
category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类: medical/general/custom/system', category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类: medical/general/custom/system',
built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板', built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板',
version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本', version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)', deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
INDEX idx_data_type (data_type), INDEX idx_data_type (data_type),
INDEX idx_labeling_type (labeling_type), INDEX idx_labeling_type (labeling_type),
INDEX idx_category (category), INDEX idx_category (category),
INDEX idx_built_in (built_in) INDEX idx_built_in (built_in)
) COMMENT='标注配置模板表'; ) COMMENT='标注配置模板表';
CREATE TABLE t_dm_labeling_projects ( CREATE TABLE t_dm_labeling_projects (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID', dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
name VARCHAR(100) NOT NULL COMMENT '项目名称', name VARCHAR(100) NOT NULL COMMENT '项目名称',
labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID', labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
template_id VARCHAR(36) NULL COMMENT '使用的模板ID', template_id VARCHAR(36) NULL COMMENT '使用的模板ID',
configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)', configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
progress JSON COMMENT '项目进度信息', progress JSON COMMENT '项目进度信息',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)', deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL, FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
INDEX idx_dataset_id (dataset_id), INDEX idx_dataset_id (dataset_id),
INDEX idx_template_id (template_id), INDEX idx_template_id (template_id),
INDEX idx_labeling_project_id (labeling_project_id) INDEX idx_labeling_project_id (labeling_project_id)
) COMMENT='标注项目表'; ) COMMENT='标注项目表';
-- 自动标注任务表
-- 内置标注模板初始化数据 CREATE TABLE t_dm_auto_annotation_tasks (
-- 这些模板将在系统首次启动时自动创建 id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
-- 使用 INSERT ... ON DUPLICATE KEY UPDATE 来覆盖已存在的记录 name VARCHAR(255) NOT NULL COMMENT '任务名称',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
-- 1. 图像分类模板 dataset_name VARCHAR(255) COMMENT '数据集名称(冗余字段,方便查询)',
INSERT INTO t_dm_annotation_templates ( config JSON NOT NULL COMMENT '任务配置(模型规模、置信度等)',
id, name, description, data_type, labeling_type, file_ids JSON COMMENT '要处理的文件ID列表,为空则处理数据集所有图像',
configuration, style, category, built_in, version, created_at status VARCHAR(50) NOT NULL DEFAULT 'pending' COMMENT '任务状态: pending/running/completed/failed',
) VALUES ( progress INT DEFAULT 0 COMMENT '任务进度 0-100',
'tpl-image-classification-001', total_images INT DEFAULT 0 COMMENT '总图片数',
'图像分类', processed_images INT DEFAULT 0 COMMENT '已处理图片数',
'简单的多标签图像分类模板', detected_objects INT DEFAULT 0 COMMENT '检测到的对象总数',
'图像', output_path VARCHAR(500) COMMENT '输出路径',
'分类', error_message TEXT COMMENT '错误信息',
JSON_OBJECT( created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
'labels', JSON_ARRAY( updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
JSON_OBJECT( completed_at TIMESTAMP NULL COMMENT '完成时间',
'fromName', 'choice', deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
'toName', 'image', INDEX idx_dataset_id (dataset_id),
'type', 'Choices', INDEX idx_status (status),
'options', JSON_ARRAY('Cat', 'Dog', 'Bird', 'Other'), INDEX idx_created_at (created_at)
'required', true, ) COMMENT='自动标注任务表';
'description', '选择最符合图像内容的标签'
)
), -- 内置标注模板初始化数据
'objects', JSON_ARRAY( -- 这些模板将在系统首次启动时自动创建
JSON_OBJECT( -- 使用 INSERT ... ON DUPLICATE KEY UPDATE 来覆盖已存在的记录
'name', 'image',
'type', 'Image', -- 1. 图像分类模板
'value', '$image' INSERT INTO t_dm_annotation_templates (
) id, name, description, data_type, labeling_type,
) configuration, style, category, built_in, version, created_at
), ) VALUES (
'horizontal', 'tpl-image-classification-001',
'计算机视觉', '图像分类',
1, '简单的多标签图像分类模板',
'1.0.0', '图像',
NOW() '分类',
) JSON_OBJECT(
ON DUPLICATE KEY UPDATE 'labels', JSON_ARRAY(
name = VALUES(name), JSON_OBJECT(
description = VALUES(description), 'fromName', 'choice',
data_type = VALUES(data_type), 'toName', 'image',
labeling_type = VALUES(labeling_type), 'type', 'Choices',
configuration = VALUES(configuration), 'options', JSON_ARRAY('Cat', 'Dog', 'Bird', 'Other'),
style = VALUES(style), 'required', true,
category = VALUES(category), 'description', '选择最符合图像内容的标签'
built_in = VALUES(built_in), )
version = VALUES(version), ),
updated_at = NOW(); 'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
-- 2. 目标检测模板(矩形框) 'type', 'Image',
INSERT INTO t_dm_annotation_templates ( 'value', '$image'
id, name, description, data_type, labeling_type, )
configuration, style, category, built_in, version, created_at )
) VALUES ( ),
'tpl-object-detection-001', 'horizontal',
'目标检测(边界框)', '计算机视觉',
'使用矩形边界框进行目标检测', 1,
'图像', '1.0.0',
'目标检测', NOW()
JSON_OBJECT( )
'labels', JSON_ARRAY( ON DUPLICATE KEY UPDATE
JSON_OBJECT( name = VALUES(name),
'fromName', 'label', description = VALUES(description),
'toName', 'image', data_type = VALUES(data_type),
'type', 'RectangleLabels', labeling_type = VALUES(labeling_type),
'labels', JSON_ARRAY('Person', 'Vehicle', 'Animal', 'Object'), configuration = VALUES(configuration),
'required', false, style = VALUES(style),
'description', '在图像中框出目标并标注类别' category = VALUES(category),
) built_in = VALUES(built_in),
), version = VALUES(version),
'objects', JSON_ARRAY( updated_at = NOW();
JSON_OBJECT(
'name', 'image',
'type', 'Image', -- 2. 目标检测模板(矩形框)
'value', '$image' INSERT INTO t_dm_annotation_templates (
) id, name, description, data_type, labeling_type,
) configuration, style, category, built_in, version, created_at
), ) VALUES (
'horizontal', 'tpl-object-detection-001',
'计算机视觉', '目标检测(边界框)',
1, '使用矩形边界框进行目标检测',
'1.0.0', '图像',
NOW() '目标检测',
) JSON_OBJECT(
ON DUPLICATE KEY UPDATE 'labels', JSON_ARRAY(
name = VALUES(name), JSON_OBJECT(
description = VALUES(description), 'fromName', 'label',
data_type = VALUES(data_type), 'toName', 'image',
labeling_type = VALUES(labeling_type), 'type', 'RectangleLabels',
configuration = VALUES(configuration), 'labels', JSON_ARRAY('Person', 'Vehicle', 'Animal', 'Object'),
style = VALUES(style), 'required', false,
category = VALUES(category), 'description', '在图像中框出目标并标注类别'
built_in = VALUES(built_in), )
version = VALUES(version), ),
updated_at = NOW(); 'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
-- 3. 图像分割模板(多边形) 'type', 'Image',
INSERT INTO t_dm_annotation_templates ( 'value', '$image'
id, name, description, data_type, labeling_type, )
configuration, style, category, built_in, version, created_at )
) VALUES ( ),
'tpl-image-segmentation-001', 'horizontal',
'图像分割(多边形)', '计算机视觉',
'使用多边形标注进行语义分割', 1,
'图像', '1.0.0',
'分割', NOW()
JSON_OBJECT( )
'labels', JSON_ARRAY( ON DUPLICATE KEY UPDATE
JSON_OBJECT( name = VALUES(name),
'fromName', 'label', description = VALUES(description),
'toName', 'image', data_type = VALUES(data_type),
'type', 'PolygonLabels', labeling_type = VALUES(labeling_type),
'labels', JSON_ARRAY('Background', 'Foreground', 'Person', 'Car'), configuration = VALUES(configuration),
'required', false, style = VALUES(style),
'description', '使用多边形框选需要分割的区域' category = VALUES(category),
) built_in = VALUES(built_in),
), version = VALUES(version),
'objects', JSON_ARRAY( updated_at = NOW();
JSON_OBJECT(
'name', 'image',
'type', 'Image', -- 3. 图像分割模板(多边形)
'value', '$image' INSERT INTO t_dm_annotation_templates (
) id, name, description, data_type, labeling_type,
) configuration, style, category, built_in, version, created_at
), ) VALUES (
'horizontal', 'tpl-image-segmentation-001',
'计算机视觉', '图像分割(多边形)',
1, '使用多边形标注进行语义分割',
'1.0.0', '图像',
NOW() '分割',
) JSON_OBJECT(
ON DUPLICATE KEY UPDATE 'labels', JSON_ARRAY(
name = VALUES(name), JSON_OBJECT(
description = VALUES(description), 'fromName', 'label',
data_type = VALUES(data_type), 'toName', 'image',
labeling_type = VALUES(labeling_type), 'type', 'PolygonLabels',
configuration = VALUES(configuration), 'labels', JSON_ARRAY('Background', 'Foreground', 'Person', 'Car'),
style = VALUES(style), 'required', false,
category = VALUES(category), 'description', '使用多边形框选需要分割的区域'
built_in = VALUES(built_in), )
version = VALUES(version), ),
updated_at = NOW(); 'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
-- 4. 文本分类模板 'type', 'Image',
INSERT INTO t_dm_annotation_templates ( 'value', '$image'
id, name, description, data_type, labeling_type, )
configuration, style, category, built_in, version, created_at )
) VALUES ( ),
'tpl-text-classification-001', 'horizontal',
'文本情感分类', '计算机视觉',
'将文本中表达的情感划分到预定义的类别', 1,
'文本', '1.0.0',
'分类', NOW()
JSON_OBJECT( )
'labels', JSON_ARRAY( ON DUPLICATE KEY UPDATE
JSON_OBJECT( name = VALUES(name),
'fromName', 'choice', description = VALUES(description),
'toName', 'text', data_type = VALUES(data_type),
'type', 'Choices', labeling_type = VALUES(labeling_type),
'options', JSON_ARRAY('Positive', 'Negative', 'Neutral'), configuration = VALUES(configuration),
'required', true, style = VALUES(style),
'description', '对文本的情感或类别进行选择' category = VALUES(category),
) built_in = VALUES(built_in),
), version = VALUES(version),
'objects', JSON_ARRAY( updated_at = NOW();
JSON_OBJECT(
'name', 'text',
'type', 'Text', -- 4. 文本分类模板
'value', '$text' INSERT INTO t_dm_annotation_templates (
) id, name, description, data_type, labeling_type,
) configuration, style, category, built_in, version, created_at
), ) VALUES (
'vertical', 'tpl-text-classification-001',
'自然语言处理', '文本情感分类',
1, '将文本中表达的情感划分到预定义的类别',
'1.0.0', '文本',
NOW() '分类',
) JSON_OBJECT(
ON DUPLICATE KEY UPDATE 'labels', JSON_ARRAY(
name = VALUES(name), JSON_OBJECT(
description = VALUES(description), 'fromName', 'choice',
data_type = VALUES(data_type), 'toName', 'text',
labeling_type = VALUES(labeling_type), 'type', 'Choices',
configuration = VALUES(configuration), 'options', JSON_ARRAY('Positive', 'Negative', 'Neutral'),
style = VALUES(style), 'required', true,
category = VALUES(category), 'description', '对文本的情感或类别进行选择'
built_in = VALUES(built_in), )
version = VALUES(version), ),
updated_at = NOW(); 'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
-- 5. 命名实体识别(NER) 'type', 'Text',
INSERT INTO t_dm_annotation_templates ( 'value', '$text'
id, name, description, data_type, labeling_type, )
configuration, style, category, built_in, version, created_at )
) VALUES ( ),
'tpl-ner-001', 'vertical',
'命名实体识别', '自然语言处理',
'从文本中抽取并标注命名实体', 1,
'文本', '1.0.0',
'实体识别', NOW()
JSON_OBJECT( )
'labels', JSON_ARRAY( ON DUPLICATE KEY UPDATE
JSON_OBJECT( name = VALUES(name),
'fromName', 'label', description = VALUES(description),
'toName', 'text', data_type = VALUES(data_type),
'type', 'Labels', labeling_type = VALUES(labeling_type),
'labels', JSON_ARRAY('PERSON', 'ORG', 'LOC', 'DATE', 'MISC'), configuration = VALUES(configuration),
'required', false, style = VALUES(style),
'description', '在文本中标注人物、地点等实体' category = VALUES(category),
) built_in = VALUES(built_in),
), version = VALUES(version),
'objects', JSON_ARRAY( updated_at = NOW();
JSON_OBJECT(
'name', 'text',
'type', 'Text', -- 5. 命名实体识别(NER)
'value', '$text' INSERT INTO t_dm_annotation_templates (
) id, name, description, data_type, labeling_type,
) configuration, style, category, built_in, version, created_at
), ) VALUES (
'vertical', 'tpl-ner-001',
'自然语言处理', '命名实体识别',
1, '从文本中抽取并标注命名实体',
'1.0.0', '文本',
NOW() '实体识别',
) JSON_OBJECT(
ON DUPLICATE KEY UPDATE 'labels', JSON_ARRAY(
name = VALUES(name), JSON_OBJECT(
description = VALUES(description), 'fromName', 'label',
data_type = VALUES(data_type), 'toName', 'text',
labeling_type = VALUES(labeling_type), 'type', 'Labels',
configuration = VALUES(configuration), 'labels', JSON_ARRAY('PERSON', 'ORG', 'LOC', 'DATE', 'MISC'),
style = VALUES(style), 'required', false,
category = VALUES(category), 'description', '在文本中标注人物、地点等实体'
built_in = VALUES(built_in), )
version = VALUES(version), ),
updated_at = NOW(); 'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
-- 6. 音频分类模板 'type', 'Text',
INSERT INTO t_dm_annotation_templates ( 'value', '$text'
id, name, description, data_type, labeling_type, )
configuration, style, category, built_in, version, created_at )
) VALUES ( ),
'tpl-audio-classification-001', 'vertical',
'音频分类', '自然语言处理',
'将音频片段划分到不同类别', 1,
'音频', '1.0.0',
'分类', NOW()
JSON_OBJECT( )
'labels', JSON_ARRAY( ON DUPLICATE KEY UPDATE
JSON_OBJECT( name = VALUES(name),
'fromName', 'choice', description = VALUES(description),
'toName', 'audio', data_type = VALUES(data_type),
'type', 'Choices', labeling_type = VALUES(labeling_type),
'options', JSON_ARRAY('Speech', 'Music', 'Noise', 'Silence'), configuration = VALUES(configuration),
'required', true, style = VALUES(style),
'description', '选择音频片段对应的类别' category = VALUES(category),
) built_in = VALUES(built_in),
), version = VALUES(version),
'objects', JSON_ARRAY( updated_at = NOW();
JSON_OBJECT(
'name', 'audio',
'type', 'Audio', -- 6. 音频分类模板
'value', '$audio' INSERT INTO t_dm_annotation_templates (
) id, name, description, data_type, labeling_type,
) configuration, style, category, built_in, version, created_at
), ) VALUES (
'horizontal', 'tpl-audio-classification-001',
'音频', '音频分类',
1, '将音频片段划分到不同类别',
'1.0.0', '音频',
NOW() '分类',
) JSON_OBJECT(
ON DUPLICATE KEY UPDATE 'labels', JSON_ARRAY(
name = VALUES(name), JSON_OBJECT(
description = VALUES(description), 'fromName', 'choice',
data_type = VALUES(data_type), 'toName', 'audio',
labeling_type = VALUES(labeling_type), 'type', 'Choices',
configuration = VALUES(configuration), 'options', JSON_ARRAY('Speech', 'Music', 'Noise', 'Silence'),
style = VALUES(style), 'required', true,
category = VALUES(category), 'description', '选择音频片段对应的类别'
built_in = VALUES(built_in), )
version = VALUES(version), ),
updated_at = NOW(); 'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'audio',
-- 7. 文本多标签分类模板 'type', 'Audio',
INSERT INTO t_dm_annotation_templates ( 'value', '$audio'
id, name, description, data_type, labeling_type, )
configuration, style, category, built_in, version, created_at )
) VALUES ( ),
'tpl-text-multilabel-001', 'horizontal',
'文本多标签分类', '音频',
'可为文本选择多个标签,适用于主题、内容类别等多标签任务', 1,
'文本', '1.0.0',
'分类', NOW()
JSON_OBJECT( )
'labels', JSON_ARRAY( ON DUPLICATE KEY UPDATE
JSON_OBJECT( name = VALUES(name),
'fromName', 'labels', description = VALUES(description),
'toName', 'text', data_type = VALUES(data_type),
'type', 'Choices', labeling_type = VALUES(labeling_type),
'options', JSON_ARRAY('Sports','Politics','Tech','Entertainment'), configuration = VALUES(configuration),
'required', true, style = VALUES(style),
'choice', 'multiple', category = VALUES(category),
'description', '可选择多个标签' built_in = VALUES(built_in),
) version = VALUES(version),
), updated_at = NOW();
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text', -- 7. 文本多标签分类模板
'type', 'Text', INSERT INTO t_dm_annotation_templates (
'value', '$text' id, name, description, data_type, labeling_type,
) configuration, style, category, built_in, version, created_at
) ) VALUES (
), 'tpl-text-multilabel-001',
'vertical', '文本多标签分类',
'自然语言处理', '可为文本选择多个标签,适用于主题、内容类别等多标签任务',
1, '文本',
'1.0.0', '分类',
NOW() JSON_OBJECT(
) 'labels', JSON_ARRAY(
ON DUPLICATE KEY UPDATE JSON_OBJECT(
name = VALUES(name), 'fromName', 'labels',
description = VALUES(description), 'toName', 'text',
data_type = VALUES(data_type), 'type', 'Choices',
labeling_type = VALUES(labeling_type), 'options', JSON_ARRAY('Sports','Politics','Tech','Entertainment'),
configuration = VALUES(configuration), 'required', true,
style = VALUES(style), 'choice', 'multiple',
category = VALUES(category), 'description', '可选择多个标签'
built_in = VALUES(built_in), )
version = VALUES(version), ),
updated_at = NOW(); 'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
-- 8. 文本摘要模板 'type', 'Text',
INSERT INTO t_dm_annotation_templates ( 'value', '$text'
id, name, description, data_type, labeling_type, )
configuration, style, category, built_in, version, created_at )
) VALUES ( ),
'tpl-text-summarization-001', 'vertical',
'文本摘要', '自然语言处理',
'根据原文撰写简要摘要', 1,
'文本', '1.0.0',
'摘要', NOW()
JSON_OBJECT( )
'labels', JSON_ARRAY( ON DUPLICATE KEY UPDATE
JSON_OBJECT( name = VALUES(name),
'fromName', 'summary', description = VALUES(description),
'toName', 'text', data_type = VALUES(data_type),
'type', 'TextArea', labeling_type = VALUES(labeling_type),
'required', true, configuration = VALUES(configuration),
'description', '在此填写摘要内容' style = VALUES(style),
) category = VALUES(category),
), built_in = VALUES(built_in),
'objects', JSON_ARRAY( version = VALUES(version),
JSON_OBJECT( updated_at = NOW();
'name', 'text',
'type', 'Text',
'value', '$text' -- 8. 文本摘要模板
) INSERT INTO t_dm_annotation_templates (
) id, name, description, data_type, labeling_type,
), configuration, style, category, built_in, version, created_at
'vertical', ) VALUES (
'自然语言处理', 'tpl-text-summarization-001',
1, '文本摘要',
'1.0.0', '根据原文撰写简要摘要',
NOW() '文本',
) '摘要',
ON DUPLICATE KEY UPDATE JSON_OBJECT(
name = VALUES(name), 'labels', JSON_ARRAY(
description = VALUES(description), JSON_OBJECT(
data_type = VALUES(data_type), 'fromName', 'summary',
labeling_type = VALUES(labeling_type), 'toName', 'text',
configuration = VALUES(configuration), 'type', 'TextArea',
style = VALUES(style), 'required', true,
category = VALUES(category), 'description', '在此填写摘要内容'
built_in = VALUES(built_in), )
version = VALUES(version), ),
updated_at = NOW(); 'objects', JSON_ARRAY(
JSON_OBJECT(
-- 9. 关键词抽取模板 'name', 'text',
INSERT INTO t_dm_annotation_templates ( 'type', 'Text',
id, name, description, data_type, labeling_type, 'value', '$text'
configuration, style, category, built_in, version, created_at )
) VALUES ( )
'tpl-keyword-extract-001', ),
'关键词抽取', 'vertical',
'从文本中选出关键词或关键短语', '自然语言处理',
'文本', 1,
'实体识别', '1.0.0',
JSON_OBJECT( NOW()
'labels', JSON_ARRAY( )
JSON_OBJECT( ON DUPLICATE KEY UPDATE
'fromName', 'kw', name = VALUES(name),
'toName', 'text', description = VALUES(description),
'type', 'Labels', data_type = VALUES(data_type),
'labels', JSON_ARRAY('Keyword'), labeling_type = VALUES(labeling_type),
'required', false, configuration = VALUES(configuration),
'description', '高亮文本并标注关键词' style = VALUES(style),
) category = VALUES(category),
), built_in = VALUES(built_in),
'objects', JSON_ARRAY( version = VALUES(version),
JSON_OBJECT( updated_at = NOW();
'name', 'text',
'type', 'Text', -- 9. 关键词抽取模板
'value', '$text' INSERT INTO t_dm_annotation_templates (
) id, name, description, data_type, labeling_type,
) configuration, style, category, built_in, version, created_at
), ) VALUES (
'vertical', 'tpl-keyword-extract-001',
'自然语言处理', '关键词抽取',
1, '从文本中选出关键词或关键短语',
'1.0.0', '文本',
NOW() '实体识别',
) JSON_OBJECT(
ON DUPLICATE KEY UPDATE 'labels', JSON_ARRAY(
name = VALUES(name), JSON_OBJECT(
description = VALUES(description), 'fromName', 'kw',
data_type = VALUES(data_type), 'toName', 'text',
labeling_type = VALUES(labeling_type), 'type', 'Labels',
configuration = VALUES(configuration), 'labels', JSON_ARRAY('Keyword'),
style = VALUES(style), 'required', false,
category = VALUES(category), 'description', '高亮文本并标注关键词'
built_in = VALUES(built_in), )
version = VALUES(version), ),
updated_at = NOW(); 'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'自然语言处理',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();

View File

@@ -1,5 +1,4 @@
FROM maven:3-eclipse-temurin-21 AS builder FROM maven:3-eclipse-temurin-21 AS builder
COPY backend/ /opt/backend COPY backend/ /opt/backend
RUN cd /opt/backend/services && \ RUN cd /opt/backend/services && \

View File

@@ -24,7 +24,6 @@ WORKDIR /opt/runtime
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -e .[all] --system \ uv pip install -e .[all] --system \
&& uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \ && uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
&& uv pip uninstall torch torchvision --system \
&& python -m spacy download zh_core_web_sm \ && python -m spacy download zh_core_web_sm \
&& echo "/usr/local/lib/ops/site-packages" > /usr/local/lib/python3.11/site-packages/ops.pth && echo "/usr/local/lib/ops/site-packages" > /usr/local/lib/python3.11/site-packages/ops.pth