Compare commits

...

10 Commits

Author SHA1 Message Date
76f70a6847 feat(knowledge-base): 添加知识库文件全库检索功能
- 新增相对路径字段替代原有的metadata存储方式
- 实现跨知识库文件检索接口searchFiles
- 添加前端全库检索页面和相关API调用
- 优化文件路径处理和数据库索引配置
- 统一请求参数类型定义为RequestPayload和RequestParams
- 简化RagFile模型中的元数据结构设计
2026-01-30 22:24:12 +08:00
cbad129ce4 feat(rag): 添加相对路径搜索功能并优化文件显示
- 在RagFileRepositoryImpl中新增relativePath字段和路径模式构建方法
- 实现buildRelativePathPattern方法用于构建相对路径搜索模式
- 修改page方法添加相对路径模糊查询支持
- 在RagFileReq DTO中添加relativePath参数字段
- 优化KnowledgeBaseDetail页面中的文件名显示逻辑
- 添加normalizePath函数处理文件路径规范化显示
2026-01-30 21:55:29 +08:00
ca7ff56610 feat(rag): 添加文件相对路径支持功能
- 在FileInfo DTO中新增relativePath字段
- 实现文件相对路径的规范化处理逻辑
- 将文件相对路径存储到元数据中
- 前端添加文件路径解析和显示功能
- 优化路径分隔符统一处理机制
- 更新文件列表展示逻辑以支持路径层级结构
2026-01-30 21:46:03 +08:00
a00a6ed3c3 feat(knowledge-base): 实现知识库文件夹功能和优化文件管理
- 添加 datasetId 和 filePath 字段到 DatasetFile 接口
- 实现 resolveRelativeFileName 函数用于解析相对文件名
- 在 AddDataDialog 中使用 resolveRelativeFileName 处理文件名
- 添加文件夹浏览功能,支持目录导航和层级显示
- 实现文件夹删除功能,可批量删除目录下所有文件
- 集成 Folder 和 File 图标组件用于目录和文件区分
- 优化文件列表加载逻辑,使用分页和关键词搜索
- 添加文件夹状态显示和相应操作按钮
- 实现文件路径前缀管理和子目录过滤
- 重构文件列表渲染逻辑,支持目录和文件混合展示
2026-01-30 21:30:54 +08:00
9a205919d7 refactor(data-import): 优化数据源文件扫描和复制逻辑
- 修改数据源文件扫描方法,直接在主流程中获取任务详情和路径
- 移除独立的getFilePaths方法,将路径扫描逻辑整合到scanFilePaths方法中
- 新增copyFilesToDatasetDirWithSourceRoot方法支持保留相对路径的文件复制
- 更新数据集文件应用服务中的文件复制逻辑,支持相对路径处理
- 修改Python后端项目接口中的文件查询逻辑,移除注释掉的编辑器服务引用
- 调整文件过滤逻辑,基于元数据中的派生源ID进行文件筛选
- 移除编辑器服务中已废弃的源文档过滤条件
2026-01-30 18:58:34 +08:00
8b2a19f09a feat(annotation): 添加标注项目文件快照功能
- 新增 LabelingProjectFile 模型用于存储标注项目的文件快照
- 在创建标注项目时记录关联的文件快照数据
- 更新查询逻辑以基于项目快照过滤文件列表
- 优化导出统计功能使用快照数据进行计算
- 添加数据库表结构支持项目文件快照关系
2026-01-30 18:10:13 +08:00
3c3ca130b3 feat(annotation): 添加文本文件内容读取和多类型标签导出功能
- 新增异步函数 _read_file_content 用于安全读取文本文件内容
- 实现在导出时包含文本文件的实际内容数据
- 扩展 CSV 导出格式支持多种标注类型标签提取
- 添加对矩形标签、多边形标签、画笔标签等多种标注类型的支持
- 更新 COCO 格式导出文档说明bbox坐标转换注意事项
2026-01-30 17:35:22 +08:00
a4cdaecf8a refactor(annotation): 简化注释数据导出下载逻辑
- 移除前端手动创建 a 标签下载文件的方式
- 将文件名参数传递给后端 API 函数
- 利用 download 函数内置的下载处理机制
- 简化 ExportAnnotationDialog 组件中的导出流程
- 更新 annotation.api.ts 中的 downloadAnnotationsUsingGet 函数签名
- 直接通过 API 调用完成文件下载和命名
2026-01-30 17:33:14 +08:00
6dfed934a5 feat(file-preview): 增加PDF文件预览功能并优化预览逻辑
- 引入统一的文件预览工具函数和类型定义
- 添加PDF文件类型的识别和预览支持
- 使用iframe实现PDF文件在线预览
- 重构文件预览逻辑,统一处理不同文件类型的预览
- 优化文本内容预览的长度截取机制
- 更新预览按钮加载状态显示
- 统一预览窗口的最大高度配置
- 修改API调用路径为专门的预览接口
2026-01-30 17:32:36 +08:00
bd37858ccc refactor(dataset): 优化数据集路径管理和关联关系处理
- 移除Dataset类中initCreateParam方法的parentPath参数
- 简化handleParentChange方法中的路径构建逻辑
- 更新错误消息将"子数据集"改为"关联数据集"
- 修改前端界面将"父数据集"相关术语统一为"关联数据集"
- 在导入配置组件中添加类型定义和改进文件处理逻辑
- 限制数据源选项排除COLLECTION类型避免错误选择
2026-01-30 16:48:39 +08:00
40 changed files with 1375 additions and 340 deletions

View File

@@ -73,7 +73,7 @@ public class DatasetApplicationService {
Dataset dataset = DatasetConverter.INSTANCE.convertToDataset(createDatasetRequest);
Dataset parentDataset = resolveParentDataset(createDatasetRequest.getParentDatasetId(), dataset.getId());
dataset.setParentDatasetId(parentDataset == null ? null : parentDataset.getId());
dataset.initCreateParam(datasetBasePath, parentDataset == null ? null : parentDataset.getPath());
dataset.initCreateParam(datasetBasePath);
// 处理标签
Set<Tag> processedTags = Optional.ofNullable(createDatasetRequest.getTags())
.filter(CollectionUtils::isNotEmpty)
@@ -291,7 +291,9 @@ public class DatasetApplicationService {
private void handleParentChange(Dataset dataset, String parentDatasetId) {
String normalized = normalizeParentId(parentDatasetId);
if (Objects.equals(dataset.getParentDatasetId(), normalized)) {
String expectedPath = buildDatasetPath(datasetBasePath, dataset.getId());
if (Objects.equals(dataset.getParentDatasetId(), normalized)
&& Objects.equals(dataset.getPath(), expectedPath)) {
return;
}
long childCount = datasetRepository.countByParentId(dataset.getId());
@@ -299,8 +301,7 @@ public class DatasetApplicationService {
throw BusinessException.of(DataManagementErrorCode.DATASET_HAS_CHILDREN);
}
Dataset parent = normalized == null ? null : resolveParentDataset(normalized, dataset.getId());
String newPath = buildDatasetPath(parent == null ? datasetBasePath : parent.getPath(), dataset.getId());
moveDatasetPath(dataset, newPath);
moveDatasetPath(dataset, expectedPath);
dataset.setParentDatasetId(parent == null ? null : parent.getId());
}
@@ -413,33 +414,32 @@ public class DatasetApplicationService {
public void processDataSourceAsync(String datasetId, String dataSourceId) {
try {
log.info("Initiating data source file scanning, dataset ID: {}, collection task ID: {}", datasetId, dataSourceId);
List<String> filePaths = getFilePaths(dataSourceId);
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId).getData();
if (taskDetail == null) {
log.warn("Fail to get collection task detail, task ID: {}", dataSourceId);
return;
}
Path targetPath = Paths.get(taskDetail.getTargetPath());
if (!Files.exists(targetPath) || !Files.isDirectory(targetPath)) {
log.warn("Target path not exists or is not a directory: {}", taskDetail.getTargetPath());
return;
}
List<String> filePaths = scanFilePaths(targetPath);
if (CollectionUtils.isEmpty(filePaths)) {
return;
}
datasetFileApplicationService.copyFilesToDatasetDir(datasetId, new CopyFilesRequest(filePaths));
datasetFileApplicationService.copyFilesToDatasetDirWithSourceRoot(datasetId, targetPath, filePaths);
log.info("Success file scan, total files: {}", filePaths.size());
} catch (Exception e) {
log.error("处理数据源文件扫描失败,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId, e);
}
}
private List<String> getFilePaths(String dataSourceId) {
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId).getData();
if (taskDetail == null) {
log.warn("Fail to get collection task detail, task ID: {}", dataSourceId);
return Collections.emptyList();
}
Path targetPath = Paths.get(taskDetail.getTargetPath());
if (!Files.exists(targetPath) || !Files.isDirectory(targetPath)) {
log.warn("Target path not exists or is not a directory: {}", taskDetail.getTargetPath());
return Collections.emptyList();
}
try (Stream<Path> paths = Files.walk(targetPath, 1)) {
private List<String> scanFilePaths(Path targetPath) {
try (Stream<Path> paths = Files.walk(targetPath)) {
return paths
.filter(Files::isRegularFile) // 只保留文件,排除目录
.map(Path::toString) // 转换为字符串路径
.filter(Files::isRegularFile)
.map(Path::toString)
.collect(Collectors.toList());
} catch (IOException e) {
log.error("Fail to scan directory: {}", targetPath, e);

View File

@@ -739,6 +739,71 @@ public class DatasetFileApplicationService {
return copiedFiles;
}
/**
* 复制文件到数据集目录(保留相对路径,适用于数据源导入)
*
* @param datasetId 数据集id
* @param sourceRoot 数据源根目录
* @param sourcePaths 源文件路径列表
* @return 复制的文件列表
*/
@Transactional
public List<DatasetFile> copyFilesToDatasetDirWithSourceRoot(String datasetId, Path sourceRoot, List<String> sourcePaths) {
Dataset dataset = datasetRepository.getById(datasetId);
BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND);
Path normalizedRoot = sourceRoot.toAbsolutePath().normalize();
List<DatasetFile> copiedFiles = new ArrayList<>();
List<DatasetFile> existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
dataset.setFiles(existDatasetFiles);
Map<String, DatasetFile> copyTargets = new LinkedHashMap<>();
for (String sourceFilePath : sourcePaths) {
if (sourceFilePath == null || sourceFilePath.isBlank()) {
continue;
}
Path sourcePath = Paths.get(sourceFilePath).toAbsolutePath().normalize();
if (!sourcePath.startsWith(normalizedRoot)) {
log.warn("Source file path is out of root: {}", sourceFilePath);
continue;
}
if (!Files.exists(sourcePath) || !Files.isRegularFile(sourcePath)) {
log.warn("Source file does not exist or is not a regular file: {}", sourceFilePath);
continue;
}
Path relativePath = normalizedRoot.relativize(sourcePath);
String fileName = sourcePath.getFileName().toString();
File sourceFile = sourcePath.toFile();
LocalDateTime currentTime = LocalDateTime.now();
Path targetPath = Paths.get(dataset.getPath(), relativePath.toString());
DatasetFile datasetFile = DatasetFile.builder()
.id(UUID.randomUUID().toString())
.datasetId(datasetId)
.fileName(fileName)
.fileType(AnalyzerUtils.getExtension(fileName))
.fileSize(sourceFile.length())
.filePath(targetPath.toString())
.uploadTime(currentTime)
.lastAccessTime(currentTime)
.build();
setDatasetFileId(datasetFile, dataset);
dataset.addFile(datasetFile);
copiedFiles.add(datasetFile);
copyTargets.put(sourceFilePath, datasetFile);
}
if (copiedFiles.isEmpty()) {
return copiedFiles;
}
datasetFileRepository.saveOrUpdateBatch(copiedFiles, 100);
dataset.active();
datasetRepository.updateById(dataset);
CompletableFuture.runAsync(() -> copyFilesToDatasetDirWithRelativePath(copyTargets, dataset, normalizedRoot));
return copiedFiles;
}
private void copyFilesToDatasetDir(List<String> sourcePaths, Dataset dataset) {
for (String sourcePath : sourcePaths) {
Path sourceFilePath = Paths.get(sourcePath);
@@ -757,6 +822,35 @@ public class DatasetFileApplicationService {
}
}
private void copyFilesToDatasetDirWithRelativePath(
Map<String, DatasetFile> copyTargets,
Dataset dataset,
Path sourceRoot
) {
Path datasetRoot = Paths.get(dataset.getPath()).toAbsolutePath().normalize();
Path normalizedRoot = sourceRoot.toAbsolutePath().normalize();
for (Map.Entry<String, DatasetFile> entry : copyTargets.entrySet()) {
Path sourcePath = Paths.get(entry.getKey()).toAbsolutePath().normalize();
if (!sourcePath.startsWith(normalizedRoot)) {
log.warn("Source file path is out of root: {}", sourcePath);
continue;
}
Path relativePath = normalizedRoot.relativize(sourcePath);
Path targetFilePath = datasetRoot.resolve(relativePath).normalize();
if (!targetFilePath.startsWith(datasetRoot)) {
log.warn("Target file path is out of dataset path: {}", targetFilePath);
continue;
}
try {
Files.createDirectories(targetFilePath.getParent());
Files.copy(sourcePath, targetFilePath);
triggerPdfTextExtraction(dataset, entry.getValue());
} catch (IOException e) {
log.error("Failed to copy file from {} to {}", sourcePath, targetFilePath, e);
}
}
}
/**
* 添加文件到数据集(仅创建数据库记录,不执行文件系统操作)
*

View File

@@ -114,9 +114,9 @@ public class Dataset extends BaseEntity<String> {
this.updatedAt = LocalDateTime.now();
}
public void initCreateParam(String datasetBasePath, String parentPath) {
public void initCreateParam(String datasetBasePath) {
this.id = UUID.randomUUID().toString();
String basePath = normalizeBasePath(parentPath != null && !parentPath.isBlank() ? parentPath : datasetBasePath);
String basePath = normalizeBasePath(datasetBasePath);
this.path = basePath + File.separator + this.id;
if (this.status == null) {
this.status = DatasetStatusType.DRAFT;

View File

@@ -42,9 +42,9 @@ public enum DataManagementErrorCode implements ErrorCode {
*/
DIRECTORY_NOT_FOUND("data_management.0007", "目录不存在"),
/**
* 存在数据集
* 存在关联数据集
*/
DATASET_HAS_CHILDREN("data_management.0008", "存在数据集,禁止删除或移动"),
DATASET_HAS_CHILDREN("data_management.0008", "存在关联数据集,禁止删除或移动"),
/**
* 数据集文件不存在
*/

View File

@@ -36,7 +36,9 @@ import org.springframework.util.StringUtils;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
/**
* 知识库服务类
@@ -47,6 +49,7 @@ import java.util.Optional;
@Service
@RequiredArgsConstructor
public class KnowledgeBaseService {
private static final String PATH_SEPARATOR = "/";
private final KnowledgeBaseRepository knowledgeBaseRepository;
private final RagFileRepository ragFileRepository;
private final ApplicationEventPublisher eventPublisher;
@@ -146,6 +149,7 @@ public class KnowledgeBaseService {
ragFile.setKnowledgeBaseId(knowledgeBase.getId());
ragFile.setFileId(fileInfo.id());
ragFile.setFileName(fileInfo.fileName());
ragFile.setRelativePath(normalizeRelativePath(fileInfo.relativePath()));
ragFile.setStatus(FileStatus.UNPROCESSED);
return ragFile;
}).toList();
@@ -153,6 +157,17 @@ public class KnowledgeBaseService {
eventPublisher.publishEvent(new DataInsertedEvent(knowledgeBase, request));
}
private String normalizeRelativePath(String relativePath) {
if (!StringUtils.hasText(relativePath)) {
return "";
}
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
while (normalized.startsWith(PATH_SEPARATOR)) {
normalized = normalized.substring(1);
}
return normalized;
}
public PagedResponse<RagFile> listFiles(String knowledgeBaseId, RagFileReq request) {
IPage<RagFile> page = new Page<>(request.getPage(), request.getSize());
request.setKnowledgeBaseId(knowledgeBaseId);
@@ -160,6 +175,41 @@ public class KnowledgeBaseService {
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
}
public PagedResponse<KnowledgeBaseFileSearchResp> searchFiles(KnowledgeBaseFileSearchReq request) {
IPage<RagFile> page = new Page<>(request.getPage(), request.getSize());
page = ragFileRepository.searchPage(page, request);
List<RagFile> records = page.getRecords();
if (records.isEmpty()) {
return PagedResponse.of(Collections.emptyList(), page.getCurrent(), page.getTotal(), page.getPages());
}
List<String> knowledgeBaseIds = records.stream()
.map(RagFile::getKnowledgeBaseId)
.filter(StringUtils::hasText)
.distinct()
.toList();
Map<String, String> knowledgeBaseNameMap = knowledgeBaseRepository.listByIds(knowledgeBaseIds).stream()
.collect(Collectors.toMap(KnowledgeBase::getId, KnowledgeBase::getName));
List<KnowledgeBaseFileSearchResp> responses = records.stream()
.map(file -> {
KnowledgeBaseFileSearchResp resp = new KnowledgeBaseFileSearchResp();
resp.setId(file.getId());
resp.setKnowledgeBaseId(file.getKnowledgeBaseId());
resp.setKnowledgeBaseName(knowledgeBaseNameMap.getOrDefault(file.getKnowledgeBaseId(), ""));
resp.setFileName(file.getFileName());
resp.setRelativePath(file.getRelativePath());
resp.setChunkCount(file.getChunkCount());
resp.setStatus(file.getStatus());
resp.setCreatedAt(file.getCreatedAt());
resp.setUpdatedAt(file.getUpdatedAt());
return resp;
})
.toList();
return PagedResponse.of(responses, page.getCurrent(), page.getTotal(), page.getPages());
}
@Transactional(rollbackFor = Exception.class)
public void deleteFiles(String knowledgeBaseId, DeleteFilesReq request) {
KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))

View File

@@ -28,6 +28,10 @@ public class RagFile extends BaseEntity<String> {
* 文件名
*/
private String fileName;
/**
* 相对路径
*/
private String relativePath;
/**
* 文件ID
*/

View File

@@ -3,6 +3,7 @@ package com.datamate.rag.indexer.domain.repository;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.repository.IRepository;
import com.datamate.rag.indexer.domain.model.RagFile;
import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseFileSearchReq;
import com.datamate.rag.indexer.interfaces.dto.RagFileReq;
import java.util.List;
@@ -21,4 +22,6 @@ public interface RagFileRepository extends IRepository<RagFile> {
List<RagFile> findAllByKnowledgeBaseId(String knowledgeBaseId);
IPage<RagFile> page(IPage<RagFile> page, RagFileReq request);
IPage<RagFile> searchPage(IPage<RagFile> page, KnowledgeBaseFileSearchReq request);
}

View File

@@ -6,6 +6,7 @@ import com.datamate.rag.indexer.domain.model.FileStatus;
import com.datamate.rag.indexer.domain.model.RagFile;
import com.datamate.rag.indexer.domain.repository.RagFileRepository;
import com.datamate.rag.indexer.infrastructure.persistence.mapper.RagFileMapper;
import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseFileSearchReq;
import com.datamate.rag.indexer.interfaces.dto.RagFileReq;
import org.springframework.stereotype.Repository;
import org.springframework.util.StringUtils;
@@ -20,6 +21,7 @@ import java.util.List;
*/
@Repository
public class RagFileRepositoryImpl extends CrudRepository<RagFileMapper, RagFile> implements RagFileRepository {
private static final String PATH_SEPARATOR = "/";
@Override
public void removeByKnowledgeBaseId(String knowledgeBaseId) {
lambdaUpdate().eq(RagFile::getKnowledgeBaseId, knowledgeBaseId).remove();
@@ -45,6 +47,27 @@ public class RagFileRepositoryImpl extends CrudRepository<RagFileMapper, RagFile
return lambdaQuery()
.eq(RagFile::getKnowledgeBaseId, request.getKnowledgeBaseId())
.like(StringUtils.hasText(request.getFileName()), RagFile::getFileName, request.getFileName())
.likeRight(StringUtils.hasText(request.getRelativePath()), RagFile::getRelativePath, normalizeRelativePath(request.getRelativePath()))
.page(page);
}
@Override
public IPage<RagFile> searchPage(IPage<RagFile> page, KnowledgeBaseFileSearchReq request) {
return lambdaQuery()
.eq(StringUtils.hasText(request.getKnowledgeBaseId()), RagFile::getKnowledgeBaseId, request.getKnowledgeBaseId())
.like(StringUtils.hasText(request.getFileName()), RagFile::getFileName, request.getFileName())
.likeRight(StringUtils.hasText(request.getRelativePath()), RagFile::getRelativePath, normalizeRelativePath(request.getRelativePath()))
.page(page);
}
private String normalizeRelativePath(String relativePath) {
if (!StringUtils.hasText(relativePath)) {
return "";
}
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
while (normalized.startsWith(PATH_SEPARATOR)) {
normalized = normalized.substring(1);
}
return normalized;
}
}

View File

@@ -105,6 +105,17 @@ public class KnowledgeBaseController {
return knowledgeBaseService.listFiles(knowledgeBaseId, request);
}
/**
* 全库检索知识库文件(跨知识库)
*
* @param request 检索请求
* @return 文件列表
*/
@GetMapping("/files/search")
public PagedResponse<KnowledgeBaseFileSearchResp> searchFiles(KnowledgeBaseFileSearchReq request) {
return knowledgeBaseService.searchFiles(request);
}
/**
* 删除知识库文件
*

View File

@@ -21,6 +21,6 @@ public class AddFilesReq {
private String delimiter;
private List<FileInfo> files;
public record FileInfo(String id, String fileName) {
public record FileInfo(String id, String fileName, String relativePath) {
}
}

View File

@@ -0,0 +1,19 @@
package com.datamate.rag.indexer.interfaces.dto;
import com.datamate.common.interfaces.PagingQuery;
import lombok.Getter;
import lombok.Setter;
/**
* 知识库文件全库检索请求
*
* @author dallas
* @since 2026-01-30
*/
@Getter
@Setter
public class KnowledgeBaseFileSearchReq extends PagingQuery {
private String fileName;
private String relativePath;
private String knowledgeBaseId;
}

View File

@@ -0,0 +1,27 @@
package com.datamate.rag.indexer.interfaces.dto;
import com.datamate.rag.indexer.domain.model.FileStatus;
import lombok.Getter;
import lombok.Setter;
import java.time.LocalDateTime;
/**
* 知识库文件全库检索响应
*
* @author dallas
* @since 2026-01-30
*/
@Getter
@Setter
public class KnowledgeBaseFileSearchResp {
private String id;
private String knowledgeBaseId;
private String knowledgeBaseName;
private String fileName;
private String relativePath;
private Integer chunkCount;
private FileStatus status;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
}

View File

@@ -14,5 +14,6 @@ import lombok.Setter;
@Getter
public class RagFileReq extends PagingQuery {
private String fileName;
private String relativePath;
private String knowledgeBaseId;
}

View File

@@ -6,6 +6,12 @@ import TextArea from "antd/es/input/TextArea";
import { useEffect, useMemo, useState } from "react";
import type { ReactNode } from "react";
import { Eye } from "lucide-react";
import {
PREVIEW_TEXT_MAX_LENGTH,
resolvePreviewFileType,
truncatePreviewText,
type PreviewFileType,
} from "@/utils/filePreview";
import {
createAnnotationTaskUsingPost,
getAnnotationTaskByIdUsingGet,
@@ -53,6 +59,7 @@ const isRecord = (value: unknown): value is Record<string, unknown> =>
!!value && typeof value === "object" && !Array.isArray(value);
const DEFAULT_SEGMENTATION_ENABLED = true;
const FILE_PREVIEW_MAX_HEIGHT = 500;
const SEGMENTATION_OPTIONS = [
{ label: "需要切片段", value: true },
{ label: "不需要切片段", value: false },
@@ -116,7 +123,7 @@ export default function CreateAnnotationTask({
const [fileContent, setFileContent] = useState("");
const [fileContentLoading, setFileContentLoading] = useState(false);
const [previewFileName, setPreviewFileName] = useState("");
const [previewFileType, setPreviewFileType] = useState<"text" | "image" | "video" | "audio">("text");
const [previewFileType, setPreviewFileType] = useState<PreviewFileType>("text");
const [previewMediaUrl, setPreviewMediaUrl] = useState("");
// 任务详情加载状态(编辑模式)
@@ -297,57 +304,32 @@ export default function CreateAnnotationTask({
// 预览文件内容
const handlePreviewFileContent = async (file: DatasetPreviewFile) => {
const fileName = file.fileName?.toLowerCase() || '';
// 文件类型扩展名映射
const textExtensions = ['.json', '.jsonl', '.txt', '.csv', '.tsv', '.xml', '.md', '.yaml', '.yml'];
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg'];
const videoExtensions = ['.mp4', '.webm', '.ogg', '.mov', '.avi'];
const audioExtensions = ['.mp3', '.wav', '.ogg', '.aac', '.flac', '.m4a'];
const isTextFile = textExtensions.some(ext => fileName.endsWith(ext));
const isImageFile = imageExtensions.some(ext => fileName.endsWith(ext));
const isVideoFile = videoExtensions.some(ext => fileName.endsWith(ext));
const isAudioFile = audioExtensions.some(ext => fileName.endsWith(ext));
if (!isTextFile && !isImageFile && !isVideoFile && !isAudioFile) {
const fileType = resolvePreviewFileType(file.fileName);
if (!fileType) {
message.warning("不支持预览该文件类型");
return;
}
setFileContentLoading(true);
setPreviewFileName(file.fileName);
setPreviewFileType(fileType);
setFileContent("");
setPreviewMediaUrl("");
const fileUrl = `/api/data-management/datasets/${selectedDatasetId}/files/${file.id}/download`;
const previewUrl = `/api/data-management/datasets/${selectedDatasetId}/files/${file.id}/preview`;
try {
if (isTextFile) {
if (fileType === "text") {
// 文本文件:获取内容
const response = await fetch(fileUrl);
const response = await fetch(previewUrl);
if (!response.ok) {
throw new Error('下载失败');
}
const text = await response.text();
// 限制预览内容长度
const maxLength = 50000;
if (text.length > maxLength) {
setFileContent(text.substring(0, maxLength) + '\n\n... (内容过长,仅显示前 50000 字符)');
} else {
setFileContent(text);
}
setPreviewFileType("text");
} else if (isImageFile) {
// 图片文件:直接使用 URL
setPreviewMediaUrl(fileUrl);
setPreviewFileType("image");
} else if (isVideoFile) {
// 视频文件:使用 URL
setPreviewMediaUrl(fileUrl);
setPreviewFileType("video");
} else if (isAudioFile) {
// 音频文件:使用 URL
setPreviewMediaUrl(fileUrl);
setPreviewFileType("audio");
setFileContent(truncatePreviewText(text, PREVIEW_TEXT_MAX_LENGTH));
} else {
// 媒体/PDF 文件:直接使用预览地址
setPreviewMediaUrl(previewUrl);
}
setFileContentVisible(true);
} catch (error) {
@@ -878,7 +860,7 @@ export default function CreateAnnotationTask({
</Button>
]}
>
<div className="mb-2 text-xs text-gray-500"></div>
<div className="mb-2 text-xs text-gray-500">PDF</div>
<Table
dataSource={datasetPreviewData}
columns={[
@@ -942,7 +924,7 @@ export default function CreateAnnotationTask({
{previewFileType === "text" && (
<pre
style={{
maxHeight: '500px',
maxHeight: `${FILE_PREVIEW_MAX_HEIGHT}px`,
overflow: 'auto',
backgroundColor: '#f5f5f5',
padding: '12px',
@@ -960,16 +942,23 @@ export default function CreateAnnotationTask({
<img
src={previewMediaUrl}
alt={previewFileName}
style={{ maxWidth: '100%', maxHeight: '500px', objectFit: 'contain' }}
style={{ maxWidth: '100%', maxHeight: `${FILE_PREVIEW_MAX_HEIGHT}px`, objectFit: 'contain' }}
/>
</div>
)}
{previewFileType === "pdf" && (
<iframe
src={previewMediaUrl}
title={previewFileName || "PDF 预览"}
style={{ width: '100%', height: `${FILE_PREVIEW_MAX_HEIGHT}px`, border: 'none' }}
/>
)}
{previewFileType === "video" && (
<div style={{ textAlign: 'center' }}>
<video
src={previewMediaUrl}
controls
style={{ maxWidth: '100%', maxHeight: '500px' }}
style={{ maxWidth: '100%', maxHeight: `${FILE_PREVIEW_MAX_HEIGHT}px` }}
>
</video>

View File

@@ -106,13 +106,6 @@ export default function ExportAnnotationDialog({
const values = await form.validateFields();
setExporting(true);
const blob = await downloadAnnotationsUsingGet(
projectId,
values.format,
values.onlyAnnotated,
values.includeData
);
// 获取文件名
const formatExt: Record<ExportFormat, string> = {
json: "json",
@@ -124,15 +117,14 @@ export default function ExportAnnotationDialog({
const ext = formatExt[values.format as ExportFormat] || "json";
const filename = `${projectName}_annotations.${ext}`;
// 下载文件
const url = window.URL.createObjectURL(blob as Blob);
const a = document.createElement("a");
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
// 下载文件(download函数内部已处理下载逻辑)
await downloadAnnotationsUsingGet(
projectId,
values.format,
values.onlyAnnotated,
values.includeData,
filename
);
message.success("导出成功");
onClose();

View File

@@ -109,12 +109,13 @@ export function downloadAnnotationsUsingGet(
projectId: string,
format: ExportFormat = "json",
onlyAnnotated: boolean = true,
includeData: boolean = false
includeData: boolean = false,
filename?: string
) {
const params = new URLSearchParams({
format,
only_annotated: String(onlyAnnotated),
include_data: String(includeData),
});
return download(`/api/annotation/export/projects/${projectId}/download?${params.toString()}`);
return download(`/api/annotation/export/projects/${projectId}/download?${params.toString()}`, null, filename);
}

View File

@@ -96,7 +96,7 @@ export default function EditDataset({
<BasicInformation
data={newDataset}
setData={setNewDataset}
hidden={["datasetType"]}
hidden={["datasetType", "dataSource"]}
/>
</Form>
</Modal>

View File

@@ -74,7 +74,7 @@ export default function BasicInformation({
value: dataset.id,
}));
setParentDatasetOptions([
{ label: "数据集", value: "" },
{ label: "无关联数据集", value: "" },
...options,
]);
} catch (error) {
@@ -102,11 +102,11 @@ export default function BasicInformation({
</Form.Item>
)}
{!hidden.includes("parentDatasetId") && (
<Form.Item name="parentDatasetId" label="数据集">
<Form.Item name="parentDatasetId" label="关联数据集">
<Select
className="w-full"
options={parentDatasetOptions}
placeholder="选择数据集(仅支持一层)"
placeholder="选择关联数据集(仅支持一层)"
/>
</Form.Item>
)}

View File

@@ -127,7 +127,7 @@ export default function DatasetDetail() {
if (!dataset?.parentDatasetId) {
items.push({
key: "children",
label: "数据集",
label: "关联数据集",
});
}
return items;
@@ -266,7 +266,7 @@ export default function DatasetDetail() {
? [
{
key: "create-child",
label: "创建数据集",
label: "创建关联数据集",
icon: <PlusOutlined />,
onClick: handleCreateChildDataset,
},
@@ -415,7 +415,7 @@ export default function DatasetDetail() {
{activeTab === "children" && (
<div className="pt-4">
<div className="flex items-center justify-between mb-3">
<h2 className="text-base font-semibold"></h2>
<h2 className="text-base font-semibold"></h2>
<span className="text-xs text-gray-500">
{childDatasets.length}
</span>
@@ -426,7 +426,7 @@ export default function DatasetDetail() {
dataSource={childDatasets}
loading={childDatasetsLoading}
pagination={false}
locale={{ emptyText: "暂无数据集" }}
locale={{ emptyText: "暂无关联数据集" }}
/>
</div>
)}

View File

@@ -14,8 +14,8 @@ import Dragger from "antd/es/upload/Dragger";
* @returns 分割后的文件列表,每行一个文件
*/
async function splitFileByLines(file: UploadFile): Promise<UploadFile[]> {
const originFile = (file as any).originFileObj || file;
if (!originFile || typeof originFile.text !== "function") {
const originFile = file.originFileObj ?? file;
if (!(originFile instanceof File) || typeof originFile.text !== "function") {
return [file];
}
@@ -41,11 +41,31 @@ async function splitFileByLines(file: UploadFile): Promise<UploadFile[]> {
name: newFileName,
size: newFile.size,
type: "text/plain",
originFileObj: newFile as any,
originFileObj: newFile as UploadFile["originFileObj"],
} as UploadFile;
});
}
type SelectOption = {
label: string;
value: string;
};
type CollectionTask = {
id: string;
name: string;
};
type ImportConfig = {
source: DataSource;
hasArchive: boolean;
splitByLine: boolean;
files?: UploadFile[];
dataSource?: string;
target?: DataSource;
[key: string]: unknown;
};
export default function ImportConfiguration({
data,
open,
@@ -60,8 +80,11 @@ export default function ImportConfiguration({
prefix?: string;
}) {
const [form] = Form.useForm();
const [collectionOptions, setCollectionOptions] = useState([]);
const [importConfig, setImportConfig] = useState<any>({
const [collectionOptions, setCollectionOptions] = useState<SelectOption[]>([]);
const availableSourceOptions = dataSourceOptions.filter(
(option) => option.value !== DataSource.COLLECTION
);
const [importConfig, setImportConfig] = useState<ImportConfig>({
source: DataSource.UPLOAD,
hasArchive: true,
splitByLine: false,
@@ -71,7 +94,8 @@ export default function ImportConfiguration({
// 本地上传文件相关逻辑
const handleUpload = async (dataset: Dataset) => {
let filesToUpload = form.getFieldValue("files") || [];
let filesToUpload =
(form.getFieldValue("files") as UploadFile[] | undefined) || [];
// 如果启用分行分割,处理文件
if (importConfig.splitByLine) {
@@ -83,7 +107,7 @@ export default function ImportConfiguration({
// 计算分片列表
const sliceList = filesToUpload.map((file) => {
const originFile = (file as any).originFileObj || file;
const originFile = (file.originFileObj ?? file) as Blob;
const slices = sliceFile(originFile);
return {
originFile: originFile, // 传入真正的 File/Blob 对象
@@ -111,7 +135,10 @@ export default function ImportConfiguration({
if (importConfig.source !== DataSource.COLLECTION) return;
try {
const res = await queryTasksUsingGet({ page: 0, size: 100 });
const options = res.data.content.map((task: any) => ({
const tasks = Array.isArray(res?.data?.content)
? (res.data.content as CollectionTask[])
: [];
const options = tasks.map((task) => ({
label: task.name,
value: task.id,
}));
@@ -126,7 +153,7 @@ export default function ImportConfiguration({
form.resetFields();
form.setFieldsValue({ files: null });
setImportConfig({
source: importConfig.source ? importConfig.source : DataSource.UPLOAD,
source: DataSource.UPLOAD,
hasArchive: true,
splitByLine: false,
});
@@ -196,12 +223,12 @@ export default function ImportConfiguration({
name="source"
rules={[{ required: true, message: "请选择数据源" }]}
>
<Radio.Group
buttonStyle="solid"
options={dataSourceOptions}
optionType="button"
/>
</Form.Item>
<Radio.Group
buttonStyle="solid"
options={availableSourceOptions}
optionType="button"
/>
</Form.Item>
{importConfig?.source === DataSource.COLLECTION && (
<Form.Item name="dataSource" label="归集任务" required>
<Select placeholder="请选择归集任务" options={collectionOptions} />
@@ -277,11 +304,13 @@ export default function ImportConfiguration({
label="上传文件"
name="files"
valuePropName="fileList"
getValueFromEvent={(e: any) => {
if (Array.isArray(e)) {
return e;
getValueFromEvent={(
event: { fileList?: UploadFile[] } | UploadFile[]
) => {
if (Array.isArray(event)) {
return event;
}
return e && e.fileList;
return event?.fileList;
}}
rules={[
{

View File

@@ -332,6 +332,14 @@ export default function Overview({
return (
<div className="flex">
<Button
size="small"
type="link"
loading={previewLoading && previewFileName === record.fileName}
onClick={() => handlePreviewFile(record)}
>
</Button>
<Button
size="small"
type="link"
@@ -549,6 +557,13 @@ export default function Overview({
/>
</div>
)}
{previewFileType === "pdf" && (
<iframe
src={previewMediaUrl}
title={previewFileName || "PDF 预览"}
style={{ width: "100%", height: `${PREVIEW_MAX_HEIGHT}px`, border: "none" }}
/>
)}
{previewFileType === "video" && (
<div style={{ textAlign: "center" }}>
<video

View File

@@ -4,7 +4,12 @@ import type {
} from "@/pages/DataManagement/dataset.model";
import { App } from "antd";
import { useState } from "react";
import { PREVIEW_TEXT_MAX_LENGTH, resolvePreviewFileType, truncatePreviewText } from "@/utils/filePreview";
import {
PREVIEW_TEXT_MAX_LENGTH,
resolvePreviewFileType,
truncatePreviewText,
type PreviewFileType,
} from "@/utils/filePreview";
import {
deleteDatasetFileUsingDelete,
downloadFileByIdUsingGet,
@@ -35,7 +40,7 @@ export function useFilesOperation(dataset: Dataset) {
const [previewVisible, setPreviewVisible] = useState(false);
const [previewContent, setPreviewContent] = useState("");
const [previewFileName, setPreviewFileName] = useState("");
const [previewFileType, setPreviewFileType] = useState<"text" | "image" | "video" | "audio">("text");
const [previewFileType, setPreviewFileType] = useState<PreviewFileType>("text");
const [previewMediaUrl, setPreviewMediaUrl] = useState("");
const [previewLoading, setPreviewLoading] = useState(false);
@@ -111,7 +116,7 @@ export function useFilesOperation(dataset: Dataset) {
return;
}
const fileUrl = `/api/data-management/datasets/${datasetId}/files/${file.id}/download`;
const previewUrl = `/api/data-management/datasets/${datasetId}/files/${file.id}/preview`;
setPreviewFileName(file.fileName);
setPreviewFileType(fileType);
setPreviewContent("");
@@ -120,7 +125,7 @@ export function useFilesOperation(dataset: Dataset) {
if (fileType === "text") {
setPreviewLoading(true);
try {
const response = await fetch(fileUrl);
const response = await fetch(previewUrl);
if (!response.ok) {
throw new Error("下载失败");
}
@@ -136,7 +141,7 @@ export function useFilesOperation(dataset: Dataset) {
return;
}
setPreviewMediaUrl(fileUrl);
setPreviewMediaUrl(previewUrl);
setPreviewVisible(true);
};

View File

@@ -34,10 +34,12 @@ export enum DataSource {
export interface DatasetFile {
id: string;
datasetId?: string;
fileName: string;
size: string;
uploadDate: string;
path: string;
filePath?: string;
}
export interface Dataset {

View File

@@ -1,12 +1,23 @@
import type React from "react";
import { useEffect, useState } from "react";
import { Table, Badge, Button, Breadcrumb, Tooltip, App, Card, Input, Empty, Spin } from "antd";
import { useCallback, useEffect, useMemo, useState } from "react";
import {
Table,
Badge,
Button,
Breadcrumb,
Tooltip,
App,
Card,
Input,
Empty,
Spin,
} from "antd";
import {
DeleteOutlined,
EditOutlined,
ReloadOutlined,
} from "@ant-design/icons";
import { useNavigate, useParams } from "react-router";
import { useNavigate, useParams, useSearchParams } from "react-router";
import DetailHeader from "@/components/DetailHeader";
import { SearchControls } from "@/components/SearchControls";
import { KBFile, KnowledgeBaseItem } from "../knowledge-base.model";
@@ -18,9 +29,9 @@ import {
queryKnowledgeBaseFilesUsingGet,
retrieveKnowledgeBaseContent,
} from "../knowledge-base.api";
import useFetchData from "@/hooks/useFetchData";
import AddDataDialog from "../components/AddDataDialog";
import CreateKnowledgeBase from "../components/CreateKnowledgeBase";
import { File, Folder } from "lucide-react";
interface StatisticItem {
icon?: React.ReactNode;
@@ -39,44 +50,127 @@ interface RecallResult {
primaryKey?: string;
}
type KBFileRow = KBFile & {
isDirectory?: boolean;
displayName?: string;
fullPath?: string;
fileCount?: number;
};
const PATH_SEPARATOR = "/";
const normalizePath = (value?: string) =>
(value ?? "").replace(/\\/g, PATH_SEPARATOR);
const normalizePrefix = (value?: string) => {
const trimmed = normalizePath(value).replace(/^\/+/, "").trim();
if (!trimmed) {
return "";
}
return trimmed.endsWith(PATH_SEPARATOR)
? trimmed
: `${trimmed}${PATH_SEPARATOR}`;
};
const splitRelativePath = (fullPath: string, prefix: string) => {
if (prefix && !fullPath.startsWith(prefix)) {
return [];
}
const remainder = fullPath.slice(prefix.length);
return remainder.split(PATH_SEPARATOR).filter(Boolean);
};
const resolveFileRelativePath = (file: KBFile) => {
const rawPath = file.relativePath || file.fileName || file.name || "";
return normalizePath(rawPath).replace(/^\/+/, "");
};
const KnowledgeBaseDetailPage: React.FC = () => {
const navigate = useNavigate();
const [searchParams] = useSearchParams();
const { message } = App.useApp();
const { id } = useParams<{ id: string }>();
const [knowledgeBase, setKnowledgeBase] = useState<KnowledgeBaseItem | undefined>(undefined);
const [showEdit, setShowEdit] = useState(false);
const [activeTab, setActiveTab] = useState<'fileList' | 'recallTest'>('fileList');
const [filePrefix, setFilePrefix] = useState("");
const [fileKeyword, setFileKeyword] = useState("");
const [filesLoading, setFilesLoading] = useState(false);
const [allFiles, setAllFiles] = useState<KBFile[]>([]);
const [filePagination, setFilePagination] = useState({
current: 1,
pageSize: 10,
});
const [recallLoading, setRecallLoading] = useState(false);
const [recallResults, setRecallResults] = useState<RecallResult[]>([]);
const [recallQuery, setRecallQuery] = useState("");
const fetchKnowledgeBaseDetails = async (id: string) => {
const { data } = await queryKnowledgeBaseByIdUsingGet(id);
const fetchKnowledgeBaseDetails = useCallback(async (baseId: string) => {
const { data } = await queryKnowledgeBaseByIdUsingGet(baseId);
setKnowledgeBase(mapKnowledgeBase(data));
};
}, []);
const fetchFiles = useCallback(async () => {
if (!id) {
setAllFiles([]);
return;
}
setFilesLoading(true);
try {
const pageSize = 200;
let page = 0;
let combined: KBFile[] = [];
const currentPrefix = normalizePrefix(filePrefix);
const keyword = fileKeyword.trim();
while (true) {
const { data } = await queryKnowledgeBaseFilesUsingGet(id, {
page,
size: pageSize,
...(currentPrefix ? { relativePath: currentPrefix } : {}),
...(keyword ? { fileName: keyword } : {}),
});
const content = Array.isArray(data?.content) ? data.content : [];
combined = combined.concat(content.map(mapFileData));
if (content.length < pageSize) {
break;
}
if (typeof data?.totalElements === "number" && combined.length >= data.totalElements) {
break;
}
page += 1;
}
setAllFiles(combined);
} catch (error) {
console.error("Failed to fetch knowledge base files:", error);
message.error("文件列表加载失败");
} finally {
setFilesLoading(false);
}
}, [id, filePrefix, fileKeyword, message]);
useEffect(() => {
if (id) {
fetchKnowledgeBaseDetails(id);
}
}, [id]);
}, [id, fetchKnowledgeBaseDetails]);
const {
loading,
tableData: files,
searchParams,
pagination,
fetchData: fetchFiles,
setSearchParams,
handleFiltersChange,
handleKeywordChange,
} = useFetchData<KBFile>(
(params) => id ? queryKnowledgeBaseFilesUsingGet(id, params) : Promise.resolve({ data: [] }),
mapFileData
);
useEffect(() => {
if (!id) {
return;
}
const prefixParam = searchParams.get("prefix");
const fileNameParam = searchParams.get("fileName");
setFilePrefix(prefixParam ? normalizePrefix(prefixParam) : "");
setFileKeyword(fileNameParam ? fileNameParam : "");
}, [id, searchParams]);
useEffect(() => {
if (id) {
fetchFiles();
}
}, [id, fetchFiles]);
// File table logic
const handleDeleteFile = async (file: KBFile) => {
const handleDeleteFile = async (file: KBFileRow) => {
try {
await deleteKnowledgeBaseFileByIdUsingDelete(knowledgeBase!.id, {
ids: [file.id]
@@ -119,6 +213,152 @@ const KnowledgeBaseDetailPage: React.FC = () => {
setRecallLoading(false);
};
const handleOpenDirectory = (directoryName: string) => {
const currentPrefix = normalizePrefix(filePrefix);
const nextPrefix = normalizePrefix(`${currentPrefix}${directoryName}`);
setFilePrefix(nextPrefix);
};
const handleBackToParent = () => {
const currentPrefix = normalizePrefix(filePrefix);
if (!currentPrefix) {
return;
}
const trimmed = currentPrefix.replace(/\/$/, "");
const parts = trimmed.split(PATH_SEPARATOR).filter(Boolean);
parts.pop();
const parentPrefix = parts.length
? `${parts.join(PATH_SEPARATOR)}${PATH_SEPARATOR}`
: "";
setFilePrefix(parentPrefix);
};
const handleDeleteDirectory = async (directoryName: string) => {
if (!knowledgeBase?.id) {
return;
}
const currentPrefix = normalizePrefix(filePrefix);
const directoryPrefix = normalizePrefix(`${currentPrefix}${directoryName}`);
const targetIds = allFiles
.filter((file) => {
const fullPath = resolveFileRelativePath(file);
return fullPath.startsWith(directoryPrefix);
})
.map((file) => file.id);
if (targetIds.length === 0) {
message.info("该文件夹为空");
return;
}
try {
await deleteKnowledgeBaseFileByIdUsingDelete(knowledgeBase.id, {
ids: targetIds,
});
message.success(`已删除 ${targetIds.length} 个文件`);
fetchFiles();
} catch {
message.error("文件夹删除失败");
}
};
const handleKeywordChange = (keyword: string) => {
setFileKeyword(keyword);
};
useEffect(() => {
setFilePagination((prev) => ({ ...prev, current: 1 }));
}, [filePrefix, fileKeyword]);
const normalizedPrefix = useMemo(() => normalizePrefix(filePrefix), [filePrefix]);
const { rows: fileRows, total: fileTotal } = useMemo(() => {
const folderMap = new Map<string, { name: string; fileCount: number }>();
const fileItems: KBFileRow[] = [];
allFiles.forEach((file) => {
const fullPath = resolveFileRelativePath(file);
if (!fullPath) {
return;
}
const segments = splitRelativePath(fullPath, normalizedPrefix);
if (segments.length === 0) {
return;
}
const leafName = segments[0];
if (segments.length > 1) {
const folderName = leafName;
const entry = folderMap.get(folderName) || {
name: folderName,
fileCount: 0,
};
entry.fileCount += 1;
folderMap.set(folderName, entry);
return;
}
const normalizedFileName = normalizePath(file.fileName);
const displayName = normalizedFileName.includes(PATH_SEPARATOR)
? leafName
: file.fileName || leafName;
fileItems.push({
...file,
name: displayName,
displayName,
fullPath,
});
});
const folderItems: KBFileRow[] = Array.from(folderMap.values()).map(
(entry) =>
({
id: `directory-${normalizedPrefix}${entry.name}`,
fileName: entry.name,
name: entry.name,
status: null,
chunkCount: 0,
createdAt: "",
updatedAt: "",
metadata: {},
knowledgeBaseId: knowledgeBase?.id || "",
fileId: "",
updatedBy: "",
createdBy: "",
isDirectory: true,
displayName: entry.name,
fullPath: `${normalizedPrefix}${entry.name}/`,
fileCount: entry.fileCount,
}) as KBFileRow
);
const sortByName = (a: KBFileRow, b: KBFileRow) =>
(a.displayName || a.name || "").localeCompare(
b.displayName || b.name || "",
"zh-Hans-CN"
);
folderItems.sort(sortByName);
fileItems.sort(sortByName);
const combined = [...folderItems, ...fileItems];
return { rows: combined, total: combined.length };
}, [allFiles, knowledgeBase?.id, normalizedPrefix]);
const filePageCurrent = filePagination.current;
const filePageSize = filePagination.pageSize;
const pagedFileRows = useMemo(() => {
const startIndex = (filePageCurrent - 1) * filePageSize;
const endIndex = startIndex + filePageSize;
return fileRows.slice(startIndex, endIndex);
}, [filePageCurrent, filePageSize, fileRows]);
useEffect(() => {
const maxPage = Math.max(1, Math.ceil(fileTotal / filePageSize));
if (filePageCurrent > maxPage) {
setFilePagination((prev) => ({ ...prev, current: maxPage }));
}
}, [filePageCurrent, filePageSize, fileTotal]);
const operations = [
{
key: "edit",
@@ -170,14 +410,38 @@ const KnowledgeBaseDetailPage: React.FC = () => {
width: 200,
ellipsis: true,
fixed: "left" as const,
render: (name: string, record: KBFileRow) => {
const displayName = record.displayName || name;
if (record.isDirectory) {
return (
<Button
type="link"
onClick={() => handleOpenDirectory(displayName)}
className="flex items-center gap-2 p-0"
>
<Folder className="w-4 h-4 text-blue-500" />
<span className="truncate">{displayName}</span>
</Button>
);
}
return (
<div className="flex items-center gap-2">
<File className="w-4 h-4 text-gray-800" />
<span className="truncate">{displayName}</span>
</div>
);
},
},
{
title: "状态",
dataIndex: "status",
key: "vectorizationStatus",
width: 120,
render: (status: unknown) => {
if (typeof status === 'object' && status !== null) {
render: (status: unknown, record: KBFileRow) => {
if (record.isDirectory) {
return <Badge color="default" text="文件夹" />;
}
if (typeof status === "object" && status !== null) {
const s = status as { color?: string; label?: string };
return <Badge color={s.color} text={s.label} />;
}
@@ -190,6 +454,8 @@ const KnowledgeBaseDetailPage: React.FC = () => {
key: "chunkCount",
width: 100,
ellipsis: true,
render: (value: number, record: KBFileRow) =>
record.isDirectory ? "-" : value ?? 0,
},
{
title: "创建时间",
@@ -197,6 +463,8 @@ const KnowledgeBaseDetailPage: React.FC = () => {
key: "createdAt",
ellipsis: true,
width: 180,
render: (value: string, record: KBFileRow) =>
record.isDirectory ? "-" : value || "-",
},
{
title: "更新时间",
@@ -204,26 +472,51 @@ const KnowledgeBaseDetailPage: React.FC = () => {
key: "updatedAt",
ellipsis: true,
width: 180,
render: (value: string, record: KBFileRow) =>
record.isDirectory ? "-" : value || "-",
},
{
title: "操作",
key: "actions",
align: "right" as const,
width: 100,
render: (_: unknown, file: KBFile) => (
<div>
{fileOps.map((op) => (
<Tooltip key={op.key} title={op.label}>
render: (_: unknown, file: KBFileRow) => {
if (file.isDirectory) {
return (
<Tooltip title="删除文件夹">
<Button
type="text"
icon={op.icon}
danger={op?.danger}
onClick={() => op.onClick(file)}
icon={<DeleteOutlined className="w-4 h-4" />}
danger
onClick={() => {
modal.confirm({
title: "确认删除该文件夹吗?",
content: `删除后将移除文件夹 “${file.displayName || file.name}” 下的全部文件,且无法恢复。`,
okText: "删除",
okType: "danger",
cancelText: "取消",
onOk: () => handleDeleteDirectory(file.displayName || file.name),
});
}}
/>
</Tooltip>
))}
</div>
),
);
}
return (
<div>
{fileOps.map((op) => (
<Tooltip key={op.key} title={op.label}>
<Button
type="text"
icon={op.icon}
danger={op?.danger}
onClick={() => op.onClick(file)}
/>
</Tooltip>
))}
</div>
);
},
},
];
@@ -265,12 +558,12 @@ const KnowledgeBaseDetailPage: React.FC = () => {
<>
<div className="flex-1">
<SearchControls
searchTerm={searchParams.keyword}
searchTerm={fileKeyword}
onSearchChange={handleKeywordChange}
searchPlaceholder="搜索文件名..."
filters={[]}
onFiltersChange={handleFiltersChange}
onClearFilters={() => setSearchParams({ ...searchParams, filter: { type: [], status: [], tags: [] } })}
onFiltersChange={() => {}}
onClearFilters={() => setFileKeyword("")}
showViewToggle={false}
showReload={false}
/>
@@ -281,14 +574,54 @@ const KnowledgeBaseDetailPage: React.FC = () => {
</div>
{activeTab === 'fileList' ? (
<Table
loading={loading}
columns={fileColumns}
dataSource={files}
rowKey="id"
pagination={pagination}
scroll={{ y: "calc(100vh - 30rem)" }}
/>
<>
<div className="mb-2">
{normalizedPrefix && (
<Button type="link" onClick={handleBackToParent} className="p-0">
<span className="flex items-center text-blue-500">
<svg
className="w-4 h-4 mr-1"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
xmlns="http://www.w3.org/2000/svg"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M10 19l-7-7m0 0l7-7m-7 7h18"
/>
</svg>
</span>
</Button>
)}
{normalizedPrefix && (
<span className="ml-2 text-gray-600">
: {normalizedPrefix}
</span>
)}
</div>
<Table
loading={filesLoading}
columns={fileColumns}
dataSource={pagedFileRows}
rowKey="id"
pagination={{
current: filePagination.current,
pageSize: filePagination.pageSize,
total: fileTotal,
showTotal: (total) => `${total}`,
onChange: (page, pageSize) =>
setFilePagination({
current: page,
pageSize: pageSize || filePagination.pageSize,
}),
}}
scroll={{ y: "calc(100vh - 30rem)" }}
/>
</>
) : (
<div className="p-2">
<div style={{ fontSize: 14, fontWeight: 300, marginBottom: 8 }}></div>

View File

@@ -37,7 +37,7 @@ export default function KnowledgeBasePage() {
await deleteKnowledgeBaseByIdUsingDelete(kb.id);
message.success("知识库删除成功");
fetchData();
} catch (error) {
} catch {
message.error("知识库删除失败");
}
};
@@ -47,7 +47,7 @@ export default function KnowledgeBasePage() {
key: "edit",
label: "编辑",
icon: <EditOutlined />,
onClick: (item) => {
onClick: (item: KnowledgeBaseItem) => {
setIsEdit(true);
setCurrentKB(item);
},
@@ -64,7 +64,7 @@ export default function KnowledgeBasePage() {
okType: "danger",
cancelText: "取消",
},
onClick: (item) => handleDeleteKB(item),
onClick: (item: KnowledgeBaseItem) => handleDeleteKB(item),
},
];
@@ -76,7 +76,7 @@ export default function KnowledgeBasePage() {
fixed: "left" as const,
width: 200,
ellipsis: true,
render: (_: any, kb: KnowledgeBaseItem) => (
render: (_: unknown, kb: KnowledgeBaseItem) => (
<Button
type="link"
onClick={() => navigate(`/data/knowledge-base/detail/${kb.id}`)}
@@ -111,7 +111,7 @@ export default function KnowledgeBasePage() {
key: "actions",
fixed: "right" as const,
width: 150,
render: (_: any, kb: KnowledgeBaseItem) => (
render: (_: unknown, kb: KnowledgeBaseItem) => (
<div className="flex items-center gap-2">
{operations.map((op) => (
<Tooltip key={op.key} title={op.label}>
@@ -132,17 +132,22 @@ export default function KnowledgeBasePage() {
<div className="h-full flex flex-col gap-4">
<div className="flex items-center justify-between">
<h1 className="text-xl font-bold"></h1>
<CreateKnowledgeBase
isEdit={isEdit}
data={currentKB}
onUpdate={() => {
fetchData();
}}
onClose={() => {
setIsEdit(false);
setCurrentKB(null);
}}
/>
<div className="flex items-center gap-2">
<Button onClick={() => navigate("/data/knowledge-base/search")}>
</Button>
<CreateKnowledgeBase
isEdit={isEdit}
data={currentKB}
onUpdate={() => {
fetchData();
}}
onClose={() => {
setIsEdit(false);
setCurrentKB(null);
}}
/>
</div>
</div>
<SearchControls
@@ -161,7 +166,9 @@ export default function KnowledgeBasePage() {
<CardView
data={tableData}
operations={operations}
onView={(item) => navigate(`/data/knowledge-base/detail/${item.id}`)}
onView={(item: KnowledgeBaseItem) =>
navigate(`/data/knowledge-base/detail/${item.id}`)
}
pagination={pagination}
/>
) : (

View File

@@ -0,0 +1,217 @@
import { useCallback, useMemo, useState } from "react";
import { App, Badge, Breadcrumb, Button, Input, Table } from "antd";
import { useNavigate } from "react-router";
import {
KBFileStatus,
KnowledgeBaseFileSearchResult,
} from "../knowledge-base.model";
import { KBFileStatusMap } from "../knowledge-base.const";
import { queryKnowledgeBaseFilesSearchUsingGet } from "../knowledge-base.api";
import { formatDateTime } from "@/utils/unit";
const PATH_SEPARATOR = "/";
const normalizePath = (value?: string) =>
(value ?? "").replace(/\\/g, PATH_SEPARATOR);
const resolvePrefix = (relativePath?: string) => {
const normalized = normalizePath(relativePath);
const parts = normalized.split(PATH_SEPARATOR).filter(Boolean);
if (parts.length <= 1) {
return "";
}
parts.pop();
return `${parts.join(PATH_SEPARATOR)}${PATH_SEPARATOR}`;
};
export default function KnowledgeBaseSearch() {
const navigate = useNavigate();
const { message } = App.useApp();
const [searchTerm, setSearchTerm] = useState("");
const [activeKeyword, setActiveKeyword] = useState("");
const [loading, setLoading] = useState(false);
const [searched, setSearched] = useState(false);
const [results, setResults] = useState<KnowledgeBaseFileSearchResult[]>([]);
const [pagination, setPagination] = useState({
current: 1,
pageSize: 10,
total: 0,
});
const fetchResults = useCallback(
async (keyword: string, page?: number, pageSize?: number) => {
const resolvedPage = page ?? pagination.current;
const resolvedPageSize = pageSize ?? pagination.pageSize;
if (!keyword) {
setResults([]);
setPagination((prev) => ({ ...prev, total: 0, current: resolvedPage }));
setSearched(false);
return;
}
setLoading(true);
try {
const { data } = await queryKnowledgeBaseFilesSearchUsingGet({
fileName: keyword,
page: Math.max(resolvedPage - 1, 0),
size: resolvedPageSize,
});
const content = Array.isArray(data?.content) ? data.content : [];
setResults(content);
setPagination({
current: resolvedPage,
pageSize: resolvedPageSize,
total: data?.totalElements ?? 0,
});
setSearched(true);
} catch (error) {
console.error("Failed to search knowledge base files:", error);
message.error("检索失败,请稍后重试");
} finally {
setLoading(false);
}
},
[message, pagination]
);
const handleSearch = (value?: string) => {
const keyword = (value ?? searchTerm).trim();
if (!keyword) {
message.warning("请输入文件名");
return;
}
setActiveKeyword(keyword);
fetchResults(keyword, 1, pagination.pageSize);
};
const columns = useMemo(
() => [
{
title: "知识库",
dataIndex: "knowledgeBaseName",
key: "knowledgeBaseName",
width: 220,
ellipsis: true,
render: (text: string) => text || "-",
},
{
title: "文件名",
dataIndex: "fileName",
key: "fileName",
width: 220,
ellipsis: true,
},
{
title: "相对路径",
dataIndex: "relativePath",
key: "relativePath",
ellipsis: true,
render: (value: string) => value || "-",
},
{
title: "状态",
dataIndex: "status",
key: "status",
width: 120,
render: (status?: KBFileStatus) => {
const config = status ? KBFileStatusMap[status] : undefined;
if (!config) {
return <Badge color="default" text={status || "-"} />;
}
return <Badge color={config.color} text={config.label} />;
},
},
{
title: "更新时间",
dataIndex: "updatedAt",
key: "updatedAt",
width: 180,
ellipsis: true,
render: (value: string) => formatDateTime(value) || "-",
},
{
title: "操作",
key: "action",
width: 120,
align: "right" as const,
render: (_: unknown, record: KnowledgeBaseFileSearchResult) => (
<Button
type="link"
onClick={() => {
const prefix = resolvePrefix(record.relativePath);
const searchParams = new URLSearchParams();
if (prefix) {
searchParams.set("prefix", prefix);
}
navigate(
`/data/knowledge-base/detail/${record.knowledgeBaseId}?${searchParams.toString()}`
);
}}
>
</Button>
),
},
],
[navigate]
);
return (
<div className="h-full flex flex-col gap-4">
<Breadcrumb>
<Breadcrumb.Item>
<a onClick={() => navigate("/data/knowledge-base")}></a>
</Breadcrumb.Item>
<Breadcrumb.Item></Breadcrumb.Item>
</Breadcrumb>
<div className="flex items-center justify-between">
<h1 className="text-xl font-bold"></h1>
</div>
<div className="flex items-center gap-3">
<Input.Search
allowClear
value={searchTerm}
onChange={(event) => setSearchTerm(event.target.value)}
onSearch={handleSearch}
placeholder="输入文件名,回车或点击搜索"
enterButton="搜索"
loading={loading}
/>
</div>
<Table
rowKey="id"
loading={loading}
columns={columns}
dataSource={results}
pagination={{
current: pagination.current,
pageSize: pagination.pageSize,
total: pagination.total,
showTotal: (total) => `${total}`,
onChange: (page, pageSize) => {
const nextKeyword = activeKeyword.trim();
if (!nextKeyword) {
message.warning("请输入文件名");
return;
}
fetchResults(nextKeyword, page, pageSize || pagination.pageSize);
},
}}
locale={{
emptyText: searched ? "暂无匹配文件" : "请输入文件名开始检索",
}}
onRow={(record) => ({
onClick: () => {
const prefix = resolvePrefix(record.relativePath);
const searchParams = new URLSearchParams();
if (prefix) {
searchParams.set("prefix", prefix);
}
navigate(
`/data/knowledge-base/detail/${record.knowledgeBaseId}?${searchParams.toString()}`
);
},
})}
/>
</div>
);
}

View File

@@ -15,6 +15,7 @@ import { addKnowledgeBaseFilesUsingPost } from "../knowledge-base.api";
import DatasetFileTransfer from "@/components/business/DatasetFileTransfer";
import { DescriptionsItemType } from "antd/es/descriptions";
import { DatasetFileCols } from "../knowledge-base.const";
import type { DatasetFile } from "@/pages/DataManagement/dataset.model";
export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
const [open, setOpen] = useState(false);
@@ -25,6 +26,35 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
const [selectedFilesMap, setSelectedFilesMap] = useState({});
const PATH_SEPARATOR = "/";
const normalizePath = (value?: string) =>
(value ?? "").replace(/\\/g, PATH_SEPARATOR);
const resolveRelativePath = (file: DatasetFile) => {
const normalizedName = normalizePath(file.fileName);
if (normalizedName.includes(PATH_SEPARATOR)) {
return normalizedName.replace(/^\/+/, "");
}
const rawPath = normalizePath(file.path || file.filePath);
const datasetId = String(file.datasetId || "");
if (rawPath && datasetId) {
const marker = `${PATH_SEPARATOR}${datasetId}${PATH_SEPARATOR}`;
const index = rawPath.lastIndexOf(marker);
if (index >= 0) {
const relative = rawPath
.slice(index + marker.length)
.replace(/^\/+/, "");
if (relative) {
return relative;
}
}
}
const fallbackName = rawPath.split(PATH_SEPARATOR).pop();
return fallbackName || file.fileName;
};
// 定义分块选项
const sliceOptions = [
{ label: "默认分块", value: "DEFAULT_CHUNK" },
@@ -129,7 +159,8 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
const requestData = {
files: Object.values(selectedFilesMap).map((file) => ({
id: String(file.id),
fileName: file.fileName,
fileName: (file as DatasetFile).fileName,
relativePath: resolveRelativePath(file as DatasetFile),
})),
processType: newKB.processType,
chunkSize: Number(newKB.chunkSize), // 确保是数字类型

View File

@@ -1,12 +1,15 @@
import { get, post, put, del } from "@/utils/request";
type RequestPayload = Record<string, unknown>;
type RequestParams = Record<string, unknown>;
// 获取知识库列表
export function queryKnowledgeBasesUsingPost(params: any) {
export function queryKnowledgeBasesUsingPost(params: RequestPayload) {
return post("/api/knowledge-base/list", params);
}
// 创建知识库
export function createKnowledgeBaseUsingPost(data: any) {
export function createKnowledgeBaseUsingPost(data: RequestPayload) {
return post("/api/knowledge-base/create", data);
}
@@ -16,7 +19,7 @@ export function queryKnowledgeBaseByIdUsingGet(baseId: string) {
}
// 更新知识库
export function updateKnowledgeBaseByIdUsingPut(baseId: string, data: any) {
export function updateKnowledgeBaseByIdUsingPut(baseId: string, data: RequestPayload) {
return put(`/api/knowledge-base/${baseId}`, data);
}
@@ -26,17 +29,22 @@ export function deleteKnowledgeBaseByIdUsingDelete(baseId: string) {
}
// 获取知识生成文件列表
export function queryKnowledgeBaseFilesUsingGet(baseId: string, data) {
export function queryKnowledgeBaseFilesUsingGet(baseId: string, data: RequestParams) {
return get(`/api/knowledge-base/${baseId}/files`, data);
}
// 全库检索知识库文件
export function queryKnowledgeBaseFilesSearchUsingGet(params: RequestParams) {
return get("/api/knowledge-base/files/search", params);
}
// 添加文件到知识库
export function addKnowledgeBaseFilesUsingPost(baseId: string, data: any) {
export function addKnowledgeBaseFilesUsingPost(baseId: string, data: RequestPayload) {
return post(`/api/knowledge-base/${baseId}/files`, data);
}
// 删除知识生成文件
export function deleteKnowledgeBaseFileByIdUsingDelete(baseId: string, data: any) {
export function deleteKnowledgeBaseFileByIdUsingDelete(baseId: string, data: RequestPayload) {
return del(`/api/knowledge-base/${baseId}/files`, data);
}

View File

@@ -29,50 +29,26 @@ export interface KBFile {
id: string;
fileName: string;
name?: string;
relativePath?: string;
createdAt: string;
updatedAt: string;
status: KBFileStatus;
chunkCount: number;
metadata: Record<string, any>;
metadata: Record<string, unknown>;
knowledgeBaseId: string;
fileId: string;
updatedBy: string;
createdBy: string;
}
interface Chunk {
id: number;
content: string;
position: number;
tokens: number;
embedding?: number[];
similarity?: string;
export interface KnowledgeBaseFileSearchResult {
id: string;
knowledgeBaseId: string;
knowledgeBaseName: string;
fileName: string;
relativePath?: string;
status?: KBFileStatus;
chunkCount?: number;
createdAt?: string;
updatedAt?: string;
vectorId?: string;
sliceOperator?: string;
parentChunkId?: number;
metadata?: {
source: string;
page?: number;
section?: string;
};
}
interface VectorizationRecord {
id: number;
timestamp: string;
operation: "create" | "update" | "delete" | "reprocess";
fileId: number;
fileName: string;
chunksProcessed: number;
vectorsGenerated: number;
status: "success" | "failed" | "partial";
duration: string;
config: {
embeddingModel: string;
chunkSize: number;
sliceMethod: string;
};
error?: string;
}

View File

@@ -41,7 +41,12 @@ import CreateKnowledgeSet from "../components/CreateKnowledgeSet";
import KnowledgeItemEditor from "../components/KnowledgeItemEditor";
import ImportKnowledgeItemsDialog from "../components/ImportKnowledgeItemsDialog";
import { formatDate } from "@/utils/unit";
import { PREVIEW_TEXT_MAX_LENGTH, resolvePreviewFileType, truncatePreviewText } from "@/utils/filePreview";
import {
PREVIEW_TEXT_MAX_LENGTH,
resolvePreviewFileType,
truncatePreviewText,
type PreviewFileType,
} from "@/utils/filePreview";
const PREVIEW_MAX_HEIGHT = 500;
const PREVIEW_MODAL_WIDTH = {
@@ -67,7 +72,7 @@ const KnowledgeSetDetail = () => {
const [previewVisible, setPreviewVisible] = useState(false);
const [previewContent, setPreviewContent] = useState("");
const [previewFileName, setPreviewFileName] = useState("");
const [previewFileType, setPreviewFileType] = useState<"text" | "image" | "video" | "audio">("text");
const [previewFileType, setPreviewFileType] = useState<PreviewFileType>("text");
const [previewMediaUrl, setPreviewMediaUrl] = useState("");
const [previewLoadingItemId, setPreviewLoadingItemId] = useState<string | null>(null);
@@ -560,6 +565,13 @@ const KnowledgeSetDetail = () => {
/>
</div>
)}
{previewFileType === "pdf" && (
<iframe
src={previewMediaUrl}
title={previewFileName || "PDF 预览"}
style={{ width: "100%", height: `${PREVIEW_MAX_HEIGHT}px`, border: "none" }}
/>
)}
{previewFileType === "video" && (
<div style={{ textAlign: "center" }}>
<video

View File

@@ -30,6 +30,7 @@ import ManualEvaluatePage from "@/pages/DataEvaluation/Evaluate/ManualEvaluate";
import KnowledgeBasePage from "@/pages/KnowledgeBase/Home/KnowledgeBasePage";
import KnowledgeBaseDetailPage from "@/pages/KnowledgeBase/Detail/KnowledgeBaseDetail";
import KnowledgeBaseFileDetailPage from "@/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail";
import KnowledgeBaseSearch from "@/pages/KnowledgeBase/Search/KnowledgeBaseSearch";
import OperatorMarketPage from "@/pages/OperatorMarket/Home/OperatorMarket";
import OperatorPluginCreate from "@/pages/OperatorMarket/Create/OperatorPluginCreate";
@@ -246,6 +247,10 @@ const router = createBrowserRouter([
index: true,
Component: KnowledgeBasePage,
},
{
path: "search",
Component: KnowledgeBaseSearch,
},
{
path: "detail/:id",
Component: KnowledgeBaseDetailPage,

View File

@@ -22,8 +22,9 @@ const IMAGE_FILE_EXTENSIONS = [
];
const VIDEO_FILE_EXTENSIONS = [".mp4", ".webm", ".ogg", ".mov", ".avi"];
const AUDIO_FILE_EXTENSIONS = [".mp3", ".wav", ".ogg", ".aac", ".flac", ".m4a"];
const PDF_FILE_EXTENSIONS = [".pdf"];
export type PreviewFileType = "text" | "image" | "video" | "audio";
export type PreviewFileType = "text" | "image" | "video" | "audio" | "pdf";
export const resolvePreviewFileType = (fileName?: string): PreviewFileType | null => {
const lowerName = (fileName || "").toLowerCase();
@@ -39,6 +40,9 @@ export const resolvePreviewFileType = (fileName?: string): PreviewFileType | nul
if (AUDIO_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) {
return "audio";
}
if (PDF_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) {
return "pdf";
}
return null;
};

View File

@@ -14,7 +14,8 @@ from .user_management import (
from .annotation_management import (
AnnotationTemplate,
LabelingProject,
AnnotationResult
AnnotationResult,
LabelingProjectFile
)
from .data_evaluation import (
@@ -32,6 +33,7 @@ __all__ = [
"AnnotationTemplate",
"LabelingProject",
"AnnotationResult",
"LabelingProjectFile",
"EvaluationTask",
"EvaluationItem",
]

View File

@@ -1,7 +1,7 @@
"""Tables of Annotation Management Module"""
import uuid
from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey
from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey, UniqueConstraint, Index
from sqlalchemy.sql import func
from app.db.session import Base
@@ -59,6 +59,26 @@ class LabelingProject(Base):
return self.deleted_at is not None
class LabelingProjectFile(Base):
"""标注项目文件快照模型"""
__tablename__ = "t_dm_labeling_project_files"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
project_id = Column(String(36), nullable=False, comment="标注项目ID")
file_id = Column(String(36), nullable=False, comment="文件ID")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
__table_args__ = (
UniqueConstraint("project_id", "file_id", name="uk_project_file"),
Index("idx_project_id", "project_id"),
Index("idx_file_id", "file_id"),
)
def __repr__(self):
return f"<LabelingProjectFile(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
class AnnotationResult(Base):
"""标注结果模型(单人单份最终标签,Label Studio annotation 原始 JSON)"""

View File

@@ -3,10 +3,11 @@ import math
import uuid
from fastapi import APIRouter, Depends, HTTPException, Query, Path
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.session import get_db
from app.db.models import LabelingProject
from app.db.models import LabelingProject, DatasetFiles
from app.module.shared.schema import StandardResponse, PaginatedData
from app.module.dataset import DatasetManagementService
from app.core.logging import get_logger
@@ -116,8 +117,35 @@ async def create_mapping(
configuration=project_configuration or None,
)
# 创建映射关系,包含项目名称(先持久化映射以获得 mapping.id)
mapping = await mapping_service.create_mapping(labeling_project)
file_result = await db.execute(
select(DatasetFiles).where(DatasetFiles.dataset_id == request.dataset_id)
)
file_records = file_result.scalars().all()
snapshot_file_ids: list[str] = []
if dataset_type == TEXT_DATASET_TYPE:
derived_source_ids = set()
for file_record in file_records:
metadata = getattr(file_record, "dataset_filemetadata", None)
if isinstance(metadata, dict):
source_id = metadata.get("derived_from_file_id")
if source_id:
derived_source_ids.add(str(source_id))
snapshot_file_ids = [
str(file_record.id)
for file_record in file_records
if file_record.id and str(file_record.id) not in derived_source_ids
]
else:
snapshot_file_ids = [
str(file_record.id)
for file_record in file_records
if file_record.id
]
# 创建映射关系并写入快照
mapping = await mapping_service.create_mapping_with_snapshot(
labeling_project, snapshot_file_ids
)
response_data = DatasetMappingCreateResponse(
id=mapping.id,

View File

@@ -23,7 +23,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.logging import get_logger
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
from app.module.annotation.config import LabelStudioTagConfig
from app.module.annotation.schema.editor import (
EditorProjectInfo,
@@ -429,21 +429,16 @@ class AnnotationEditorService:
exclude_source_documents: Optional[bool] = None,
) -> EditorTaskListResponse:
project = await self._get_project_or_404(project_id)
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
should_exclude_source_documents = False
if dataset_type == DATASET_TYPE_TEXT:
should_exclude_source_documents = (
exclude_source_documents if exclude_source_documents is not None else True
)
base_conditions = [DatasetFiles.dataset_id == project.dataset_id]
if should_exclude_source_documents:
base_conditions.append(~self._build_source_document_filter())
base_conditions = [
LabelingProjectFile.project_id == project_id,
DatasetFiles.dataset_id == project.dataset_id,
]
count_result = await self.db.execute(
select(func.count()).select_from(DatasetFiles).where(
*base_conditions
)
select(func.count())
.select_from(LabelingProjectFile)
.join(DatasetFiles, LabelingProjectFile.file_id == DatasetFiles.id)
.where(*base_conditions)
)
total = int(count_result.scalar() or 0)
@@ -453,6 +448,7 @@ class AnnotationEditorService:
)
files_result = await self.db.execute(
select(DatasetFiles, AnnotationResult.id, AnnotationResult.updated_at)
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
.outerjoin(
AnnotationResult,
(AnnotationResult.file_id == DatasetFiles.id)
@@ -827,7 +823,10 @@ class AnnotationEditorService:
# 校验文件归属
file_result = await self.db.execute(
select(DatasetFiles).where(
select(DatasetFiles)
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
.where(
LabelingProjectFile.project_id == project.id,
DatasetFiles.id == file_id,
DatasetFiles.dataset_id == project.dataset_id,
)

View File

@@ -25,7 +25,33 @@ from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.logging import get_logger
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
async def _read_file_content(file_path: str, max_size: int = 10 * 1024 * 1024) -> Optional[str]:
"""读取文件内容,仅适用于文本文件
Args:
file_path: 文件路径
max_size: 最大读取字节数(默认10MB)
Returns:
文件内容字符串,如果读取失败返回 None
"""
try:
# 检查文件是否存在且大小在限制内
if not os.path.exists(file_path):
return None
file_size = os.path.getsize(file_path)
if file_size > max_size:
return f"[File too large: {file_size} bytes]"
# 尝试以文本方式读取
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
return f.read()
except Exception:
return None
from ..schema.export import (
AnnotationExportItem,
@@ -49,15 +75,18 @@ class AnnotationExportService:
project = await self._get_project_or_404(project_id)
logger.info(f"Export stats for project: id={project_id}, dataset_id={project.dataset_id}, name={project.name}")
# 获取总文件数(只统计 ACTIVE 状态的文件)
# 获取总文件数(标注项目快照内的文件)
total_result = await self.db.execute(
select(func.count()).select_from(DatasetFiles).where(
select(func.count())
.select_from(LabelingProjectFile)
.join(DatasetFiles, LabelingProjectFile.file_id == DatasetFiles.id)
.where(
LabelingProjectFile.project_id == project_id,
DatasetFiles.dataset_id == project.dataset_id,
DatasetFiles.status == "ACTIVE",
)
)
total_files = int(total_result.scalar() or 0)
logger.info(f"Total files (ACTIVE): {total_files} for dataset_id={project.dataset_id}")
logger.info(f"Total files (snapshot): {total_files} for project_id={project_id}")
# 获取已标注文件数(统计不同的 file_id 数量)
annotated_result = await self.db.execute(
@@ -139,30 +168,43 @@ class AnnotationExportService:
# 只获取已标注的数据
result = await self.db.execute(
select(AnnotationResult, DatasetFiles)
.join(LabelingProjectFile, LabelingProjectFile.file_id == AnnotationResult.file_id)
.join(DatasetFiles, AnnotationResult.file_id == DatasetFiles.id)
.where(AnnotationResult.project_id == project_id)
.where(
AnnotationResult.project_id == project_id,
LabelingProjectFile.project_id == project_id,
DatasetFiles.dataset_id == dataset_id,
)
.order_by(AnnotationResult.updated_at.desc())
)
rows = result.all()
for ann, file in rows:
annotation_data = ann.annotation or {}
# 获取文件内容(如果是文本文件且用户要求包含数据)
file_content = None
if include_data:
file_path = getattr(file, "file_path", "")
file_content = await _read_file_content(file_path)
items.append(
AnnotationExportItem(
file_id=str(file.id),
file_name=str(getattr(file, "file_name", "")),
data={"text": ""} if include_data else None, # TEXT 类型数据需要单独获取
data={"text": file_content} if include_data else None,
annotations=[annotation_data] if annotation_data else [],
created_at=ann.created_at,
updated_at=ann.updated_at,
)
)
else:
# 获取所有文件,包括未标注的(只获取 ACTIVE 状态的文件
# 获取所有文件(基于标注项目快照
files_result = await self.db.execute(
select(DatasetFiles).where(
select(DatasetFiles)
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
.where(
LabelingProjectFile.project_id == project_id,
DatasetFiles.dataset_id == dataset_id,
DatasetFiles.status == "ACTIVE",
)
)
files = files_result.scalars().all()
@@ -178,11 +220,17 @@ class AnnotationExportService:
ann = annotations.get(file_id)
annotation_data = ann.annotation if ann else {}
# 获取文件内容(如果是文本文件且用户要求包含数据)
file_content = None
if include_data:
file_path = getattr(file, "file_path", "")
file_content = await _read_file_content(file_path)
items.append(
AnnotationExportItem(
file_id=file_id,
file_name=str(getattr(file, "file_name", "")),
data={"text": ""} if include_data else None,
data={"text": file_content} if include_data else None,
annotations=[annotation_data] if annotation_data else [],
created_at=ann.created_at if ann else None,
updated_at=ann.updated_at if ann else None,
@@ -256,12 +304,14 @@ class AnnotationExportService:
writer.writeheader()
for item in items:
# 提取标签信息
# 提取标签信息(支持多种标注类型)
labels = []
for ann in item.annotations:
results = ann.get("result", [])
for r in results:
value = r.get("value", {})
label_type = r.get("type", "")
# 提取不同类型的标签值
if "choices" in value:
labels.extend(value["choices"])
@@ -269,6 +319,18 @@ class AnnotationExportService:
labels.append(value["text"])
elif "labels" in value:
labels.extend(value["labels"])
elif "rectanglelabels" in value:
labels.extend(value["rectanglelabels"])
elif "polygonlabels" in value:
labels.extend(value["polygonlabels"])
elif "brushlabels" in value:
labels.extend(value["brushlabels"])
elif "hypertextlabels" in value:
labels.extend(value["hypertextlabels"])
elif "timeserieslabels" in value:
labels.extend(value["timeserieslabels"])
elif "transcription" in value:
labels.append(value["transcription"])
writer.writerow({
"file_id": item.file_id,
@@ -286,7 +348,11 @@ class AnnotationExportService:
def _export_coco(
self, items: List[AnnotationExportItem], project_name: str
) -> Tuple[bytes, str, str]:
"""导出为 COCO 格式(适用于目标检测标注)"""
"""导出为 COCO 格式(适用于目标检测标注)
注意:当前实现中图片宽高被设置为0,因为需要读取实际图片文件获取尺寸。
bbox 坐标使用 Label Studio 的百分比值(0-100),使用时需要转换为像素坐标。
"""
coco_format = COCOExportFormat(
info={
"description": f"Exported from DataMate project: {project_name}",

View File

@@ -1,13 +1,13 @@
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy import update, func
from sqlalchemy import update, func, insert
from sqlalchemy.orm import aliased
from typing import Optional, List, Tuple
from datetime import datetime
import uuid
from app.core.logging import get_logger
from app.db.models import LabelingProject, AnnotationTemplate, AnnotationResult
from app.db.models import LabelingProject, AnnotationTemplate, AnnotationResult, LabelingProjectFile
from app.db.models.dataset_management import Dataset, DatasetFiles
from app.module.annotation.schema import (
DatasetMappingCreateRequest,
@@ -24,6 +24,8 @@ class DatasetMappingService:
def __init__(self, db: AsyncSession):
self.db = db
SNAPSHOT_INSERT_BATCH_SIZE = 500
def _build_query_with_dataset_name(self):
"""Build base query with dataset name joined"""
return select(
@@ -49,11 +51,14 @@ class DatasetMappingService:
Returns:
(total_count, annotated_count) 元组
"""
# 获取数据集总数据量(统计 ACTIVE 和 COMPLETED 状态的文件)
# 获取标注项目快照数据量(统计快照内的文件)
total_result = await self.db.execute(
select(func.count()).select_from(DatasetFiles).where(
select(func.count())
.select_from(LabelingProjectFile)
.join(DatasetFiles, LabelingProjectFile.file_id == DatasetFiles.id)
.where(
LabelingProjectFile.project_id == project_id,
DatasetFiles.dataset_id == dataset_id,
DatasetFiles.status.in_(["ACTIVE", "COMPLETED"]),
)
)
total_count = int(total_result.scalar() or 0)
@@ -214,6 +219,48 @@ class DatasetMappingService:
logger.debug(f"Mapping created: {labeling_project.id}")
return await self._to_response(labeling_project)
async def create_mapping_with_snapshot(
self,
labeling_project: LabelingProject,
file_ids: List[str],
) -> DatasetMappingResponse:
"""创建数据集映射并写入快照文件"""
logger.debug(
"Create dataset mapping with snapshot: %s -> %s, files=%d",
labeling_project.dataset_id,
labeling_project.labeling_project_id,
len(file_ids),
)
self.db.add(labeling_project)
await self.db.flush()
assert labeling_project.id, "labeling_project.id must be set before snapshot insert"
if file_ids:
await self._insert_snapshot_records(labeling_project.id, file_ids)
await self.db.commit()
await self.db.refresh(labeling_project)
logger.debug("Mapping created with snapshot: %s", labeling_project.id)
return await self._to_response(labeling_project)
async def _insert_snapshot_records(self, project_id: str, file_ids: List[str]) -> None:
batch: List[dict] = []
for file_id in file_ids:
batch.append(
{
"id": str(uuid.uuid4()),
"project_id": project_id,
"file_id": file_id,
}
)
if len(batch) >= self.SNAPSHOT_INSERT_BATCH_SIZE:
await self.db.execute(insert(LabelingProjectFile).values(batch))
batch.clear()
if batch:
await self.db.execute(insert(LabelingProjectFile).values(batch))
async def get_mapping_by_source_uuid(
self,
dataset_id: str

View File

@@ -48,6 +48,17 @@ CREATE TABLE IF NOT EXISTS t_dm_labeling_projects (
INDEX idx_labeling_project_id (labeling_project_id)
) COMMENT='标注项目表';
-- 标注项目文件快照表
CREATE TABLE IF NOT EXISTS t_dm_labeling_project_files (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
project_id VARCHAR(36) NOT NULL COMMENT '标注项目ID',
file_id VARCHAR(36) NOT NULL COMMENT '文件ID',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
UNIQUE KEY uk_project_file (project_id, file_id),
INDEX idx_project_id (project_id),
INDEX idx_file_id (file_id)
) COMMENT='标注项目文件快照表';
-- 标注结果表
CREATE TABLE IF NOT EXISTS t_dm_annotation_results (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',

View File

@@ -18,6 +18,7 @@ create table if not exists t_rag_file
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
knowledge_base_id VARCHAR(36) NOT NULL COMMENT '知识库ID',
file_name VARCHAR(255) NOT NULL COMMENT '文件名',
relative_path VARCHAR(512) NULL COMMENT '相对路径',
file_id VARCHAR(255) NOT NULL COMMENT '文件ID',
chunk_count INT COMMENT '切片数',
metadata JSON COMMENT '元数据',
@@ -28,3 +29,6 @@ create table if not exists t_rag_file
created_by VARCHAR(255) COMMENT '创建者',
updated_by VARCHAR(255) COMMENT '更新者'
) comment '知识库切片表';
create index idx_rag_file_kb_name on t_rag_file (knowledge_base_id, file_name);
create index idx_rag_file_kb_path on t_rag_file (knowledge_base_id, relative_path);