You've already forked DataMate
Compare commits
10 Commits
accaa47a83
...
76f70a6847
| Author | SHA1 | Date | |
|---|---|---|---|
| 76f70a6847 | |||
| cbad129ce4 | |||
| ca7ff56610 | |||
| a00a6ed3c3 | |||
| 9a205919d7 | |||
| 8b2a19f09a | |||
| 3c3ca130b3 | |||
| a4cdaecf8a | |||
| 6dfed934a5 | |||
| bd37858ccc |
@@ -73,7 +73,7 @@ public class DatasetApplicationService {
|
||||
Dataset dataset = DatasetConverter.INSTANCE.convertToDataset(createDatasetRequest);
|
||||
Dataset parentDataset = resolveParentDataset(createDatasetRequest.getParentDatasetId(), dataset.getId());
|
||||
dataset.setParentDatasetId(parentDataset == null ? null : parentDataset.getId());
|
||||
dataset.initCreateParam(datasetBasePath, parentDataset == null ? null : parentDataset.getPath());
|
||||
dataset.initCreateParam(datasetBasePath);
|
||||
// 处理标签
|
||||
Set<Tag> processedTags = Optional.ofNullable(createDatasetRequest.getTags())
|
||||
.filter(CollectionUtils::isNotEmpty)
|
||||
@@ -291,7 +291,9 @@ public class DatasetApplicationService {
|
||||
|
||||
private void handleParentChange(Dataset dataset, String parentDatasetId) {
|
||||
String normalized = normalizeParentId(parentDatasetId);
|
||||
if (Objects.equals(dataset.getParentDatasetId(), normalized)) {
|
||||
String expectedPath = buildDatasetPath(datasetBasePath, dataset.getId());
|
||||
if (Objects.equals(dataset.getParentDatasetId(), normalized)
|
||||
&& Objects.equals(dataset.getPath(), expectedPath)) {
|
||||
return;
|
||||
}
|
||||
long childCount = datasetRepository.countByParentId(dataset.getId());
|
||||
@@ -299,8 +301,7 @@ public class DatasetApplicationService {
|
||||
throw BusinessException.of(DataManagementErrorCode.DATASET_HAS_CHILDREN);
|
||||
}
|
||||
Dataset parent = normalized == null ? null : resolveParentDataset(normalized, dataset.getId());
|
||||
String newPath = buildDatasetPath(parent == null ? datasetBasePath : parent.getPath(), dataset.getId());
|
||||
moveDatasetPath(dataset, newPath);
|
||||
moveDatasetPath(dataset, expectedPath);
|
||||
dataset.setParentDatasetId(parent == null ? null : parent.getId());
|
||||
}
|
||||
|
||||
@@ -413,33 +414,32 @@ public class DatasetApplicationService {
|
||||
public void processDataSourceAsync(String datasetId, String dataSourceId) {
|
||||
try {
|
||||
log.info("Initiating data source file scanning, dataset ID: {}, collection task ID: {}", datasetId, dataSourceId);
|
||||
List<String> filePaths = getFilePaths(dataSourceId);
|
||||
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId).getData();
|
||||
if (taskDetail == null) {
|
||||
log.warn("Fail to get collection task detail, task ID: {}", dataSourceId);
|
||||
return;
|
||||
}
|
||||
Path targetPath = Paths.get(taskDetail.getTargetPath());
|
||||
if (!Files.exists(targetPath) || !Files.isDirectory(targetPath)) {
|
||||
log.warn("Target path not exists or is not a directory: {}", taskDetail.getTargetPath());
|
||||
return;
|
||||
}
|
||||
List<String> filePaths = scanFilePaths(targetPath);
|
||||
if (CollectionUtils.isEmpty(filePaths)) {
|
||||
return;
|
||||
}
|
||||
datasetFileApplicationService.copyFilesToDatasetDir(datasetId, new CopyFilesRequest(filePaths));
|
||||
datasetFileApplicationService.copyFilesToDatasetDirWithSourceRoot(datasetId, targetPath, filePaths);
|
||||
log.info("Success file scan, total files: {}", filePaths.size());
|
||||
} catch (Exception e) {
|
||||
log.error("处理数据源文件扫描失败,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId, e);
|
||||
}
|
||||
}
|
||||
|
||||
private List<String> getFilePaths(String dataSourceId) {
|
||||
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId).getData();
|
||||
if (taskDetail == null) {
|
||||
log.warn("Fail to get collection task detail, task ID: {}", dataSourceId);
|
||||
return Collections.emptyList();
|
||||
}
|
||||
Path targetPath = Paths.get(taskDetail.getTargetPath());
|
||||
if (!Files.exists(targetPath) || !Files.isDirectory(targetPath)) {
|
||||
log.warn("Target path not exists or is not a directory: {}", taskDetail.getTargetPath());
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
try (Stream<Path> paths = Files.walk(targetPath, 1)) {
|
||||
private List<String> scanFilePaths(Path targetPath) {
|
||||
try (Stream<Path> paths = Files.walk(targetPath)) {
|
||||
return paths
|
||||
.filter(Files::isRegularFile) // 只保留文件,排除目录
|
||||
.map(Path::toString) // 转换为字符串路径
|
||||
.filter(Files::isRegularFile)
|
||||
.map(Path::toString)
|
||||
.collect(Collectors.toList());
|
||||
} catch (IOException e) {
|
||||
log.error("Fail to scan directory: {}", targetPath, e);
|
||||
|
||||
@@ -695,17 +695,17 @@ public class DatasetFileApplicationService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 复制文件到数据集目录
|
||||
*
|
||||
* @param datasetId 数据集id
|
||||
* @param req 复制文件请求
|
||||
* @return 复制的文件列表
|
||||
*/
|
||||
@Transactional
|
||||
public List<DatasetFile> copyFilesToDatasetDir(String datasetId, CopyFilesRequest req) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND);
|
||||
/**
|
||||
* 复制文件到数据集目录
|
||||
*
|
||||
* @param datasetId 数据集id
|
||||
* @param req 复制文件请求
|
||||
* @return 复制的文件列表
|
||||
*/
|
||||
@Transactional
|
||||
public List<DatasetFile> copyFilesToDatasetDir(String datasetId, CopyFilesRequest req) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND);
|
||||
List<DatasetFile> copiedFiles = new ArrayList<>();
|
||||
List<DatasetFile> existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
dataset.setFiles(existDatasetFiles);
|
||||
@@ -735,15 +735,80 @@ public class DatasetFileApplicationService {
|
||||
datasetFileRepository.saveOrUpdateBatch(copiedFiles, 100);
|
||||
dataset.active();
|
||||
datasetRepository.updateById(dataset);
|
||||
CompletableFuture.runAsync(() -> copyFilesToDatasetDir(req.sourcePaths(), dataset));
|
||||
return copiedFiles;
|
||||
}
|
||||
|
||||
private void copyFilesToDatasetDir(List<String> sourcePaths, Dataset dataset) {
|
||||
for (String sourcePath : sourcePaths) {
|
||||
Path sourceFilePath = Paths.get(sourcePath);
|
||||
Path targetFilePath = Paths.get(dataset.getPath(), sourceFilePath.getFileName().toString());
|
||||
try {
|
||||
CompletableFuture.runAsync(() -> copyFilesToDatasetDir(req.sourcePaths(), dataset));
|
||||
return copiedFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* 复制文件到数据集目录(保留相对路径,适用于数据源导入)
|
||||
*
|
||||
* @param datasetId 数据集id
|
||||
* @param sourceRoot 数据源根目录
|
||||
* @param sourcePaths 源文件路径列表
|
||||
* @return 复制的文件列表
|
||||
*/
|
||||
@Transactional
|
||||
public List<DatasetFile> copyFilesToDatasetDirWithSourceRoot(String datasetId, Path sourceRoot, List<String> sourcePaths) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND);
|
||||
|
||||
Path normalizedRoot = sourceRoot.toAbsolutePath().normalize();
|
||||
List<DatasetFile> copiedFiles = new ArrayList<>();
|
||||
List<DatasetFile> existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
dataset.setFiles(existDatasetFiles);
|
||||
Map<String, DatasetFile> copyTargets = new LinkedHashMap<>();
|
||||
|
||||
for (String sourceFilePath : sourcePaths) {
|
||||
if (sourceFilePath == null || sourceFilePath.isBlank()) {
|
||||
continue;
|
||||
}
|
||||
Path sourcePath = Paths.get(sourceFilePath).toAbsolutePath().normalize();
|
||||
if (!sourcePath.startsWith(normalizedRoot)) {
|
||||
log.warn("Source file path is out of root: {}", sourceFilePath);
|
||||
continue;
|
||||
}
|
||||
if (!Files.exists(sourcePath) || !Files.isRegularFile(sourcePath)) {
|
||||
log.warn("Source file does not exist or is not a regular file: {}", sourceFilePath);
|
||||
continue;
|
||||
}
|
||||
|
||||
Path relativePath = normalizedRoot.relativize(sourcePath);
|
||||
String fileName = sourcePath.getFileName().toString();
|
||||
File sourceFile = sourcePath.toFile();
|
||||
LocalDateTime currentTime = LocalDateTime.now();
|
||||
Path targetPath = Paths.get(dataset.getPath(), relativePath.toString());
|
||||
|
||||
DatasetFile datasetFile = DatasetFile.builder()
|
||||
.id(UUID.randomUUID().toString())
|
||||
.datasetId(datasetId)
|
||||
.fileName(fileName)
|
||||
.fileType(AnalyzerUtils.getExtension(fileName))
|
||||
.fileSize(sourceFile.length())
|
||||
.filePath(targetPath.toString())
|
||||
.uploadTime(currentTime)
|
||||
.lastAccessTime(currentTime)
|
||||
.build();
|
||||
setDatasetFileId(datasetFile, dataset);
|
||||
dataset.addFile(datasetFile);
|
||||
copiedFiles.add(datasetFile);
|
||||
copyTargets.put(sourceFilePath, datasetFile);
|
||||
}
|
||||
|
||||
if (copiedFiles.isEmpty()) {
|
||||
return copiedFiles;
|
||||
}
|
||||
datasetFileRepository.saveOrUpdateBatch(copiedFiles, 100);
|
||||
dataset.active();
|
||||
datasetRepository.updateById(dataset);
|
||||
CompletableFuture.runAsync(() -> copyFilesToDatasetDirWithRelativePath(copyTargets, dataset, normalizedRoot));
|
||||
return copiedFiles;
|
||||
}
|
||||
|
||||
private void copyFilesToDatasetDir(List<String> sourcePaths, Dataset dataset) {
|
||||
for (String sourcePath : sourcePaths) {
|
||||
Path sourceFilePath = Paths.get(sourcePath);
|
||||
Path targetFilePath = Paths.get(dataset.getPath(), sourceFilePath.getFileName().toString());
|
||||
try {
|
||||
Files.createDirectories(Path.of(dataset.getPath()));
|
||||
Files.copy(sourceFilePath, targetFilePath);
|
||||
DatasetFile datasetFile = datasetFileRepository.findByDatasetIdAndFileName(
|
||||
@@ -753,10 +818,39 @@ public class DatasetFileApplicationService {
|
||||
triggerPdfTextExtraction(dataset, datasetFile);
|
||||
} catch (IOException e) {
|
||||
log.error("Failed to copy file from {} to {}", sourcePath, targetFilePath, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void copyFilesToDatasetDirWithRelativePath(
|
||||
Map<String, DatasetFile> copyTargets,
|
||||
Dataset dataset,
|
||||
Path sourceRoot
|
||||
) {
|
||||
Path datasetRoot = Paths.get(dataset.getPath()).toAbsolutePath().normalize();
|
||||
Path normalizedRoot = sourceRoot.toAbsolutePath().normalize();
|
||||
for (Map.Entry<String, DatasetFile> entry : copyTargets.entrySet()) {
|
||||
Path sourcePath = Paths.get(entry.getKey()).toAbsolutePath().normalize();
|
||||
if (!sourcePath.startsWith(normalizedRoot)) {
|
||||
log.warn("Source file path is out of root: {}", sourcePath);
|
||||
continue;
|
||||
}
|
||||
Path relativePath = normalizedRoot.relativize(sourcePath);
|
||||
Path targetFilePath = datasetRoot.resolve(relativePath).normalize();
|
||||
if (!targetFilePath.startsWith(datasetRoot)) {
|
||||
log.warn("Target file path is out of dataset path: {}", targetFilePath);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
Files.createDirectories(targetFilePath.getParent());
|
||||
Files.copy(sourcePath, targetFilePath);
|
||||
triggerPdfTextExtraction(dataset, entry.getValue());
|
||||
} catch (IOException e) {
|
||||
log.error("Failed to copy file from {} to {}", sourcePath, targetFilePath, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 添加文件到数据集(仅创建数据库记录,不执行文件系统操作)
|
||||
*
|
||||
|
||||
@@ -114,9 +114,9 @@ public class Dataset extends BaseEntity<String> {
|
||||
this.updatedAt = LocalDateTime.now();
|
||||
}
|
||||
|
||||
public void initCreateParam(String datasetBasePath, String parentPath) {
|
||||
public void initCreateParam(String datasetBasePath) {
|
||||
this.id = UUID.randomUUID().toString();
|
||||
String basePath = normalizeBasePath(parentPath != null && !parentPath.isBlank() ? parentPath : datasetBasePath);
|
||||
String basePath = normalizeBasePath(datasetBasePath);
|
||||
this.path = basePath + File.separator + this.id;
|
||||
if (this.status == null) {
|
||||
this.status = DatasetStatusType.DRAFT;
|
||||
|
||||
@@ -42,9 +42,9 @@ public enum DataManagementErrorCode implements ErrorCode {
|
||||
*/
|
||||
DIRECTORY_NOT_FOUND("data_management.0007", "目录不存在"),
|
||||
/**
|
||||
* 存在子数据集
|
||||
* 存在关联数据集
|
||||
*/
|
||||
DATASET_HAS_CHILDREN("data_management.0008", "存在子数据集,禁止删除或移动"),
|
||||
DATASET_HAS_CHILDREN("data_management.0008", "存在关联数据集,禁止删除或移动"),
|
||||
/**
|
||||
* 数据集文件不存在
|
||||
*/
|
||||
|
||||
@@ -36,7 +36,9 @@ import org.springframework.util.StringUtils;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 知识库服务类
|
||||
@@ -47,6 +49,7 @@ import java.util.Optional;
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class KnowledgeBaseService {
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
private final KnowledgeBaseRepository knowledgeBaseRepository;
|
||||
private final RagFileRepository ragFileRepository;
|
||||
private final ApplicationEventPublisher eventPublisher;
|
||||
@@ -146,6 +149,7 @@ public class KnowledgeBaseService {
|
||||
ragFile.setKnowledgeBaseId(knowledgeBase.getId());
|
||||
ragFile.setFileId(fileInfo.id());
|
||||
ragFile.setFileName(fileInfo.fileName());
|
||||
ragFile.setRelativePath(normalizeRelativePath(fileInfo.relativePath()));
|
||||
ragFile.setStatus(FileStatus.UNPROCESSED);
|
||||
return ragFile;
|
||||
}).toList();
|
||||
@@ -153,6 +157,17 @@ public class KnowledgeBaseService {
|
||||
eventPublisher.publishEvent(new DataInsertedEvent(knowledgeBase, request));
|
||||
}
|
||||
|
||||
private String normalizeRelativePath(String relativePath) {
|
||||
if (!StringUtils.hasText(relativePath)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
public PagedResponse<RagFile> listFiles(String knowledgeBaseId, RagFileReq request) {
|
||||
IPage<RagFile> page = new Page<>(request.getPage(), request.getSize());
|
||||
request.setKnowledgeBaseId(knowledgeBaseId);
|
||||
@@ -160,6 +175,41 @@ public class KnowledgeBaseService {
|
||||
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
|
||||
}
|
||||
|
||||
public PagedResponse<KnowledgeBaseFileSearchResp> searchFiles(KnowledgeBaseFileSearchReq request) {
|
||||
IPage<RagFile> page = new Page<>(request.getPage(), request.getSize());
|
||||
page = ragFileRepository.searchPage(page, request);
|
||||
List<RagFile> records = page.getRecords();
|
||||
if (records.isEmpty()) {
|
||||
return PagedResponse.of(Collections.emptyList(), page.getCurrent(), page.getTotal(), page.getPages());
|
||||
}
|
||||
|
||||
List<String> knowledgeBaseIds = records.stream()
|
||||
.map(RagFile::getKnowledgeBaseId)
|
||||
.filter(StringUtils::hasText)
|
||||
.distinct()
|
||||
.toList();
|
||||
Map<String, String> knowledgeBaseNameMap = knowledgeBaseRepository.listByIds(knowledgeBaseIds).stream()
|
||||
.collect(Collectors.toMap(KnowledgeBase::getId, KnowledgeBase::getName));
|
||||
|
||||
List<KnowledgeBaseFileSearchResp> responses = records.stream()
|
||||
.map(file -> {
|
||||
KnowledgeBaseFileSearchResp resp = new KnowledgeBaseFileSearchResp();
|
||||
resp.setId(file.getId());
|
||||
resp.setKnowledgeBaseId(file.getKnowledgeBaseId());
|
||||
resp.setKnowledgeBaseName(knowledgeBaseNameMap.getOrDefault(file.getKnowledgeBaseId(), ""));
|
||||
resp.setFileName(file.getFileName());
|
||||
resp.setRelativePath(file.getRelativePath());
|
||||
resp.setChunkCount(file.getChunkCount());
|
||||
resp.setStatus(file.getStatus());
|
||||
resp.setCreatedAt(file.getCreatedAt());
|
||||
resp.setUpdatedAt(file.getUpdatedAt());
|
||||
return resp;
|
||||
})
|
||||
.toList();
|
||||
|
||||
return PagedResponse.of(responses, page.getCurrent(), page.getTotal(), page.getPages());
|
||||
}
|
||||
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public void deleteFiles(String knowledgeBaseId, DeleteFilesReq request) {
|
||||
KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
|
||||
@@ -222,4 +272,4 @@ public class KnowledgeBaseService {
|
||||
});
|
||||
return searchResults;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,10 @@ public class RagFile extends BaseEntity<String> {
|
||||
* 文件名
|
||||
*/
|
||||
private String fileName;
|
||||
/**
|
||||
* 相对路径
|
||||
*/
|
||||
private String relativePath;
|
||||
/**
|
||||
* 文件ID
|
||||
*/
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.datamate.rag.indexer.domain.repository;
|
||||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||
import com.baomidou.mybatisplus.extension.repository.IRepository;
|
||||
import com.datamate.rag.indexer.domain.model.RagFile;
|
||||
import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseFileSearchReq;
|
||||
import com.datamate.rag.indexer.interfaces.dto.RagFileReq;
|
||||
|
||||
import java.util.List;
|
||||
@@ -21,4 +22,6 @@ public interface RagFileRepository extends IRepository<RagFile> {
|
||||
List<RagFile> findAllByKnowledgeBaseId(String knowledgeBaseId);
|
||||
|
||||
IPage<RagFile> page(IPage<RagFile> page, RagFileReq request);
|
||||
|
||||
IPage<RagFile> searchPage(IPage<RagFile> page, KnowledgeBaseFileSearchReq request);
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import com.datamate.rag.indexer.domain.model.FileStatus;
|
||||
import com.datamate.rag.indexer.domain.model.RagFile;
|
||||
import com.datamate.rag.indexer.domain.repository.RagFileRepository;
|
||||
import com.datamate.rag.indexer.infrastructure.persistence.mapper.RagFileMapper;
|
||||
import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseFileSearchReq;
|
||||
import com.datamate.rag.indexer.interfaces.dto.RagFileReq;
|
||||
import org.springframework.stereotype.Repository;
|
||||
import org.springframework.util.StringUtils;
|
||||
@@ -20,6 +21,7 @@ import java.util.List;
|
||||
*/
|
||||
@Repository
|
||||
public class RagFileRepositoryImpl extends CrudRepository<RagFileMapper, RagFile> implements RagFileRepository {
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
@Override
|
||||
public void removeByKnowledgeBaseId(String knowledgeBaseId) {
|
||||
lambdaUpdate().eq(RagFile::getKnowledgeBaseId, knowledgeBaseId).remove();
|
||||
@@ -45,6 +47,27 @@ public class RagFileRepositoryImpl extends CrudRepository<RagFileMapper, RagFile
|
||||
return lambdaQuery()
|
||||
.eq(RagFile::getKnowledgeBaseId, request.getKnowledgeBaseId())
|
||||
.like(StringUtils.hasText(request.getFileName()), RagFile::getFileName, request.getFileName())
|
||||
.likeRight(StringUtils.hasText(request.getRelativePath()), RagFile::getRelativePath, normalizeRelativePath(request.getRelativePath()))
|
||||
.page(page);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IPage<RagFile> searchPage(IPage<RagFile> page, KnowledgeBaseFileSearchReq request) {
|
||||
return lambdaQuery()
|
||||
.eq(StringUtils.hasText(request.getKnowledgeBaseId()), RagFile::getKnowledgeBaseId, request.getKnowledgeBaseId())
|
||||
.like(StringUtils.hasText(request.getFileName()), RagFile::getFileName, request.getFileName())
|
||||
.likeRight(StringUtils.hasText(request.getRelativePath()), RagFile::getRelativePath, normalizeRelativePath(request.getRelativePath()))
|
||||
.page(page);
|
||||
}
|
||||
|
||||
private String normalizeRelativePath(String relativePath) {
|
||||
if (!StringUtils.hasText(relativePath)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,6 +105,17 @@ public class KnowledgeBaseController {
|
||||
return knowledgeBaseService.listFiles(knowledgeBaseId, request);
|
||||
}
|
||||
|
||||
/**
|
||||
* 全库检索知识库文件(跨知识库)
|
||||
*
|
||||
* @param request 检索请求
|
||||
* @return 文件列表
|
||||
*/
|
||||
@GetMapping("/files/search")
|
||||
public PagedResponse<KnowledgeBaseFileSearchResp> searchFiles(KnowledgeBaseFileSearchReq request) {
|
||||
return knowledgeBaseService.searchFiles(request);
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除知识库文件
|
||||
*
|
||||
@@ -141,4 +152,4 @@ public class KnowledgeBaseController {
|
||||
public List<SearchResp.SearchResult> retrieve(@RequestBody @Valid RetrieveReq request) {
|
||||
return knowledgeBaseService.retrieve(request);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,6 @@ public class AddFilesReq {
|
||||
private String delimiter;
|
||||
private List<FileInfo> files;
|
||||
|
||||
public record FileInfo(String id, String fileName) {
|
||||
public record FileInfo(String id, String fileName, String relativePath) {
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.datamate.rag.indexer.interfaces.dto;
|
||||
|
||||
import com.datamate.common.interfaces.PagingQuery;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* 知识库文件全库检索请求
|
||||
*
|
||||
* @author dallas
|
||||
* @since 2026-01-30
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class KnowledgeBaseFileSearchReq extends PagingQuery {
|
||||
private String fileName;
|
||||
private String relativePath;
|
||||
private String knowledgeBaseId;
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.datamate.rag.indexer.interfaces.dto;
|
||||
|
||||
import com.datamate.rag.indexer.domain.model.FileStatus;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* 知识库文件全库检索响应
|
||||
*
|
||||
* @author dallas
|
||||
* @since 2026-01-30
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class KnowledgeBaseFileSearchResp {
|
||||
private String id;
|
||||
private String knowledgeBaseId;
|
||||
private String knowledgeBaseName;
|
||||
private String fileName;
|
||||
private String relativePath;
|
||||
private Integer chunkCount;
|
||||
private FileStatus status;
|
||||
private LocalDateTime createdAt;
|
||||
private LocalDateTime updatedAt;
|
||||
}
|
||||
@@ -14,5 +14,6 @@ import lombok.Setter;
|
||||
@Getter
|
||||
public class RagFileReq extends PagingQuery {
|
||||
private String fileName;
|
||||
private String relativePath;
|
||||
private String knowledgeBaseId;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,12 @@ import TextArea from "antd/es/input/TextArea";
|
||||
import { useEffect, useMemo, useState } from "react";
|
||||
import type { ReactNode } from "react";
|
||||
import { Eye } from "lucide-react";
|
||||
import {
|
||||
PREVIEW_TEXT_MAX_LENGTH,
|
||||
resolvePreviewFileType,
|
||||
truncatePreviewText,
|
||||
type PreviewFileType,
|
||||
} from "@/utils/filePreview";
|
||||
import {
|
||||
createAnnotationTaskUsingPost,
|
||||
getAnnotationTaskByIdUsingGet,
|
||||
@@ -53,6 +59,7 @@ const isRecord = (value: unknown): value is Record<string, unknown> =>
|
||||
!!value && typeof value === "object" && !Array.isArray(value);
|
||||
|
||||
const DEFAULT_SEGMENTATION_ENABLED = true;
|
||||
const FILE_PREVIEW_MAX_HEIGHT = 500;
|
||||
const SEGMENTATION_OPTIONS = [
|
||||
{ label: "需要切片段", value: true },
|
||||
{ label: "不需要切片段", value: false },
|
||||
@@ -116,7 +123,7 @@ export default function CreateAnnotationTask({
|
||||
const [fileContent, setFileContent] = useState("");
|
||||
const [fileContentLoading, setFileContentLoading] = useState(false);
|
||||
const [previewFileName, setPreviewFileName] = useState("");
|
||||
const [previewFileType, setPreviewFileType] = useState<"text" | "image" | "video" | "audio">("text");
|
||||
const [previewFileType, setPreviewFileType] = useState<PreviewFileType>("text");
|
||||
const [previewMediaUrl, setPreviewMediaUrl] = useState("");
|
||||
|
||||
// 任务详情加载状态(编辑模式)
|
||||
@@ -297,57 +304,32 @@ export default function CreateAnnotationTask({
|
||||
|
||||
// 预览文件内容
|
||||
const handlePreviewFileContent = async (file: DatasetPreviewFile) => {
|
||||
const fileName = file.fileName?.toLowerCase() || '';
|
||||
|
||||
// 文件类型扩展名映射
|
||||
const textExtensions = ['.json', '.jsonl', '.txt', '.csv', '.tsv', '.xml', '.md', '.yaml', '.yml'];
|
||||
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg'];
|
||||
const videoExtensions = ['.mp4', '.webm', '.ogg', '.mov', '.avi'];
|
||||
const audioExtensions = ['.mp3', '.wav', '.ogg', '.aac', '.flac', '.m4a'];
|
||||
|
||||
const isTextFile = textExtensions.some(ext => fileName.endsWith(ext));
|
||||
const isImageFile = imageExtensions.some(ext => fileName.endsWith(ext));
|
||||
const isVideoFile = videoExtensions.some(ext => fileName.endsWith(ext));
|
||||
const isAudioFile = audioExtensions.some(ext => fileName.endsWith(ext));
|
||||
|
||||
if (!isTextFile && !isImageFile && !isVideoFile && !isAudioFile) {
|
||||
const fileType = resolvePreviewFileType(file.fileName);
|
||||
if (!fileType) {
|
||||
message.warning("不支持预览该文件类型");
|
||||
return;
|
||||
}
|
||||
|
||||
setFileContentLoading(true);
|
||||
setPreviewFileName(file.fileName);
|
||||
setPreviewFileType(fileType);
|
||||
setFileContent("");
|
||||
setPreviewMediaUrl("");
|
||||
|
||||
const fileUrl = `/api/data-management/datasets/${selectedDatasetId}/files/${file.id}/download`;
|
||||
const previewUrl = `/api/data-management/datasets/${selectedDatasetId}/files/${file.id}/preview`;
|
||||
|
||||
try {
|
||||
if (isTextFile) {
|
||||
if (fileType === "text") {
|
||||
// 文本文件:获取内容
|
||||
const response = await fetch(fileUrl);
|
||||
const response = await fetch(previewUrl);
|
||||
if (!response.ok) {
|
||||
throw new Error('下载失败');
|
||||
}
|
||||
const text = await response.text();
|
||||
// 限制预览内容长度
|
||||
const maxLength = 50000;
|
||||
if (text.length > maxLength) {
|
||||
setFileContent(text.substring(0, maxLength) + '\n\n... (内容过长,仅显示前 50000 字符)');
|
||||
} else {
|
||||
setFileContent(text);
|
||||
}
|
||||
setPreviewFileType("text");
|
||||
} else if (isImageFile) {
|
||||
// 图片文件:直接使用 URL
|
||||
setPreviewMediaUrl(fileUrl);
|
||||
setPreviewFileType("image");
|
||||
} else if (isVideoFile) {
|
||||
// 视频文件:使用 URL
|
||||
setPreviewMediaUrl(fileUrl);
|
||||
setPreviewFileType("video");
|
||||
} else if (isAudioFile) {
|
||||
// 音频文件:使用 URL
|
||||
setPreviewMediaUrl(fileUrl);
|
||||
setPreviewFileType("audio");
|
||||
setFileContent(truncatePreviewText(text, PREVIEW_TEXT_MAX_LENGTH));
|
||||
} else {
|
||||
// 媒体/PDF 文件:直接使用预览地址
|
||||
setPreviewMediaUrl(previewUrl);
|
||||
}
|
||||
setFileContentVisible(true);
|
||||
} catch (error) {
|
||||
@@ -878,7 +860,7 @@ export default function CreateAnnotationTask({
|
||||
</Button>
|
||||
]}
|
||||
>
|
||||
<div className="mb-2 text-xs text-gray-500">点击文件名可预览文件内容(支持文本、图片、音频、视频)</div>
|
||||
<div className="mb-2 text-xs text-gray-500">点击文件名可预览文件内容(支持文本、图片、音频、视频、PDF)</div>
|
||||
<Table
|
||||
dataSource={datasetPreviewData}
|
||||
columns={[
|
||||
@@ -942,7 +924,7 @@ export default function CreateAnnotationTask({
|
||||
{previewFileType === "text" && (
|
||||
<pre
|
||||
style={{
|
||||
maxHeight: '500px',
|
||||
maxHeight: `${FILE_PREVIEW_MAX_HEIGHT}px`,
|
||||
overflow: 'auto',
|
||||
backgroundColor: '#f5f5f5',
|
||||
padding: '12px',
|
||||
@@ -960,16 +942,23 @@ export default function CreateAnnotationTask({
|
||||
<img
|
||||
src={previewMediaUrl}
|
||||
alt={previewFileName}
|
||||
style={{ maxWidth: '100%', maxHeight: '500px', objectFit: 'contain' }}
|
||||
style={{ maxWidth: '100%', maxHeight: `${FILE_PREVIEW_MAX_HEIGHT}px`, objectFit: 'contain' }}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
{previewFileType === "pdf" && (
|
||||
<iframe
|
||||
src={previewMediaUrl}
|
||||
title={previewFileName || "PDF 预览"}
|
||||
style={{ width: '100%', height: `${FILE_PREVIEW_MAX_HEIGHT}px`, border: 'none' }}
|
||||
/>
|
||||
)}
|
||||
{previewFileType === "video" && (
|
||||
<div style={{ textAlign: 'center' }}>
|
||||
<video
|
||||
src={previewMediaUrl}
|
||||
controls
|
||||
style={{ maxWidth: '100%', maxHeight: '500px' }}
|
||||
style={{ maxWidth: '100%', maxHeight: `${FILE_PREVIEW_MAX_HEIGHT}px` }}
|
||||
>
|
||||
您的浏览器不支持视频播放
|
||||
</video>
|
||||
|
||||
@@ -106,13 +106,6 @@ export default function ExportAnnotationDialog({
|
||||
const values = await form.validateFields();
|
||||
setExporting(true);
|
||||
|
||||
const blob = await downloadAnnotationsUsingGet(
|
||||
projectId,
|
||||
values.format,
|
||||
values.onlyAnnotated,
|
||||
values.includeData
|
||||
);
|
||||
|
||||
// 获取文件名
|
||||
const formatExt: Record<ExportFormat, string> = {
|
||||
json: "json",
|
||||
@@ -124,15 +117,14 @@ export default function ExportAnnotationDialog({
|
||||
const ext = formatExt[values.format as ExportFormat] || "json";
|
||||
const filename = `${projectName}_annotations.${ext}`;
|
||||
|
||||
// 下载文件
|
||||
const url = window.URL.createObjectURL(blob as Blob);
|
||||
const a = document.createElement("a");
|
||||
a.href = url;
|
||||
a.download = filename;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
window.URL.revokeObjectURL(url);
|
||||
document.body.removeChild(a);
|
||||
// 下载文件(download函数内部已处理下载逻辑)
|
||||
await downloadAnnotationsUsingGet(
|
||||
projectId,
|
||||
values.format,
|
||||
values.onlyAnnotated,
|
||||
values.includeData,
|
||||
filename
|
||||
);
|
||||
|
||||
message.success("导出成功");
|
||||
onClose();
|
||||
|
||||
@@ -109,12 +109,13 @@ export function downloadAnnotationsUsingGet(
|
||||
projectId: string,
|
||||
format: ExportFormat = "json",
|
||||
onlyAnnotated: boolean = true,
|
||||
includeData: boolean = false
|
||||
includeData: boolean = false,
|
||||
filename?: string
|
||||
) {
|
||||
const params = new URLSearchParams({
|
||||
format,
|
||||
only_annotated: String(onlyAnnotated),
|
||||
include_data: String(includeData),
|
||||
});
|
||||
return download(`/api/annotation/export/projects/${projectId}/download?${params.toString()}`);
|
||||
return download(`/api/annotation/export/projects/${projectId}/download?${params.toString()}`, null, filename);
|
||||
}
|
||||
|
||||
@@ -96,7 +96,7 @@ export default function EditDataset({
|
||||
<BasicInformation
|
||||
data={newDataset}
|
||||
setData={setNewDataset}
|
||||
hidden={["datasetType"]}
|
||||
hidden={["datasetType", "dataSource"]}
|
||||
/>
|
||||
</Form>
|
||||
</Modal>
|
||||
|
||||
@@ -74,7 +74,7 @@ export default function BasicInformation({
|
||||
value: dataset.id,
|
||||
}));
|
||||
setParentDatasetOptions([
|
||||
{ label: "根数据集", value: "" },
|
||||
{ label: "无关联数据集", value: "" },
|
||||
...options,
|
||||
]);
|
||||
} catch (error) {
|
||||
@@ -102,11 +102,11 @@ export default function BasicInformation({
|
||||
</Form.Item>
|
||||
)}
|
||||
{!hidden.includes("parentDatasetId") && (
|
||||
<Form.Item name="parentDatasetId" label="父数据集">
|
||||
<Form.Item name="parentDatasetId" label="关联数据集">
|
||||
<Select
|
||||
className="w-full"
|
||||
options={parentDatasetOptions}
|
||||
placeholder="选择父数据集(仅支持一层)"
|
||||
placeholder="选择关联数据集(仅支持一层)"
|
||||
/>
|
||||
</Form.Item>
|
||||
)}
|
||||
|
||||
@@ -127,7 +127,7 @@ export default function DatasetDetail() {
|
||||
if (!dataset?.parentDatasetId) {
|
||||
items.push({
|
||||
key: "children",
|
||||
label: "子数据集",
|
||||
label: "关联数据集",
|
||||
});
|
||||
}
|
||||
return items;
|
||||
@@ -266,7 +266,7 @@ export default function DatasetDetail() {
|
||||
? [
|
||||
{
|
||||
key: "create-child",
|
||||
label: "创建子数据集",
|
||||
label: "创建关联数据集",
|
||||
icon: <PlusOutlined />,
|
||||
onClick: handleCreateChildDataset,
|
||||
},
|
||||
@@ -415,7 +415,7 @@ export default function DatasetDetail() {
|
||||
{activeTab === "children" && (
|
||||
<div className="pt-4">
|
||||
<div className="flex items-center justify-between mb-3">
|
||||
<h2 className="text-base font-semibold">子数据集</h2>
|
||||
<h2 className="text-base font-semibold">关联数据集</h2>
|
||||
<span className="text-xs text-gray-500">
|
||||
共 {childDatasets.length} 个
|
||||
</span>
|
||||
@@ -426,7 +426,7 @@ export default function DatasetDetail() {
|
||||
dataSource={childDatasets}
|
||||
loading={childDatasetsLoading}
|
||||
pagination={false}
|
||||
locale={{ emptyText: "暂无子数据集" }}
|
||||
locale={{ emptyText: "暂无关联数据集" }}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -13,11 +13,11 @@ import Dragger from "antd/es/upload/Dragger";
|
||||
* @param file 原始文件
|
||||
* @returns 分割后的文件列表,每行一个文件
|
||||
*/
|
||||
async function splitFileByLines(file: UploadFile): Promise<UploadFile[]> {
|
||||
const originFile = (file as any).originFileObj || file;
|
||||
if (!originFile || typeof originFile.text !== "function") {
|
||||
return [file];
|
||||
}
|
||||
async function splitFileByLines(file: UploadFile): Promise<UploadFile[]> {
|
||||
const originFile = file.originFileObj ?? file;
|
||||
if (!(originFile instanceof File) || typeof originFile.text !== "function") {
|
||||
return [file];
|
||||
}
|
||||
|
||||
const text = await originFile.text();
|
||||
if (!text) return [file];
|
||||
@@ -36,17 +36,37 @@ async function splitFileByLines(file: UploadFile): Promise<UploadFile[]> {
|
||||
const newFileName = `${baseName}_${String(index + 1).padStart(padLength, "0")}${ext}`;
|
||||
const blob = new Blob([line], { type: "text/plain" });
|
||||
const newFile = new File([blob], newFileName, { type: "text/plain" });
|
||||
return {
|
||||
uid: `${file.uid}-${index}`,
|
||||
name: newFileName,
|
||||
size: newFile.size,
|
||||
type: "text/plain",
|
||||
originFileObj: newFile as any,
|
||||
} as UploadFile;
|
||||
});
|
||||
}
|
||||
|
||||
export default function ImportConfiguration({
|
||||
return {
|
||||
uid: `${file.uid}-${index}`,
|
||||
name: newFileName,
|
||||
size: newFile.size,
|
||||
type: "text/plain",
|
||||
originFileObj: newFile as UploadFile["originFileObj"],
|
||||
} as UploadFile;
|
||||
});
|
||||
}
|
||||
|
||||
type SelectOption = {
|
||||
label: string;
|
||||
value: string;
|
||||
};
|
||||
|
||||
type CollectionTask = {
|
||||
id: string;
|
||||
name: string;
|
||||
};
|
||||
|
||||
type ImportConfig = {
|
||||
source: DataSource;
|
||||
hasArchive: boolean;
|
||||
splitByLine: boolean;
|
||||
files?: UploadFile[];
|
||||
dataSource?: string;
|
||||
target?: DataSource;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
export default function ImportConfiguration({
|
||||
data,
|
||||
open,
|
||||
onClose,
|
||||
@@ -59,19 +79,23 @@ export default function ImportConfiguration({
|
||||
updateEvent?: string;
|
||||
prefix?: string;
|
||||
}) {
|
||||
const [form] = Form.useForm();
|
||||
const [collectionOptions, setCollectionOptions] = useState([]);
|
||||
const [importConfig, setImportConfig] = useState<any>({
|
||||
source: DataSource.UPLOAD,
|
||||
hasArchive: true,
|
||||
splitByLine: false,
|
||||
});
|
||||
const [form] = Form.useForm();
|
||||
const [collectionOptions, setCollectionOptions] = useState<SelectOption[]>([]);
|
||||
const availableSourceOptions = dataSourceOptions.filter(
|
||||
(option) => option.value !== DataSource.COLLECTION
|
||||
);
|
||||
const [importConfig, setImportConfig] = useState<ImportConfig>({
|
||||
source: DataSource.UPLOAD,
|
||||
hasArchive: true,
|
||||
splitByLine: false,
|
||||
});
|
||||
const [currentPrefix, setCurrentPrefix] = useState<string>("");
|
||||
|
||||
// 本地上传文件相关逻辑
|
||||
|
||||
const handleUpload = async (dataset: Dataset) => {
|
||||
let filesToUpload = form.getFieldValue("files") || [];
|
||||
let filesToUpload =
|
||||
(form.getFieldValue("files") as UploadFile[] | undefined) || [];
|
||||
|
||||
// 如果启用分行分割,处理文件
|
||||
if (importConfig.splitByLine) {
|
||||
@@ -83,14 +107,14 @@ export default function ImportConfiguration({
|
||||
|
||||
// 计算分片列表
|
||||
const sliceList = filesToUpload.map((file) => {
|
||||
const originFile = (file as any).originFileObj || file;
|
||||
const slices = sliceFile(originFile);
|
||||
return {
|
||||
originFile: originFile, // 传入真正的 File/Blob 对象
|
||||
slices,
|
||||
name: file.name,
|
||||
size: originFile.size || 0,
|
||||
};
|
||||
const originFile = (file.originFileObj ?? file) as Blob;
|
||||
const slices = sliceFile(originFile);
|
||||
return {
|
||||
originFile: originFile, // 传入真正的 File/Blob 对象
|
||||
slices,
|
||||
name: file.name,
|
||||
size: originFile.size || 0,
|
||||
};
|
||||
});
|
||||
|
||||
console.log("[ImportConfiguration] Uploading with currentPrefix:", currentPrefix);
|
||||
@@ -111,10 +135,13 @@ export default function ImportConfiguration({
|
||||
if (importConfig.source !== DataSource.COLLECTION) return;
|
||||
try {
|
||||
const res = await queryTasksUsingGet({ page: 0, size: 100 });
|
||||
const options = res.data.content.map((task: any) => ({
|
||||
label: task.name,
|
||||
value: task.id,
|
||||
}));
|
||||
const tasks = Array.isArray(res?.data?.content)
|
||||
? (res.data.content as CollectionTask[])
|
||||
: [];
|
||||
const options = tasks.map((task) => ({
|
||||
label: task.name,
|
||||
value: task.id,
|
||||
}));
|
||||
setCollectionOptions(options);
|
||||
} catch (error) {
|
||||
console.error("Error fetching collection tasks:", error);
|
||||
@@ -123,13 +150,13 @@ export default function ImportConfiguration({
|
||||
|
||||
const resetState = () => {
|
||||
console.log('[ImportConfiguration] resetState called, preserving currentPrefix:', currentPrefix);
|
||||
form.resetFields();
|
||||
form.setFieldsValue({ files: null });
|
||||
setImportConfig({
|
||||
source: importConfig.source ? importConfig.source : DataSource.UPLOAD,
|
||||
hasArchive: true,
|
||||
splitByLine: false,
|
||||
});
|
||||
form.resetFields();
|
||||
form.setFieldsValue({ files: null });
|
||||
setImportConfig({
|
||||
source: DataSource.UPLOAD,
|
||||
hasArchive: true,
|
||||
splitByLine: false,
|
||||
});
|
||||
console.log('[ImportConfiguration] resetState done, currentPrefix still:', currentPrefix);
|
||||
};
|
||||
|
||||
@@ -196,12 +223,12 @@ export default function ImportConfiguration({
|
||||
name="source"
|
||||
rules={[{ required: true, message: "请选择数据源" }]}
|
||||
>
|
||||
<Radio.Group
|
||||
buttonStyle="solid"
|
||||
options={dataSourceOptions}
|
||||
optionType="button"
|
||||
/>
|
||||
</Form.Item>
|
||||
<Radio.Group
|
||||
buttonStyle="solid"
|
||||
options={availableSourceOptions}
|
||||
optionType="button"
|
||||
/>
|
||||
</Form.Item>
|
||||
{importConfig?.source === DataSource.COLLECTION && (
|
||||
<Form.Item name="dataSource" label="归集任务" required>
|
||||
<Select placeholder="请选择归集任务" options={collectionOptions} />
|
||||
@@ -277,12 +304,14 @@ export default function ImportConfiguration({
|
||||
label="上传文件"
|
||||
name="files"
|
||||
valuePropName="fileList"
|
||||
getValueFromEvent={(e: any) => {
|
||||
if (Array.isArray(e)) {
|
||||
return e;
|
||||
}
|
||||
return e && e.fileList;
|
||||
}}
|
||||
getValueFromEvent={(
|
||||
event: { fileList?: UploadFile[] } | UploadFile[]
|
||||
) => {
|
||||
if (Array.isArray(event)) {
|
||||
return event;
|
||||
}
|
||||
return event?.fileList;
|
||||
}}
|
||||
rules={[
|
||||
{
|
||||
required: true,
|
||||
|
||||
@@ -330,13 +330,21 @@ export default function Overview({
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex">
|
||||
<Button
|
||||
size="small"
|
||||
type="link"
|
||||
onClick={() => handleDownloadFile(record)}
|
||||
>
|
||||
return (
|
||||
<div className="flex">
|
||||
<Button
|
||||
size="small"
|
||||
type="link"
|
||||
loading={previewLoading && previewFileName === record.fileName}
|
||||
onClick={() => handlePreviewFile(record)}
|
||||
>
|
||||
预览
|
||||
</Button>
|
||||
<Button
|
||||
size="small"
|
||||
type="link"
|
||||
onClick={() => handleDownloadFile(record)}
|
||||
>
|
||||
下载
|
||||
</Button>
|
||||
<Button
|
||||
@@ -549,6 +557,13 @@ export default function Overview({
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
{previewFileType === "pdf" && (
|
||||
<iframe
|
||||
src={previewMediaUrl}
|
||||
title={previewFileName || "PDF 预览"}
|
||||
style={{ width: "100%", height: `${PREVIEW_MAX_HEIGHT}px`, border: "none" }}
|
||||
/>
|
||||
)}
|
||||
{previewFileType === "video" && (
|
||||
<div style={{ textAlign: "center" }}>
|
||||
<video
|
||||
|
||||
@@ -4,7 +4,12 @@ import type {
|
||||
} from "@/pages/DataManagement/dataset.model";
|
||||
import { App } from "antd";
|
||||
import { useState } from "react";
|
||||
import { PREVIEW_TEXT_MAX_LENGTH, resolvePreviewFileType, truncatePreviewText } from "@/utils/filePreview";
|
||||
import {
|
||||
PREVIEW_TEXT_MAX_LENGTH,
|
||||
resolvePreviewFileType,
|
||||
truncatePreviewText,
|
||||
type PreviewFileType,
|
||||
} from "@/utils/filePreview";
|
||||
import {
|
||||
deleteDatasetFileUsingDelete,
|
||||
downloadFileByIdUsingGet,
|
||||
@@ -35,7 +40,7 @@ export function useFilesOperation(dataset: Dataset) {
|
||||
const [previewVisible, setPreviewVisible] = useState(false);
|
||||
const [previewContent, setPreviewContent] = useState("");
|
||||
const [previewFileName, setPreviewFileName] = useState("");
|
||||
const [previewFileType, setPreviewFileType] = useState<"text" | "image" | "video" | "audio">("text");
|
||||
const [previewFileType, setPreviewFileType] = useState<PreviewFileType>("text");
|
||||
const [previewMediaUrl, setPreviewMediaUrl] = useState("");
|
||||
const [previewLoading, setPreviewLoading] = useState(false);
|
||||
|
||||
@@ -111,7 +116,7 @@ export function useFilesOperation(dataset: Dataset) {
|
||||
return;
|
||||
}
|
||||
|
||||
const fileUrl = `/api/data-management/datasets/${datasetId}/files/${file.id}/download`;
|
||||
const previewUrl = `/api/data-management/datasets/${datasetId}/files/${file.id}/preview`;
|
||||
setPreviewFileName(file.fileName);
|
||||
setPreviewFileType(fileType);
|
||||
setPreviewContent("");
|
||||
@@ -120,7 +125,7 @@ export function useFilesOperation(dataset: Dataset) {
|
||||
if (fileType === "text") {
|
||||
setPreviewLoading(true);
|
||||
try {
|
||||
const response = await fetch(fileUrl);
|
||||
const response = await fetch(previewUrl);
|
||||
if (!response.ok) {
|
||||
throw new Error("下载失败");
|
||||
}
|
||||
@@ -136,7 +141,7 @@ export function useFilesOperation(dataset: Dataset) {
|
||||
return;
|
||||
}
|
||||
|
||||
setPreviewMediaUrl(fileUrl);
|
||||
setPreviewMediaUrl(previewUrl);
|
||||
setPreviewVisible(true);
|
||||
};
|
||||
|
||||
|
||||
@@ -34,10 +34,12 @@ export enum DataSource {
|
||||
|
||||
export interface DatasetFile {
|
||||
id: string;
|
||||
datasetId?: string;
|
||||
fileName: string;
|
||||
size: string;
|
||||
uploadDate: string;
|
||||
path: string;
|
||||
filePath?: string;
|
||||
}
|
||||
|
||||
export interface Dataset {
|
||||
|
||||
@@ -1,12 +1,23 @@
|
||||
import type React from "react";
|
||||
import { useEffect, useState } from "react";
|
||||
import { Table, Badge, Button, Breadcrumb, Tooltip, App, Card, Input, Empty, Spin } from "antd";
|
||||
import { useCallback, useEffect, useMemo, useState } from "react";
|
||||
import {
|
||||
Table,
|
||||
Badge,
|
||||
Button,
|
||||
Breadcrumb,
|
||||
Tooltip,
|
||||
App,
|
||||
Card,
|
||||
Input,
|
||||
Empty,
|
||||
Spin,
|
||||
} from "antd";
|
||||
import {
|
||||
DeleteOutlined,
|
||||
EditOutlined,
|
||||
ReloadOutlined,
|
||||
} from "@ant-design/icons";
|
||||
import { useNavigate, useParams } from "react-router";
|
||||
import { useNavigate, useParams, useSearchParams } from "react-router";
|
||||
import DetailHeader from "@/components/DetailHeader";
|
||||
import { SearchControls } from "@/components/SearchControls";
|
||||
import { KBFile, KnowledgeBaseItem } from "../knowledge-base.model";
|
||||
@@ -18,9 +29,9 @@ import {
|
||||
queryKnowledgeBaseFilesUsingGet,
|
||||
retrieveKnowledgeBaseContent,
|
||||
} from "../knowledge-base.api";
|
||||
import useFetchData from "@/hooks/useFetchData";
|
||||
import AddDataDialog from "../components/AddDataDialog";
|
||||
import CreateKnowledgeBase from "../components/CreateKnowledgeBase";
|
||||
import { File, Folder } from "lucide-react";
|
||||
|
||||
interface StatisticItem {
|
||||
icon?: React.ReactNode;
|
||||
@@ -39,44 +50,127 @@ interface RecallResult {
|
||||
primaryKey?: string;
|
||||
}
|
||||
|
||||
type KBFileRow = KBFile & {
|
||||
isDirectory?: boolean;
|
||||
displayName?: string;
|
||||
fullPath?: string;
|
||||
fileCount?: number;
|
||||
};
|
||||
|
||||
const PATH_SEPARATOR = "/";
|
||||
const normalizePath = (value?: string) =>
|
||||
(value ?? "").replace(/\\/g, PATH_SEPARATOR);
|
||||
|
||||
const normalizePrefix = (value?: string) => {
|
||||
const trimmed = normalizePath(value).replace(/^\/+/, "").trim();
|
||||
if (!trimmed) {
|
||||
return "";
|
||||
}
|
||||
return trimmed.endsWith(PATH_SEPARATOR)
|
||||
? trimmed
|
||||
: `${trimmed}${PATH_SEPARATOR}`;
|
||||
};
|
||||
|
||||
const splitRelativePath = (fullPath: string, prefix: string) => {
|
||||
if (prefix && !fullPath.startsWith(prefix)) {
|
||||
return [];
|
||||
}
|
||||
const remainder = fullPath.slice(prefix.length);
|
||||
return remainder.split(PATH_SEPARATOR).filter(Boolean);
|
||||
};
|
||||
|
||||
const resolveFileRelativePath = (file: KBFile) => {
|
||||
const rawPath = file.relativePath || file.fileName || file.name || "";
|
||||
return normalizePath(rawPath).replace(/^\/+/, "");
|
||||
};
|
||||
|
||||
const KnowledgeBaseDetailPage: React.FC = () => {
|
||||
const navigate = useNavigate();
|
||||
const [searchParams] = useSearchParams();
|
||||
const { message } = App.useApp();
|
||||
const { id } = useParams<{ id: string }>();
|
||||
const [knowledgeBase, setKnowledgeBase] = useState<KnowledgeBaseItem | undefined>(undefined);
|
||||
const [showEdit, setShowEdit] = useState(false);
|
||||
const [activeTab, setActiveTab] = useState<'fileList' | 'recallTest'>('fileList');
|
||||
const [filePrefix, setFilePrefix] = useState("");
|
||||
const [fileKeyword, setFileKeyword] = useState("");
|
||||
const [filesLoading, setFilesLoading] = useState(false);
|
||||
const [allFiles, setAllFiles] = useState<KBFile[]>([]);
|
||||
const [filePagination, setFilePagination] = useState({
|
||||
current: 1,
|
||||
pageSize: 10,
|
||||
});
|
||||
const [recallLoading, setRecallLoading] = useState(false);
|
||||
const [recallResults, setRecallResults] = useState<RecallResult[]>([]);
|
||||
const [recallQuery, setRecallQuery] = useState("");
|
||||
|
||||
const fetchKnowledgeBaseDetails = async (id: string) => {
|
||||
const { data } = await queryKnowledgeBaseByIdUsingGet(id);
|
||||
const fetchKnowledgeBaseDetails = useCallback(async (baseId: string) => {
|
||||
const { data } = await queryKnowledgeBaseByIdUsingGet(baseId);
|
||||
setKnowledgeBase(mapKnowledgeBase(data));
|
||||
};
|
||||
}, []);
|
||||
|
||||
const fetchFiles = useCallback(async () => {
|
||||
if (!id) {
|
||||
setAllFiles([]);
|
||||
return;
|
||||
}
|
||||
setFilesLoading(true);
|
||||
try {
|
||||
const pageSize = 200;
|
||||
let page = 0;
|
||||
let combined: KBFile[] = [];
|
||||
const currentPrefix = normalizePrefix(filePrefix);
|
||||
const keyword = fileKeyword.trim();
|
||||
while (true) {
|
||||
const { data } = await queryKnowledgeBaseFilesUsingGet(id, {
|
||||
page,
|
||||
size: pageSize,
|
||||
...(currentPrefix ? { relativePath: currentPrefix } : {}),
|
||||
...(keyword ? { fileName: keyword } : {}),
|
||||
});
|
||||
const content = Array.isArray(data?.content) ? data.content : [];
|
||||
combined = combined.concat(content.map(mapFileData));
|
||||
if (content.length < pageSize) {
|
||||
break;
|
||||
}
|
||||
if (typeof data?.totalElements === "number" && combined.length >= data.totalElements) {
|
||||
break;
|
||||
}
|
||||
page += 1;
|
||||
}
|
||||
setAllFiles(combined);
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch knowledge base files:", error);
|
||||
message.error("文件列表加载失败");
|
||||
} finally {
|
||||
setFilesLoading(false);
|
||||
}
|
||||
}, [id, filePrefix, fileKeyword, message]);
|
||||
|
||||
useEffect(() => {
|
||||
if (id) {
|
||||
fetchKnowledgeBaseDetails(id);
|
||||
}
|
||||
}, [id]);
|
||||
}, [id, fetchKnowledgeBaseDetails]);
|
||||
|
||||
const {
|
||||
loading,
|
||||
tableData: files,
|
||||
searchParams,
|
||||
pagination,
|
||||
fetchData: fetchFiles,
|
||||
setSearchParams,
|
||||
handleFiltersChange,
|
||||
handleKeywordChange,
|
||||
} = useFetchData<KBFile>(
|
||||
(params) => id ? queryKnowledgeBaseFilesUsingGet(id, params) : Promise.resolve({ data: [] }),
|
||||
mapFileData
|
||||
);
|
||||
useEffect(() => {
|
||||
if (!id) {
|
||||
return;
|
||||
}
|
||||
const prefixParam = searchParams.get("prefix");
|
||||
const fileNameParam = searchParams.get("fileName");
|
||||
setFilePrefix(prefixParam ? normalizePrefix(prefixParam) : "");
|
||||
setFileKeyword(fileNameParam ? fileNameParam : "");
|
||||
}, [id, searchParams]);
|
||||
|
||||
useEffect(() => {
|
||||
if (id) {
|
||||
fetchFiles();
|
||||
}
|
||||
}, [id, fetchFiles]);
|
||||
|
||||
// File table logic
|
||||
const handleDeleteFile = async (file: KBFile) => {
|
||||
const handleDeleteFile = async (file: KBFileRow) => {
|
||||
try {
|
||||
await deleteKnowledgeBaseFileByIdUsingDelete(knowledgeBase!.id, {
|
||||
ids: [file.id]
|
||||
@@ -119,6 +213,152 @@ const KnowledgeBaseDetailPage: React.FC = () => {
|
||||
setRecallLoading(false);
|
||||
};
|
||||
|
||||
const handleOpenDirectory = (directoryName: string) => {
|
||||
const currentPrefix = normalizePrefix(filePrefix);
|
||||
const nextPrefix = normalizePrefix(`${currentPrefix}${directoryName}`);
|
||||
setFilePrefix(nextPrefix);
|
||||
};
|
||||
|
||||
const handleBackToParent = () => {
|
||||
const currentPrefix = normalizePrefix(filePrefix);
|
||||
if (!currentPrefix) {
|
||||
return;
|
||||
}
|
||||
const trimmed = currentPrefix.replace(/\/$/, "");
|
||||
const parts = trimmed.split(PATH_SEPARATOR).filter(Boolean);
|
||||
parts.pop();
|
||||
const parentPrefix = parts.length
|
||||
? `${parts.join(PATH_SEPARATOR)}${PATH_SEPARATOR}`
|
||||
: "";
|
||||
setFilePrefix(parentPrefix);
|
||||
};
|
||||
|
||||
const handleDeleteDirectory = async (directoryName: string) => {
|
||||
if (!knowledgeBase?.id) {
|
||||
return;
|
||||
}
|
||||
const currentPrefix = normalizePrefix(filePrefix);
|
||||
const directoryPrefix = normalizePrefix(`${currentPrefix}${directoryName}`);
|
||||
const targetIds = allFiles
|
||||
.filter((file) => {
|
||||
const fullPath = resolveFileRelativePath(file);
|
||||
return fullPath.startsWith(directoryPrefix);
|
||||
})
|
||||
.map((file) => file.id);
|
||||
if (targetIds.length === 0) {
|
||||
message.info("该文件夹为空");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await deleteKnowledgeBaseFileByIdUsingDelete(knowledgeBase.id, {
|
||||
ids: targetIds,
|
||||
});
|
||||
message.success(`已删除 ${targetIds.length} 个文件`);
|
||||
fetchFiles();
|
||||
} catch {
|
||||
message.error("文件夹删除失败");
|
||||
}
|
||||
};
|
||||
|
||||
const handleKeywordChange = (keyword: string) => {
|
||||
setFileKeyword(keyword);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
setFilePagination((prev) => ({ ...prev, current: 1 }));
|
||||
}, [filePrefix, fileKeyword]);
|
||||
|
||||
const normalizedPrefix = useMemo(() => normalizePrefix(filePrefix), [filePrefix]);
|
||||
|
||||
const { rows: fileRows, total: fileTotal } = useMemo(() => {
|
||||
const folderMap = new Map<string, { name: string; fileCount: number }>();
|
||||
const fileItems: KBFileRow[] = [];
|
||||
|
||||
allFiles.forEach((file) => {
|
||||
const fullPath = resolveFileRelativePath(file);
|
||||
if (!fullPath) {
|
||||
return;
|
||||
}
|
||||
const segments = splitRelativePath(fullPath, normalizedPrefix);
|
||||
if (segments.length === 0) {
|
||||
return;
|
||||
}
|
||||
const leafName = segments[0];
|
||||
|
||||
if (segments.length > 1) {
|
||||
const folderName = leafName;
|
||||
const entry = folderMap.get(folderName) || {
|
||||
name: folderName,
|
||||
fileCount: 0,
|
||||
};
|
||||
entry.fileCount += 1;
|
||||
folderMap.set(folderName, entry);
|
||||
return;
|
||||
}
|
||||
|
||||
const normalizedFileName = normalizePath(file.fileName);
|
||||
const displayName = normalizedFileName.includes(PATH_SEPARATOR)
|
||||
? leafName
|
||||
: file.fileName || leafName;
|
||||
fileItems.push({
|
||||
...file,
|
||||
name: displayName,
|
||||
displayName,
|
||||
fullPath,
|
||||
});
|
||||
});
|
||||
|
||||
const folderItems: KBFileRow[] = Array.from(folderMap.values()).map(
|
||||
(entry) =>
|
||||
({
|
||||
id: `directory-${normalizedPrefix}${entry.name}`,
|
||||
fileName: entry.name,
|
||||
name: entry.name,
|
||||
status: null,
|
||||
chunkCount: 0,
|
||||
createdAt: "",
|
||||
updatedAt: "",
|
||||
metadata: {},
|
||||
knowledgeBaseId: knowledgeBase?.id || "",
|
||||
fileId: "",
|
||||
updatedBy: "",
|
||||
createdBy: "",
|
||||
isDirectory: true,
|
||||
displayName: entry.name,
|
||||
fullPath: `${normalizedPrefix}${entry.name}/`,
|
||||
fileCount: entry.fileCount,
|
||||
}) as KBFileRow
|
||||
);
|
||||
|
||||
const sortByName = (a: KBFileRow, b: KBFileRow) =>
|
||||
(a.displayName || a.name || "").localeCompare(
|
||||
b.displayName || b.name || "",
|
||||
"zh-Hans-CN"
|
||||
);
|
||||
|
||||
folderItems.sort(sortByName);
|
||||
fileItems.sort(sortByName);
|
||||
|
||||
const combined = [...folderItems, ...fileItems];
|
||||
return { rows: combined, total: combined.length };
|
||||
}, [allFiles, knowledgeBase?.id, normalizedPrefix]);
|
||||
|
||||
const filePageCurrent = filePagination.current;
|
||||
const filePageSize = filePagination.pageSize;
|
||||
|
||||
const pagedFileRows = useMemo(() => {
|
||||
const startIndex = (filePageCurrent - 1) * filePageSize;
|
||||
const endIndex = startIndex + filePageSize;
|
||||
return fileRows.slice(startIndex, endIndex);
|
||||
}, [filePageCurrent, filePageSize, fileRows]);
|
||||
|
||||
useEffect(() => {
|
||||
const maxPage = Math.max(1, Math.ceil(fileTotal / filePageSize));
|
||||
if (filePageCurrent > maxPage) {
|
||||
setFilePagination((prev) => ({ ...prev, current: maxPage }));
|
||||
}
|
||||
}, [filePageCurrent, filePageSize, fileTotal]);
|
||||
|
||||
const operations = [
|
||||
{
|
||||
key: "edit",
|
||||
@@ -170,14 +410,38 @@ const KnowledgeBaseDetailPage: React.FC = () => {
|
||||
width: 200,
|
||||
ellipsis: true,
|
||||
fixed: "left" as const,
|
||||
render: (name: string, record: KBFileRow) => {
|
||||
const displayName = record.displayName || name;
|
||||
if (record.isDirectory) {
|
||||
return (
|
||||
<Button
|
||||
type="link"
|
||||
onClick={() => handleOpenDirectory(displayName)}
|
||||
className="flex items-center gap-2 p-0"
|
||||
>
|
||||
<Folder className="w-4 h-4 text-blue-500" />
|
||||
<span className="truncate">{displayName}</span>
|
||||
</Button>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div className="flex items-center gap-2">
|
||||
<File className="w-4 h-4 text-gray-800" />
|
||||
<span className="truncate">{displayName}</span>
|
||||
</div>
|
||||
);
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "状态",
|
||||
dataIndex: "status",
|
||||
key: "vectorizationStatus",
|
||||
width: 120,
|
||||
render: (status: unknown) => {
|
||||
if (typeof status === 'object' && status !== null) {
|
||||
render: (status: unknown, record: KBFileRow) => {
|
||||
if (record.isDirectory) {
|
||||
return <Badge color="default" text="文件夹" />;
|
||||
}
|
||||
if (typeof status === "object" && status !== null) {
|
||||
const s = status as { color?: string; label?: string };
|
||||
return <Badge color={s.color} text={s.label} />;
|
||||
}
|
||||
@@ -190,6 +454,8 @@ const KnowledgeBaseDetailPage: React.FC = () => {
|
||||
key: "chunkCount",
|
||||
width: 100,
|
||||
ellipsis: true,
|
||||
render: (value: number, record: KBFileRow) =>
|
||||
record.isDirectory ? "-" : value ?? 0,
|
||||
},
|
||||
{
|
||||
title: "创建时间",
|
||||
@@ -197,6 +463,8 @@ const KnowledgeBaseDetailPage: React.FC = () => {
|
||||
key: "createdAt",
|
||||
ellipsis: true,
|
||||
width: 180,
|
||||
render: (value: string, record: KBFileRow) =>
|
||||
record.isDirectory ? "-" : value || "-",
|
||||
},
|
||||
{
|
||||
title: "更新时间",
|
||||
@@ -204,26 +472,51 @@ const KnowledgeBaseDetailPage: React.FC = () => {
|
||||
key: "updatedAt",
|
||||
ellipsis: true,
|
||||
width: 180,
|
||||
render: (value: string, record: KBFileRow) =>
|
||||
record.isDirectory ? "-" : value || "-",
|
||||
},
|
||||
{
|
||||
title: "操作",
|
||||
key: "actions",
|
||||
align: "right" as const,
|
||||
width: 100,
|
||||
render: (_: unknown, file: KBFile) => (
|
||||
<div>
|
||||
{fileOps.map((op) => (
|
||||
<Tooltip key={op.key} title={op.label}>
|
||||
render: (_: unknown, file: KBFileRow) => {
|
||||
if (file.isDirectory) {
|
||||
return (
|
||||
<Tooltip title="删除文件夹">
|
||||
<Button
|
||||
type="text"
|
||||
icon={op.icon}
|
||||
danger={op?.danger}
|
||||
onClick={() => op.onClick(file)}
|
||||
icon={<DeleteOutlined className="w-4 h-4" />}
|
||||
danger
|
||||
onClick={() => {
|
||||
modal.confirm({
|
||||
title: "确认删除该文件夹吗?",
|
||||
content: `删除后将移除文件夹 “${file.displayName || file.name}” 下的全部文件,且无法恢复。`,
|
||||
okText: "删除",
|
||||
okType: "danger",
|
||||
cancelText: "取消",
|
||||
onOk: () => handleDeleteDirectory(file.displayName || file.name),
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</Tooltip>
|
||||
))}
|
||||
</div>
|
||||
),
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div>
|
||||
{fileOps.map((op) => (
|
||||
<Tooltip key={op.key} title={op.label}>
|
||||
<Button
|
||||
type="text"
|
||||
icon={op.icon}
|
||||
danger={op?.danger}
|
||||
onClick={() => op.onClick(file)}
|
||||
/>
|
||||
</Tooltip>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
@@ -265,12 +558,12 @@ const KnowledgeBaseDetailPage: React.FC = () => {
|
||||
<>
|
||||
<div className="flex-1">
|
||||
<SearchControls
|
||||
searchTerm={searchParams.keyword}
|
||||
searchTerm={fileKeyword}
|
||||
onSearchChange={handleKeywordChange}
|
||||
searchPlaceholder="搜索文件名..."
|
||||
filters={[]}
|
||||
onFiltersChange={handleFiltersChange}
|
||||
onClearFilters={() => setSearchParams({ ...searchParams, filter: { type: [], status: [], tags: [] } })}
|
||||
onFiltersChange={() => {}}
|
||||
onClearFilters={() => setFileKeyword("")}
|
||||
showViewToggle={false}
|
||||
showReload={false}
|
||||
/>
|
||||
@@ -281,14 +574,54 @@ const KnowledgeBaseDetailPage: React.FC = () => {
|
||||
</div>
|
||||
|
||||
{activeTab === 'fileList' ? (
|
||||
<Table
|
||||
loading={loading}
|
||||
columns={fileColumns}
|
||||
dataSource={files}
|
||||
rowKey="id"
|
||||
pagination={pagination}
|
||||
scroll={{ y: "calc(100vh - 30rem)" }}
|
||||
/>
|
||||
<>
|
||||
<div className="mb-2">
|
||||
{normalizedPrefix && (
|
||||
<Button type="link" onClick={handleBackToParent} className="p-0">
|
||||
<span className="flex items-center text-blue-500">
|
||||
<svg
|
||||
className="w-4 h-4 mr-1"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
>
|
||||
<path
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
strokeWidth={2}
|
||||
d="M10 19l-7-7m0 0l7-7m-7 7h18"
|
||||
/>
|
||||
</svg>
|
||||
返回上一级
|
||||
</span>
|
||||
</Button>
|
||||
)}
|
||||
{normalizedPrefix && (
|
||||
<span className="ml-2 text-gray-600">
|
||||
当前路径: {normalizedPrefix}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<Table
|
||||
loading={filesLoading}
|
||||
columns={fileColumns}
|
||||
dataSource={pagedFileRows}
|
||||
rowKey="id"
|
||||
pagination={{
|
||||
current: filePagination.current,
|
||||
pageSize: filePagination.pageSize,
|
||||
total: fileTotal,
|
||||
showTotal: (total) => `共 ${total} 条`,
|
||||
onChange: (page, pageSize) =>
|
||||
setFilePagination({
|
||||
current: page,
|
||||
pageSize: pageSize || filePagination.pageSize,
|
||||
}),
|
||||
}}
|
||||
scroll={{ y: "calc(100vh - 30rem)" }}
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
<div className="p-2">
|
||||
<div style={{ fontSize: 14, fontWeight: 300, marginBottom: 8 }}>基于语义文本检索和全文检索后的加权平均结果</div>
|
||||
|
||||
@@ -37,7 +37,7 @@ export default function KnowledgeBasePage() {
|
||||
await deleteKnowledgeBaseByIdUsingDelete(kb.id);
|
||||
message.success("知识库删除成功");
|
||||
fetchData();
|
||||
} catch (error) {
|
||||
} catch {
|
||||
message.error("知识库删除失败");
|
||||
}
|
||||
};
|
||||
@@ -47,7 +47,7 @@ export default function KnowledgeBasePage() {
|
||||
key: "edit",
|
||||
label: "编辑",
|
||||
icon: <EditOutlined />,
|
||||
onClick: (item) => {
|
||||
onClick: (item: KnowledgeBaseItem) => {
|
||||
setIsEdit(true);
|
||||
setCurrentKB(item);
|
||||
},
|
||||
@@ -64,7 +64,7 @@ export default function KnowledgeBasePage() {
|
||||
okType: "danger",
|
||||
cancelText: "取消",
|
||||
},
|
||||
onClick: (item) => handleDeleteKB(item),
|
||||
onClick: (item: KnowledgeBaseItem) => handleDeleteKB(item),
|
||||
},
|
||||
];
|
||||
|
||||
@@ -76,7 +76,7 @@ export default function KnowledgeBasePage() {
|
||||
fixed: "left" as const,
|
||||
width: 200,
|
||||
ellipsis: true,
|
||||
render: (_: any, kb: KnowledgeBaseItem) => (
|
||||
render: (_: unknown, kb: KnowledgeBaseItem) => (
|
||||
<Button
|
||||
type="link"
|
||||
onClick={() => navigate(`/data/knowledge-base/detail/${kb.id}`)}
|
||||
@@ -111,7 +111,7 @@ export default function KnowledgeBasePage() {
|
||||
key: "actions",
|
||||
fixed: "right" as const,
|
||||
width: 150,
|
||||
render: (_: any, kb: KnowledgeBaseItem) => (
|
||||
render: (_: unknown, kb: KnowledgeBaseItem) => (
|
||||
<div className="flex items-center gap-2">
|
||||
{operations.map((op) => (
|
||||
<Tooltip key={op.key} title={op.label}>
|
||||
@@ -132,17 +132,22 @@ export default function KnowledgeBasePage() {
|
||||
<div className="h-full flex flex-col gap-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-bold">知识生成</h1>
|
||||
<CreateKnowledgeBase
|
||||
isEdit={isEdit}
|
||||
data={currentKB}
|
||||
onUpdate={() => {
|
||||
fetchData();
|
||||
}}
|
||||
onClose={() => {
|
||||
setIsEdit(false);
|
||||
setCurrentKB(null);
|
||||
}}
|
||||
/>
|
||||
<div className="flex items-center gap-2">
|
||||
<Button onClick={() => navigate("/data/knowledge-base/search")}>
|
||||
全库搜索
|
||||
</Button>
|
||||
<CreateKnowledgeBase
|
||||
isEdit={isEdit}
|
||||
data={currentKB}
|
||||
onUpdate={() => {
|
||||
fetchData();
|
||||
}}
|
||||
onClose={() => {
|
||||
setIsEdit(false);
|
||||
setCurrentKB(null);
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<SearchControls
|
||||
@@ -161,7 +166,9 @@ export default function KnowledgeBasePage() {
|
||||
<CardView
|
||||
data={tableData}
|
||||
operations={operations}
|
||||
onView={(item) => navigate(`/data/knowledge-base/detail/${item.id}`)}
|
||||
onView={(item: KnowledgeBaseItem) =>
|
||||
navigate(`/data/knowledge-base/detail/${item.id}`)
|
||||
}
|
||||
pagination={pagination}
|
||||
/>
|
||||
) : (
|
||||
@@ -177,4 +184,4 @@ export default function KnowledgeBasePage() {
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
217
frontend/src/pages/KnowledgeBase/Search/KnowledgeBaseSearch.tsx
Normal file
217
frontend/src/pages/KnowledgeBase/Search/KnowledgeBaseSearch.tsx
Normal file
@@ -0,0 +1,217 @@
|
||||
import { useCallback, useMemo, useState } from "react";
|
||||
import { App, Badge, Breadcrumb, Button, Input, Table } from "antd";
|
||||
import { useNavigate } from "react-router";
|
||||
import {
|
||||
KBFileStatus,
|
||||
KnowledgeBaseFileSearchResult,
|
||||
} from "../knowledge-base.model";
|
||||
import { KBFileStatusMap } from "../knowledge-base.const";
|
||||
import { queryKnowledgeBaseFilesSearchUsingGet } from "../knowledge-base.api";
|
||||
import { formatDateTime } from "@/utils/unit";
|
||||
|
||||
const PATH_SEPARATOR = "/";
|
||||
|
||||
const normalizePath = (value?: string) =>
|
||||
(value ?? "").replace(/\\/g, PATH_SEPARATOR);
|
||||
|
||||
const resolvePrefix = (relativePath?: string) => {
|
||||
const normalized = normalizePath(relativePath);
|
||||
const parts = normalized.split(PATH_SEPARATOR).filter(Boolean);
|
||||
if (parts.length <= 1) {
|
||||
return "";
|
||||
}
|
||||
parts.pop();
|
||||
return `${parts.join(PATH_SEPARATOR)}${PATH_SEPARATOR}`;
|
||||
};
|
||||
|
||||
export default function KnowledgeBaseSearch() {
|
||||
const navigate = useNavigate();
|
||||
const { message } = App.useApp();
|
||||
const [searchTerm, setSearchTerm] = useState("");
|
||||
const [activeKeyword, setActiveKeyword] = useState("");
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [searched, setSearched] = useState(false);
|
||||
const [results, setResults] = useState<KnowledgeBaseFileSearchResult[]>([]);
|
||||
const [pagination, setPagination] = useState({
|
||||
current: 1,
|
||||
pageSize: 10,
|
||||
total: 0,
|
||||
});
|
||||
|
||||
const fetchResults = useCallback(
|
||||
async (keyword: string, page?: number, pageSize?: number) => {
|
||||
const resolvedPage = page ?? pagination.current;
|
||||
const resolvedPageSize = pageSize ?? pagination.pageSize;
|
||||
if (!keyword) {
|
||||
setResults([]);
|
||||
setPagination((prev) => ({ ...prev, total: 0, current: resolvedPage }));
|
||||
setSearched(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
try {
|
||||
const { data } = await queryKnowledgeBaseFilesSearchUsingGet({
|
||||
fileName: keyword,
|
||||
page: Math.max(resolvedPage - 1, 0),
|
||||
size: resolvedPageSize,
|
||||
});
|
||||
const content = Array.isArray(data?.content) ? data.content : [];
|
||||
setResults(content);
|
||||
setPagination({
|
||||
current: resolvedPage,
|
||||
pageSize: resolvedPageSize,
|
||||
total: data?.totalElements ?? 0,
|
||||
});
|
||||
setSearched(true);
|
||||
} catch (error) {
|
||||
console.error("Failed to search knowledge base files:", error);
|
||||
message.error("检索失败,请稍后重试");
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
},
|
||||
[message, pagination]
|
||||
);
|
||||
|
||||
const handleSearch = (value?: string) => {
|
||||
const keyword = (value ?? searchTerm).trim();
|
||||
if (!keyword) {
|
||||
message.warning("请输入文件名");
|
||||
return;
|
||||
}
|
||||
setActiveKeyword(keyword);
|
||||
fetchResults(keyword, 1, pagination.pageSize);
|
||||
};
|
||||
|
||||
const columns = useMemo(
|
||||
() => [
|
||||
{
|
||||
title: "知识库",
|
||||
dataIndex: "knowledgeBaseName",
|
||||
key: "knowledgeBaseName",
|
||||
width: 220,
|
||||
ellipsis: true,
|
||||
render: (text: string) => text || "-",
|
||||
},
|
||||
{
|
||||
title: "文件名",
|
||||
dataIndex: "fileName",
|
||||
key: "fileName",
|
||||
width: 220,
|
||||
ellipsis: true,
|
||||
},
|
||||
{
|
||||
title: "相对路径",
|
||||
dataIndex: "relativePath",
|
||||
key: "relativePath",
|
||||
ellipsis: true,
|
||||
render: (value: string) => value || "-",
|
||||
},
|
||||
{
|
||||
title: "状态",
|
||||
dataIndex: "status",
|
||||
key: "status",
|
||||
width: 120,
|
||||
render: (status?: KBFileStatus) => {
|
||||
const config = status ? KBFileStatusMap[status] : undefined;
|
||||
if (!config) {
|
||||
return <Badge color="default" text={status || "-"} />;
|
||||
}
|
||||
return <Badge color={config.color} text={config.label} />;
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "更新时间",
|
||||
dataIndex: "updatedAt",
|
||||
key: "updatedAt",
|
||||
width: 180,
|
||||
ellipsis: true,
|
||||
render: (value: string) => formatDateTime(value) || "-",
|
||||
},
|
||||
{
|
||||
title: "操作",
|
||||
key: "action",
|
||||
width: 120,
|
||||
align: "right" as const,
|
||||
render: (_: unknown, record: KnowledgeBaseFileSearchResult) => (
|
||||
<Button
|
||||
type="link"
|
||||
onClick={() => {
|
||||
const prefix = resolvePrefix(record.relativePath);
|
||||
const searchParams = new URLSearchParams();
|
||||
if (prefix) {
|
||||
searchParams.set("prefix", prefix);
|
||||
}
|
||||
navigate(
|
||||
`/data/knowledge-base/detail/${record.knowledgeBaseId}?${searchParams.toString()}`
|
||||
);
|
||||
}}
|
||||
>
|
||||
定位
|
||||
</Button>
|
||||
),
|
||||
},
|
||||
],
|
||||
[navigate]
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="h-full flex flex-col gap-4">
|
||||
<Breadcrumb>
|
||||
<Breadcrumb.Item>
|
||||
<a onClick={() => navigate("/data/knowledge-base")}>知识库</a>
|
||||
</Breadcrumb.Item>
|
||||
<Breadcrumb.Item>全库搜索</Breadcrumb.Item>
|
||||
</Breadcrumb>
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-bold">知识库全库检索</h1>
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<Input.Search
|
||||
allowClear
|
||||
value={searchTerm}
|
||||
onChange={(event) => setSearchTerm(event.target.value)}
|
||||
onSearch={handleSearch}
|
||||
placeholder="输入文件名,回车或点击搜索"
|
||||
enterButton="搜索"
|
||||
loading={loading}
|
||||
/>
|
||||
</div>
|
||||
<Table
|
||||
rowKey="id"
|
||||
loading={loading}
|
||||
columns={columns}
|
||||
dataSource={results}
|
||||
pagination={{
|
||||
current: pagination.current,
|
||||
pageSize: pagination.pageSize,
|
||||
total: pagination.total,
|
||||
showTotal: (total) => `共 ${total} 条`,
|
||||
onChange: (page, pageSize) => {
|
||||
const nextKeyword = activeKeyword.trim();
|
||||
if (!nextKeyword) {
|
||||
message.warning("请输入文件名");
|
||||
return;
|
||||
}
|
||||
fetchResults(nextKeyword, page, pageSize || pagination.pageSize);
|
||||
},
|
||||
}}
|
||||
locale={{
|
||||
emptyText: searched ? "暂无匹配文件" : "请输入文件名开始检索",
|
||||
}}
|
||||
onRow={(record) => ({
|
||||
onClick: () => {
|
||||
const prefix = resolvePrefix(record.relativePath);
|
||||
const searchParams = new URLSearchParams();
|
||||
if (prefix) {
|
||||
searchParams.set("prefix", prefix);
|
||||
}
|
||||
navigate(
|
||||
`/data/knowledge-base/detail/${record.knowledgeBaseId}?${searchParams.toString()}`
|
||||
);
|
||||
},
|
||||
})}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -15,6 +15,7 @@ import { addKnowledgeBaseFilesUsingPost } from "../knowledge-base.api";
|
||||
import DatasetFileTransfer from "@/components/business/DatasetFileTransfer";
|
||||
import { DescriptionsItemType } from "antd/es/descriptions";
|
||||
import { DatasetFileCols } from "../knowledge-base.const";
|
||||
import type { DatasetFile } from "@/pages/DataManagement/dataset.model";
|
||||
|
||||
export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
|
||||
const [open, setOpen] = useState(false);
|
||||
@@ -25,6 +26,35 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
|
||||
|
||||
const [selectedFilesMap, setSelectedFilesMap] = useState({});
|
||||
|
||||
const PATH_SEPARATOR = "/";
|
||||
const normalizePath = (value?: string) =>
|
||||
(value ?? "").replace(/\\/g, PATH_SEPARATOR);
|
||||
|
||||
const resolveRelativePath = (file: DatasetFile) => {
|
||||
const normalizedName = normalizePath(file.fileName);
|
||||
if (normalizedName.includes(PATH_SEPARATOR)) {
|
||||
return normalizedName.replace(/^\/+/, "");
|
||||
}
|
||||
|
||||
const rawPath = normalizePath(file.path || file.filePath);
|
||||
const datasetId = String(file.datasetId || "");
|
||||
if (rawPath && datasetId) {
|
||||
const marker = `${PATH_SEPARATOR}${datasetId}${PATH_SEPARATOR}`;
|
||||
const index = rawPath.lastIndexOf(marker);
|
||||
if (index >= 0) {
|
||||
const relative = rawPath
|
||||
.slice(index + marker.length)
|
||||
.replace(/^\/+/, "");
|
||||
if (relative) {
|
||||
return relative;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const fallbackName = rawPath.split(PATH_SEPARATOR).pop();
|
||||
return fallbackName || file.fileName;
|
||||
};
|
||||
|
||||
// 定义分块选项
|
||||
const sliceOptions = [
|
||||
{ label: "默认分块", value: "DEFAULT_CHUNK" },
|
||||
@@ -129,7 +159,8 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
|
||||
const requestData = {
|
||||
files: Object.values(selectedFilesMap).map((file) => ({
|
||||
id: String(file.id),
|
||||
fileName: file.fileName,
|
||||
fileName: (file as DatasetFile).fileName,
|
||||
relativePath: resolveRelativePath(file as DatasetFile),
|
||||
})),
|
||||
processType: newKB.processType,
|
||||
chunkSize: Number(newKB.chunkSize), // 确保是数字类型
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import { get, post, put, del } from "@/utils/request";
|
||||
|
||||
type RequestPayload = Record<string, unknown>;
|
||||
type RequestParams = Record<string, unknown>;
|
||||
|
||||
// 获取知识库列表
|
||||
export function queryKnowledgeBasesUsingPost(params: any) {
|
||||
export function queryKnowledgeBasesUsingPost(params: RequestPayload) {
|
||||
return post("/api/knowledge-base/list", params);
|
||||
}
|
||||
|
||||
// 创建知识库
|
||||
export function createKnowledgeBaseUsingPost(data: any) {
|
||||
export function createKnowledgeBaseUsingPost(data: RequestPayload) {
|
||||
return post("/api/knowledge-base/create", data);
|
||||
}
|
||||
|
||||
@@ -16,7 +19,7 @@ export function queryKnowledgeBaseByIdUsingGet(baseId: string) {
|
||||
}
|
||||
|
||||
// 更新知识库
|
||||
export function updateKnowledgeBaseByIdUsingPut(baseId: string, data: any) {
|
||||
export function updateKnowledgeBaseByIdUsingPut(baseId: string, data: RequestPayload) {
|
||||
return put(`/api/knowledge-base/${baseId}`, data);
|
||||
}
|
||||
|
||||
@@ -26,17 +29,22 @@ export function deleteKnowledgeBaseByIdUsingDelete(baseId: string) {
|
||||
}
|
||||
|
||||
// 获取知识生成文件列表
|
||||
export function queryKnowledgeBaseFilesUsingGet(baseId: string, data) {
|
||||
export function queryKnowledgeBaseFilesUsingGet(baseId: string, data: RequestParams) {
|
||||
return get(`/api/knowledge-base/${baseId}/files`, data);
|
||||
}
|
||||
|
||||
// 全库检索知识库文件
|
||||
export function queryKnowledgeBaseFilesSearchUsingGet(params: RequestParams) {
|
||||
return get("/api/knowledge-base/files/search", params);
|
||||
}
|
||||
|
||||
// 添加文件到知识库
|
||||
export function addKnowledgeBaseFilesUsingPost(baseId: string, data: any) {
|
||||
export function addKnowledgeBaseFilesUsingPost(baseId: string, data: RequestPayload) {
|
||||
return post(`/api/knowledge-base/${baseId}/files`, data);
|
||||
}
|
||||
|
||||
// 删除知识生成文件
|
||||
export function deleteKnowledgeBaseFileByIdUsingDelete(baseId: string, data: any) {
|
||||
export function deleteKnowledgeBaseFileByIdUsingDelete(baseId: string, data: RequestPayload) {
|
||||
return del(`/api/knowledge-base/${baseId}/files`, data);
|
||||
}
|
||||
|
||||
|
||||
@@ -29,50 +29,26 @@ export interface KBFile {
|
||||
id: string;
|
||||
fileName: string;
|
||||
name?: string;
|
||||
relativePath?: string;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
status: KBFileStatus;
|
||||
chunkCount: number;
|
||||
metadata: Record<string, any>;
|
||||
metadata: Record<string, unknown>;
|
||||
knowledgeBaseId: string;
|
||||
fileId: string;
|
||||
updatedBy: string;
|
||||
createdBy: string;
|
||||
}
|
||||
|
||||
interface Chunk {
|
||||
id: number;
|
||||
content: string;
|
||||
position: number;
|
||||
tokens: number;
|
||||
embedding?: number[];
|
||||
similarity?: string;
|
||||
export interface KnowledgeBaseFileSearchResult {
|
||||
id: string;
|
||||
knowledgeBaseId: string;
|
||||
knowledgeBaseName: string;
|
||||
fileName: string;
|
||||
relativePath?: string;
|
||||
status?: KBFileStatus;
|
||||
chunkCount?: number;
|
||||
createdAt?: string;
|
||||
updatedAt?: string;
|
||||
vectorId?: string;
|
||||
sliceOperator?: string;
|
||||
parentChunkId?: number;
|
||||
metadata?: {
|
||||
source: string;
|
||||
page?: number;
|
||||
section?: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface VectorizationRecord {
|
||||
id: number;
|
||||
timestamp: string;
|
||||
operation: "create" | "update" | "delete" | "reprocess";
|
||||
fileId: number;
|
||||
fileName: string;
|
||||
chunksProcessed: number;
|
||||
vectorsGenerated: number;
|
||||
status: "success" | "failed" | "partial";
|
||||
duration: string;
|
||||
config: {
|
||||
embeddingModel: string;
|
||||
chunkSize: number;
|
||||
sliceMethod: string;
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
|
||||
@@ -41,7 +41,12 @@ import CreateKnowledgeSet from "../components/CreateKnowledgeSet";
|
||||
import KnowledgeItemEditor from "../components/KnowledgeItemEditor";
|
||||
import ImportKnowledgeItemsDialog from "../components/ImportKnowledgeItemsDialog";
|
||||
import { formatDate } from "@/utils/unit";
|
||||
import { PREVIEW_TEXT_MAX_LENGTH, resolvePreviewFileType, truncatePreviewText } from "@/utils/filePreview";
|
||||
import {
|
||||
PREVIEW_TEXT_MAX_LENGTH,
|
||||
resolvePreviewFileType,
|
||||
truncatePreviewText,
|
||||
type PreviewFileType,
|
||||
} from "@/utils/filePreview";
|
||||
|
||||
const PREVIEW_MAX_HEIGHT = 500;
|
||||
const PREVIEW_MODAL_WIDTH = {
|
||||
@@ -67,7 +72,7 @@ const KnowledgeSetDetail = () => {
|
||||
const [previewVisible, setPreviewVisible] = useState(false);
|
||||
const [previewContent, setPreviewContent] = useState("");
|
||||
const [previewFileName, setPreviewFileName] = useState("");
|
||||
const [previewFileType, setPreviewFileType] = useState<"text" | "image" | "video" | "audio">("text");
|
||||
const [previewFileType, setPreviewFileType] = useState<PreviewFileType>("text");
|
||||
const [previewMediaUrl, setPreviewMediaUrl] = useState("");
|
||||
const [previewLoadingItemId, setPreviewLoadingItemId] = useState<string | null>(null);
|
||||
|
||||
@@ -560,6 +565,13 @@ const KnowledgeSetDetail = () => {
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
{previewFileType === "pdf" && (
|
||||
<iframe
|
||||
src={previewMediaUrl}
|
||||
title={previewFileName || "PDF 预览"}
|
||||
style={{ width: "100%", height: `${PREVIEW_MAX_HEIGHT}px`, border: "none" }}
|
||||
/>
|
||||
)}
|
||||
{previewFileType === "video" && (
|
||||
<div style={{ textAlign: "center" }}>
|
||||
<video
|
||||
|
||||
@@ -30,6 +30,7 @@ import ManualEvaluatePage from "@/pages/DataEvaluation/Evaluate/ManualEvaluate";
|
||||
import KnowledgeBasePage from "@/pages/KnowledgeBase/Home/KnowledgeBasePage";
|
||||
import KnowledgeBaseDetailPage from "@/pages/KnowledgeBase/Detail/KnowledgeBaseDetail";
|
||||
import KnowledgeBaseFileDetailPage from "@/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail";
|
||||
import KnowledgeBaseSearch from "@/pages/KnowledgeBase/Search/KnowledgeBaseSearch";
|
||||
|
||||
import OperatorMarketPage from "@/pages/OperatorMarket/Home/OperatorMarket";
|
||||
import OperatorPluginCreate from "@/pages/OperatorMarket/Create/OperatorPluginCreate";
|
||||
@@ -246,6 +247,10 @@ const router = createBrowserRouter([
|
||||
index: true,
|
||||
Component: KnowledgeBasePage,
|
||||
},
|
||||
{
|
||||
path: "search",
|
||||
Component: KnowledgeBaseSearch,
|
||||
},
|
||||
{
|
||||
path: "detail/:id",
|
||||
Component: KnowledgeBaseDetailPage,
|
||||
|
||||
@@ -22,8 +22,9 @@ const IMAGE_FILE_EXTENSIONS = [
|
||||
];
|
||||
const VIDEO_FILE_EXTENSIONS = [".mp4", ".webm", ".ogg", ".mov", ".avi"];
|
||||
const AUDIO_FILE_EXTENSIONS = [".mp3", ".wav", ".ogg", ".aac", ".flac", ".m4a"];
|
||||
const PDF_FILE_EXTENSIONS = [".pdf"];
|
||||
|
||||
export type PreviewFileType = "text" | "image" | "video" | "audio";
|
||||
export type PreviewFileType = "text" | "image" | "video" | "audio" | "pdf";
|
||||
|
||||
export const resolvePreviewFileType = (fileName?: string): PreviewFileType | null => {
|
||||
const lowerName = (fileName || "").toLowerCase();
|
||||
@@ -39,6 +40,9 @@ export const resolvePreviewFileType = (fileName?: string): PreviewFileType | nul
|
||||
if (AUDIO_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) {
|
||||
return "audio";
|
||||
}
|
||||
if (PDF_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) {
|
||||
return "pdf";
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
|
||||
@@ -14,7 +14,8 @@ from .user_management import (
|
||||
from .annotation_management import (
|
||||
AnnotationTemplate,
|
||||
LabelingProject,
|
||||
AnnotationResult
|
||||
AnnotationResult,
|
||||
LabelingProjectFile
|
||||
)
|
||||
|
||||
from .data_evaluation import (
|
||||
@@ -32,6 +33,7 @@ __all__ = [
|
||||
"AnnotationTemplate",
|
||||
"LabelingProject",
|
||||
"AnnotationResult",
|
||||
"LabelingProjectFile",
|
||||
"EvaluationTask",
|
||||
"EvaluationItem",
|
||||
]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Tables of Annotation Management Module"""
|
||||
|
||||
import uuid
|
||||
from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey
|
||||
import uuid
|
||||
from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey, UniqueConstraint, Index
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.db.session import Base
|
||||
@@ -34,7 +34,7 @@ class AnnotationTemplate(Base):
|
||||
"""检查是否已被软删除"""
|
||||
return self.deleted_at is not None
|
||||
|
||||
class LabelingProject(Base):
|
||||
class LabelingProject(Base):
|
||||
"""标注项目模型"""
|
||||
|
||||
__tablename__ = "t_dm_labeling_projects"
|
||||
@@ -50,13 +50,33 @@ class LabelingProject(Base):
|
||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
|
||||
def __repr__(self):
|
||||
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
|
||||
|
||||
@property
|
||||
def is_deleted(self) -> bool:
|
||||
"""检查是否已被软删除"""
|
||||
return self.deleted_at is not None
|
||||
def is_deleted(self) -> bool:
|
||||
"""检查是否已被软删除"""
|
||||
return self.deleted_at is not None
|
||||
|
||||
|
||||
class LabelingProjectFile(Base):
|
||||
"""标注项目文件快照模型"""
|
||||
|
||||
__tablename__ = "t_dm_labeling_project_files"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
project_id = Column(String(36), nullable=False, comment="标注项目ID")
|
||||
file_id = Column(String(36), nullable=False, comment="文件ID")
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("project_id", "file_id", name="uk_project_file"),
|
||||
Index("idx_project_id", "project_id"),
|
||||
Index("idx_file_id", "file_id"),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<LabelingProjectFile(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
|
||||
|
||||
|
||||
class AnnotationResult(Base):
|
||||
|
||||
@@ -3,10 +3,11 @@ import math
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db.session import get_db
|
||||
from app.db.models import LabelingProject
|
||||
from app.db.models import LabelingProject, DatasetFiles
|
||||
from app.module.shared.schema import StandardResponse, PaginatedData
|
||||
from app.module.dataset import DatasetManagementService
|
||||
from app.core.logging import get_logger
|
||||
@@ -116,8 +117,35 @@ async def create_mapping(
|
||||
configuration=project_configuration or None,
|
||||
)
|
||||
|
||||
# 创建映射关系,包含项目名称(先持久化映射以获得 mapping.id)
|
||||
mapping = await mapping_service.create_mapping(labeling_project)
|
||||
file_result = await db.execute(
|
||||
select(DatasetFiles).where(DatasetFiles.dataset_id == request.dataset_id)
|
||||
)
|
||||
file_records = file_result.scalars().all()
|
||||
snapshot_file_ids: list[str] = []
|
||||
if dataset_type == TEXT_DATASET_TYPE:
|
||||
derived_source_ids = set()
|
||||
for file_record in file_records:
|
||||
metadata = getattr(file_record, "dataset_filemetadata", None)
|
||||
if isinstance(metadata, dict):
|
||||
source_id = metadata.get("derived_from_file_id")
|
||||
if source_id:
|
||||
derived_source_ids.add(str(source_id))
|
||||
snapshot_file_ids = [
|
||||
str(file_record.id)
|
||||
for file_record in file_records
|
||||
if file_record.id and str(file_record.id) not in derived_source_ids
|
||||
]
|
||||
else:
|
||||
snapshot_file_ids = [
|
||||
str(file_record.id)
|
||||
for file_record in file_records
|
||||
if file_record.id
|
||||
]
|
||||
|
||||
# 创建映射关系并写入快照
|
||||
mapping = await mapping_service.create_mapping_with_snapshot(
|
||||
labeling_project, snapshot_file_ids
|
||||
)
|
||||
|
||||
response_data = DatasetMappingCreateResponse(
|
||||
id=mapping.id,
|
||||
|
||||
@@ -23,7 +23,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.logging import get_logger
|
||||
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
|
||||
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
|
||||
from app.module.annotation.config import LabelStudioTagConfig
|
||||
from app.module.annotation.schema.editor import (
|
||||
EditorProjectInfo,
|
||||
@@ -429,21 +429,16 @@ class AnnotationEditorService:
|
||||
exclude_source_documents: Optional[bool] = None,
|
||||
) -> EditorTaskListResponse:
|
||||
project = await self._get_project_or_404(project_id)
|
||||
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
|
||||
should_exclude_source_documents = False
|
||||
if dataset_type == DATASET_TYPE_TEXT:
|
||||
should_exclude_source_documents = (
|
||||
exclude_source_documents if exclude_source_documents is not None else True
|
||||
)
|
||||
|
||||
base_conditions = [DatasetFiles.dataset_id == project.dataset_id]
|
||||
if should_exclude_source_documents:
|
||||
base_conditions.append(~self._build_source_document_filter())
|
||||
base_conditions = [
|
||||
LabelingProjectFile.project_id == project_id,
|
||||
DatasetFiles.dataset_id == project.dataset_id,
|
||||
]
|
||||
|
||||
count_result = await self.db.execute(
|
||||
select(func.count()).select_from(DatasetFiles).where(
|
||||
*base_conditions
|
||||
)
|
||||
select(func.count())
|
||||
.select_from(LabelingProjectFile)
|
||||
.join(DatasetFiles, LabelingProjectFile.file_id == DatasetFiles.id)
|
||||
.where(*base_conditions)
|
||||
)
|
||||
total = int(count_result.scalar() or 0)
|
||||
|
||||
@@ -453,6 +448,7 @@ class AnnotationEditorService:
|
||||
)
|
||||
files_result = await self.db.execute(
|
||||
select(DatasetFiles, AnnotationResult.id, AnnotationResult.updated_at)
|
||||
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
|
||||
.outerjoin(
|
||||
AnnotationResult,
|
||||
(AnnotationResult.file_id == DatasetFiles.id)
|
||||
@@ -827,7 +823,10 @@ class AnnotationEditorService:
|
||||
|
||||
# 校验文件归属
|
||||
file_result = await self.db.execute(
|
||||
select(DatasetFiles).where(
|
||||
select(DatasetFiles)
|
||||
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
|
||||
.where(
|
||||
LabelingProjectFile.project_id == project.id,
|
||||
DatasetFiles.id == file_id,
|
||||
DatasetFiles.dataset_id == project.dataset_id,
|
||||
)
|
||||
|
||||
@@ -25,7 +25,33 @@ from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
|
||||
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
|
||||
|
||||
|
||||
async def _read_file_content(file_path: str, max_size: int = 10 * 1024 * 1024) -> Optional[str]:
|
||||
"""读取文件内容,仅适用于文本文件
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
max_size: 最大读取字节数(默认10MB)
|
||||
|
||||
Returns:
|
||||
文件内容字符串,如果读取失败返回 None
|
||||
"""
|
||||
try:
|
||||
# 检查文件是否存在且大小在限制内
|
||||
if not os.path.exists(file_path):
|
||||
return None
|
||||
|
||||
file_size = os.path.getsize(file_path)
|
||||
if file_size > max_size:
|
||||
return f"[File too large: {file_size} bytes]"
|
||||
|
||||
# 尝试以文本方式读取
|
||||
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
||||
return f.read()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
from ..schema.export import (
|
||||
AnnotationExportItem,
|
||||
@@ -49,15 +75,18 @@ class AnnotationExportService:
|
||||
project = await self._get_project_or_404(project_id)
|
||||
logger.info(f"Export stats for project: id={project_id}, dataset_id={project.dataset_id}, name={project.name}")
|
||||
|
||||
# 获取总文件数(只统计 ACTIVE 状态的文件)
|
||||
# 获取总文件数(标注项目快照内的文件)
|
||||
total_result = await self.db.execute(
|
||||
select(func.count()).select_from(DatasetFiles).where(
|
||||
select(func.count())
|
||||
.select_from(LabelingProjectFile)
|
||||
.join(DatasetFiles, LabelingProjectFile.file_id == DatasetFiles.id)
|
||||
.where(
|
||||
LabelingProjectFile.project_id == project_id,
|
||||
DatasetFiles.dataset_id == project.dataset_id,
|
||||
DatasetFiles.status == "ACTIVE",
|
||||
)
|
||||
)
|
||||
total_files = int(total_result.scalar() or 0)
|
||||
logger.info(f"Total files (ACTIVE): {total_files} for dataset_id={project.dataset_id}")
|
||||
logger.info(f"Total files (snapshot): {total_files} for project_id={project_id}")
|
||||
|
||||
# 获取已标注文件数(统计不同的 file_id 数量)
|
||||
annotated_result = await self.db.execute(
|
||||
@@ -139,30 +168,43 @@ class AnnotationExportService:
|
||||
# 只获取已标注的数据
|
||||
result = await self.db.execute(
|
||||
select(AnnotationResult, DatasetFiles)
|
||||
.join(LabelingProjectFile, LabelingProjectFile.file_id == AnnotationResult.file_id)
|
||||
.join(DatasetFiles, AnnotationResult.file_id == DatasetFiles.id)
|
||||
.where(AnnotationResult.project_id == project_id)
|
||||
.where(
|
||||
AnnotationResult.project_id == project_id,
|
||||
LabelingProjectFile.project_id == project_id,
|
||||
DatasetFiles.dataset_id == dataset_id,
|
||||
)
|
||||
.order_by(AnnotationResult.updated_at.desc())
|
||||
)
|
||||
rows = result.all()
|
||||
|
||||
for ann, file in rows:
|
||||
annotation_data = ann.annotation or {}
|
||||
# 获取文件内容(如果是文本文件且用户要求包含数据)
|
||||
file_content = None
|
||||
if include_data:
|
||||
file_path = getattr(file, "file_path", "")
|
||||
file_content = await _read_file_content(file_path)
|
||||
|
||||
items.append(
|
||||
AnnotationExportItem(
|
||||
file_id=str(file.id),
|
||||
file_name=str(getattr(file, "file_name", "")),
|
||||
data={"text": ""} if include_data else None, # TEXT 类型数据需要单独获取
|
||||
data={"text": file_content} if include_data else None,
|
||||
annotations=[annotation_data] if annotation_data else [],
|
||||
created_at=ann.created_at,
|
||||
updated_at=ann.updated_at,
|
||||
)
|
||||
)
|
||||
else:
|
||||
# 获取所有文件,包括未标注的(只获取 ACTIVE 状态的文件)
|
||||
# 获取所有文件(基于标注项目快照)
|
||||
files_result = await self.db.execute(
|
||||
select(DatasetFiles).where(
|
||||
select(DatasetFiles)
|
||||
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
|
||||
.where(
|
||||
LabelingProjectFile.project_id == project_id,
|
||||
DatasetFiles.dataset_id == dataset_id,
|
||||
DatasetFiles.status == "ACTIVE",
|
||||
)
|
||||
)
|
||||
files = files_result.scalars().all()
|
||||
@@ -177,12 +219,18 @@ class AnnotationExportService:
|
||||
file_id = str(file.id)
|
||||
ann = annotations.get(file_id)
|
||||
annotation_data = ann.annotation if ann else {}
|
||||
|
||||
# 获取文件内容(如果是文本文件且用户要求包含数据)
|
||||
file_content = None
|
||||
if include_data:
|
||||
file_path = getattr(file, "file_path", "")
|
||||
file_content = await _read_file_content(file_path)
|
||||
|
||||
items.append(
|
||||
AnnotationExportItem(
|
||||
file_id=file_id,
|
||||
file_name=str(getattr(file, "file_name", "")),
|
||||
data={"text": ""} if include_data else None,
|
||||
data={"text": file_content} if include_data else None,
|
||||
annotations=[annotation_data] if annotation_data else [],
|
||||
created_at=ann.created_at if ann else None,
|
||||
updated_at=ann.updated_at if ann else None,
|
||||
@@ -256,12 +304,14 @@ class AnnotationExportService:
|
||||
writer.writeheader()
|
||||
|
||||
for item in items:
|
||||
# 提取标签信息
|
||||
# 提取标签信息(支持多种标注类型)
|
||||
labels = []
|
||||
for ann in item.annotations:
|
||||
results = ann.get("result", [])
|
||||
for r in results:
|
||||
value = r.get("value", {})
|
||||
label_type = r.get("type", "")
|
||||
|
||||
# 提取不同类型的标签值
|
||||
if "choices" in value:
|
||||
labels.extend(value["choices"])
|
||||
@@ -269,6 +319,18 @@ class AnnotationExportService:
|
||||
labels.append(value["text"])
|
||||
elif "labels" in value:
|
||||
labels.extend(value["labels"])
|
||||
elif "rectanglelabels" in value:
|
||||
labels.extend(value["rectanglelabels"])
|
||||
elif "polygonlabels" in value:
|
||||
labels.extend(value["polygonlabels"])
|
||||
elif "brushlabels" in value:
|
||||
labels.extend(value["brushlabels"])
|
||||
elif "hypertextlabels" in value:
|
||||
labels.extend(value["hypertextlabels"])
|
||||
elif "timeserieslabels" in value:
|
||||
labels.extend(value["timeserieslabels"])
|
||||
elif "transcription" in value:
|
||||
labels.append(value["transcription"])
|
||||
|
||||
writer.writerow({
|
||||
"file_id": item.file_id,
|
||||
@@ -286,7 +348,11 @@ class AnnotationExportService:
|
||||
def _export_coco(
|
||||
self, items: List[AnnotationExportItem], project_name: str
|
||||
) -> Tuple[bytes, str, str]:
|
||||
"""导出为 COCO 格式(适用于目标检测标注)"""
|
||||
"""导出为 COCO 格式(适用于目标检测标注)
|
||||
|
||||
注意:当前实现中图片宽高被设置为0,因为需要读取实际图片文件获取尺寸。
|
||||
bbox 坐标使用 Label Studio 的百分比值(0-100),使用时需要转换为像素坐标。
|
||||
"""
|
||||
coco_format = COCOExportFormat(
|
||||
info={
|
||||
"description": f"Exported from DataMate project: {project_name}",
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy import update, func
|
||||
from sqlalchemy import update, func, insert
|
||||
from sqlalchemy.orm import aliased
|
||||
from typing import Optional, List, Tuple
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.db.models import LabelingProject, AnnotationTemplate, AnnotationResult
|
||||
from app.db.models import LabelingProject, AnnotationTemplate, AnnotationResult, LabelingProjectFile
|
||||
from app.db.models.dataset_management import Dataset, DatasetFiles
|
||||
from app.module.annotation.schema import (
|
||||
DatasetMappingCreateRequest,
|
||||
@@ -20,9 +20,11 @@ logger = get_logger(__name__)
|
||||
|
||||
class DatasetMappingService:
|
||||
"""数据集映射服务"""
|
||||
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
SNAPSHOT_INSERT_BATCH_SIZE = 500
|
||||
|
||||
def _build_query_with_dataset_name(self):
|
||||
"""Build base query with dataset name joined"""
|
||||
@@ -49,11 +51,14 @@ class DatasetMappingService:
|
||||
Returns:
|
||||
(total_count, annotated_count) 元组
|
||||
"""
|
||||
# 获取数据集总数据量(统计 ACTIVE 和 COMPLETED 状态的文件)
|
||||
# 获取标注项目快照数据量(只统计快照内的文件)
|
||||
total_result = await self.db.execute(
|
||||
select(func.count()).select_from(DatasetFiles).where(
|
||||
select(func.count())
|
||||
.select_from(LabelingProjectFile)
|
||||
.join(DatasetFiles, LabelingProjectFile.file_id == DatasetFiles.id)
|
||||
.where(
|
||||
LabelingProjectFile.project_id == project_id,
|
||||
DatasetFiles.dataset_id == dataset_id,
|
||||
DatasetFiles.status.in_(["ACTIVE", "COMPLETED"]),
|
||||
)
|
||||
)
|
||||
total_count = int(total_result.scalar() or 0)
|
||||
@@ -213,6 +218,48 @@ class DatasetMappingService:
|
||||
|
||||
logger.debug(f"Mapping created: {labeling_project.id}")
|
||||
return await self._to_response(labeling_project)
|
||||
|
||||
async def create_mapping_with_snapshot(
|
||||
self,
|
||||
labeling_project: LabelingProject,
|
||||
file_ids: List[str],
|
||||
) -> DatasetMappingResponse:
|
||||
"""创建数据集映射并写入快照文件"""
|
||||
logger.debug(
|
||||
"Create dataset mapping with snapshot: %s -> %s, files=%d",
|
||||
labeling_project.dataset_id,
|
||||
labeling_project.labeling_project_id,
|
||||
len(file_ids),
|
||||
)
|
||||
|
||||
self.db.add(labeling_project)
|
||||
await self.db.flush()
|
||||
assert labeling_project.id, "labeling_project.id must be set before snapshot insert"
|
||||
|
||||
if file_ids:
|
||||
await self._insert_snapshot_records(labeling_project.id, file_ids)
|
||||
|
||||
await self.db.commit()
|
||||
await self.db.refresh(labeling_project)
|
||||
|
||||
logger.debug("Mapping created with snapshot: %s", labeling_project.id)
|
||||
return await self._to_response(labeling_project)
|
||||
|
||||
async def _insert_snapshot_records(self, project_id: str, file_ids: List[str]) -> None:
|
||||
batch: List[dict] = []
|
||||
for file_id in file_ids:
|
||||
batch.append(
|
||||
{
|
||||
"id": str(uuid.uuid4()),
|
||||
"project_id": project_id,
|
||||
"file_id": file_id,
|
||||
}
|
||||
)
|
||||
if len(batch) >= self.SNAPSHOT_INSERT_BATCH_SIZE:
|
||||
await self.db.execute(insert(LabelingProjectFile).values(batch))
|
||||
batch.clear()
|
||||
if batch:
|
||||
await self.db.execute(insert(LabelingProjectFile).values(batch))
|
||||
|
||||
async def get_mapping_by_source_uuid(
|
||||
self,
|
||||
|
||||
@@ -48,6 +48,17 @@ CREATE TABLE IF NOT EXISTS t_dm_labeling_projects (
|
||||
INDEX idx_labeling_project_id (labeling_project_id)
|
||||
) COMMENT='标注项目表';
|
||||
|
||||
-- 标注项目文件快照表
|
||||
CREATE TABLE IF NOT EXISTS t_dm_labeling_project_files (
|
||||
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
|
||||
project_id VARCHAR(36) NOT NULL COMMENT '标注项目ID',
|
||||
file_id VARCHAR(36) NOT NULL COMMENT '文件ID',
|
||||
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
||||
UNIQUE KEY uk_project_file (project_id, file_id),
|
||||
INDEX idx_project_id (project_id),
|
||||
INDEX idx_file_id (file_id)
|
||||
) COMMENT='标注项目文件快照表';
|
||||
|
||||
-- 标注结果表
|
||||
CREATE TABLE IF NOT EXISTS t_dm_annotation_results (
|
||||
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
|
||||
|
||||
@@ -18,6 +18,7 @@ create table if not exists t_rag_file
|
||||
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
|
||||
knowledge_base_id VARCHAR(36) NOT NULL COMMENT '知识库ID',
|
||||
file_name VARCHAR(255) NOT NULL COMMENT '文件名',
|
||||
relative_path VARCHAR(512) NULL COMMENT '相对路径',
|
||||
file_id VARCHAR(255) NOT NULL COMMENT '文件ID',
|
||||
chunk_count INT COMMENT '切片数',
|
||||
metadata JSON COMMENT '元数据',
|
||||
@@ -28,3 +29,6 @@ create table if not exists t_rag_file
|
||||
created_by VARCHAR(255) COMMENT '创建者',
|
||||
updated_by VARCHAR(255) COMMENT '更新者'
|
||||
) comment '知识库切片表';
|
||||
|
||||
create index idx_rag_file_kb_name on t_rag_file (knowledge_base_id, file_name);
|
||||
create index idx_rag_file_kb_path on t_rag_file (knowledge_base_id, relative_path);
|
||||
|
||||
Reference in New Issue
Block a user