You've already forked DataMate
Compare commits
93 Commits
5318ee9641
...
lsf
| Author | SHA1 | Date | |
|---|---|---|---|
| 473f4e717f | |||
| 6b0042cb66 | |||
| fa9e9d9f68 | |||
| 707e65b017 | |||
| cda22a720c | |||
| 394e2bda18 | |||
| 4220284f5a | |||
| 8415166949 | |||
| 078f303f57 | |||
| 50f2da5503 | |||
| 3af1daf8b6 | |||
| 7c7729434b | |||
| 17a62cd3c2 | |||
| f381d641ab | |||
| c8611d29ff | |||
| 147beb1ec7 | |||
| 699031dae7 | |||
| 88b1383653 | |||
| cc6415c4d9 | |||
| 3d036c4cd6 | |||
| 2445235fd2 | |||
| 893e0a1580 | |||
| 05e6842fc8 | |||
| da5b18e423 | |||
| 31629ab50b | |||
| fb43052ddf | |||
| c44c75be25 | |||
| 05f3efc148 | |||
| 16eb5cacf9 | |||
| e71116d117 | |||
| cac53d7aac | |||
| 43b4a619bc | |||
| 9da187d2c6 | |||
| b36fdd2438 | |||
| daa63bdd13 | |||
| 85433ac071 | |||
| fc2e50b415 | |||
| 26e1ae69d7 | |||
| 7092c3f955 | |||
| b2bdf9e066 | |||
| a5261b33b2 | |||
|
|
52daf30869 | ||
| 07a901043a | |||
| 32e3fc97c6 | |||
| a73571bd73 | |||
| 00fa1b86eb | |||
| 626c0fcd9a | |||
| 2f2e0d6a8d | |||
| 10fad39e02 | |||
| 9014dca1ac | |||
| 0b8fe34586 | |||
| 27e27a09d4 | |||
| d24fea83d8 | |||
| 05088fef1a | |||
| a0239518fb | |||
| 9d185bb10c | |||
| 6c4f05c0b9 | |||
| 438acebb89 | |||
| f06d6e5a7e | |||
| fda283198d | |||
| d535d0ac1b | |||
| 4d2c9e546c | |||
| 02cd16523f | |||
| d4a44f3bf5 | |||
| 340a0ad364 | |||
| 00c41fbbd3 | |||
| 2430db290d | |||
| 40889baacc | |||
| 551248ec76 | |||
| 0bb9abb200 | |||
| d135a7f336 | |||
| 7043a26ab3 | |||
| 906bb39b83 | |||
| dbf8ec53dd | |||
| 5f89968974 | |||
| be313cf425 | |||
| db37de8aee | |||
| aeec19b99f | |||
| a4aefe66cd | |||
| 2f3a8b38d0 | |||
| 150af1a741 | |||
| e28f680abb | |||
| 4f99875670 | |||
| c23a9da8cb | |||
| 310bc356b1 | |||
| c1fb02b0f5 | |||
| 4a3e466210 | |||
| 5d8d25ca8c | |||
| f6788756d3 | |||
| 5a5279869e | |||
| e1c963928a | |||
| 33cf65c9f8 | |||
| 3e0a15ac8e |
304
Makefile.offline.mk
Normal file
304
Makefile.offline.mk
Normal file
@@ -0,0 +1,304 @@
|
||||
# ============================================================================
|
||||
# Makefile 离线构建扩展
|
||||
# 将此文件内容追加到主 Makefile 末尾,或单独包含使用
|
||||
# ============================================================================
|
||||
|
||||
# 离线构建配置
|
||||
CACHE_DIR ?= ./build-cache
|
||||
OFFLINE_VERSION ?= latest
|
||||
|
||||
# 创建 buildx 构建器(如果不存在)
|
||||
.PHONY: ensure-buildx
|
||||
ensure-buildx:
|
||||
@if ! docker buildx inspect offline-builder > /dev/null 2>&1; then \
|
||||
echo "创建 buildx 构建器..."; \
|
||||
docker buildx create --name offline-builder --driver docker-container --use 2>/dev/null || docker buildx use offline-builder; \
|
||||
else \
|
||||
docker buildx use offline-builder 2>/dev/null || true; \
|
||||
fi
|
||||
|
||||
# ========== 离线缓存导出(有网环境) ==========
|
||||
|
||||
.PHONY: offline-export
|
||||
offline-export: ensure-buildx
|
||||
@echo "======================================"
|
||||
@echo "导出离线构建缓存..."
|
||||
@echo "======================================"
|
||||
@mkdir -p $(CACHE_DIR)/buildkit $(CACHE_DIR)/images $(CACHE_DIR)/resources
|
||||
@$(MAKE) _offline-export-base-images
|
||||
@$(MAKE) _offline-export-cache
|
||||
@$(MAKE) _offline-export-resources
|
||||
@$(MAKE) _offline-package
|
||||
|
||||
.PHONY: _offline-export-base-images
|
||||
_offline-export-base-images:
|
||||
@echo ""
|
||||
@echo "1. 导出基础镜像..."
|
||||
@bash -c 'images=( \
|
||||
"maven:3-eclipse-temurin-21" \
|
||||
"maven:3-eclipse-temurin-8" \
|
||||
"eclipse-temurin:21-jdk" \
|
||||
"mysql:8" \
|
||||
"node:20-alpine" \
|
||||
"nginx:1.29" \
|
||||
"ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm" \
|
||||
"ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm" \
|
||||
"ghcr.nju.edu.cn/astral-sh/uv:latest" \
|
||||
"python:3.12-slim" \
|
||||
"python:3.11-slim" \
|
||||
"gcr.nju.edu.cn/distroless/nodejs20-debian12" \
|
||||
); for img in "$${images[@]}"; do echo " Pulling $$img..."; docker pull "$$img" 2>/dev/null || true; done'
|
||||
@echo " Saving base images..."
|
||||
@docker save -o $(CACHE_DIR)/images/base-images.tar \
|
||||
maven:3-eclipse-temurin-21 \
|
||||
maven:3-eclipse-temurin-8 \
|
||||
eclipse-temurin:21-jdk \
|
||||
mysql:8 \
|
||||
node:20-alpine \
|
||||
nginx:1.29 \
|
||||
ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm \
|
||||
ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm \
|
||||
ghcr.nju.edu.cn/astral-sh/uv:latest \
|
||||
python:3.12-slim \
|
||||
python:3.11-slim \
|
||||
gcr.nju.edu.cn/distroless/nodejs20-debian12 2>/dev/null || echo " Warning: Some images may not exist"
|
||||
|
||||
.PHONY: _offline-export-cache
|
||||
_offline-export-cache:
|
||||
@echo ""
|
||||
@echo "2. 导出 BuildKit 缓存..."
|
||||
@echo " backend..."
|
||||
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/backend-cache,mode=max -f scripts/images/backend/Dockerfile -t datamate-backend:cache . 2>/dev/null || echo " Warning: backend cache export failed"
|
||||
@echo " backend-python..."
|
||||
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/backend-python-cache,mode=max -f scripts/images/backend-python/Dockerfile -t datamate-backend-python:cache . 2>/dev/null || echo " Warning: backend-python cache export failed"
|
||||
@echo " database..."
|
||||
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/database-cache,mode=max -f scripts/images/database/Dockerfile -t datamate-database:cache . 2>/dev/null || echo " Warning: database cache export failed"
|
||||
@echo " frontend..."
|
||||
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/frontend-cache,mode=max -f scripts/images/frontend/Dockerfile -t datamate-frontend:cache . 2>/dev/null || echo " Warning: frontend cache export failed"
|
||||
@echo " gateway..."
|
||||
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/gateway-cache,mode=max -f scripts/images/gateway/Dockerfile -t datamate-gateway:cache . 2>/dev/null || echo " Warning: gateway cache export failed"
|
||||
@echo " runtime..."
|
||||
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/runtime-cache,mode=max -f scripts/images/runtime/Dockerfile -t datamate-runtime:cache . 2>/dev/null || echo " Warning: runtime cache export failed"
|
||||
@echo " deer-flow-backend..."
|
||||
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/deer-flow-backend-cache,mode=max -f scripts/images/deer-flow-backend/Dockerfile -t deer-flow-backend:cache . 2>/dev/null || echo " Warning: deer-flow-backend cache export failed"
|
||||
@echo " deer-flow-frontend..."
|
||||
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/deer-flow-frontend-cache,mode=max -f scripts/images/deer-flow-frontend/Dockerfile -t deer-flow-frontend:cache . 2>/dev/null || echo " Warning: deer-flow-frontend cache export failed"
|
||||
@echo " mineru..."
|
||||
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/mineru-cache,mode=max -f scripts/images/mineru/Dockerfile -t datamate-mineru:cache . 2>/dev/null || echo " Warning: mineru cache export failed"
|
||||
|
||||
.PHONY: _offline-export-resources
|
||||
_offline-export-resources:
|
||||
@echo ""
|
||||
@echo "3. 预下载外部资源..."
|
||||
@mkdir -p $(CACHE_DIR)/resources/models
|
||||
@echo " PaddleOCR model..."
|
||||
@wget -q -O $(CACHE_DIR)/resources/models/ch_ppocr_mobile_v2.0_cls_infer.tar \
|
||||
https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar 2>/dev/null || echo " Warning: PaddleOCR model download failed"
|
||||
@echo " spaCy model..."
|
||||
@wget -q -O $(CACHE_DIR)/resources/models/zh_core_web_sm-3.8.0-py3-none-any.whl \
|
||||
https://ghproxy.net/https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl 2>/dev/null || echo " Warning: spaCy model download failed"
|
||||
@echo " DataX source..."
|
||||
@if [ ! -d "$(CACHE_DIR)/resources/DataX" ]; then \
|
||||
git clone --depth 1 https://gitee.com/alibaba/DataX.git $(CACHE_DIR)/resources/DataX 2>/dev/null || echo " Warning: DataX clone failed"; \
|
||||
fi
|
||||
@echo " deer-flow source..."
|
||||
@if [ ! -d "$(CACHE_DIR)/resources/deer-flow" ]; then \
|
||||
git clone --depth 1 https://ghproxy.net/https://github.com/ModelEngine-Group/deer-flow.git $(CACHE_DIR)/resources/deer-flow 2>/dev/null || echo " Warning: deer-flow clone failed"; \
|
||||
fi
|
||||
|
||||
.PHONY: _offline-package
|
||||
_offline-package:
|
||||
@echo ""
|
||||
@echo "4. 打包缓存..."
|
||||
@cd $(CACHE_DIR) && tar -czf "build-cache-$$(date +%Y%m%d).tar.gz" buildkit images resources 2>/dev/null && cd - > /dev/null
|
||||
@echo ""
|
||||
@echo "======================================"
|
||||
@echo "✓ 缓存导出完成!"
|
||||
@echo "======================================"
|
||||
@echo "传输文件: $(CACHE_DIR)/build-cache-$$(date +%Y%m%d).tar.gz"
|
||||
|
||||
# ========== 离线构建(无网环境) ==========
|
||||
|
||||
.PHONY: offline-setup
|
||||
offline-setup:
|
||||
@echo "======================================"
|
||||
@echo "设置离线构建环境..."
|
||||
@echo "======================================"
|
||||
@if [ ! -d "$(CACHE_DIR)" ]; then \
|
||||
echo "查找并解压缓存包..."; \
|
||||
cache_file=$$(ls -t build-cache-*.tar.gz 2>/dev/null | head -1); \
|
||||
if [ -z "$$cache_file" ]; then \
|
||||
echo "错误: 未找到缓存压缩包 (build-cache-*.tar.gz)"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
echo "解压 $$cache_file..."; \
|
||||
tar -xzf "$$cache_file"; \
|
||||
else \
|
||||
echo "缓存目录已存在: $(CACHE_DIR)"; \
|
||||
fi
|
||||
@echo ""
|
||||
@echo "加载基础镜像..."
|
||||
@if [ -f "$(CACHE_DIR)/images/base-images.tar" ]; then \
|
||||
docker load -i $(CACHE_DIR)/images/base-images.tar; \
|
||||
else \
|
||||
echo "警告: 基础镜像文件不存在,假设已手动加载"; \
|
||||
fi
|
||||
@$(MAKE) ensure-buildx
|
||||
@echo ""
|
||||
@echo "✓ 离线环境准备完成"
|
||||
|
||||
.PHONY: offline-build
|
||||
offline-build: offline-setup
|
||||
@echo ""
|
||||
@echo "======================================"
|
||||
@echo "开始离线构建..."
|
||||
@echo "======================================"
|
||||
@$(MAKE) _offline-build-services
|
||||
|
||||
.PHONY: _offline-build-services
|
||||
_offline-build-services: ensure-buildx
|
||||
@echo ""
|
||||
@echo "构建 datamate-database..."
|
||||
@docker buildx build \
|
||||
--cache-from type=local,src=$(CACHE_DIR)/buildkit/database-cache \
|
||||
--pull=false \
|
||||
-f scripts/images/database/Dockerfile \
|
||||
-t datamate-database:$(OFFLINE_VERSION) \
|
||||
--load . || echo " Failed"
|
||||
|
||||
@echo ""
|
||||
@echo "构建 datamate-gateway..."
|
||||
@docker buildx build \
|
||||
--cache-from type=local,src=$(CACHE_DIR)/buildkit/gateway-cache \
|
||||
--pull=false \
|
||||
-f scripts/images/gateway/Dockerfile \
|
||||
-t datamate-gateway:$(OFFLINE_VERSION) \
|
||||
--load . || echo " Failed"
|
||||
|
||||
@echo ""
|
||||
@echo "构建 datamate-backend..."
|
||||
@docker buildx build \
|
||||
--cache-from type=local,src=$(CACHE_DIR)/buildkit/backend-cache \
|
||||
--pull=false \
|
||||
-f scripts/images/backend/Dockerfile \
|
||||
-t datamate-backend:$(OFFLINE_VERSION) \
|
||||
--load . || echo " Failed"
|
||||
|
||||
@echo ""
|
||||
@echo "构建 datamate-frontend..."
|
||||
@docker buildx build \
|
||||
--cache-from type=local,src=$(CACHE_DIR)/buildkit/frontend-cache \
|
||||
--pull=false \
|
||||
-f scripts/images/frontend/Dockerfile \
|
||||
-t datamate-frontend:$(OFFLINE_VERSION) \
|
||||
--load . || echo " Failed"
|
||||
|
||||
@echo ""
|
||||
@echo "构建 datamate-runtime..."
|
||||
@docker buildx build \
|
||||
--cache-from type=local,src=$(CACHE_DIR)/buildkit/runtime-cache \
|
||||
--pull=false \
|
||||
--build-arg RESOURCES_DIR=$(CACHE_DIR)/resources \
|
||||
-f scripts/images/runtime/Dockerfile \
|
||||
-t datamate-runtime:$(OFFLINE_VERSION) \
|
||||
--load . || echo " Failed"
|
||||
|
||||
@echo ""
|
||||
@echo "构建 datamate-backend-python..."
|
||||
@docker buildx build \
|
||||
--cache-from type=local,src=$(CACHE_DIR)/buildkit/backend-python-cache \
|
||||
--pull=false \
|
||||
--build-arg RESOURCES_DIR=$(CACHE_DIR)/resources \
|
||||
-f scripts/images/backend-python/Dockerfile \
|
||||
-t datamate-backend-python:$(OFFLINE_VERSION) \
|
||||
--load . || echo " Failed"
|
||||
|
||||
@echo ""
|
||||
@echo "======================================"
|
||||
@echo "✓ 离线构建完成"
|
||||
@echo "======================================"
|
||||
|
||||
# 单个服务离线构建 (BuildKit)
|
||||
.PHONY: %-offline-build
|
||||
%-offline-build: offline-setup ensure-buildx
|
||||
@echo "离线构建 $*..."
|
||||
@if [ ! -d "$(CACHE_DIR)/buildkit/$*-cache" ]; then \
|
||||
echo "错误: $* 的缓存不存在"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@$(eval IMAGE_NAME := $(if $(filter deer-flow%,$*),$*,datamate-$*))
|
||||
@docker buildx build \
|
||||
--cache-from type=local,src=$(CACHE_DIR)/buildkit/$*-cache \
|
||||
--pull=false \
|
||||
$(if $(filter runtime backend-python deer-flow%,$*),--build-arg RESOURCES_DIR=$(CACHE_DIR)/resources,) \
|
||||
-f scripts/images/$*/Dockerfile \
|
||||
-t $(IMAGE_NAME):$(OFFLINE_VERSION) \
|
||||
--load .
|
||||
|
||||
# 传统 Docker 构建(不使用 BuildKit,更稳定)
|
||||
.PHONY: offline-build-classic
|
||||
offline-build-classic: offline-setup
|
||||
@echo "使用传统 docker build 进行离线构建..."
|
||||
@bash scripts/offline/build-offline-classic.sh $(CACHE_DIR) $(OFFLINE_VERSION)
|
||||
|
||||
# 诊断离线环境
|
||||
.PHONY: offline-diagnose
|
||||
offline-diagnose:
|
||||
@bash scripts/offline/diagnose.sh $(CACHE_DIR)
|
||||
|
||||
# 构建 APT 预装基础镜像(有网环境)
|
||||
.PHONY: offline-build-base-images
|
||||
offline-build-base-images:
|
||||
@echo "构建 APT 预装基础镜像..."
|
||||
@bash scripts/offline/build-base-images.sh $(CACHE_DIR)
|
||||
|
||||
# 使用预装基础镜像进行离线构建(推荐)
|
||||
.PHONY: offline-build-final
|
||||
offline-build-final: offline-setup
|
||||
@echo "使用预装 APT 包的基础镜像进行离线构建..."
|
||||
@bash scripts/offline/build-offline-final.sh $(CACHE_DIR) $(OFFLINE_VERSION)
|
||||
|
||||
# 完整离线导出(包含 APT 预装基础镜像)
|
||||
.PHONY: offline-export-full
|
||||
offline-export-full:
|
||||
@echo "======================================"
|
||||
@echo "完整离线缓存导出(含 APT 预装基础镜像)"
|
||||
@echo "======================================"
|
||||
@$(MAKE) offline-build-base-images
|
||||
@$(MAKE) offline-export
|
||||
@echo ""
|
||||
@echo "导出完成!传输时请包含以下文件:"
|
||||
@echo " - build-cache/images/base-images-with-apt.tar"
|
||||
@echo " - build-cache-YYYYMMDD.tar.gz"
|
||||
|
||||
# ========== 帮助 ==========
|
||||
|
||||
.PHONY: help-offline
|
||||
help-offline:
|
||||
@echo "离线构建命令:"
|
||||
@echo ""
|
||||
@echo "【有网环境】"
|
||||
@echo " make offline-export [CACHE_DIR=./build-cache] - 导出构建缓存"
|
||||
@echo " make offline-export-full - 导出完整缓存(含 APT 预装基础镜像)"
|
||||
@echo " make offline-build-base-images - 构建 APT 预装基础镜像"
|
||||
@echo ""
|
||||
@echo "【无网环境】"
|
||||
@echo " make offline-setup [CACHE_DIR=./build-cache] - 解压并准备离线缓存"
|
||||
@echo " make offline-build-final - 使用预装基础镜像构建(推荐,解决 APT 问题)"
|
||||
@echo " make offline-build-classic - 使用传统 docker build"
|
||||
@echo " make offline-build - 使用 BuildKit 构建"
|
||||
@echo " make offline-diagnose - 诊断离线构建环境"
|
||||
@echo " make <service>-offline-build - 离线构建单个服务"
|
||||
@echo ""
|
||||
@echo "【完整工作流程(推荐)】"
|
||||
@echo " # 1. 有网环境导出完整缓存"
|
||||
@echo " make offline-export-full"
|
||||
@echo ""
|
||||
@echo " # 2. 传输到无网环境(需要传输两个文件)"
|
||||
@echo " scp build-cache/images/base-images-with-apt.tar user@offline-server:/path/"
|
||||
@echo " scp build-cache-*.tar.gz user@offline-server:/path/"
|
||||
@echo ""
|
||||
@echo " # 3. 无网环境构建"
|
||||
@echo " tar -xzf build-cache-*.tar.gz"
|
||||
@echo " docker load -i build-cache/images/base-images-with-apt.tar"
|
||||
@echo " make offline-build-final"
|
||||
@@ -470,6 +470,23 @@ paths:
|
||||
'200':
|
||||
description: 上传成功
|
||||
|
||||
/data-management/datasets/upload/cancel-upload/{reqId}:
|
||||
put:
|
||||
tags: [ DatasetFile ]
|
||||
operationId: cancelUpload
|
||||
summary: 取消上传
|
||||
description: 取消预上传请求并清理临时分片
|
||||
parameters:
|
||||
- name: reqId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 预上传请求ID
|
||||
responses:
|
||||
'200':
|
||||
description: 取消成功
|
||||
|
||||
/data-management/dataset-types:
|
||||
get:
|
||||
operationId: getDatasetTypes
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
|
||||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
||||
import com.datamate.common.domain.utils.ChunksSaver;
|
||||
@@ -19,8 +20,11 @@ import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorC
|
||||
import com.datamate.datamanagement.infrastructure.persistence.mapper.TagMapper;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount;
|
||||
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
|
||||
import com.datamate.datamanagement.interfaces.dto.*;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
@@ -53,6 +57,7 @@ public class DatasetApplicationService {
|
||||
private static final int SIMILAR_DATASET_MAX_LIMIT = 50;
|
||||
private static final int SIMILAR_DATASET_CANDIDATE_FACTOR = 5;
|
||||
private static final int SIMILAR_DATASET_CANDIDATE_MAX = 100;
|
||||
private static final String DERIVED_METADATA_KEY = "derived_from_file_id";
|
||||
private final DatasetRepository datasetRepository;
|
||||
private final TagMapper tagMapper;
|
||||
private final DatasetFileRepository datasetFileRepository;
|
||||
@@ -97,6 +102,7 @@ public class DatasetApplicationService {
|
||||
public Dataset updateDataset(String datasetId, UpdateDatasetRequest updateDatasetRequest) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
|
||||
|
||||
if (StringUtils.hasText(updateDatasetRequest.getName())) {
|
||||
dataset.setName(updateDatasetRequest.getName());
|
||||
}
|
||||
@@ -109,13 +115,31 @@ public class DatasetApplicationService {
|
||||
if (Objects.nonNull(updateDatasetRequest.getStatus())) {
|
||||
dataset.setStatus(updateDatasetRequest.getStatus());
|
||||
}
|
||||
if (updateDatasetRequest.getParentDatasetId() != null) {
|
||||
if (updateDatasetRequest.isParentDatasetIdProvided()) {
|
||||
// 保存原始的 parentDatasetId 值,用于比较是否发生了变化
|
||||
String originalParentDatasetId = dataset.getParentDatasetId();
|
||||
|
||||
// 处理父数据集变更:仅当请求显式包含 parentDatasetId 时处理
|
||||
// handleParentChange 内部通过 normalizeParentId 方法将空字符串和 null 都转换为 null
|
||||
// 这样既支持设置新的父数据集,也支持清除关联
|
||||
handleParentChange(dataset, updateDatasetRequest.getParentDatasetId());
|
||||
|
||||
// 检查 parentDatasetId 是否发生了变化
|
||||
if (!Objects.equals(originalParentDatasetId, dataset.getParentDatasetId())) {
|
||||
// 使用 LambdaUpdateWrapper 显式地更新 parentDatasetId 字段
|
||||
// 这样即使值为 null 也能被正确更新到数据库
|
||||
datasetRepository.update(null, new LambdaUpdateWrapper<Dataset>()
|
||||
.eq(Dataset::getId, datasetId)
|
||||
.set(Dataset::getParentDatasetId, dataset.getParentDatasetId()));
|
||||
}
|
||||
}
|
||||
|
||||
if (StringUtils.hasText(updateDatasetRequest.getDataSource())) {
|
||||
// 数据源id不为空,使用异步线程进行文件扫盘落库
|
||||
processDataSourceAsync(dataset.getId(), updateDatasetRequest.getDataSource());
|
||||
}
|
||||
|
||||
// 更新其他字段(不包括 parentDatasetId,因为它已经在上面的代码中更新了)
|
||||
datasetRepository.updateById(dataset);
|
||||
return dataset;
|
||||
}
|
||||
@@ -142,6 +166,7 @@ public class DatasetApplicationService {
|
||||
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
|
||||
List<DatasetFile> datasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
dataset.setFiles(datasetFiles);
|
||||
applyVisibleFileCounts(Collections.singletonList(dataset));
|
||||
return dataset;
|
||||
}
|
||||
|
||||
@@ -153,6 +178,7 @@ public class DatasetApplicationService {
|
||||
IPage<Dataset> page = new Page<>(query.getPage(), query.getSize());
|
||||
page = datasetRepository.findByCriteria(page, query);
|
||||
String datasetPvcName = getDatasetPvcName();
|
||||
applyVisibleFileCounts(page.getRecords());
|
||||
List<DatasetResponse> datasetResponses = DatasetConverter.INSTANCE.convertToResponse(page.getRecords());
|
||||
datasetResponses.forEach(dataset -> dataset.setPvcName(datasetPvcName));
|
||||
return PagedResponse.of(datasetResponses, page.getCurrent(), page.getTotal(), page.getPages());
|
||||
@@ -200,6 +226,7 @@ public class DatasetApplicationService {
|
||||
})
|
||||
.limit(safeLimit)
|
||||
.toList();
|
||||
applyVisibleFileCounts(sorted);
|
||||
List<DatasetResponse> responses = DatasetConverter.INSTANCE.convertToResponse(sorted);
|
||||
responses.forEach(item -> item.setPvcName(datasetPvcName));
|
||||
return responses;
|
||||
@@ -345,6 +372,61 @@ public class DatasetApplicationService {
|
||||
dataset.setPath(newPath);
|
||||
}
|
||||
|
||||
private void applyVisibleFileCounts(List<Dataset> datasets) {
|
||||
if (CollectionUtils.isEmpty(datasets)) {
|
||||
return;
|
||||
}
|
||||
List<String> datasetIds = datasets.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(Dataset::getId)
|
||||
.filter(StringUtils::hasText)
|
||||
.toList();
|
||||
if (datasetIds.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
Map<String, Long> countMap = datasetFileRepository.countNonDerivedByDatasetIds(datasetIds).stream()
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toMap(
|
||||
DatasetFileCount::getDatasetId,
|
||||
count -> Optional.ofNullable(count.getFileCount()).orElse(0L),
|
||||
(left, right) -> left
|
||||
));
|
||||
for (Dataset dataset : datasets) {
|
||||
if (dataset == null || !StringUtils.hasText(dataset.getId())) {
|
||||
continue;
|
||||
}
|
||||
Long visibleCount = countMap.get(dataset.getId());
|
||||
dataset.setFileCount(visibleCount != null ? visibleCount : 0L);
|
||||
}
|
||||
}
|
||||
|
||||
private List<DatasetFile> filterVisibleFiles(List<DatasetFile> files) {
|
||||
if (CollectionUtils.isEmpty(files)) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return files.stream()
|
||||
.filter(file -> !isDerivedFile(file))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private boolean isDerivedFile(DatasetFile datasetFile) {
|
||||
if (datasetFile == null) {
|
||||
return false;
|
||||
}
|
||||
String metadata = datasetFile.getMetadata();
|
||||
if (!StringUtils.hasText(metadata)) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
Map<String, Object> metadataMap = mapper.readValue(metadata, new TypeReference<Map<String, Object>>() {});
|
||||
return metadataMap.get(DERIVED_METADATA_KEY) != null;
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to parse dataset file metadata for derived detection: {}", datasetFile.getId(), e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取数据集统计信息
|
||||
*/
|
||||
@@ -357,27 +439,29 @@ public class DatasetApplicationService {
|
||||
|
||||
Map<String, Object> statistics = new HashMap<>();
|
||||
|
||||
// 基础统计
|
||||
Long totalFiles = datasetFileRepository.countByDatasetId(datasetId);
|
||||
Long completedFiles = datasetFileRepository.countCompletedByDatasetId(datasetId);
|
||||
List<DatasetFile> allFiles = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
List<DatasetFile> visibleFiles = filterVisibleFiles(allFiles);
|
||||
long totalFiles = visibleFiles.size();
|
||||
long completedFiles = visibleFiles.stream()
|
||||
.filter(file -> "COMPLETED".equalsIgnoreCase(file.getStatus()))
|
||||
.count();
|
||||
Long totalSize = datasetFileRepository.sumSizeByDatasetId(datasetId);
|
||||
|
||||
statistics.put("totalFiles", totalFiles != null ? totalFiles.intValue() : 0);
|
||||
statistics.put("completedFiles", completedFiles != null ? completedFiles.intValue() : 0);
|
||||
statistics.put("totalFiles", (int) totalFiles);
|
||||
statistics.put("completedFiles", (int) completedFiles);
|
||||
statistics.put("totalSize", totalSize != null ? totalSize : 0L);
|
||||
|
||||
// 完成率计算
|
||||
float completionRate = 0.0f;
|
||||
if (totalFiles != null && totalFiles > 0) {
|
||||
completionRate = (completedFiles != null ? completedFiles.floatValue() : 0.0f) / totalFiles.floatValue() * 100.0f;
|
||||
if (totalFiles > 0) {
|
||||
completionRate = ((float) completedFiles) / (float) totalFiles * 100.0f;
|
||||
}
|
||||
statistics.put("completionRate", completionRate);
|
||||
|
||||
// 文件类型分布统计
|
||||
Map<String, Integer> fileTypeDistribution = new HashMap<>();
|
||||
List<DatasetFile> allFiles = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
if (allFiles != null) {
|
||||
for (DatasetFile file : allFiles) {
|
||||
if (!visibleFiles.isEmpty()) {
|
||||
for (DatasetFile file : visibleFiles) {
|
||||
String fileType = file.getFileType() != null ? file.getFileType() : "unknown";
|
||||
fileTypeDistribution.put(fileType, fileTypeDistribution.getOrDefault(fileType, 0) + 1);
|
||||
}
|
||||
@@ -386,8 +470,8 @@ public class DatasetApplicationService {
|
||||
|
||||
// 状态分布统计
|
||||
Map<String, Integer> statusDistribution = new HashMap<>();
|
||||
if (allFiles != null) {
|
||||
for (DatasetFile file : allFiles) {
|
||||
if (!visibleFiles.isEmpty()) {
|
||||
for (DatasetFile file : visibleFiles) {
|
||||
String status = file.getStatus() != null ? file.getStatus() : "unknown";
|
||||
statusDistribution.put(status, statusDistribution.getOrDefault(status, 0) + 1);
|
||||
}
|
||||
|
||||
@@ -88,6 +88,7 @@ public class DatasetFileApplicationService {
|
||||
private final DatasetRepository datasetRepository;
|
||||
private final FileService fileService;
|
||||
private final PdfTextExtractAsyncService pdfTextExtractAsyncService;
|
||||
private final DatasetFilePreviewService datasetFilePreviewService;
|
||||
|
||||
@Value("${datamate.data-management.base-path:/dataset}")
|
||||
private String datasetBasePath;
|
||||
@@ -99,11 +100,13 @@ public class DatasetFileApplicationService {
|
||||
public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository,
|
||||
DatasetRepository datasetRepository,
|
||||
FileService fileService,
|
||||
PdfTextExtractAsyncService pdfTextExtractAsyncService) {
|
||||
PdfTextExtractAsyncService pdfTextExtractAsyncService,
|
||||
DatasetFilePreviewService datasetFilePreviewService) {
|
||||
this.datasetFileRepository = datasetFileRepository;
|
||||
this.datasetRepository = datasetRepository;
|
||||
this.fileService = fileService;
|
||||
this.pdfTextExtractAsyncService = pdfTextExtractAsyncService;
|
||||
this.datasetFilePreviewService = datasetFilePreviewService;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -162,18 +165,31 @@ public class DatasetFileApplicationService {
|
||||
String datasetPath = dataset.getPath();
|
||||
Path queryPath = Path.of(dataset.getPath() + File.separator + prefix);
|
||||
Map<String, DatasetFile> datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId)
|
||||
.stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity()));
|
||||
.stream()
|
||||
.filter(file -> file.getFilePath() != null)
|
||||
.collect(Collectors.toMap(
|
||||
file -> normalizeFilePath(file.getFilePath()),
|
||||
Function.identity(),
|
||||
(left, right) -> left
|
||||
));
|
||||
Set<String> derivedFilePaths = excludeDerivedFiles
|
||||
? datasetFilesMap.values().stream()
|
||||
.filter(this::isDerivedFile)
|
||||
.map(DatasetFile::getFilePath)
|
||||
.map(this::normalizeFilePath)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toSet())
|
||||
: Collections.emptySet();
|
||||
// 如果目录不存在,直接返回空结果(数据集刚创建时目录可能还未生成)
|
||||
if (!Files.exists(queryPath)) {
|
||||
return new PagedResponse<>(page, size, 0, 0, Collections.emptyList());
|
||||
}
|
||||
try (Stream<Path> pathStream = Files.list(queryPath)) {
|
||||
List<Path> allFiles = pathStream
|
||||
.filter(path -> path.toString().startsWith(datasetPath))
|
||||
.filter(path -> !excludeDerivedFiles || Files.isDirectory(path) || !derivedFilePaths.contains(path.toString()))
|
||||
.filter(path -> !excludeDerivedFiles
|
||||
|| Files.isDirectory(path)
|
||||
|| !derivedFilePaths.contains(normalizeFilePath(path.toString())))
|
||||
.sorted(Comparator
|
||||
.comparing((Path path) -> !Files.isDirectory(path))
|
||||
.thenComparing(path -> path.getFileName().toString()))
|
||||
@@ -192,7 +208,9 @@ public class DatasetFileApplicationService {
|
||||
if (fromIndex < total) {
|
||||
pageData = allFiles.subList(fromIndex, toIndex);
|
||||
}
|
||||
List<DatasetFile> datasetFiles = pageData.stream().map(path -> getDatasetFile(path, datasetFilesMap)).toList();
|
||||
List<DatasetFile> datasetFiles = pageData.stream()
|
||||
.map(path -> getDatasetFile(path, datasetFilesMap, excludeDerivedFiles, derivedFilePaths))
|
||||
.toList();
|
||||
|
||||
return new PagedResponse<>(page, size, total, totalPages, datasetFiles);
|
||||
} catch (IOException e) {
|
||||
@@ -201,7 +219,10 @@ public class DatasetFileApplicationService {
|
||||
}
|
||||
}
|
||||
|
||||
private DatasetFile getDatasetFile(Path path, Map<String, DatasetFile> datasetFilesMap) {
|
||||
private DatasetFile getDatasetFile(Path path,
|
||||
Map<String, DatasetFile> datasetFilesMap,
|
||||
boolean excludeDerivedFiles,
|
||||
Set<String> derivedFilePaths) {
|
||||
DatasetFile datasetFile = new DatasetFile();
|
||||
LocalDateTime localDateTime = LocalDateTime.now();
|
||||
try {
|
||||
@@ -223,12 +244,21 @@ public class DatasetFileApplicationService {
|
||||
long totalSize;
|
||||
|
||||
try (Stream<Path> walk = Files.walk(path)) {
|
||||
fileCount = walk.filter(Files::isRegularFile).count();
|
||||
Stream<Path> fileStream = walk.filter(Files::isRegularFile);
|
||||
if (excludeDerivedFiles && !derivedFilePaths.isEmpty()) {
|
||||
fileStream = fileStream.filter(filePath ->
|
||||
!derivedFilePaths.contains(normalizeFilePath(filePath.toString())));
|
||||
}
|
||||
fileCount = fileStream.count();
|
||||
}
|
||||
|
||||
try (Stream<Path> walk = Files.walk(path)) {
|
||||
totalSize = walk
|
||||
.filter(Files::isRegularFile)
|
||||
Stream<Path> fileStream = walk.filter(Files::isRegularFile);
|
||||
if (excludeDerivedFiles && !derivedFilePaths.isEmpty()) {
|
||||
fileStream = fileStream.filter(filePath ->
|
||||
!derivedFilePaths.contains(normalizeFilePath(filePath.toString())));
|
||||
}
|
||||
totalSize = fileStream
|
||||
.mapToLong(p -> {
|
||||
try {
|
||||
return Files.size(p);
|
||||
@@ -246,7 +276,7 @@ public class DatasetFileApplicationService {
|
||||
log.error("stat directory info error", e);
|
||||
}
|
||||
} else {
|
||||
DatasetFile exist = datasetFilesMap.get(path.toString());
|
||||
DatasetFile exist = datasetFilesMap.get(normalizeFilePath(path.toString()));
|
||||
if (exist == null) {
|
||||
datasetFile.setId("file-" + datasetFile.getFileName());
|
||||
datasetFile.setFileSize(path.toFile().length());
|
||||
@@ -257,6 +287,17 @@ public class DatasetFileApplicationService {
|
||||
return datasetFile;
|
||||
}
|
||||
|
||||
private String normalizeFilePath(String filePath) {
|
||||
if (filePath == null || filePath.isBlank()) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return Paths.get(filePath).toAbsolutePath().normalize().toString();
|
||||
} catch (Exception e) {
|
||||
return filePath.replace("\\", "/");
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isSourceDocument(DatasetFile datasetFile) {
|
||||
if (datasetFile == null) {
|
||||
return false;
|
||||
@@ -312,6 +353,7 @@ public class DatasetFileApplicationService {
|
||||
datasetFileRepository.removeById(fileId);
|
||||
dataset.removeFile(file);
|
||||
datasetRepository.updateById(dataset);
|
||||
datasetFilePreviewService.deletePreviewFileQuietly(datasetId, fileId);
|
||||
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
|
||||
if (file.getFilePath().startsWith(dataset.getPath())) {
|
||||
try {
|
||||
@@ -463,6 +505,14 @@ public class DatasetFileApplicationService {
|
||||
saveFileInfoToDb(uploadResult, datasetId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 取消上传
|
||||
*/
|
||||
@Transactional
|
||||
public void cancelUpload(String reqId) {
|
||||
fileService.cancelUpload(reqId);
|
||||
}
|
||||
|
||||
private void saveFileInfoToDb(FileUploadResult fileUploadResult, String datasetId) {
|
||||
if (Objects.isNull(fileUploadResult.getSavedFile())) {
|
||||
// 文件切片上传没有完成
|
||||
@@ -684,6 +734,7 @@ public class DatasetFileApplicationService {
|
||||
|
||||
for (DatasetFile file : filesToDelete) {
|
||||
datasetFileRepository.removeById(file.getId());
|
||||
datasetFilePreviewService.deletePreviewFileQuietly(datasetId, file.getId());
|
||||
}
|
||||
|
||||
// 删除文件系统中的目录
|
||||
|
||||
@@ -0,0 +1,171 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.datamate.datamanagement.infrastructure.config.DataManagementProperties;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* 数据集文件预览转换异步任务
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class DatasetFilePreviewAsyncService {
|
||||
private static final Set<String> OFFICE_EXTENSIONS = Set.of("doc", "docx");
|
||||
private static final String DATASET_PREVIEW_DIR = "dataset-previews";
|
||||
private static final String PREVIEW_FILE_SUFFIX = ".pdf";
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
private static final int MAX_ERROR_LENGTH = 500;
|
||||
private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
|
||||
|
||||
private final DatasetFileRepository datasetFileRepository;
|
||||
private final DataManagementProperties dataManagementProperties;
|
||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
@Async
|
||||
public void convertPreviewAsync(String fileId) {
|
||||
if (StringUtils.isBlank(fileId)) {
|
||||
return;
|
||||
}
|
||||
DatasetFile file = datasetFileRepository.getById(fileId);
|
||||
if (file == null) {
|
||||
return;
|
||||
}
|
||||
String extension = resolveFileExtension(resolveOriginalName(file));
|
||||
if (!OFFICE_EXTENSIONS.contains(extension)) {
|
||||
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, null, "仅支持 DOC/DOCX 转换");
|
||||
return;
|
||||
}
|
||||
if (StringUtils.isBlank(file.getFilePath())) {
|
||||
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, null, "源文件路径为空");
|
||||
return;
|
||||
}
|
||||
Path sourcePath = Paths.get(file.getFilePath()).toAbsolutePath().normalize();
|
||||
if (!Files.exists(sourcePath) || !Files.isRegularFile(sourcePath)) {
|
||||
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, null, "源文件不存在");
|
||||
return;
|
||||
}
|
||||
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
|
||||
.readPreviewInfo(file.getMetadata(), objectMapper);
|
||||
String previewRelativePath = StringUtils.defaultIfBlank(
|
||||
previewInfo.pdfPath(),
|
||||
resolvePreviewRelativePath(file.getDatasetId(), file.getId())
|
||||
);
|
||||
Path targetPath = resolvePreviewStoragePath(previewRelativePath);
|
||||
|
||||
try {
|
||||
ensureParentDirectory(targetPath);
|
||||
LibreOfficeConverter.convertToPdf(sourcePath, targetPath);
|
||||
updatePreviewStatus(file, KnowledgeItemPreviewStatus.READY, previewRelativePath, null);
|
||||
} catch (Exception e) {
|
||||
log.error("dataset preview convert failed, fileId: {}", file.getId(), e);
|
||||
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, previewRelativePath, trimError(e.getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
private void updatePreviewStatus(
|
||||
DatasetFile file,
|
||||
KnowledgeItemPreviewStatus status,
|
||||
String previewRelativePath,
|
||||
String error
|
||||
) {
|
||||
if (file == null) {
|
||||
return;
|
||||
}
|
||||
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
|
||||
file.getMetadata(),
|
||||
objectMapper,
|
||||
status,
|
||||
previewRelativePath,
|
||||
error,
|
||||
nowText()
|
||||
);
|
||||
file.setMetadata(updatedMetadata);
|
||||
datasetFileRepository.updateById(file);
|
||||
}
|
||||
|
||||
private String resolveOriginalName(DatasetFile file) {
|
||||
if (file == null) {
|
||||
return "";
|
||||
}
|
||||
if (StringUtils.isNotBlank(file.getFileName())) {
|
||||
return file.getFileName();
|
||||
}
|
||||
if (StringUtils.isNotBlank(file.getFilePath())) {
|
||||
return Paths.get(file.getFilePath()).getFileName().toString();
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private String resolveFileExtension(String fileName) {
|
||||
if (StringUtils.isBlank(fileName)) {
|
||||
return "";
|
||||
}
|
||||
int dotIndex = fileName.lastIndexOf('.');
|
||||
if (dotIndex <= 0 || dotIndex >= fileName.length() - 1) {
|
||||
return "";
|
||||
}
|
||||
return fileName.substring(dotIndex + 1).toLowerCase();
|
||||
}
|
||||
|
||||
private String resolvePreviewRelativePath(String datasetId, String fileId) {
|
||||
String relativePath = Paths.get(DATASET_PREVIEW_DIR, datasetId, fileId + PREVIEW_FILE_SUFFIX)
|
||||
.toString();
|
||||
return relativePath.replace("\\", PATH_SEPARATOR);
|
||||
}
|
||||
|
||||
private Path resolvePreviewStoragePath(String relativePath) {
|
||||
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace("/", java.io.File.separator);
|
||||
Path root = resolveUploadRootPath();
|
||||
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
|
||||
if (!target.startsWith(root)) {
|
||||
throw new IllegalArgumentException("invalid preview path");
|
||||
}
|
||||
return target;
|
||||
}
|
||||
|
||||
private Path resolveUploadRootPath() {
|
||||
String uploadDir = dataManagementProperties.getFileStorage().getUploadDir();
|
||||
return Paths.get(uploadDir).toAbsolutePath().normalize();
|
||||
}
|
||||
|
||||
private void ensureParentDirectory(Path targetPath) {
|
||||
try {
|
||||
Path parent = targetPath.getParent();
|
||||
if (parent != null) {
|
||||
Files.createDirectories(parent);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new IllegalStateException("创建预览目录失败", e);
|
||||
}
|
||||
}
|
||||
|
||||
private String trimError(String error) {
|
||||
if (StringUtils.isBlank(error)) {
|
||||
return "";
|
||||
}
|
||||
if (error.length() <= MAX_ERROR_LENGTH) {
|
||||
return error;
|
||||
}
|
||||
return error.substring(0, MAX_ERROR_LENGTH);
|
||||
}
|
||||
|
||||
private String nowText() {
|
||||
return LocalDateTime.now().format(PREVIEW_TIME_FORMATTER);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,233 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.datamate.common.infrastructure.exception.BusinessAssert;
|
||||
import com.datamate.common.infrastructure.exception.CommonErrorCode;
|
||||
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.datamate.datamanagement.infrastructure.config.DataManagementProperties;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
|
||||
import com.datamate.datamanagement.interfaces.dto.DatasetFilePreviewStatusResponse;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* 数据集文件预览转换服务
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class DatasetFilePreviewService {
|
||||
private static final Set<String> OFFICE_EXTENSIONS = Set.of("doc", "docx");
|
||||
private static final String DATASET_PREVIEW_DIR = "dataset-previews";
|
||||
private static final String PREVIEW_FILE_SUFFIX = ".pdf";
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
|
||||
|
||||
private final DatasetFileRepository datasetFileRepository;
|
||||
private final DataManagementProperties dataManagementProperties;
|
||||
private final DatasetFilePreviewAsyncService datasetFilePreviewAsyncService;
|
||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
public DatasetFilePreviewStatusResponse getPreviewStatus(String datasetId, String fileId) {
|
||||
DatasetFile file = requireDatasetFile(datasetId, fileId);
|
||||
assertOfficeDocument(file);
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
|
||||
.readPreviewInfo(file.getMetadata(), objectMapper);
|
||||
|
||||
if (previewInfo.status() == KnowledgeItemPreviewStatus.READY && !previewPdfExists(file, previewInfo)) {
|
||||
previewInfo = markPreviewFailed(file, previewInfo, "预览文件不存在");
|
||||
}
|
||||
|
||||
return buildResponse(previewInfo);
|
||||
}
|
||||
|
||||
public DatasetFilePreviewStatusResponse ensurePreview(String datasetId, String fileId) {
|
||||
DatasetFile file = requireDatasetFile(datasetId, fileId);
|
||||
assertOfficeDocument(file);
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
|
||||
.readPreviewInfo(file.getMetadata(), objectMapper);
|
||||
|
||||
if (previewInfo.status() == KnowledgeItemPreviewStatus.READY && previewPdfExists(file, previewInfo)) {
|
||||
return buildResponse(previewInfo);
|
||||
}
|
||||
if (previewInfo.status() == KnowledgeItemPreviewStatus.PROCESSING) {
|
||||
return buildResponse(previewInfo);
|
||||
}
|
||||
|
||||
String previewRelativePath = resolvePreviewRelativePath(file.getDatasetId(), file.getId());
|
||||
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
|
||||
file.getMetadata(),
|
||||
objectMapper,
|
||||
KnowledgeItemPreviewStatus.PROCESSING,
|
||||
previewRelativePath,
|
||||
null,
|
||||
nowText()
|
||||
);
|
||||
file.setMetadata(updatedMetadata);
|
||||
datasetFileRepository.updateById(file);
|
||||
datasetFilePreviewAsyncService.convertPreviewAsync(file.getId());
|
||||
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo refreshed = KnowledgeItemPreviewMetadataHelper
|
||||
.readPreviewInfo(updatedMetadata, objectMapper);
|
||||
return buildResponse(refreshed);
|
||||
}
|
||||
|
||||
public boolean isOfficeDocument(String fileName) {
|
||||
String extension = resolveFileExtension(fileName);
|
||||
return StringUtils.isNotBlank(extension) && OFFICE_EXTENSIONS.contains(extension.toLowerCase());
|
||||
}
|
||||
|
||||
public PreviewFile resolveReadyPreviewFile(String datasetId, DatasetFile file) {
|
||||
if (file == null) {
|
||||
return null;
|
||||
}
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
|
||||
.readPreviewInfo(file.getMetadata(), objectMapper);
|
||||
if (previewInfo.status() != KnowledgeItemPreviewStatus.READY) {
|
||||
return null;
|
||||
}
|
||||
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(datasetId, file.getId()));
|
||||
Path filePath = resolvePreviewStoragePath(relativePath);
|
||||
if (!Files.exists(filePath) || !Files.isRegularFile(filePath)) {
|
||||
markPreviewFailed(file, previewInfo, "预览文件不存在");
|
||||
return null;
|
||||
}
|
||||
String previewName = resolvePreviewPdfName(file);
|
||||
return new PreviewFile(filePath, previewName);
|
||||
}
|
||||
|
||||
public void deletePreviewFileQuietly(String datasetId, String fileId) {
|
||||
String relativePath = resolvePreviewRelativePath(datasetId, fileId);
|
||||
Path filePath = resolvePreviewStoragePath(relativePath);
|
||||
try {
|
||||
Files.deleteIfExists(filePath);
|
||||
} catch (Exception e) {
|
||||
log.warn("delete dataset preview pdf error, fileId: {}", fileId, e);
|
||||
}
|
||||
}
|
||||
|
||||
private DatasetFilePreviewStatusResponse buildResponse(KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo) {
|
||||
DatasetFilePreviewStatusResponse response = new DatasetFilePreviewStatusResponse();
|
||||
KnowledgeItemPreviewStatus status = previewInfo.status() == null
|
||||
? KnowledgeItemPreviewStatus.PENDING
|
||||
: previewInfo.status();
|
||||
response.setStatus(status);
|
||||
response.setPreviewError(previewInfo.error());
|
||||
response.setUpdatedAt(previewInfo.updatedAt());
|
||||
return response;
|
||||
}
|
||||
|
||||
private DatasetFile requireDatasetFile(String datasetId, String fileId) {
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(datasetId), CommonErrorCode.PARAM_ERROR);
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(fileId), CommonErrorCode.PARAM_ERROR);
|
||||
DatasetFile datasetFile = datasetFileRepository.getById(fileId);
|
||||
BusinessAssert.notNull(datasetFile, CommonErrorCode.PARAM_ERROR);
|
||||
BusinessAssert.isTrue(Objects.equals(datasetFile.getDatasetId(), datasetId), CommonErrorCode.PARAM_ERROR);
|
||||
return datasetFile;
|
||||
}
|
||||
|
||||
private void assertOfficeDocument(DatasetFile file) {
|
||||
BusinessAssert.notNull(file, CommonErrorCode.PARAM_ERROR);
|
||||
String extension = resolveFileExtension(resolveOriginalName(file));
|
||||
BusinessAssert.isTrue(OFFICE_EXTENSIONS.contains(extension), CommonErrorCode.PARAM_ERROR);
|
||||
}
|
||||
|
||||
private String resolveOriginalName(DatasetFile file) {
|
||||
if (file == null) {
|
||||
return "";
|
||||
}
|
||||
if (StringUtils.isNotBlank(file.getFileName())) {
|
||||
return file.getFileName();
|
||||
}
|
||||
if (StringUtils.isNotBlank(file.getFilePath())) {
|
||||
return Paths.get(file.getFilePath()).getFileName().toString();
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private String resolveFileExtension(String fileName) {
|
||||
if (StringUtils.isBlank(fileName)) {
|
||||
return "";
|
||||
}
|
||||
int dotIndex = fileName.lastIndexOf('.');
|
||||
if (dotIndex <= 0 || dotIndex >= fileName.length() - 1) {
|
||||
return "";
|
||||
}
|
||||
return fileName.substring(dotIndex + 1).toLowerCase();
|
||||
}
|
||||
|
||||
private String resolvePreviewPdfName(DatasetFile file) {
|
||||
String originalName = resolveOriginalName(file);
|
||||
if (StringUtils.isBlank(originalName)) {
|
||||
return "预览.pdf";
|
||||
}
|
||||
int dotIndex = originalName.lastIndexOf('.');
|
||||
if (dotIndex <= 0) {
|
||||
return originalName + PREVIEW_FILE_SUFFIX;
|
||||
}
|
||||
return originalName.substring(0, dotIndex) + PREVIEW_FILE_SUFFIX;
|
||||
}
|
||||
|
||||
private boolean previewPdfExists(DatasetFile file, KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo) {
|
||||
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(file.getDatasetId(), file.getId()));
|
||||
Path filePath = resolvePreviewStoragePath(relativePath);
|
||||
return Files.exists(filePath) && Files.isRegularFile(filePath);
|
||||
}
|
||||
|
||||
private KnowledgeItemPreviewMetadataHelper.PreviewInfo markPreviewFailed(
|
||||
DatasetFile file,
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo,
|
||||
String error
|
||||
) {
|
||||
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(file.getDatasetId(), file.getId()));
|
||||
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
|
||||
file.getMetadata(),
|
||||
objectMapper,
|
||||
KnowledgeItemPreviewStatus.FAILED,
|
||||
relativePath,
|
||||
error,
|
||||
nowText()
|
||||
);
|
||||
file.setMetadata(updatedMetadata);
|
||||
datasetFileRepository.updateById(file);
|
||||
return KnowledgeItemPreviewMetadataHelper.readPreviewInfo(updatedMetadata, objectMapper);
|
||||
}
|
||||
|
||||
private String resolvePreviewRelativePath(String datasetId, String fileId) {
|
||||
String relativePath = Paths.get(DATASET_PREVIEW_DIR, datasetId, fileId + PREVIEW_FILE_SUFFIX)
|
||||
.toString();
|
||||
return relativePath.replace("\\", PATH_SEPARATOR);
|
||||
}
|
||||
|
||||
Path resolvePreviewStoragePath(String relativePath) {
|
||||
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace("/", java.io.File.separator);
|
||||
Path root = resolveUploadRootPath();
|
||||
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
|
||||
BusinessAssert.isTrue(target.startsWith(root), CommonErrorCode.PARAM_ERROR);
|
||||
return target;
|
||||
}
|
||||
|
||||
private Path resolveUploadRootPath() {
|
||||
String uploadDir = dataManagementProperties.getFileStorage().getUploadDir();
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(uploadDir), CommonErrorCode.PARAM_ERROR);
|
||||
return Paths.get(uploadDir).toAbsolutePath().normalize();
|
||||
}
|
||||
|
||||
private String nowText() {
|
||||
return LocalDateTime.now().format(PREVIEW_TIME_FORMATTER);
|
||||
}
|
||||
|
||||
public record PreviewFile(Path filePath, String fileName) {
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.datamate.common.infrastructure.exception.BusinessAssert;
|
||||
import com.datamate.common.infrastructure.exception.CommonErrorCode;
|
||||
import com.datamate.datamanagement.common.enums.KnowledgeStatusType;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItemDirectory;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeSet;
|
||||
import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.KnowledgeItemDirectoryRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.KnowledgeItemRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.KnowledgeSetRepository;
|
||||
import com.datamate.datamanagement.interfaces.dto.CreateKnowledgeDirectoryRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeDirectoryQuery;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* 知识条目目录应用服务
|
||||
*/
|
||||
@Service
|
||||
@Transactional
|
||||
@RequiredArgsConstructor
|
||||
public class KnowledgeDirectoryApplicationService {
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
private static final String INVALID_PATH_SEGMENT = "..";
|
||||
|
||||
private final KnowledgeItemDirectoryRepository knowledgeItemDirectoryRepository;
|
||||
private final KnowledgeItemRepository knowledgeItemRepository;
|
||||
private final KnowledgeSetRepository knowledgeSetRepository;
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
public List<KnowledgeItemDirectory> getKnowledgeDirectories(String setId, KnowledgeDirectoryQuery query) {
|
||||
BusinessAssert.notNull(query, CommonErrorCode.PARAM_ERROR);
|
||||
query.setSetId(setId);
|
||||
return knowledgeItemDirectoryRepository.findByCriteria(query);
|
||||
}
|
||||
|
||||
public KnowledgeItemDirectory createKnowledgeDirectory(String setId, CreateKnowledgeDirectoryRequest request) {
|
||||
BusinessAssert.notNull(request, CommonErrorCode.PARAM_ERROR);
|
||||
KnowledgeSet knowledgeSet = requireKnowledgeSet(setId);
|
||||
BusinessAssert.isTrue(!isReadOnlyStatus(knowledgeSet.getStatus()),
|
||||
DataManagementErrorCode.KNOWLEDGE_SET_STATUS_ERROR);
|
||||
|
||||
String directoryName = normalizeDirectoryName(request.getDirectoryName());
|
||||
validateDirectoryName(directoryName);
|
||||
|
||||
String parentPrefix = normalizeRelativePathPrefix(request.getParentPrefix());
|
||||
String relativePath = normalizeRelativePathValue(parentPrefix + directoryName);
|
||||
validateRelativePath(relativePath);
|
||||
|
||||
BusinessAssert.isTrue(!knowledgeItemRepository.existsBySetIdAndRelativePath(setId, relativePath),
|
||||
CommonErrorCode.PARAM_ERROR);
|
||||
|
||||
KnowledgeItemDirectory existing = knowledgeItemDirectoryRepository.findBySetIdAndPath(setId, relativePath);
|
||||
if (existing != null) {
|
||||
return existing;
|
||||
}
|
||||
|
||||
KnowledgeItemDirectory directory = new KnowledgeItemDirectory();
|
||||
directory.setId(UUID.randomUUID().toString());
|
||||
directory.setSetId(setId);
|
||||
directory.setName(directoryName);
|
||||
directory.setRelativePath(relativePath);
|
||||
knowledgeItemDirectoryRepository.save(directory);
|
||||
return directory;
|
||||
}
|
||||
|
||||
public void deleteKnowledgeDirectory(String setId, String relativePath) {
|
||||
KnowledgeSet knowledgeSet = requireKnowledgeSet(setId);
|
||||
BusinessAssert.isTrue(!isReadOnlyStatus(knowledgeSet.getStatus()),
|
||||
DataManagementErrorCode.KNOWLEDGE_SET_STATUS_ERROR);
|
||||
|
||||
String normalized = normalizeRelativePathValue(relativePath);
|
||||
validateRelativePath(normalized);
|
||||
|
||||
knowledgeItemRepository.removeByRelativePathPrefix(setId, normalized);
|
||||
knowledgeItemDirectoryRepository.removeByRelativePathPrefix(setId, normalized);
|
||||
}
|
||||
|
||||
private KnowledgeSet requireKnowledgeSet(String setId) {
|
||||
KnowledgeSet knowledgeSet = knowledgeSetRepository.getById(setId);
|
||||
BusinessAssert.notNull(knowledgeSet, DataManagementErrorCode.KNOWLEDGE_SET_NOT_FOUND);
|
||||
return knowledgeSet;
|
||||
}
|
||||
|
||||
private boolean isReadOnlyStatus(KnowledgeStatusType status) {
|
||||
return status == KnowledgeStatusType.ARCHIVED || status == KnowledgeStatusType.DEPRECATED;
|
||||
}
|
||||
|
||||
private String normalizeDirectoryName(String name) {
|
||||
return StringUtils.trimToEmpty(name);
|
||||
}
|
||||
|
||||
private void validateDirectoryName(String name) {
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(name), CommonErrorCode.PARAM_ERROR);
|
||||
BusinessAssert.isTrue(!name.contains(PATH_SEPARATOR), CommonErrorCode.PARAM_ERROR);
|
||||
BusinessAssert.isTrue(!name.contains("\\"), CommonErrorCode.PARAM_ERROR);
|
||||
BusinessAssert.isTrue(!name.contains(INVALID_PATH_SEGMENT), CommonErrorCode.PARAM_ERROR);
|
||||
}
|
||||
|
||||
private void validateRelativePath(String relativePath) {
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(relativePath), CommonErrorCode.PARAM_ERROR);
|
||||
BusinessAssert.isTrue(!relativePath.contains(INVALID_PATH_SEGMENT), CommonErrorCode.PARAM_ERROR);
|
||||
}
|
||||
|
||||
private String normalizeRelativePathPrefix(String prefix) {
|
||||
if (StringUtils.isBlank(prefix)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = prefix.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
while (normalized.endsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(0, normalized.length() - 1);
|
||||
}
|
||||
if (StringUtils.isBlank(normalized)) {
|
||||
return "";
|
||||
}
|
||||
validateRelativePath(normalized);
|
||||
return normalized + PATH_SEPARATOR;
|
||||
}
|
||||
|
||||
private String normalizeRelativePathValue(String relativePath) {
|
||||
if (StringUtils.isBlank(relativePath)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
while (normalized.endsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(0, normalized.length() - 1);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
@@ -16,12 +16,14 @@ import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItem;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeSet;
|
||||
import com.datamate.datamanagement.infrastructure.config.DataManagementProperties;
|
||||
import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.mapper.TagMapper;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.KnowledgeItemRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.KnowledgeSetRepository;
|
||||
import com.datamate.datamanagement.interfaces.converter.KnowledgeConverter;
|
||||
import com.datamate.datamanagement.interfaces.dto.CreateKnowledgeItemRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.DeleteKnowledgeItemsRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.ImportKnowledgeItemsRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeItemPagingQuery;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeItemResponse;
|
||||
@@ -74,16 +76,20 @@ public class KnowledgeItemApplicationService {
|
||||
private static final String EXPORT_FILE_PREFIX = "knowledge_set_";
|
||||
private static final String EXPORT_FILE_SUFFIX = ".zip";
|
||||
private static final String EXPORT_CONTENT_TYPE = "application/zip";
|
||||
private static final String PREVIEW_PDF_CONTENT_TYPE = "application/pdf";
|
||||
private static final int MAX_FILE_BASE_LENGTH = 120;
|
||||
private static final int MAX_TITLE_LENGTH = 200;
|
||||
private static final String KNOWLEDGE_ITEM_UPLOAD_DIR = "knowledge-items";
|
||||
private static final String DEFAULT_FILE_EXTENSION = "bin";
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
|
||||
private final KnowledgeItemRepository knowledgeItemRepository;
|
||||
private final KnowledgeSetRepository knowledgeSetRepository;
|
||||
private final DatasetRepository datasetRepository;
|
||||
private final DatasetFileRepository datasetFileRepository;
|
||||
private final DataManagementProperties dataManagementProperties;
|
||||
private final TagMapper tagMapper;
|
||||
private final KnowledgeItemPreviewService knowledgeItemPreviewService;
|
||||
|
||||
public KnowledgeItem createKnowledgeItem(String setId, CreateKnowledgeItemRequest request) {
|
||||
KnowledgeSet knowledgeSet = requireKnowledgeSet(setId);
|
||||
@@ -112,6 +118,7 @@ public class KnowledgeItemApplicationService {
|
||||
|
||||
List<MultipartFile> files = request.getFiles();
|
||||
BusinessAssert.isTrue(CollectionUtils.isNotEmpty(files), CommonErrorCode.PARAM_ERROR);
|
||||
String parentPrefix = normalizeRelativePathPrefix(request.getParentPrefix());
|
||||
|
||||
Path uploadRoot = resolveUploadRootPath();
|
||||
Path setDir = uploadRoot.resolve(KNOWLEDGE_ITEM_UPLOAD_DIR).resolve(setId).normalize();
|
||||
@@ -145,6 +152,7 @@ public class KnowledgeItemApplicationService {
|
||||
knowledgeItem.setContentType(KnowledgeContentType.FILE);
|
||||
knowledgeItem.setSourceType(KnowledgeSourceType.FILE_UPLOAD);
|
||||
knowledgeItem.setSourceFileId(trimToLength(safeOriginalName, MAX_TITLE_LENGTH));
|
||||
knowledgeItem.setRelativePath(buildRelativePath(parentPrefix, safeOriginalName));
|
||||
|
||||
items.add(knowledgeItem);
|
||||
}
|
||||
@@ -170,6 +178,9 @@ public class KnowledgeItemApplicationService {
|
||||
if (request.getContentType() != null) {
|
||||
knowledgeItem.setContentType(request.getContentType());
|
||||
}
|
||||
if (request.getMetadata() != null) {
|
||||
knowledgeItem.setMetadata(request.getMetadata());
|
||||
}
|
||||
|
||||
knowledgeItemRepository.updateById(knowledgeItem);
|
||||
return knowledgeItem;
|
||||
@@ -182,6 +193,22 @@ public class KnowledgeItemApplicationService {
|
||||
knowledgeItemRepository.removeById(itemId);
|
||||
}
|
||||
|
||||
public void deleteKnowledgeItems(String setId, DeleteKnowledgeItemsRequest request) {
|
||||
BusinessAssert.notNull(request, CommonErrorCode.PARAM_ERROR);
|
||||
List<String> ids = request.getIds();
|
||||
BusinessAssert.isTrue(CollectionUtils.isNotEmpty(ids), CommonErrorCode.PARAM_ERROR);
|
||||
|
||||
List<KnowledgeItem> items = knowledgeItemRepository.listByIds(ids);
|
||||
BusinessAssert.isTrue(CollectionUtils.isNotEmpty(items), DataManagementErrorCode.KNOWLEDGE_ITEM_NOT_FOUND);
|
||||
BusinessAssert.isTrue(items.size() == ids.size(), DataManagementErrorCode.KNOWLEDGE_ITEM_NOT_FOUND);
|
||||
|
||||
boolean allMatch = items.stream().allMatch(item -> Objects.equals(item.getSetId(), setId));
|
||||
BusinessAssert.isTrue(allMatch, CommonErrorCode.PARAM_ERROR);
|
||||
|
||||
List<String> deleteIds = items.stream().map(KnowledgeItem::getId).toList();
|
||||
knowledgeItemRepository.removeByIds(deleteIds);
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
public KnowledgeItem getKnowledgeItem(String setId, String itemId) {
|
||||
KnowledgeItem knowledgeItem = knowledgeItemRepository.getById(itemId);
|
||||
@@ -213,6 +240,7 @@ public class KnowledgeItemApplicationService {
|
||||
long datasetFileSize = safeLong(knowledgeItemRepository.sumDatasetFileSize());
|
||||
long uploadFileSize = calculateUploadFileTotalSize();
|
||||
response.setTotalSize(datasetFileSize + uploadFileSize);
|
||||
response.setTotalTags(safeLong(tagMapper.countKnowledgeSetTags()));
|
||||
|
||||
return response;
|
||||
}
|
||||
@@ -256,6 +284,7 @@ public class KnowledgeItemApplicationService {
|
||||
knowledgeItem.setSourceType(KnowledgeSourceType.DATASET_FILE);
|
||||
knowledgeItem.setSourceDatasetId(dataset.getId());
|
||||
knowledgeItem.setSourceFileId(datasetFile.getId());
|
||||
knowledgeItem.setRelativePath(resolveDatasetFileRelativePath(dataset, datasetFile));
|
||||
|
||||
items.add(knowledgeItem);
|
||||
}
|
||||
@@ -307,7 +336,7 @@ public class KnowledgeItemApplicationService {
|
||||
|
||||
String relativePath = knowledgeItem.getContent();
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(relativePath), CommonErrorCode.PARAM_ERROR);
|
||||
Path filePath = resolveKnowledgeItemStoragePath(relativePath);
|
||||
Path filePath = resolveKnowledgeItemStoragePathWithFallback(relativePath);
|
||||
BusinessAssert.isTrue(Files.exists(filePath) && Files.isRegularFile(filePath), CommonErrorCode.PARAM_ERROR);
|
||||
|
||||
String downloadName = StringUtils.isNotBlank(knowledgeItem.getSourceFileId())
|
||||
@@ -340,12 +369,32 @@ public class KnowledgeItemApplicationService {
|
||||
|
||||
String relativePath = knowledgeItem.getContent();
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(relativePath), CommonErrorCode.PARAM_ERROR);
|
||||
Path filePath = resolveKnowledgeItemStoragePath(relativePath);
|
||||
BusinessAssert.isTrue(Files.exists(filePath) && Files.isRegularFile(filePath), CommonErrorCode.PARAM_ERROR);
|
||||
|
||||
String previewName = StringUtils.isNotBlank(knowledgeItem.getSourceFileId())
|
||||
? knowledgeItem.getSourceFileId()
|
||||
: filePath.getFileName().toString();
|
||||
: Paths.get(relativePath).getFileName().toString();
|
||||
|
||||
if (knowledgeItemPreviewService.isOfficeDocument(previewName)) {
|
||||
KnowledgeItemPreviewService.PreviewFile previewFile = knowledgeItemPreviewService.resolveReadyPreviewFile(setId, knowledgeItem);
|
||||
if (previewFile == null) {
|
||||
response.setStatus(HttpServletResponse.SC_CONFLICT);
|
||||
return;
|
||||
}
|
||||
response.setContentType(PREVIEW_PDF_CONTENT_TYPE);
|
||||
response.setCharacterEncoding(StandardCharsets.UTF_8.name());
|
||||
response.setHeader(HttpHeaders.CONTENT_DISPOSITION,
|
||||
"inline; filename=\"" + URLEncoder.encode(previewFile.fileName(), StandardCharsets.UTF_8) + "\"");
|
||||
try (InputStream inputStream = Files.newInputStream(previewFile.filePath())) {
|
||||
inputStream.transferTo(response.getOutputStream());
|
||||
response.flushBuffer();
|
||||
} catch (IOException e) {
|
||||
log.error("preview knowledge item pdf error, itemId: {}", itemId, e);
|
||||
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Path filePath = resolveKnowledgeItemStoragePathWithFallback(relativePath);
|
||||
BusinessAssert.isTrue(Files.exists(filePath) && Files.isRegularFile(filePath), CommonErrorCode.PARAM_ERROR);
|
||||
|
||||
String contentType = null;
|
||||
try {
|
||||
@@ -418,7 +467,10 @@ public class KnowledgeItemApplicationService {
|
||||
knowledgeItem.setContentType(KnowledgeContentType.FILE);
|
||||
knowledgeItem.setSourceType(KnowledgeSourceType.FILE_UPLOAD);
|
||||
knowledgeItem.setSourceFileId(sourceFileId);
|
||||
knowledgeItem.setRelativePath(resolveReplacedRelativePath(knowledgeItem.getRelativePath(), sourceFileId));
|
||||
knowledgeItem.setMetadata(knowledgeItemPreviewService.clearPreviewMetadata(knowledgeItem.getMetadata()));
|
||||
knowledgeItemRepository.updateById(knowledgeItem);
|
||||
knowledgeItemPreviewService.deletePreviewFileQuietly(setId, knowledgeItem.getId());
|
||||
deleteFile(oldFilePath);
|
||||
} catch (Exception e) {
|
||||
deleteFileQuietly(targetPath);
|
||||
@@ -483,6 +535,86 @@ public class KnowledgeItemApplicationService {
|
||||
return target;
|
||||
}
|
||||
|
||||
private Path resolveKnowledgeItemStoragePathWithFallback(String relativePath) {
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(relativePath), CommonErrorCode.PARAM_ERROR);
|
||||
String normalizedInput = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
Path root = resolveUploadRootPath();
|
||||
java.util.LinkedHashSet<Path> candidates = new java.util.LinkedHashSet<>();
|
||||
|
||||
Path inputPath = Paths.get(normalizedInput.replace(PATH_SEPARATOR, File.separator));
|
||||
if (inputPath.isAbsolute()) {
|
||||
Path normalizedAbsolute = inputPath.toAbsolutePath().normalize();
|
||||
if (normalizedAbsolute.startsWith(root)) {
|
||||
candidates.add(normalizedAbsolute);
|
||||
}
|
||||
String segmentRelativePath = extractRelativePathFromSegment(normalizedInput, KNOWLEDGE_ITEM_UPLOAD_DIR);
|
||||
if (StringUtils.isNotBlank(segmentRelativePath)) {
|
||||
candidates.add(buildKnowledgeItemStoragePath(root, segmentRelativePath));
|
||||
}
|
||||
BusinessAssert.isTrue(!candidates.isEmpty(), CommonErrorCode.PARAM_ERROR);
|
||||
} else {
|
||||
String normalizedRelative = normalizeRelativePathValue(normalizedInput);
|
||||
if (StringUtils.isNotBlank(normalizedRelative)) {
|
||||
candidates.add(buildKnowledgeItemStoragePath(root, normalizedRelative));
|
||||
}
|
||||
String segmentRelativePath = extractRelativePathFromSegment(normalizedInput, KNOWLEDGE_ITEM_UPLOAD_DIR);
|
||||
if (StringUtils.isNotBlank(segmentRelativePath)) {
|
||||
candidates.add(buildKnowledgeItemStoragePath(root, segmentRelativePath));
|
||||
}
|
||||
if (StringUtils.isNotBlank(normalizedRelative)
|
||||
&& !normalizedRelative.startsWith(KNOWLEDGE_ITEM_UPLOAD_DIR + PATH_SEPARATOR)
|
||||
&& !normalizedRelative.equals(KNOWLEDGE_ITEM_UPLOAD_DIR)) {
|
||||
candidates.add(buildKnowledgeItemStoragePath(root, KNOWLEDGE_ITEM_UPLOAD_DIR + PATH_SEPARATOR + normalizedRelative));
|
||||
}
|
||||
}
|
||||
|
||||
if (root.getFileName() != null && KNOWLEDGE_ITEM_UPLOAD_DIR.equals(root.getFileName().toString())) {
|
||||
String normalizedRelative = normalizeRelativePathValue(normalizedInput);
|
||||
if (StringUtils.isNotBlank(normalizedRelative)
|
||||
&& normalizedRelative.startsWith(KNOWLEDGE_ITEM_UPLOAD_DIR + PATH_SEPARATOR)) {
|
||||
String withoutPrefix = normalizedRelative.substring(KNOWLEDGE_ITEM_UPLOAD_DIR.length() + PATH_SEPARATOR.length());
|
||||
if (StringUtils.isNotBlank(withoutPrefix)) {
|
||||
candidates.add(buildKnowledgeItemStoragePath(root, withoutPrefix));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Path fallback = null;
|
||||
for (Path candidate : candidates) {
|
||||
if (fallback == null) {
|
||||
fallback = candidate;
|
||||
}
|
||||
if (Files.exists(candidate) && Files.isRegularFile(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
BusinessAssert.notNull(fallback, CommonErrorCode.PARAM_ERROR);
|
||||
return fallback;
|
||||
}
|
||||
|
||||
private Path buildKnowledgeItemStoragePath(Path root, String relativePath) {
|
||||
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace(PATH_SEPARATOR, File.separator);
|
||||
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
|
||||
BusinessAssert.isTrue(target.startsWith(root), CommonErrorCode.PARAM_ERROR);
|
||||
return target;
|
||||
}
|
||||
|
||||
private String extractRelativePathFromSegment(String rawPath, String segment) {
|
||||
if (StringUtils.isBlank(rawPath) || StringUtils.isBlank(segment)) {
|
||||
return null;
|
||||
}
|
||||
String normalized = rawPath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
String segmentPrefix = segment + PATH_SEPARATOR;
|
||||
int index = normalized.indexOf(segmentPrefix);
|
||||
if (index < 0) {
|
||||
return segment.equals(normalized) ? segment : null;
|
||||
}
|
||||
return normalizeRelativePathValue(normalized.substring(index));
|
||||
}
|
||||
|
||||
private KnowledgeItemSearchResponse normalizeSearchResponse(KnowledgeItemSearchResponse item) {
|
||||
BusinessAssert.notNull(item, CommonErrorCode.PARAM_ERROR);
|
||||
if (item.getSourceType() == KnowledgeSourceType.FILE_UPLOAD) {
|
||||
@@ -540,6 +672,84 @@ public class KnowledgeItemApplicationService {
|
||||
return relativePath.replace(File.separatorChar, '/');
|
||||
}
|
||||
|
||||
private String buildRelativePath(String parentPrefix, String fileName) {
|
||||
String safeName = sanitizeFileName(fileName);
|
||||
if (StringUtils.isBlank(safeName)) {
|
||||
safeName = "file";
|
||||
}
|
||||
String normalizedPrefix = normalizeRelativePathPrefix(parentPrefix);
|
||||
return normalizedPrefix + safeName;
|
||||
}
|
||||
|
||||
private String normalizeRelativePathPrefix(String prefix) {
|
||||
if (StringUtils.isBlank(prefix)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = prefix.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
while (normalized.endsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(0, normalized.length() - 1);
|
||||
}
|
||||
BusinessAssert.isTrue(!normalized.contains(".."), CommonErrorCode.PARAM_ERROR);
|
||||
if (StringUtils.isBlank(normalized)) {
|
||||
return "";
|
||||
}
|
||||
return normalized + PATH_SEPARATOR;
|
||||
}
|
||||
|
||||
private String normalizeRelativePathValue(String relativePath) {
|
||||
if (StringUtils.isBlank(relativePath)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
while (normalized.endsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(0, normalized.length() - 1);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private String resolveDatasetFileRelativePath(Dataset dataset, DatasetFile datasetFile) {
|
||||
if (datasetFile == null) {
|
||||
return "";
|
||||
}
|
||||
String fileName = StringUtils.defaultIfBlank(datasetFile.getFileName(), datasetFile.getId());
|
||||
String datasetPath = dataset == null ? null : dataset.getPath();
|
||||
String filePath = datasetFile.getFilePath();
|
||||
if (StringUtils.isBlank(datasetPath) || StringUtils.isBlank(filePath)) {
|
||||
return buildRelativePath("", fileName);
|
||||
}
|
||||
try {
|
||||
Path datasetRoot = Paths.get(datasetPath).toAbsolutePath().normalize();
|
||||
Path targetPath = Paths.get(filePath).toAbsolutePath().normalize();
|
||||
if (targetPath.startsWith(datasetRoot)) {
|
||||
Path relative = datasetRoot.relativize(targetPath);
|
||||
String relativeValue = relative.toString().replace(File.separatorChar, '/');
|
||||
String normalized = normalizeRelativePathValue(relativeValue);
|
||||
if (!normalized.contains("..") && StringUtils.isNotBlank(normalized)) {
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("resolve dataset file relative path failed, fileId: {}", datasetFile.getId(), e);
|
||||
}
|
||||
return buildRelativePath("", fileName);
|
||||
}
|
||||
|
||||
private String resolveReplacedRelativePath(String existingRelativePath, String newFileName) {
|
||||
String normalized = normalizeRelativePathValue(existingRelativePath);
|
||||
if (StringUtils.isBlank(normalized)) {
|
||||
return buildRelativePath("", newFileName);
|
||||
}
|
||||
int lastIndex = normalized.lastIndexOf(PATH_SEPARATOR);
|
||||
String parentPrefix = lastIndex >= 0 ? normalized.substring(0, lastIndex + 1) : "";
|
||||
return buildRelativePath(parentPrefix, newFileName);
|
||||
}
|
||||
|
||||
private void createDirectories(Path path) {
|
||||
try {
|
||||
Files.createDirectories(path);
|
||||
|
||||
@@ -0,0 +1,275 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItem;
|
||||
import com.datamate.datamanagement.infrastructure.config.DataManagementProperties;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.KnowledgeItemRepository;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* 知识条目预览转换异步任务
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class KnowledgeItemPreviewAsyncService {
|
||||
private static final Set<String> OFFICE_EXTENSIONS = Set.of("doc", "docx");
|
||||
private static final String KNOWLEDGE_ITEM_UPLOAD_DIR = "knowledge-items";
|
||||
private static final String PREVIEW_SUB_DIR = "preview";
|
||||
private static final String PREVIEW_FILE_SUFFIX = ".pdf";
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
private static final int MAX_ERROR_LENGTH = 500;
|
||||
private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
|
||||
|
||||
private final KnowledgeItemRepository knowledgeItemRepository;
|
||||
private final DataManagementProperties dataManagementProperties;
|
||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
@Async
|
||||
public void convertPreviewAsync(String itemId) {
|
||||
if (StringUtils.isBlank(itemId)) {
|
||||
return;
|
||||
}
|
||||
KnowledgeItem item = knowledgeItemRepository.getById(itemId);
|
||||
if (item == null) {
|
||||
return;
|
||||
}
|
||||
String extension = resolveFileExtension(resolveOriginalName(item));
|
||||
if (!OFFICE_EXTENSIONS.contains(extension)) {
|
||||
updatePreviewStatus(item, KnowledgeItemPreviewStatus.FAILED, null, "仅支持 DOC/DOCX 转换");
|
||||
return;
|
||||
}
|
||||
if (StringUtils.isBlank(item.getContent())) {
|
||||
updatePreviewStatus(item, KnowledgeItemPreviewStatus.FAILED, null, "源文件路径为空");
|
||||
return;
|
||||
}
|
||||
Path sourcePath = resolveKnowledgeItemStoragePath(item.getContent());
|
||||
if (!Files.exists(sourcePath) || !Files.isRegularFile(sourcePath)) {
|
||||
updatePreviewStatus(item, KnowledgeItemPreviewStatus.FAILED, null, "源文件不存在");
|
||||
return;
|
||||
}
|
||||
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
|
||||
.readPreviewInfo(item.getMetadata(), objectMapper);
|
||||
String previewRelativePath = StringUtils.defaultIfBlank(
|
||||
previewInfo.pdfPath(),
|
||||
resolvePreviewRelativePath(item.getSetId(), item.getId())
|
||||
);
|
||||
Path targetPath = resolvePreviewStoragePath(previewRelativePath);
|
||||
ensureParentDirectory(targetPath);
|
||||
|
||||
try {
|
||||
LibreOfficeConverter.convertToPdf(sourcePath, targetPath);
|
||||
updatePreviewStatus(item, KnowledgeItemPreviewStatus.READY, previewRelativePath, null);
|
||||
} catch (Exception e) {
|
||||
log.error("preview convert failed, itemId: {}", item.getId(), e);
|
||||
updatePreviewStatus(item, KnowledgeItemPreviewStatus.FAILED, previewRelativePath, trimError(e.getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
private void updatePreviewStatus(
|
||||
KnowledgeItem item,
|
||||
KnowledgeItemPreviewStatus status,
|
||||
String previewRelativePath,
|
||||
String error
|
||||
) {
|
||||
if (item == null) {
|
||||
return;
|
||||
}
|
||||
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
|
||||
item.getMetadata(),
|
||||
objectMapper,
|
||||
status,
|
||||
previewRelativePath,
|
||||
error,
|
||||
nowText()
|
||||
);
|
||||
item.setMetadata(updatedMetadata);
|
||||
knowledgeItemRepository.updateById(item);
|
||||
}
|
||||
|
||||
private String resolveOriginalName(KnowledgeItem item) {
|
||||
if (item == null) {
|
||||
return "";
|
||||
}
|
||||
if (StringUtils.isNotBlank(item.getSourceFileId())) {
|
||||
return item.getSourceFileId();
|
||||
}
|
||||
if (StringUtils.isNotBlank(item.getContent())) {
|
||||
return Paths.get(item.getContent()).getFileName().toString();
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private String resolveFileExtension(String fileName) {
|
||||
if (StringUtils.isBlank(fileName)) {
|
||||
return "";
|
||||
}
|
||||
int dotIndex = fileName.lastIndexOf('.');
|
||||
if (dotIndex <= 0 || dotIndex >= fileName.length() - 1) {
|
||||
return "";
|
||||
}
|
||||
return fileName.substring(dotIndex + 1).toLowerCase();
|
||||
}
|
||||
|
||||
private String resolvePreviewRelativePath(String setId, String itemId) {
|
||||
String relativePath = Paths.get(KNOWLEDGE_ITEM_UPLOAD_DIR, setId, PREVIEW_SUB_DIR, itemId + PREVIEW_FILE_SUFFIX)
|
||||
.toString();
|
||||
return relativePath.replace("\\", PATH_SEPARATOR);
|
||||
}
|
||||
|
||||
private Path resolvePreviewStoragePath(String relativePath) {
|
||||
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace("/", java.io.File.separator);
|
||||
Path root = resolveUploadRootPath();
|
||||
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
|
||||
if (!target.startsWith(root)) {
|
||||
throw new IllegalArgumentException("invalid preview path");
|
||||
}
|
||||
return target;
|
||||
}
|
||||
|
||||
private Path resolveKnowledgeItemStoragePath(String relativePath) {
|
||||
if (StringUtils.isBlank(relativePath)) {
|
||||
throw new IllegalArgumentException("invalid knowledge item path");
|
||||
}
|
||||
String normalizedInput = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
Path root = resolveUploadRootPath();
|
||||
java.util.LinkedHashSet<Path> candidates = new java.util.LinkedHashSet<>();
|
||||
|
||||
Path inputPath = Paths.get(normalizedInput.replace(PATH_SEPARATOR, java.io.File.separator));
|
||||
if (inputPath.isAbsolute()) {
|
||||
Path normalizedAbsolute = inputPath.toAbsolutePath().normalize();
|
||||
if (normalizedAbsolute.startsWith(root)) {
|
||||
candidates.add(normalizedAbsolute);
|
||||
}
|
||||
String segmentRelativePath = extractRelativePathFromSegment(normalizedInput, KNOWLEDGE_ITEM_UPLOAD_DIR);
|
||||
if (StringUtils.isNotBlank(segmentRelativePath)) {
|
||||
candidates.add(buildKnowledgeItemStoragePath(root, segmentRelativePath));
|
||||
}
|
||||
if (candidates.isEmpty()) {
|
||||
throw new IllegalArgumentException("invalid knowledge item path");
|
||||
}
|
||||
} else {
|
||||
String normalizedRelative = normalizeRelativePathValue(normalizedInput);
|
||||
if (StringUtils.isNotBlank(normalizedRelative)) {
|
||||
candidates.add(buildKnowledgeItemStoragePath(root, normalizedRelative));
|
||||
}
|
||||
String segmentRelativePath = extractRelativePathFromSegment(normalizedInput, KNOWLEDGE_ITEM_UPLOAD_DIR);
|
||||
if (StringUtils.isNotBlank(segmentRelativePath)) {
|
||||
candidates.add(buildKnowledgeItemStoragePath(root, segmentRelativePath));
|
||||
}
|
||||
if (StringUtils.isNotBlank(normalizedRelative)
|
||||
&& !normalizedRelative.startsWith(KNOWLEDGE_ITEM_UPLOAD_DIR + PATH_SEPARATOR)
|
||||
&& !normalizedRelative.equals(KNOWLEDGE_ITEM_UPLOAD_DIR)) {
|
||||
candidates.add(buildKnowledgeItemStoragePath(root, KNOWLEDGE_ITEM_UPLOAD_DIR + PATH_SEPARATOR + normalizedRelative));
|
||||
}
|
||||
}
|
||||
|
||||
if (root.getFileName() != null && KNOWLEDGE_ITEM_UPLOAD_DIR.equals(root.getFileName().toString())) {
|
||||
String normalizedRelative = normalizeRelativePathValue(normalizedInput);
|
||||
if (StringUtils.isNotBlank(normalizedRelative)
|
||||
&& normalizedRelative.startsWith(KNOWLEDGE_ITEM_UPLOAD_DIR + PATH_SEPARATOR)) {
|
||||
String withoutPrefix = normalizedRelative.substring(KNOWLEDGE_ITEM_UPLOAD_DIR.length() + PATH_SEPARATOR.length());
|
||||
if (StringUtils.isNotBlank(withoutPrefix)) {
|
||||
candidates.add(buildKnowledgeItemStoragePath(root, withoutPrefix));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Path fallback = null;
|
||||
for (Path candidate : candidates) {
|
||||
if (fallback == null) {
|
||||
fallback = candidate;
|
||||
}
|
||||
if (Files.exists(candidate) && Files.isRegularFile(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
if (fallback == null) {
|
||||
throw new IllegalArgumentException("invalid knowledge item path");
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
private Path buildKnowledgeItemStoragePath(Path root, String relativePath) {
|
||||
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace(PATH_SEPARATOR, java.io.File.separator);
|
||||
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
|
||||
if (!target.startsWith(root)) {
|
||||
throw new IllegalArgumentException("invalid knowledge item path");
|
||||
}
|
||||
return target;
|
||||
}
|
||||
|
||||
private String extractRelativePathFromSegment(String rawPath, String segment) {
|
||||
if (StringUtils.isBlank(rawPath) || StringUtils.isBlank(segment)) {
|
||||
return null;
|
||||
}
|
||||
String normalized = rawPath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
String segmentPrefix = segment + PATH_SEPARATOR;
|
||||
int index = normalized.indexOf(segmentPrefix);
|
||||
if (index < 0) {
|
||||
return segment.equals(normalized) ? segment : null;
|
||||
}
|
||||
return normalizeRelativePathValue(normalized.substring(index));
|
||||
}
|
||||
|
||||
private String normalizeRelativePathValue(String relativePath) {
|
||||
if (StringUtils.isBlank(relativePath)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
while (normalized.endsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(0, normalized.length() - 1);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private Path resolveUploadRootPath() {
|
||||
String uploadDir = dataManagementProperties.getFileStorage().getUploadDir();
|
||||
return Paths.get(uploadDir).toAbsolutePath().normalize();
|
||||
}
|
||||
|
||||
private void ensureParentDirectory(Path targetPath) {
|
||||
try {
|
||||
Path parent = targetPath.getParent();
|
||||
if (parent != null) {
|
||||
Files.createDirectories(parent);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new IllegalStateException("创建预览目录失败", e);
|
||||
}
|
||||
}
|
||||
|
||||
private String trimError(String error) {
|
||||
if (StringUtils.isBlank(error)) {
|
||||
return "";
|
||||
}
|
||||
if (error.length() <= MAX_ERROR_LENGTH) {
|
||||
return error;
|
||||
}
|
||||
return error.substring(0, MAX_ERROR_LENGTH);
|
||||
}
|
||||
|
||||
private String nowText() {
|
||||
return LocalDateTime.now().format(PREVIEW_TIME_FORMATTER);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,134 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/**
|
||||
* 知识条目预览元数据解析与写入辅助类
|
||||
*/
|
||||
public final class KnowledgeItemPreviewMetadataHelper {
|
||||
public static final String PREVIEW_STATUS_KEY = "previewStatus";
|
||||
public static final String PREVIEW_PDF_PATH_KEY = "previewPdfPath";
|
||||
public static final String PREVIEW_ERROR_KEY = "previewError";
|
||||
public static final String PREVIEW_UPDATED_AT_KEY = "previewUpdatedAt";
|
||||
|
||||
private KnowledgeItemPreviewMetadataHelper() {
|
||||
}
|
||||
|
||||
public static PreviewInfo readPreviewInfo(String metadata, ObjectMapper objectMapper) {
|
||||
if (StringUtils.isBlank(metadata) || objectMapper == null) {
|
||||
return PreviewInfo.empty();
|
||||
}
|
||||
try {
|
||||
JsonNode node = objectMapper.readTree(metadata);
|
||||
if (node == null || !node.isObject()) {
|
||||
return PreviewInfo.empty();
|
||||
}
|
||||
String statusText = textValue(node, PREVIEW_STATUS_KEY);
|
||||
KnowledgeItemPreviewStatus status = parseStatus(statusText);
|
||||
return new PreviewInfo(
|
||||
status,
|
||||
textValue(node, PREVIEW_PDF_PATH_KEY),
|
||||
textValue(node, PREVIEW_ERROR_KEY),
|
||||
textValue(node, PREVIEW_UPDATED_AT_KEY)
|
||||
);
|
||||
} catch (Exception ignore) {
|
||||
return PreviewInfo.empty();
|
||||
}
|
||||
}
|
||||
|
||||
public static String applyPreviewInfo(
|
||||
String metadata,
|
||||
ObjectMapper objectMapper,
|
||||
KnowledgeItemPreviewStatus status,
|
||||
String pdfPath,
|
||||
String error,
|
||||
String updatedAt
|
||||
) {
|
||||
if (objectMapper == null) {
|
||||
return metadata;
|
||||
}
|
||||
ObjectNode root = parseRoot(metadata, objectMapper);
|
||||
if (status == null) {
|
||||
root.remove(PREVIEW_STATUS_KEY);
|
||||
} else {
|
||||
root.put(PREVIEW_STATUS_KEY, status.name());
|
||||
}
|
||||
if (StringUtils.isBlank(pdfPath)) {
|
||||
root.remove(PREVIEW_PDF_PATH_KEY);
|
||||
} else {
|
||||
root.put(PREVIEW_PDF_PATH_KEY, pdfPath);
|
||||
}
|
||||
if (StringUtils.isBlank(error)) {
|
||||
root.remove(PREVIEW_ERROR_KEY);
|
||||
} else {
|
||||
root.put(PREVIEW_ERROR_KEY, error);
|
||||
}
|
||||
if (StringUtils.isBlank(updatedAt)) {
|
||||
root.remove(PREVIEW_UPDATED_AT_KEY);
|
||||
} else {
|
||||
root.put(PREVIEW_UPDATED_AT_KEY, updatedAt);
|
||||
}
|
||||
return root.size() == 0 ? null : root.toString();
|
||||
}
|
||||
|
||||
public static String clearPreviewInfo(String metadata, ObjectMapper objectMapper) {
|
||||
if (objectMapper == null) {
|
||||
return metadata;
|
||||
}
|
||||
ObjectNode root = parseRoot(metadata, objectMapper);
|
||||
root.remove(PREVIEW_STATUS_KEY);
|
||||
root.remove(PREVIEW_PDF_PATH_KEY);
|
||||
root.remove(PREVIEW_ERROR_KEY);
|
||||
root.remove(PREVIEW_UPDATED_AT_KEY);
|
||||
return root.size() == 0 ? null : root.toString();
|
||||
}
|
||||
|
||||
private static ObjectNode parseRoot(String metadata, ObjectMapper objectMapper) {
|
||||
if (StringUtils.isBlank(metadata)) {
|
||||
return objectMapper.createObjectNode();
|
||||
}
|
||||
try {
|
||||
JsonNode node = objectMapper.readTree(metadata);
|
||||
if (node instanceof ObjectNode objectNode) {
|
||||
return objectNode;
|
||||
}
|
||||
} catch (Exception ignore) {
|
||||
return objectMapper.createObjectNode();
|
||||
}
|
||||
return objectMapper.createObjectNode();
|
||||
}
|
||||
|
||||
private static String textValue(JsonNode node, String key) {
|
||||
if (node == null || StringUtils.isBlank(key)) {
|
||||
return null;
|
||||
}
|
||||
JsonNode value = node.get(key);
|
||||
return value == null || value.isNull() ? null : value.asText();
|
||||
}
|
||||
|
||||
private static KnowledgeItemPreviewStatus parseStatus(String statusText) {
|
||||
if (StringUtils.isBlank(statusText)) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return KnowledgeItemPreviewStatus.valueOf(statusText);
|
||||
} catch (Exception ignore) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public record PreviewInfo(
|
||||
KnowledgeItemPreviewStatus status,
|
||||
String pdfPath,
|
||||
String error,
|
||||
String updatedAt
|
||||
) {
|
||||
public static PreviewInfo empty() {
|
||||
return new PreviewInfo(null, null, null, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,244 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.datamate.common.infrastructure.exception.BusinessAssert;
|
||||
import com.datamate.common.infrastructure.exception.CommonErrorCode;
|
||||
import com.datamate.datamanagement.common.enums.KnowledgeContentType;
|
||||
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
|
||||
import com.datamate.datamanagement.common.enums.KnowledgeSourceType;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItem;
|
||||
import com.datamate.datamanagement.infrastructure.config.DataManagementProperties;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.KnowledgeItemRepository;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeItemPreviewStatusResponse;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* 知识条目预览转换服务
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class KnowledgeItemPreviewService {
|
||||
private static final Set<String> OFFICE_EXTENSIONS = Set.of("doc", "docx");
|
||||
private static final String KNOWLEDGE_ITEM_UPLOAD_DIR = "knowledge-items";
|
||||
private static final String PREVIEW_SUB_DIR = "preview";
|
||||
private static final String PREVIEW_FILE_SUFFIX = ".pdf";
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
|
||||
|
||||
private final KnowledgeItemRepository knowledgeItemRepository;
|
||||
private final DataManagementProperties dataManagementProperties;
|
||||
private final KnowledgeItemPreviewAsyncService knowledgeItemPreviewAsyncService;
|
||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
public KnowledgeItemPreviewStatusResponse getPreviewStatus(String setId, String itemId) {
|
||||
KnowledgeItem item = requireKnowledgeItem(setId, itemId);
|
||||
assertOfficeDocument(item);
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
|
||||
.readPreviewInfo(item.getMetadata(), objectMapper);
|
||||
|
||||
if (previewInfo.status() == KnowledgeItemPreviewStatus.READY && !previewPdfExists(item, previewInfo)) {
|
||||
previewInfo = markPreviewFailed(item, previewInfo, "预览文件不存在");
|
||||
}
|
||||
|
||||
return buildResponse(previewInfo);
|
||||
}
|
||||
|
||||
public KnowledgeItemPreviewStatusResponse ensurePreview(String setId, String itemId) {
|
||||
KnowledgeItem item = requireKnowledgeItem(setId, itemId);
|
||||
assertOfficeDocument(item);
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
|
||||
.readPreviewInfo(item.getMetadata(), objectMapper);
|
||||
|
||||
if (previewInfo.status() == KnowledgeItemPreviewStatus.READY && previewPdfExists(item, previewInfo)) {
|
||||
return buildResponse(previewInfo);
|
||||
}
|
||||
if (previewInfo.status() == KnowledgeItemPreviewStatus.PROCESSING) {
|
||||
return buildResponse(previewInfo);
|
||||
}
|
||||
|
||||
String previewRelativePath = resolvePreviewRelativePath(item.getSetId(), item.getId());
|
||||
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
|
||||
item.getMetadata(),
|
||||
objectMapper,
|
||||
KnowledgeItemPreviewStatus.PROCESSING,
|
||||
previewRelativePath,
|
||||
null,
|
||||
nowText()
|
||||
);
|
||||
item.setMetadata(updatedMetadata);
|
||||
knowledgeItemRepository.updateById(item);
|
||||
knowledgeItemPreviewAsyncService.convertPreviewAsync(item.getId());
|
||||
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo refreshed = KnowledgeItemPreviewMetadataHelper
|
||||
.readPreviewInfo(updatedMetadata, objectMapper);
|
||||
return buildResponse(refreshed);
|
||||
}
|
||||
|
||||
public boolean isOfficeDocument(String fileName) {
|
||||
String extension = resolveFileExtension(fileName);
|
||||
return StringUtils.isNotBlank(extension) && OFFICE_EXTENSIONS.contains(extension.toLowerCase());
|
||||
}
|
||||
|
||||
public PreviewFile resolveReadyPreviewFile(String setId, KnowledgeItem item) {
|
||||
if (item == null) {
|
||||
return null;
|
||||
}
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
|
||||
.readPreviewInfo(item.getMetadata(), objectMapper);
|
||||
if (previewInfo.status() != KnowledgeItemPreviewStatus.READY) {
|
||||
return null;
|
||||
}
|
||||
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(setId, item.getId()));
|
||||
Path filePath = resolvePreviewStoragePath(relativePath);
|
||||
if (!Files.exists(filePath) || !Files.isRegularFile(filePath)) {
|
||||
markPreviewFailed(item, previewInfo, "预览文件不存在");
|
||||
return null;
|
||||
}
|
||||
String previewName = resolvePreviewPdfName(item);
|
||||
return new PreviewFile(filePath, previewName);
|
||||
}
|
||||
|
||||
public String clearPreviewMetadata(String metadata) {
|
||||
return KnowledgeItemPreviewMetadataHelper.clearPreviewInfo(metadata, objectMapper);
|
||||
}
|
||||
|
||||
public void deletePreviewFileQuietly(String setId, String itemId) {
|
||||
String relativePath = resolvePreviewRelativePath(setId, itemId);
|
||||
Path filePath = resolvePreviewStoragePath(relativePath);
|
||||
try {
|
||||
Files.deleteIfExists(filePath);
|
||||
} catch (Exception e) {
|
||||
log.warn("delete preview pdf error, itemId: {}", itemId, e);
|
||||
}
|
||||
}
|
||||
|
||||
private KnowledgeItemPreviewStatusResponse buildResponse(KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo) {
|
||||
KnowledgeItemPreviewStatusResponse response = new KnowledgeItemPreviewStatusResponse();
|
||||
KnowledgeItemPreviewStatus status = previewInfo.status() == null
|
||||
? KnowledgeItemPreviewStatus.PENDING
|
||||
: previewInfo.status();
|
||||
response.setStatus(status);
|
||||
response.setPreviewError(previewInfo.error());
|
||||
response.setUpdatedAt(previewInfo.updatedAt());
|
||||
return response;
|
||||
}
|
||||
|
||||
private KnowledgeItem requireKnowledgeItem(String setId, String itemId) {
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(setId), CommonErrorCode.PARAM_ERROR);
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(itemId), CommonErrorCode.PARAM_ERROR);
|
||||
KnowledgeItem knowledgeItem = knowledgeItemRepository.getById(itemId);
|
||||
BusinessAssert.notNull(knowledgeItem, CommonErrorCode.PARAM_ERROR);
|
||||
BusinessAssert.isTrue(Objects.equals(knowledgeItem.getSetId(), setId), CommonErrorCode.PARAM_ERROR);
|
||||
return knowledgeItem;
|
||||
}
|
||||
|
||||
private void assertOfficeDocument(KnowledgeItem item) {
|
||||
BusinessAssert.notNull(item, CommonErrorCode.PARAM_ERROR);
|
||||
BusinessAssert.isTrue(
|
||||
item.getContentType() == KnowledgeContentType.FILE || item.getSourceType() == KnowledgeSourceType.FILE_UPLOAD,
|
||||
CommonErrorCode.PARAM_ERROR
|
||||
);
|
||||
String extension = resolveFileExtension(resolveOriginalName(item));
|
||||
BusinessAssert.isTrue(OFFICE_EXTENSIONS.contains(extension), CommonErrorCode.PARAM_ERROR);
|
||||
}
|
||||
|
||||
private String resolveOriginalName(KnowledgeItem item) {
|
||||
if (item == null) {
|
||||
return "";
|
||||
}
|
||||
if (StringUtils.isNotBlank(item.getSourceFileId())) {
|
||||
return item.getSourceFileId();
|
||||
}
|
||||
if (StringUtils.isNotBlank(item.getContent())) {
|
||||
return Paths.get(item.getContent()).getFileName().toString();
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private String resolveFileExtension(String fileName) {
|
||||
if (StringUtils.isBlank(fileName)) {
|
||||
return "";
|
||||
}
|
||||
int dotIndex = fileName.lastIndexOf('.');
|
||||
if (dotIndex <= 0 || dotIndex >= fileName.length() - 1) {
|
||||
return "";
|
||||
}
|
||||
return fileName.substring(dotIndex + 1).toLowerCase();
|
||||
}
|
||||
|
||||
private String resolvePreviewPdfName(KnowledgeItem item) {
|
||||
String originalName = resolveOriginalName(item);
|
||||
if (StringUtils.isBlank(originalName)) {
|
||||
return "预览.pdf";
|
||||
}
|
||||
int dotIndex = originalName.lastIndexOf('.');
|
||||
if (dotIndex <= 0) {
|
||||
return originalName + PREVIEW_FILE_SUFFIX;
|
||||
}
|
||||
return originalName.substring(0, dotIndex) + PREVIEW_FILE_SUFFIX;
|
||||
}
|
||||
|
||||
private boolean previewPdfExists(KnowledgeItem item, KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo) {
|
||||
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(item.getSetId(), item.getId()));
|
||||
Path filePath = resolvePreviewStoragePath(relativePath);
|
||||
return Files.exists(filePath) && Files.isRegularFile(filePath);
|
||||
}
|
||||
|
||||
private KnowledgeItemPreviewMetadataHelper.PreviewInfo markPreviewFailed(
|
||||
KnowledgeItem item,
|
||||
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo,
|
||||
String error
|
||||
) {
|
||||
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(item.getSetId(), item.getId()));
|
||||
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
|
||||
item.getMetadata(),
|
||||
objectMapper,
|
||||
KnowledgeItemPreviewStatus.FAILED,
|
||||
relativePath,
|
||||
error,
|
||||
nowText()
|
||||
);
|
||||
item.setMetadata(updatedMetadata);
|
||||
knowledgeItemRepository.updateById(item);
|
||||
return KnowledgeItemPreviewMetadataHelper.readPreviewInfo(updatedMetadata, objectMapper);
|
||||
}
|
||||
|
||||
private String resolvePreviewRelativePath(String setId, String itemId) {
|
||||
String relativePath = Paths.get(KNOWLEDGE_ITEM_UPLOAD_DIR, setId, PREVIEW_SUB_DIR, itemId + PREVIEW_FILE_SUFFIX)
|
||||
.toString();
|
||||
return relativePath.replace("\\", PATH_SEPARATOR);
|
||||
}
|
||||
|
||||
private Path resolvePreviewStoragePath(String relativePath) {
|
||||
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace("/", java.io.File.separator);
|
||||
Path root = resolveUploadRootPath();
|
||||
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
|
||||
BusinessAssert.isTrue(target.startsWith(root), CommonErrorCode.PARAM_ERROR);
|
||||
return target;
|
||||
}
|
||||
|
||||
private Path resolveUploadRootPath() {
|
||||
String uploadDir = dataManagementProperties.getFileStorage().getUploadDir();
|
||||
BusinessAssert.isTrue(StringUtils.isNotBlank(uploadDir), CommonErrorCode.PARAM_ERROR);
|
||||
return Paths.get(uploadDir).toAbsolutePath().normalize();
|
||||
}
|
||||
|
||||
private String nowText() {
|
||||
return LocalDateTime.now().format(PREVIEW_TIME_FORMATTER);
|
||||
}
|
||||
|
||||
public record PreviewFile(Path filePath, String fileName) {
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,93 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.time.Duration;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* LibreOffice 文档转换工具
|
||||
*/
|
||||
public final class LibreOfficeConverter {
|
||||
private static final String LIBREOFFICE_COMMAND = "soffice";
|
||||
private static final Duration CONVERT_TIMEOUT = Duration.ofMinutes(5);
|
||||
private static final int MAX_OUTPUT_LENGTH = 500;
|
||||
|
||||
private LibreOfficeConverter() {
|
||||
}
|
||||
|
||||
public static void convertToPdf(Path sourcePath, Path targetPath) throws Exception {
|
||||
Path outputDir = targetPath.getParent();
|
||||
List<String> command = List.of(
|
||||
LIBREOFFICE_COMMAND,
|
||||
"--headless",
|
||||
"--nologo",
|
||||
"--nolockcheck",
|
||||
"--nodefault",
|
||||
"--nofirststartwizard",
|
||||
"--convert-to",
|
||||
"pdf",
|
||||
"--outdir",
|
||||
outputDir.toString(),
|
||||
sourcePath.toString()
|
||||
);
|
||||
ProcessBuilder processBuilder = new ProcessBuilder(command);
|
||||
processBuilder.redirectErrorStream(true);
|
||||
Process process = processBuilder.start();
|
||||
boolean finished = process.waitFor(CONVERT_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS);
|
||||
String output = readProcessOutput(process.getInputStream());
|
||||
if (!finished) {
|
||||
process.destroyForcibly();
|
||||
throw new IllegalStateException("LibreOffice 转换超时");
|
||||
}
|
||||
if (process.exitValue() != 0) {
|
||||
throw new IllegalStateException("LibreOffice 转换失败: " + output);
|
||||
}
|
||||
Path generated = outputDir.resolve(stripExtension(sourcePath.getFileName().toString()) + ".pdf");
|
||||
if (!Files.exists(generated)) {
|
||||
throw new IllegalStateException("LibreOffice 输出文件不存在");
|
||||
}
|
||||
if (!generated.equals(targetPath)) {
|
||||
Files.move(generated, targetPath, StandardCopyOption.REPLACE_EXISTING);
|
||||
}
|
||||
}
|
||||
|
||||
private static String readProcessOutput(InputStream inputStream) throws IOException {
|
||||
if (inputStream == null) {
|
||||
return "";
|
||||
}
|
||||
byte[] buffer = new byte[1024];
|
||||
StringBuilder builder = new StringBuilder();
|
||||
int total = 0;
|
||||
int read;
|
||||
while ((read = inputStream.read(buffer)) >= 0) {
|
||||
if (read == 0) {
|
||||
continue;
|
||||
}
|
||||
int remaining = MAX_OUTPUT_LENGTH - total;
|
||||
if (remaining <= 0) {
|
||||
break;
|
||||
}
|
||||
int toAppend = Math.min(remaining, read);
|
||||
builder.append(new String(buffer, 0, toAppend, StandardCharsets.UTF_8));
|
||||
total += toAppend;
|
||||
if (total >= MAX_OUTPUT_LENGTH) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
private static String stripExtension(String fileName) {
|
||||
if (fileName == null || fileName.isBlank()) {
|
||||
return "preview";
|
||||
}
|
||||
int dotIndex = fileName.lastIndexOf('.');
|
||||
return dotIndex <= 0 ? fileName : fileName.substring(0, dotIndex);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.datamate.datamanagement.common.enums;
|
||||
|
||||
/**
|
||||
* 知识条目预览转换状态
|
||||
*/
|
||||
public enum KnowledgeItemPreviewStatus {
|
||||
PENDING,
|
||||
PROCESSING,
|
||||
READY,
|
||||
FAILED
|
||||
}
|
||||
@@ -38,4 +38,12 @@ public class KnowledgeItem extends BaseEntity<String> {
|
||||
* 来源文件ID
|
||||
*/
|
||||
private String sourceFileId;
|
||||
/**
|
||||
* 相对路径(用于目录展示)
|
||||
*/
|
||||
private String relativePath;
|
||||
/**
|
||||
* 扩展元数据
|
||||
*/
|
||||
private String metadata;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
package com.datamate.datamanagement.domain.model.knowledge;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import com.datamate.common.domain.model.base.BaseEntity;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* 知识条目目录实体(与数据库表 t_dm_knowledge_item_directories 对齐)
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@TableName(value = "t_dm_knowledge_item_directories", autoResultMap = true)
|
||||
public class KnowledgeItemDirectory extends BaseEntity<String> {
|
||||
/**
|
||||
* 所属知识集ID
|
||||
*/
|
||||
private String setId;
|
||||
|
||||
/**
|
||||
* 目录名称
|
||||
*/
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* 目录相对路径
|
||||
*/
|
||||
private String relativePath;
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package com.datamate.datamanagement.infrastructure.persistence.mapper;
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
import org.apache.ibatis.session.RowBounds;
|
||||
@@ -17,6 +18,7 @@ public interface DatasetFileMapper extends BaseMapper<DatasetFile> {
|
||||
Long countByDatasetId(@Param("datasetId") String datasetId);
|
||||
Long countCompletedByDatasetId(@Param("datasetId") String datasetId);
|
||||
Long sumSizeByDatasetId(@Param("datasetId") String datasetId);
|
||||
Long countNonDerivedByDatasetId(@Param("datasetId") String datasetId);
|
||||
DatasetFile findByDatasetIdAndFileName(@Param("datasetId") String datasetId, @Param("fileName") String fileName);
|
||||
List<DatasetFile> findAllByDatasetId(@Param("datasetId") String datasetId);
|
||||
List<DatasetFile> findByCriteria(@Param("datasetId") String datasetId,
|
||||
@@ -38,4 +40,12 @@ public interface DatasetFileMapper extends BaseMapper<DatasetFile> {
|
||||
* @return 源文件ID列表
|
||||
*/
|
||||
List<String> findSourceFileIdsWithDerivedFiles(@Param("datasetId") String datasetId);
|
||||
|
||||
/**
|
||||
* 批量统计排除衍生文件后的文件数
|
||||
*
|
||||
* @param datasetIds 数据集ID列表
|
||||
* @return 文件数统计列表
|
||||
*/
|
||||
List<DatasetFileCount> countNonDerivedByDatasetIds(@Param("datasetIds") List<String> datasetIds);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
package com.datamate.datamanagement.infrastructure.persistence.mapper;
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItemDirectory;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
|
||||
@Mapper
|
||||
public interface KnowledgeItemDirectoryMapper extends BaseMapper<KnowledgeItemDirectory> {
|
||||
}
|
||||
@@ -28,13 +28,16 @@ public interface KnowledgeItemMapper extends BaseMapper<KnowledgeItem> {
|
||||
WHEN ki.source_type = 'FILE_UPLOAD' THEN ki.content
|
||||
ELSE NULL
|
||||
END AS content,
|
||||
ki.relative_path AS relativePath,
|
||||
ki.created_at AS createdAt,
|
||||
ki.updated_at AS updatedAt
|
||||
FROM t_dm_knowledge_items ki
|
||||
LEFT JOIN t_dm_knowledge_sets ks ON ki.set_id = ks.id
|
||||
LEFT JOIN t_dm_dataset_files df ON ki.source_file_id = df.id AND ki.source_type = 'DATASET_FILE'
|
||||
WHERE (ki.source_type = 'FILE_UPLOAD' AND ki.source_file_id LIKE CONCAT('%', #{keyword}, '%'))
|
||||
OR (ki.source_type = 'DATASET_FILE' AND df.file_name LIKE CONCAT('%', #{keyword}, '%'))
|
||||
WHERE (ki.source_type = 'FILE_UPLOAD' AND (ki.source_file_id LIKE CONCAT('%', #{keyword}, '%')
|
||||
OR ki.relative_path LIKE CONCAT('%', #{keyword}, '%')))
|
||||
OR (ki.source_type = 'DATASET_FILE' AND (df.file_name LIKE CONCAT('%', #{keyword}, '%')
|
||||
OR ki.relative_path LIKE CONCAT('%', #{keyword}, '%')))
|
||||
ORDER BY ki.created_at DESC
|
||||
""")
|
||||
IPage<KnowledgeItemSearchResponse> searchFileItems(IPage<?> page, @Param("keyword") String keyword);
|
||||
|
||||
@@ -14,6 +14,7 @@ public interface TagMapper {
|
||||
List<Tag> findByIdIn(@Param("ids") List<String> ids);
|
||||
List<Tag> findByKeyword(@Param("keyword") String keyword);
|
||||
List<Tag> findAllByOrderByUsageCountDesc();
|
||||
Long countKnowledgeSetTags();
|
||||
|
||||
int insert(Tag tag);
|
||||
int update(Tag tag);
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.datamate.datamanagement.infrastructure.persistence.repository;
|
||||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||
import com.baomidou.mybatisplus.extension.repository.IRepository;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -15,6 +16,8 @@ import java.util.List;
|
||||
public interface DatasetFileRepository extends IRepository<DatasetFile> {
|
||||
Long countByDatasetId(String datasetId);
|
||||
|
||||
Long countNonDerivedByDatasetId(String datasetId);
|
||||
|
||||
Long countCompletedByDatasetId(String datasetId);
|
||||
|
||||
Long sumSizeByDatasetId(String datasetId);
|
||||
@@ -36,4 +39,6 @@ public interface DatasetFileRepository extends IRepository<DatasetFile> {
|
||||
* @return 源文件ID列表
|
||||
*/
|
||||
List<String> findSourceFileIdsWithDerivedFiles(String datasetId);
|
||||
|
||||
List<DatasetFileCount> countNonDerivedByDatasetIds(List<String> datasetIds);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
package com.datamate.datamanagement.infrastructure.persistence.repository;
|
||||
|
||||
import com.baomidou.mybatisplus.extension.repository.IRepository;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItemDirectory;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeDirectoryQuery;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 知识条目目录仓储接口
|
||||
*/
|
||||
public interface KnowledgeItemDirectoryRepository extends IRepository<KnowledgeItemDirectory> {
|
||||
List<KnowledgeItemDirectory> findByCriteria(KnowledgeDirectoryQuery query);
|
||||
|
||||
KnowledgeItemDirectory findBySetIdAndPath(String setId, String relativePath);
|
||||
|
||||
int removeByRelativePathPrefix(String setId, String relativePath);
|
||||
}
|
||||
@@ -26,4 +26,8 @@ public interface KnowledgeItemRepository extends IRepository<KnowledgeItem> {
|
||||
IPage<KnowledgeItemSearchResponse> searchFileItems(IPage<?> page, String keyword);
|
||||
|
||||
Long sumDatasetFileSize();
|
||||
|
||||
boolean existsBySetIdAndRelativePath(String setId, String relativePath);
|
||||
|
||||
int removeByRelativePathPrefix(String setId, String relativePath);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
package com.datamate.datamanagement.infrastructure.persistence.repository.dto;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* 数据集文件数统计结果
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class DatasetFileCount {
|
||||
private String datasetId;
|
||||
private Long fileCount;
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import com.baomidou.mybatisplus.extension.repository.CrudRepository;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.mapper.DatasetFileMapper;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Repository;
|
||||
import org.springframework.util.StringUtils;
|
||||
@@ -30,6 +31,11 @@ public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper,
|
||||
return datasetFileMapper.selectCount(new LambdaQueryWrapper<DatasetFile>().eq(DatasetFile::getDatasetId, datasetId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long countNonDerivedByDatasetId(String datasetId) {
|
||||
return datasetFileMapper.countNonDerivedByDatasetId(datasetId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long countCompletedByDatasetId(String datasetId) {
|
||||
return datasetFileMapper.countCompletedByDatasetId(datasetId);
|
||||
@@ -71,4 +77,9 @@ public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper,
|
||||
// 使用 MyBatis 的 @Select 注解或直接调用 mapper 方法
|
||||
return datasetFileMapper.findSourceFileIdsWithDerivedFiles(datasetId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DatasetFileCount> countNonDerivedByDatasetIds(List<String> datasetIds) {
|
||||
return datasetFileMapper.countNonDerivedByDatasetIds(datasetIds);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
package com.datamate.datamanagement.infrastructure.persistence.repository.impl;
|
||||
|
||||
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
||||
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItemDirectory;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.mapper.KnowledgeItemDirectoryMapper;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.KnowledgeItemDirectoryRepository;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeDirectoryQuery;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 知识条目目录仓储实现类
|
||||
*/
|
||||
@Repository
|
||||
@RequiredArgsConstructor
|
||||
public class KnowledgeItemDirectoryRepositoryImpl
|
||||
extends CrudRepository<KnowledgeItemDirectoryMapper, KnowledgeItemDirectory>
|
||||
implements KnowledgeItemDirectoryRepository {
|
||||
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
private final KnowledgeItemDirectoryMapper knowledgeItemDirectoryMapper;
|
||||
|
||||
@Override
|
||||
public List<KnowledgeItemDirectory> findByCriteria(KnowledgeDirectoryQuery query) {
|
||||
String relativePath = normalizeRelativePathPrefix(query.getRelativePath());
|
||||
LambdaQueryWrapper<KnowledgeItemDirectory> wrapper = new LambdaQueryWrapper<KnowledgeItemDirectory>()
|
||||
.eq(StringUtils.isNotBlank(query.getSetId()), KnowledgeItemDirectory::getSetId, query.getSetId())
|
||||
.likeRight(StringUtils.isNotBlank(relativePath), KnowledgeItemDirectory::getRelativePath, relativePath);
|
||||
|
||||
if (StringUtils.isNotBlank(query.getKeyword())) {
|
||||
wrapper.and(w -> w.like(KnowledgeItemDirectory::getName, query.getKeyword())
|
||||
.or()
|
||||
.like(KnowledgeItemDirectory::getRelativePath, query.getKeyword()));
|
||||
}
|
||||
|
||||
wrapper.orderByAsc(KnowledgeItemDirectory::getRelativePath);
|
||||
return knowledgeItemDirectoryMapper.selectList(wrapper);
|
||||
}
|
||||
|
||||
@Override
|
||||
public KnowledgeItemDirectory findBySetIdAndPath(String setId, String relativePath) {
|
||||
return knowledgeItemDirectoryMapper.selectOne(new LambdaQueryWrapper<KnowledgeItemDirectory>()
|
||||
.eq(KnowledgeItemDirectory::getSetId, setId)
|
||||
.eq(KnowledgeItemDirectory::getRelativePath, relativePath));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int removeByRelativePathPrefix(String setId, String relativePath) {
|
||||
String normalized = normalizeRelativePathValue(relativePath);
|
||||
if (StringUtils.isBlank(normalized)) {
|
||||
return 0;
|
||||
}
|
||||
String prefix = normalizeRelativePathPrefix(normalized);
|
||||
LambdaQueryWrapper<KnowledgeItemDirectory> wrapper = new LambdaQueryWrapper<KnowledgeItemDirectory>()
|
||||
.eq(KnowledgeItemDirectory::getSetId, setId)
|
||||
.and(w -> w.eq(KnowledgeItemDirectory::getRelativePath, normalized)
|
||||
.or()
|
||||
.likeRight(KnowledgeItemDirectory::getRelativePath, prefix));
|
||||
return knowledgeItemDirectoryMapper.delete(wrapper);
|
||||
}
|
||||
|
||||
private String normalizeRelativePathPrefix(String relativePath) {
|
||||
if (StringUtils.isBlank(relativePath)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
if (StringUtils.isBlank(normalized)) {
|
||||
return "";
|
||||
}
|
||||
if (!normalized.endsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized + PATH_SEPARATOR;
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private String normalizeRelativePathValue(String relativePath) {
|
||||
if (StringUtils.isBlank(relativePath)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
while (normalized.endsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(0, normalized.length() - 1);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
@@ -21,21 +21,26 @@ import java.util.List;
|
||||
@Repository
|
||||
@RequiredArgsConstructor
|
||||
public class KnowledgeItemRepositoryImpl extends CrudRepository<KnowledgeItemMapper, KnowledgeItem> implements KnowledgeItemRepository {
|
||||
private static final String PATH_SEPARATOR = "/";
|
||||
private final KnowledgeItemMapper knowledgeItemMapper;
|
||||
|
||||
@Override
|
||||
public IPage<KnowledgeItem> findByCriteria(IPage<KnowledgeItem> page, KnowledgeItemPagingQuery query) {
|
||||
String relativePath = normalizeRelativePathPrefix(query.getRelativePath());
|
||||
LambdaQueryWrapper<KnowledgeItem> wrapper = new LambdaQueryWrapper<KnowledgeItem>()
|
||||
.eq(StringUtils.isNotBlank(query.getSetId()), KnowledgeItem::getSetId, query.getSetId())
|
||||
.eq(query.getContentType() != null, KnowledgeItem::getContentType, query.getContentType())
|
||||
.eq(query.getSourceType() != null, KnowledgeItem::getSourceType, query.getSourceType())
|
||||
.eq(StringUtils.isNotBlank(query.getSourceDatasetId()), KnowledgeItem::getSourceDatasetId, query.getSourceDatasetId())
|
||||
.eq(StringUtils.isNotBlank(query.getSourceFileId()), KnowledgeItem::getSourceFileId, query.getSourceFileId());
|
||||
.eq(StringUtils.isNotBlank(query.getSourceFileId()), KnowledgeItem::getSourceFileId, query.getSourceFileId())
|
||||
.likeRight(StringUtils.isNotBlank(relativePath), KnowledgeItem::getRelativePath, relativePath);
|
||||
|
||||
if (StringUtils.isNotBlank(query.getKeyword())) {
|
||||
wrapper.and(w -> w.like(KnowledgeItem::getSourceFileId, query.getKeyword())
|
||||
.or()
|
||||
.like(KnowledgeItem::getContent, query.getKeyword()));
|
||||
.like(KnowledgeItem::getContent, query.getKeyword())
|
||||
.or()
|
||||
.like(KnowledgeItem::getRelativePath, query.getKeyword()));
|
||||
}
|
||||
|
||||
wrapper.orderByDesc(KnowledgeItem::getCreatedAt);
|
||||
@@ -77,4 +82,60 @@ public class KnowledgeItemRepositoryImpl extends CrudRepository<KnowledgeItemMap
|
||||
public Long sumDatasetFileSize() {
|
||||
return knowledgeItemMapper.sumDatasetFileSize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean existsBySetIdAndRelativePath(String setId, String relativePath) {
|
||||
if (StringUtils.isBlank(setId) || StringUtils.isBlank(relativePath)) {
|
||||
return false;
|
||||
}
|
||||
return knowledgeItemMapper.selectCount(new LambdaQueryWrapper<KnowledgeItem>()
|
||||
.eq(KnowledgeItem::getSetId, setId)
|
||||
.eq(KnowledgeItem::getRelativePath, relativePath)) > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int removeByRelativePathPrefix(String setId, String relativePath) {
|
||||
String normalized = normalizeRelativePathValue(relativePath);
|
||||
if (StringUtils.isBlank(setId) || StringUtils.isBlank(normalized)) {
|
||||
return 0;
|
||||
}
|
||||
String prefix = normalizeRelativePathPrefix(normalized);
|
||||
LambdaQueryWrapper<KnowledgeItem> wrapper = new LambdaQueryWrapper<KnowledgeItem>()
|
||||
.eq(KnowledgeItem::getSetId, setId)
|
||||
.and(w -> w.eq(KnowledgeItem::getRelativePath, normalized)
|
||||
.or()
|
||||
.likeRight(KnowledgeItem::getRelativePath, prefix));
|
||||
return knowledgeItemMapper.delete(wrapper);
|
||||
}
|
||||
|
||||
private String normalizeRelativePathPrefix(String relativePath) {
|
||||
if (StringUtils.isBlank(relativePath)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
if (StringUtils.isBlank(normalized)) {
|
||||
return "";
|
||||
}
|
||||
if (!normalized.endsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized + PATH_SEPARATOR;
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private String normalizeRelativePathValue(String relativePath) {
|
||||
if (StringUtils.isBlank(relativePath)) {
|
||||
return "";
|
||||
}
|
||||
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
|
||||
while (normalized.startsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(1);
|
||||
}
|
||||
while (normalized.endsWith(PATH_SEPARATOR)) {
|
||||
normalized = normalized.substring(0, normalized.length() - 1);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
package com.datamate.datamanagement.interfaces.converter;
|
||||
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItem;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItemDirectory;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeSet;
|
||||
import com.datamate.datamanagement.interfaces.dto.CreateKnowledgeItemRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.CreateKnowledgeSetRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeDirectoryResponse;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeItemResponse;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeSetResponse;
|
||||
import org.mapstruct.Mapper;
|
||||
@@ -31,4 +33,8 @@ public interface KnowledgeConverter {
|
||||
KnowledgeItemResponse convertToResponse(KnowledgeItem knowledgeItem);
|
||||
|
||||
List<KnowledgeItemResponse> convertItemResponses(List<KnowledgeItem> items);
|
||||
|
||||
KnowledgeDirectoryResponse convertToResponse(KnowledgeItemDirectory directory);
|
||||
|
||||
List<KnowledgeDirectoryResponse> convertDirectoryResponses(List<KnowledgeItemDirectory> directories);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.datamate.datamanagement.interfaces.dto;
|
||||
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* 创建知识条目目录请求
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class CreateKnowledgeDirectoryRequest {
|
||||
|
||||
/** 父级前缀路径,例如 "docs/",为空表示知识集根目录 */
|
||||
private String parentPrefix;
|
||||
|
||||
/** 新建目录名称 */
|
||||
@NotBlank
|
||||
private String directoryName;
|
||||
}
|
||||
@@ -34,4 +34,8 @@ public class CreateKnowledgeItemRequest {
|
||||
* 来源文件ID(用于标注同步等场景)
|
||||
*/
|
||||
private String sourceFileId;
|
||||
/**
|
||||
* 扩展元数据
|
||||
*/
|
||||
private String metadata;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.datamate.datamanagement.interfaces.dto;
|
||||
|
||||
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* 数据集文件预览状态响应
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class DatasetFilePreviewStatusResponse {
|
||||
private KnowledgeItemPreviewStatus status;
|
||||
private String previewError;
|
||||
private String updatedAt;
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.datamate.datamanagement.interfaces.dto;
|
||||
|
||||
import jakarta.validation.constraints.NotEmpty;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 批量删除知识条目请求
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class DeleteKnowledgeItemsRequest {
|
||||
/**
|
||||
* 知识条目ID列表
|
||||
*/
|
||||
@NotEmpty(message = "知识条目ID不能为空")
|
||||
private List<String> ids;
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.datamate.datamanagement.interfaces.dto;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* 知识条目目录查询参数
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class KnowledgeDirectoryQuery {
|
||||
/** 所属知识集ID */
|
||||
private String setId;
|
||||
|
||||
/** 目录相对路径前缀 */
|
||||
private String relativePath;
|
||||
|
||||
/** 搜索关键字 */
|
||||
private String keyword;
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.datamate.datamanagement.interfaces.dto;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* 知识条目目录响应
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class KnowledgeDirectoryResponse {
|
||||
private String id;
|
||||
private String setId;
|
||||
private String name;
|
||||
private String relativePath;
|
||||
private LocalDateTime createdAt;
|
||||
private LocalDateTime updatedAt;
|
||||
}
|
||||
@@ -41,4 +41,8 @@ public class KnowledgeItemPagingQuery extends PagingQuery {
|
||||
* 来源文件ID
|
||||
*/
|
||||
private String sourceFileId;
|
||||
/**
|
||||
* 相对路径前缀
|
||||
*/
|
||||
private String relativePath;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.datamate.datamanagement.interfaces.dto;
|
||||
|
||||
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* 知识条目预览状态响应
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class KnowledgeItemPreviewStatusResponse {
|
||||
private KnowledgeItemPreviewStatus status;
|
||||
private String previewError;
|
||||
private String updatedAt;
|
||||
}
|
||||
@@ -20,6 +20,14 @@ public class KnowledgeItemResponse {
|
||||
private KnowledgeSourceType sourceType;
|
||||
private String sourceDatasetId;
|
||||
private String sourceFileId;
|
||||
/**
|
||||
* 相对路径(用于目录展示)
|
||||
*/
|
||||
private String relativePath;
|
||||
/**
|
||||
* 扩展元数据
|
||||
*/
|
||||
private String metadata;
|
||||
private LocalDateTime createdAt;
|
||||
private LocalDateTime updatedAt;
|
||||
private String createdBy;
|
||||
|
||||
@@ -23,6 +23,10 @@ public class KnowledgeItemSearchResponse {
|
||||
private String sourceFileId;
|
||||
private String fileName;
|
||||
private Long fileSize;
|
||||
/**
|
||||
* 相对路径(用于目录展示)
|
||||
*/
|
||||
private String relativePath;
|
||||
private LocalDateTime createdAt;
|
||||
private LocalDateTime updatedAt;
|
||||
|
||||
|
||||
@@ -12,4 +12,5 @@ public class KnowledgeManagementStatisticsResponse {
|
||||
private Long totalKnowledgeSets = 0L;
|
||||
private Long totalFiles = 0L;
|
||||
private Long totalSize = 0L;
|
||||
private Long totalTags = 0L;
|
||||
}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
package com.datamate.datamanagement.interfaces.dto;
|
||||
|
||||
import com.datamate.datamanagement.common.enums.DatasetStatusType;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import jakarta.validation.constraints.Size;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@@ -24,9 +26,18 @@ public class UpdateDatasetRequest {
|
||||
/** 归集任务id */
|
||||
private String dataSource;
|
||||
/** 父数据集ID */
|
||||
@Setter(AccessLevel.NONE)
|
||||
private String parentDatasetId;
|
||||
@JsonIgnore
|
||||
@Setter(AccessLevel.NONE)
|
||||
private boolean parentDatasetIdProvided;
|
||||
/** 标签列表 */
|
||||
private List<String> tags;
|
||||
/** 数据集状态 */
|
||||
private DatasetStatusType status;
|
||||
|
||||
public void setParentDatasetId(String parentDatasetId) {
|
||||
this.parentDatasetIdProvided = true;
|
||||
this.parentDatasetId = parentDatasetId;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,4 +18,8 @@ public class UpdateKnowledgeItemRequest {
|
||||
* 内容类型
|
||||
*/
|
||||
private KnowledgeContentType contentType;
|
||||
/**
|
||||
* 扩展元数据
|
||||
*/
|
||||
private String metadata;
|
||||
}
|
||||
|
||||
@@ -17,4 +17,8 @@ public class UploadKnowledgeItemsRequest {
|
||||
*/
|
||||
@NotEmpty(message = "文件列表不能为空")
|
||||
private List<MultipartFile> files;
|
||||
/**
|
||||
* 目录前缀(用于目录上传)
|
||||
*/
|
||||
private String parentPrefix;
|
||||
}
|
||||
|
||||
@@ -6,11 +6,13 @@ import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import com.datamate.common.interfaces.PagedResponse;
|
||||
import com.datamate.common.interfaces.PagingQuery;
|
||||
import com.datamate.datamanagement.application.DatasetFileApplicationService;
|
||||
import com.datamate.datamanagement.application.DatasetFilePreviewService;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
|
||||
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.DatasetFilePreviewStatusResponse;
|
||||
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
|
||||
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
|
||||
@@ -19,6 +21,7 @@ import jakarta.validation.Valid;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.core.io.UrlResource;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
@@ -37,10 +40,13 @@ import java.util.List;
|
||||
public class DatasetFileController {
|
||||
|
||||
private final DatasetFileApplicationService datasetFileApplicationService;
|
||||
private final DatasetFilePreviewService datasetFilePreviewService;
|
||||
|
||||
@Autowired
|
||||
public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService) {
|
||||
public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService,
|
||||
DatasetFilePreviewService datasetFilePreviewService) {
|
||||
this.datasetFileApplicationService = datasetFileApplicationService;
|
||||
this.datasetFilePreviewService = datasetFilePreviewService;
|
||||
}
|
||||
|
||||
@GetMapping
|
||||
@@ -120,6 +126,19 @@ public class DatasetFileController {
|
||||
@PathVariable("fileId") String fileId) {
|
||||
try {
|
||||
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
|
||||
if (datasetFilePreviewService.isOfficeDocument(datasetFile.getFileName())) {
|
||||
DatasetFilePreviewService.PreviewFile previewFile = datasetFilePreviewService
|
||||
.resolveReadyPreviewFile(datasetId, datasetFile);
|
||||
if (previewFile == null) {
|
||||
return ResponseEntity.status(HttpStatus.CONFLICT).build();
|
||||
}
|
||||
Resource previewResource = new UrlResource(previewFile.filePath().toUri());
|
||||
return ResponseEntity.ok()
|
||||
.contentType(MediaType.APPLICATION_PDF)
|
||||
.header(HttpHeaders.CONTENT_DISPOSITION,
|
||||
"inline; filename=\"" + previewFile.fileName() + "\"")
|
||||
.body(previewResource);
|
||||
}
|
||||
Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId);
|
||||
MediaType mediaType = MediaTypeFactory.getMediaType(resource)
|
||||
.orElse(MediaType.APPLICATION_OCTET_STREAM);
|
||||
@@ -136,6 +155,18 @@ public class DatasetFileController {
|
||||
}
|
||||
}
|
||||
|
||||
@GetMapping("/{fileId}/preview/status")
|
||||
public DatasetFilePreviewStatusResponse getDatasetFilePreviewStatus(@PathVariable("datasetId") String datasetId,
|
||||
@PathVariable("fileId") String fileId) {
|
||||
return datasetFilePreviewService.getPreviewStatus(datasetId, fileId);
|
||||
}
|
||||
|
||||
@PostMapping("/{fileId}/preview/convert")
|
||||
public DatasetFilePreviewStatusResponse convertDatasetFilePreview(@PathVariable("datasetId") String datasetId,
|
||||
@PathVariable("fileId") String fileId) {
|
||||
return datasetFilePreviewService.ensurePreview(datasetId, fileId);
|
||||
}
|
||||
|
||||
@IgnoreResponseWrap
|
||||
@GetMapping(value = "/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE)
|
||||
public void downloadDatasetFileAsZip(@PathVariable("datasetId") String datasetId, HttpServletResponse response) {
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
package com.datamate.datamanagement.interfaces.rest;
|
||||
|
||||
import com.datamate.datamanagement.application.DatasetFileApplicationService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.PutMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
/**
|
||||
* 数据集上传控制器
|
||||
*/
|
||||
@Slf4j
|
||||
@RestController
|
||||
@RequiredArgsConstructor
|
||||
@RequestMapping("/data-management/datasets/upload")
|
||||
public class DatasetUploadController {
|
||||
|
||||
private final DatasetFileApplicationService datasetFileApplicationService;
|
||||
|
||||
/**
|
||||
* 取消上传
|
||||
*
|
||||
* @param reqId 预上传请求ID
|
||||
*/
|
||||
@PutMapping("/cancel-upload/{reqId}")
|
||||
public ResponseEntity<Void> cancelUpload(@PathVariable("reqId") String reqId) {
|
||||
datasetFileApplicationService.cancelUpload(reqId);
|
||||
return ResponseEntity.ok().build();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package com.datamate.datamanagement.interfaces.rest;
|
||||
|
||||
import com.datamate.datamanagement.application.KnowledgeDirectoryApplicationService;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItemDirectory;
|
||||
import com.datamate.datamanagement.interfaces.converter.KnowledgeConverter;
|
||||
import com.datamate.datamanagement.interfaces.dto.CreateKnowledgeDirectoryRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeDirectoryQuery;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeDirectoryResponse;
|
||||
import jakarta.validation.Valid;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 知识条目目录 REST 控制器
|
||||
*/
|
||||
@RestController
|
||||
@RequiredArgsConstructor
|
||||
@RequestMapping("/data-management/knowledge-sets/{setId}/directories")
|
||||
public class KnowledgeDirectoryController {
|
||||
private final KnowledgeDirectoryApplicationService knowledgeDirectoryApplicationService;
|
||||
|
||||
@GetMapping
|
||||
public List<KnowledgeDirectoryResponse> getKnowledgeDirectories(@PathVariable("setId") String setId,
|
||||
KnowledgeDirectoryQuery query) {
|
||||
List<KnowledgeItemDirectory> directories = knowledgeDirectoryApplicationService.getKnowledgeDirectories(setId, query);
|
||||
return KnowledgeConverter.INSTANCE.convertDirectoryResponses(directories);
|
||||
}
|
||||
|
||||
@PostMapping
|
||||
public KnowledgeDirectoryResponse createKnowledgeDirectory(@PathVariable("setId") String setId,
|
||||
@RequestBody @Valid CreateKnowledgeDirectoryRequest request) {
|
||||
KnowledgeItemDirectory directory = knowledgeDirectoryApplicationService.createKnowledgeDirectory(setId, request);
|
||||
return KnowledgeConverter.INSTANCE.convertToResponse(directory);
|
||||
}
|
||||
|
||||
@DeleteMapping
|
||||
public void deleteKnowledgeDirectory(@PathVariable("setId") String setId,
|
||||
@RequestParam("relativePath") String relativePath) {
|
||||
knowledgeDirectoryApplicationService.deleteKnowledgeDirectory(setId, relativePath);
|
||||
}
|
||||
}
|
||||
@@ -3,11 +3,14 @@ package com.datamate.datamanagement.interfaces.rest;
|
||||
import com.datamate.common.infrastructure.common.IgnoreResponseWrap;
|
||||
import com.datamate.common.interfaces.PagedResponse;
|
||||
import com.datamate.datamanagement.application.KnowledgeItemApplicationService;
|
||||
import com.datamate.datamanagement.application.KnowledgeItemPreviewService;
|
||||
import com.datamate.datamanagement.domain.model.knowledge.KnowledgeItem;
|
||||
import com.datamate.datamanagement.interfaces.converter.KnowledgeConverter;
|
||||
import com.datamate.datamanagement.interfaces.dto.CreateKnowledgeItemRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.DeleteKnowledgeItemsRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.ImportKnowledgeItemsRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeItemPagingQuery;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeItemPreviewStatusResponse;
|
||||
import com.datamate.datamanagement.interfaces.dto.KnowledgeItemResponse;
|
||||
import com.datamate.datamanagement.interfaces.dto.ReplaceKnowledgeItemFileRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.UpdateKnowledgeItemRequest;
|
||||
@@ -30,6 +33,7 @@ import java.util.List;
|
||||
@RequestMapping("/data-management/knowledge-sets/{setId}/items")
|
||||
public class KnowledgeItemController {
|
||||
private final KnowledgeItemApplicationService knowledgeItemApplicationService;
|
||||
private final KnowledgeItemPreviewService knowledgeItemPreviewService;
|
||||
|
||||
@GetMapping
|
||||
public PagedResponse<KnowledgeItemResponse> getKnowledgeItems(@PathVariable("setId") String setId,
|
||||
@@ -80,6 +84,18 @@ public class KnowledgeItemController {
|
||||
knowledgeItemApplicationService.previewKnowledgeItemFile(setId, itemId, response);
|
||||
}
|
||||
|
||||
@GetMapping("/{itemId}/preview/status")
|
||||
public KnowledgeItemPreviewStatusResponse getKnowledgeItemPreviewStatus(@PathVariable("setId") String setId,
|
||||
@PathVariable("itemId") String itemId) {
|
||||
return knowledgeItemPreviewService.getPreviewStatus(setId, itemId);
|
||||
}
|
||||
|
||||
@PostMapping("/{itemId}/preview/convert")
|
||||
public KnowledgeItemPreviewStatusResponse convertKnowledgeItemPreview(@PathVariable("setId") String setId,
|
||||
@PathVariable("itemId") String itemId) {
|
||||
return knowledgeItemPreviewService.ensurePreview(setId, itemId);
|
||||
}
|
||||
|
||||
@GetMapping("/{itemId}")
|
||||
public KnowledgeItemResponse getKnowledgeItemById(@PathVariable("setId") String setId,
|
||||
@PathVariable("itemId") String itemId) {
|
||||
@@ -108,4 +124,10 @@ public class KnowledgeItemController {
|
||||
@PathVariable("itemId") String itemId) {
|
||||
knowledgeItemApplicationService.deleteKnowledgeItem(setId, itemId);
|
||||
}
|
||||
|
||||
@PostMapping("/batch-delete")
|
||||
public void deleteKnowledgeItems(@PathVariable("setId") String setId,
|
||||
@RequestBody @Valid DeleteKnowledgeItemsRequest request) {
|
||||
knowledgeItemApplicationService.deleteKnowledgeItems(setId, request);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,6 +42,13 @@
|
||||
SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId}
|
||||
</select>
|
||||
|
||||
<select id="countNonDerivedByDatasetId" parameterType="string" resultType="long">
|
||||
SELECT COUNT(*)
|
||||
FROM t_dm_dataset_files
|
||||
WHERE dataset_id = #{datasetId}
|
||||
AND (metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL)
|
||||
</select>
|
||||
|
||||
<select id="countCompletedByDatasetId" parameterType="string" resultType="long">
|
||||
SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId} AND status = 'COMPLETED'
|
||||
</select>
|
||||
@@ -110,4 +117,16 @@
|
||||
AND metadata IS NOT NULL
|
||||
AND JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NOT NULL
|
||||
</select>
|
||||
|
||||
<select id="countNonDerivedByDatasetIds" resultType="com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount">
|
||||
SELECT dataset_id AS datasetId,
|
||||
COUNT(*) AS fileCount
|
||||
FROM t_dm_dataset_files
|
||||
WHERE dataset_id IN
|
||||
<foreach collection="datasetIds" item="datasetId" open="(" separator="," close=")">
|
||||
#{datasetId}
|
||||
</foreach>
|
||||
AND (metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL)
|
||||
GROUP BY dataset_id
|
||||
</select>
|
||||
</mapper>
|
||||
|
||||
@@ -145,9 +145,10 @@
|
||||
|
||||
<select id="getAllDatasetStatistics" resultType="com.datamate.datamanagement.interfaces.dto.AllDatasetStatisticsResponse">
|
||||
SELECT
|
||||
COUNT(*) AS total_datasets,
|
||||
SUM(size_bytes) AS total_size,
|
||||
SUM(file_count) AS total_files
|
||||
FROM t_dm_datasets;
|
||||
(SELECT COUNT(*) FROM t_dm_datasets) AS total_datasets,
|
||||
(SELECT COALESCE(SUM(size_bytes), 0) FROM t_dm_datasets) AS total_size,
|
||||
(SELECT COUNT(*)
|
||||
FROM t_dm_dataset_files
|
||||
WHERE metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL) AS total_files
|
||||
</select>
|
||||
</mapper>
|
||||
|
||||
@@ -53,6 +53,19 @@
|
||||
ORDER BY usage_count DESC, name ASC
|
||||
</select>
|
||||
|
||||
<select id="countKnowledgeSetTags" resultType="long">
|
||||
SELECT COUNT(DISTINCT t.id)
|
||||
FROM t_dm_tags t
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM t_dm_knowledge_sets ks
|
||||
WHERE ks.tags IS NOT NULL
|
||||
AND JSON_VALID(ks.tags) = 1
|
||||
AND JSON_LENGTH(ks.tags) > 0
|
||||
AND JSON_SEARCH(ks.tags, 'one', t.name, NULL, '$[*].name') IS NOT NULL
|
||||
)
|
||||
</select>
|
||||
|
||||
<insert id="insert" parameterType="com.datamate.datamanagement.domain.model.dataset.Tag">
|
||||
INSERT INTO t_dm_tags (id, name, description, category, color, usage_count)
|
||||
VALUES (#{id}, #{name}, #{description}, #{category}, #{color}, #{usageCount})
|
||||
|
||||
@@ -21,7 +21,7 @@ import java.util.UUID;
|
||||
*/
|
||||
@Component
|
||||
public class FileService {
|
||||
private static final int DEFAULT_TIMEOUT = 120;
|
||||
private static final int DEFAULT_TIMEOUT = 1800;
|
||||
|
||||
private final ChunkUploadRequestMapper chunkUploadRequestMapper;
|
||||
|
||||
@@ -74,6 +74,26 @@ public class FileService {
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* 取消上传
|
||||
*/
|
||||
@Transactional
|
||||
public void cancelUpload(String reqId) {
|
||||
if (reqId == null || reqId.isBlank()) {
|
||||
throw BusinessException.of(CommonErrorCode.PARAM_ERROR);
|
||||
}
|
||||
ChunkUploadPreRequest preRequest = chunkUploadRequestMapper.findById(reqId);
|
||||
if (preRequest == null) {
|
||||
return;
|
||||
}
|
||||
String uploadPath = preRequest.getUploadPath();
|
||||
if (uploadPath != null && !uploadPath.isBlank()) {
|
||||
File tempDir = new File(uploadPath, String.format(ChunksSaver.TEMP_DIR_NAME_FORMAT, preRequest.getId()));
|
||||
ChunksSaver.deleteFolder(tempDir.getPath());
|
||||
}
|
||||
chunkUploadRequestMapper.deleteById(reqId);
|
||||
}
|
||||
|
||||
private File uploadFile(ChunkUploadRequest fileUploadRequest, ChunkUploadPreRequest preRequest) {
|
||||
File savedFile = ChunksSaver.saveFile(fileUploadRequest, preRequest);
|
||||
preRequest.setTimeout(LocalDateTime.now().plusSeconds(DEFAULT_TIMEOUT));
|
||||
|
||||
@@ -5,7 +5,7 @@ server {
|
||||
access_log /var/log/datamate/frontend/access.log main;
|
||||
error_log /var/log/datamate/frontend/error.log notice;
|
||||
|
||||
client_max_body_size 1024M;
|
||||
client_max_body_size 0;
|
||||
|
||||
add_header Set-Cookie "NEXT_LOCALE=zh";
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ services:
|
||||
- log_volume:/var/log/datamate
|
||||
- operator-upload-volume:/operators/upload
|
||||
- operator-runtime-volume:/operators/extract
|
||||
- uploads_volume:/uploads
|
||||
networks: [ datamate ]
|
||||
depends_on:
|
||||
- datamate-database
|
||||
@@ -154,6 +155,8 @@ services:
|
||||
profiles: [ data-juicer ]
|
||||
|
||||
volumes:
|
||||
uploads_volume:
|
||||
name: datamate-uploads-volume
|
||||
dataset_volume:
|
||||
name: datamate-dataset-volume
|
||||
flow_volume:
|
||||
|
||||
@@ -169,6 +169,33 @@
|
||||
}
|
||||
}
|
||||
|
||||
function isAnnotationObject(value) {
|
||||
if (!value || typeof value !== "object") return false;
|
||||
return typeof value.serializeAnnotation === "function" || typeof value.serialize === "function";
|
||||
}
|
||||
|
||||
function resolveSelectedAnnotation(store) {
|
||||
if (!store) return null;
|
||||
const annotations = Array.isArray(store.annotations) ? store.annotations : [];
|
||||
if (isAnnotationObject(store.selectedAnnotation)) {
|
||||
return store.selectedAnnotation;
|
||||
}
|
||||
if (isAnnotationObject(store.selected)) {
|
||||
return store.selected;
|
||||
}
|
||||
const selectedId = store.selected;
|
||||
if (selectedId !== undefined && selectedId !== null && annotations.length) {
|
||||
const matched = annotations.find((ann) => ann && String(ann.id) === String(selectedId));
|
||||
if (isAnnotationObject(matched)) {
|
||||
return matched;
|
||||
}
|
||||
}
|
||||
if (annotations.length && isAnnotationObject(annotations[0])) {
|
||||
return annotations[0];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function exportSelectedAnnotation() {
|
||||
if (!lsInstance) {
|
||||
throw new Error("LabelStudio 未初始化");
|
||||
@@ -179,10 +206,10 @@
|
||||
throw new Error("无法访问 annotationStore");
|
||||
}
|
||||
|
||||
const selected =
|
||||
store.selected ||
|
||||
store.selectedAnnotation ||
|
||||
(Array.isArray(store.annotations) && store.annotations.length ? store.annotations[0] : null);
|
||||
const selected = resolveSelectedAnnotation(store);
|
||||
if (!selected) {
|
||||
throw new Error("未找到可导出的标注对象");
|
||||
}
|
||||
|
||||
let serialized = null;
|
||||
if (selected && typeof selected.serializeAnnotation === "function") {
|
||||
@@ -197,6 +224,10 @@
|
||||
? { id: selected?.id || serialized.id || "draft", ...serialized }
|
||||
: { id: selected?.id || "draft", result: (selected && selected.result) || [] };
|
||||
|
||||
if (!Array.isArray(annotationPayload.result) && Array.isArray(annotationPayload.results)) {
|
||||
annotationPayload.result = annotationPayload.results;
|
||||
}
|
||||
|
||||
// 最小化对齐 Label Studio Server 的字段(DataMate 侧会原样存储)
|
||||
const taskId = typeof currentTask?.id === "number" ? currentTask.id : Number(currentTask?.id) || null;
|
||||
const fileId = currentTask?.data?.file_id || currentTask?.data?.fileId || null;
|
||||
@@ -237,6 +268,17 @@
|
||||
return true;
|
||||
}
|
||||
|
||||
function isSaveShortcut(event) {
|
||||
if (!event || event.defaultPrevented || event.isComposing) return false;
|
||||
const key = event.key;
|
||||
const code = event.code;
|
||||
const isS = key === "s" || key === "S" || code === "KeyS";
|
||||
if (!isS) return false;
|
||||
if (!(event.ctrlKey || event.metaKey)) return false;
|
||||
if (event.shiftKey || event.altKey) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
function handleSaveAndNextShortcut(event) {
|
||||
if (!isSaveAndNextShortcut(event) || event.repeat) return;
|
||||
event.preventDefault();
|
||||
@@ -249,6 +291,18 @@
|
||||
}
|
||||
}
|
||||
|
||||
function handleSaveShortcut(event) {
|
||||
if (!isSaveShortcut(event) || event.repeat) return;
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
try {
|
||||
const raw = exportSelectedAnnotation();
|
||||
postToParent("LS_EXPORT_RESULT", raw);
|
||||
} catch (e) {
|
||||
postToParent("LS_ERROR", { message: e?.message || String(e) });
|
||||
}
|
||||
}
|
||||
|
||||
function initLabelStudio(payload) {
|
||||
if (!window.LabelStudio) {
|
||||
throw new Error("LabelStudio 未加载(请检查静态资源/网络)");
|
||||
@@ -320,6 +374,7 @@
|
||||
}
|
||||
|
||||
window.addEventListener("keydown", handleSaveAndNextShortcut);
|
||||
window.addEventListener("keydown", handleSaveShortcut);
|
||||
|
||||
window.addEventListener("message", (event) => {
|
||||
if (event.origin !== ORIGIN) return;
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
import { Button, Input, Popover, theme, Tag, Empty } from "antd";
|
||||
import { PlusOutlined } from "@ant-design/icons";
|
||||
import { useEffect, useMemo, useState } from "react";
|
||||
import { useCallback, useEffect, useMemo, useState } from "react";
|
||||
|
||||
interface Tag {
|
||||
id: number;
|
||||
id?: string | number;
|
||||
name: string;
|
||||
color: string;
|
||||
color?: string;
|
||||
}
|
||||
|
||||
interface AddTagPopoverProps {
|
||||
tags: Tag[];
|
||||
onFetchTags?: () => Promise<Tag[]>;
|
||||
onAddTag?: (tag: Tag) => void;
|
||||
onAddTag?: (tagName: string) => void;
|
||||
onCreateAndTag?: (tagName: string) => void;
|
||||
}
|
||||
|
||||
@@ -27,20 +27,23 @@ export default function AddTagPopover({
|
||||
const [newTag, setNewTag] = useState("");
|
||||
const [allTags, setAllTags] = useState<Tag[]>([]);
|
||||
|
||||
const tagsSet = useMemo(() => new Set(tags.map((tag) => tag.id)), [tags]);
|
||||
const tagsSet = useMemo(
|
||||
() => new Set(tags.map((tag) => (tag.id ?? tag.name))),
|
||||
[tags]
|
||||
);
|
||||
|
||||
const fetchTags = async () => {
|
||||
const fetchTags = useCallback(async () => {
|
||||
if (onFetchTags && showPopover) {
|
||||
const data = await onFetchTags?.();
|
||||
setAllTags(data || []);
|
||||
}
|
||||
};
|
||||
}, [onFetchTags, showPopover]);
|
||||
useEffect(() => {
|
||||
fetchTags();
|
||||
}, [showPopover]);
|
||||
}, [fetchTags]);
|
||||
|
||||
const availableTags = useMemo(() => {
|
||||
return allTags.filter((tag) => !tagsSet.has(tag.id));
|
||||
return allTags.filter((tag) => !tagsSet.has(tag.id ?? tag.name));
|
||||
}, [allTags, tagsSet]);
|
||||
|
||||
const handleCreateAndAddTag = () => {
|
||||
|
||||
@@ -24,21 +24,28 @@ interface OperationItem {
|
||||
|
||||
interface TagConfig {
|
||||
showAdd: boolean;
|
||||
tags: { id: number; name: string; color: string }[];
|
||||
onFetchTags?: () => Promise<{
|
||||
data: { id: number; name: string; color: string }[];
|
||||
}>;
|
||||
onAddTag?: (tag: { id: number; name: string; color: string }) => void;
|
||||
tags: { id?: string | number; name: string; color?: string }[];
|
||||
onFetchTags?: () => Promise<{ id?: string | number; name: string; color?: string }[]>;
|
||||
onAddTag?: (tagName: string) => void;
|
||||
onCreateAndTag?: (tagName: string) => void;
|
||||
}
|
||||
interface DetailHeaderProps<T> {
|
||||
interface DetailHeaderData {
|
||||
name?: string;
|
||||
description?: string;
|
||||
status?: { color?: string; icon?: React.ReactNode; label?: string };
|
||||
tags?: { id?: string | number; name?: string }[];
|
||||
icon?: React.ReactNode;
|
||||
iconColor?: string;
|
||||
}
|
||||
|
||||
interface DetailHeaderProps<T extends DetailHeaderData> {
|
||||
data: T;
|
||||
statistics: StatisticItem[];
|
||||
operations: OperationItem[];
|
||||
tagConfig?: TagConfig;
|
||||
}
|
||||
|
||||
function DetailHeader<T>({
|
||||
function DetailHeader<T extends DetailHeaderData>({
|
||||
data = {} as T,
|
||||
statistics,
|
||||
operations,
|
||||
@@ -50,13 +57,13 @@ function DetailHeader<T>({
|
||||
<div className="flex items-start gap-4 flex-1">
|
||||
<div
|
||||
className={`w-16 h-16 text-white rounded-lg flex-center shadow-lg ${
|
||||
(data as any)?.iconColor
|
||||
data?.iconColor
|
||||
? ""
|
||||
: "bg-gradient-to-br from-sky-300 to-blue-500 text-white"
|
||||
}`}
|
||||
style={(data as any)?.iconColor ? { backgroundColor: (data as any).iconColor } : undefined}
|
||||
style={data?.iconColor ? { backgroundColor: data.iconColor } : undefined}
|
||||
>
|
||||
{<div className="w-[2.8rem] h-[2.8rem] text-gray-50">{(data as any)?.icon}</div> || (
|
||||
{<div className="w-[2.8rem] h-[2.8rem] text-gray-50">{data?.icon}</div> || (
|
||||
<Database className="w-8 h-8 text-white" />
|
||||
)}
|
||||
</div>
|
||||
|
||||
21
frontend/src/components/ProtectedRoute.tsx
Normal file
21
frontend/src/components/ProtectedRoute.tsx
Normal file
@@ -0,0 +1,21 @@
|
||||
import React from 'react';
|
||||
import { Navigate, useLocation, Outlet } from 'react-router';
|
||||
import { useAppSelector } from '@/store/hooks';
|
||||
|
||||
interface ProtectedRouteProps {
|
||||
children?: React.ReactNode;
|
||||
}
|
||||
|
||||
const ProtectedRoute: React.FC<ProtectedRouteProps> = ({ children }) => {
|
||||
const { isAuthenticated } = useAppSelector((state) => state.auth);
|
||||
const location = useLocation();
|
||||
|
||||
if (!isAuthenticated) {
|
||||
// Redirect to the login page, but save the current location they were trying to go to
|
||||
return <Navigate to="/login" state={{ from: location }} replace />;
|
||||
}
|
||||
|
||||
return children ? <>{children}</> : <Outlet />;
|
||||
};
|
||||
|
||||
export default ProtectedRoute;
|
||||
@@ -1,5 +1,5 @@
|
||||
import { TaskItem } from "@/pages/DataManagement/dataset.model";
|
||||
import { calculateSHA256, checkIsFilesExist } from "@/utils/file.util";
|
||||
import { calculateSHA256, checkIsFilesExist, streamSplitAndUpload, StreamUploadResult } from "@/utils/file.util";
|
||||
import { App } from "antd";
|
||||
import { useRef, useState } from "react";
|
||||
|
||||
@@ -9,17 +9,18 @@ export function useFileSliceUpload(
|
||||
uploadChunk,
|
||||
cancelUpload,
|
||||
}: {
|
||||
preUpload: (id: string, params: any) => Promise<{ data: number }>;
|
||||
uploadChunk: (id: string, formData: FormData, config: any) => Promise<any>;
|
||||
cancelUpload: ((reqId: number) => Promise<any>) | null;
|
||||
preUpload: (id: string, params: Record<string, unknown>) => Promise<{ data: number }>;
|
||||
uploadChunk: (id: string, formData: FormData, config: Record<string, unknown>) => Promise<unknown>;
|
||||
cancelUpload: ((reqId: number) => Promise<unknown>) | null;
|
||||
},
|
||||
showTaskCenter = true // 上传时是否显示任务中心
|
||||
showTaskCenter = true, // 上传时是否显示任务中心
|
||||
enableStreamUpload = true // 是否启用流式分割上传
|
||||
) {
|
||||
const { message } = App.useApp();
|
||||
const [taskList, setTaskList] = useState<TaskItem[]>([]);
|
||||
const taskListRef = useRef<TaskItem[]>([]); // 用于固定任务顺序
|
||||
|
||||
const createTask = (detail: any = {}) => {
|
||||
const createTask = (detail: Record<string, unknown> = {}) => {
|
||||
const { dataset } = detail;
|
||||
const title = `上传数据集: ${dataset.name} `;
|
||||
const controller = new AbortController();
|
||||
@@ -37,6 +38,14 @@ export function useFileSliceUpload(
|
||||
taskListRef.current = [task, ...taskListRef.current];
|
||||
|
||||
setTaskList(taskListRef.current);
|
||||
|
||||
// 立即显示任务中心,让用户感知上传已开始
|
||||
if (showTaskCenter) {
|
||||
window.dispatchEvent(
|
||||
new CustomEvent("show:task-popover", { detail: { show: true } })
|
||||
);
|
||||
}
|
||||
|
||||
return task;
|
||||
};
|
||||
|
||||
@@ -60,7 +69,7 @@ export function useFileSliceUpload(
|
||||
// 携带前缀信息,便于刷新后仍停留在当前目录
|
||||
window.dispatchEvent(
|
||||
new CustomEvent(task.updateEvent, {
|
||||
detail: { prefix: (task as any).prefix },
|
||||
detail: { prefix: task.prefix },
|
||||
})
|
||||
);
|
||||
}
|
||||
@@ -71,7 +80,7 @@ export function useFileSliceUpload(
|
||||
}
|
||||
};
|
||||
|
||||
async function buildFormData({ file, reqId, i, j }) {
|
||||
async function buildFormData({ file, reqId, i, j }: { file: { slices: Blob[]; name: string; size: number }; reqId: number; i: number; j: number }) {
|
||||
const formData = new FormData();
|
||||
const { slices, name, size } = file;
|
||||
const checkSum = await calculateSHA256(slices[j]);
|
||||
@@ -86,12 +95,18 @@ export function useFileSliceUpload(
|
||||
return formData;
|
||||
}
|
||||
|
||||
async function uploadSlice(task: TaskItem, fileInfo) {
|
||||
async function uploadSlice(task: TaskItem, fileInfo: { loaded: number; i: number; j: number; files: { slices: Blob[]; name: string; size: number }[]; totalSize: number }) {
|
||||
if (!task) {
|
||||
return;
|
||||
}
|
||||
const { reqId, key } = task;
|
||||
const { reqId, key, controller } = task;
|
||||
const { loaded, i, j, files, totalSize } = fileInfo;
|
||||
|
||||
// 检查是否已取消
|
||||
if (controller.signal.aborted) {
|
||||
throw new Error("Upload cancelled");
|
||||
}
|
||||
|
||||
const formData = await buildFormData({
|
||||
file: files[i],
|
||||
i,
|
||||
@@ -101,6 +116,7 @@ export function useFileSliceUpload(
|
||||
|
||||
let newTask = { ...task };
|
||||
await uploadChunk(key, formData, {
|
||||
signal: controller.signal,
|
||||
onUploadProgress: (e) => {
|
||||
const loadedSize = loaded + e.loaded;
|
||||
const curPercent = Number((loadedSize / totalSize) * 100).toFixed(2);
|
||||
@@ -116,7 +132,7 @@ export function useFileSliceUpload(
|
||||
});
|
||||
}
|
||||
|
||||
async function uploadFile({ task, files, totalSize }) {
|
||||
async function uploadFile({ task, files, totalSize }: { task: TaskItem; files: { slices: Blob[]; name: string; size: number; originFile: Blob }[]; totalSize: number }) {
|
||||
console.log('[useSliceUpload] Calling preUpload with prefix:', task.prefix);
|
||||
const { data: reqId } = await preUpload(task.key, {
|
||||
totalFileNum: files.length,
|
||||
@@ -132,24 +148,29 @@ export function useFileSliceUpload(
|
||||
reqId,
|
||||
isCancel: false,
|
||||
cancelFn: () => {
|
||||
task.controller.abort();
|
||||
// 使用 newTask 的 controller 确保一致性
|
||||
newTask.controller.abort();
|
||||
cancelUpload?.(reqId);
|
||||
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent));
|
||||
if (newTask.updateEvent) window.dispatchEvent(new Event(newTask.updateEvent));
|
||||
},
|
||||
};
|
||||
updateTaskList(newTask);
|
||||
if (showTaskCenter) {
|
||||
window.dispatchEvent(
|
||||
new CustomEvent("show:task-popover", { detail: { show: true } })
|
||||
);
|
||||
}
|
||||
// 注意:show:task-popover 事件已在 createTask 中触发,此处不再重复触发
|
||||
// // 更新数据状态
|
||||
if (task.updateEvent) window.dispatchEvent(new Event(task.updateEvent));
|
||||
|
||||
let loaded = 0;
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
// 检查是否已取消
|
||||
if (newTask.controller.signal.aborted) {
|
||||
throw new Error("Upload cancelled");
|
||||
}
|
||||
const { slices } = files[i];
|
||||
for (let j = 0; j < slices.length; j++) {
|
||||
// 检查是否已取消
|
||||
if (newTask.controller.signal.aborted) {
|
||||
throw new Error("Upload cancelled");
|
||||
}
|
||||
await uploadSlice(newTask, {
|
||||
loaded,
|
||||
i,
|
||||
@@ -163,7 +184,7 @@ export function useFileSliceUpload(
|
||||
removeTask(newTask);
|
||||
}
|
||||
|
||||
const handleUpload = async ({ task, files }) => {
|
||||
const handleUpload = async ({ task, files }: { task: TaskItem; files: { slices: Blob[]; name: string; size: number; originFile: Blob }[] }) => {
|
||||
const isErrorFile = await checkIsFilesExist(files);
|
||||
if (isErrorFile) {
|
||||
message.error("文件被修改或删除,请重新选择文件上传");
|
||||
@@ -189,10 +210,174 @@ export function useFileSliceUpload(
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* 流式分割上传处理
|
||||
* 用于大文件按行分割并立即上传的场景
|
||||
*/
|
||||
const handleStreamUpload = async ({ task, files }: { task: TaskItem; files: File[] }) => {
|
||||
try {
|
||||
console.log('[useSliceUpload] Starting stream upload for', files.length, 'files');
|
||||
|
||||
const totalSize = files.reduce((acc, file) => acc + file.size, 0);
|
||||
|
||||
// 存储所有文件的 reqId,用于取消上传
|
||||
const reqIds: number[] = [];
|
||||
|
||||
const newTask: TaskItem = {
|
||||
...task,
|
||||
reqId: -1,
|
||||
isCancel: false,
|
||||
cancelFn: () => {
|
||||
// 使用 newTask 的 controller 确保一致性
|
||||
newTask.controller.abort();
|
||||
// 取消所有文件的预上传请求
|
||||
reqIds.forEach(id => cancelUpload?.(id));
|
||||
if (newTask.updateEvent) window.dispatchEvent(new Event(newTask.updateEvent));
|
||||
},
|
||||
};
|
||||
updateTaskList(newTask);
|
||||
|
||||
let totalUploadedLines = 0;
|
||||
let totalProcessedBytes = 0;
|
||||
const results: StreamUploadResult[] = [];
|
||||
|
||||
// 逐个处理文件,每个文件单独调用 preUpload
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
// 检查是否已取消
|
||||
if (newTask.controller.signal.aborted) {
|
||||
throw new Error("Upload cancelled");
|
||||
}
|
||||
|
||||
const file = files[i];
|
||||
console.log(`[useSliceUpload] Processing file ${i + 1}/${files.length}: ${file.name}`);
|
||||
|
||||
const result = await streamSplitAndUpload(
|
||||
file,
|
||||
(formData, config) => uploadChunk(task.key, formData, {
|
||||
...config,
|
||||
signal: newTask.controller.signal,
|
||||
}),
|
||||
(currentBytes, totalBytes, uploadedLines) => {
|
||||
// 检查是否已取消
|
||||
if (newTask.controller.signal.aborted) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 更新进度
|
||||
const overallBytes = totalProcessedBytes + currentBytes;
|
||||
const curPercent = Number((overallBytes / totalSize) * 100).toFixed(2);
|
||||
|
||||
const updatedTask: TaskItem = {
|
||||
...newTask,
|
||||
...taskListRef.current.find((item) => item.key === task.key),
|
||||
size: overallBytes,
|
||||
percent: curPercent >= 100 ? 99.99 : curPercent,
|
||||
streamUploadInfo: {
|
||||
currentFile: file.name,
|
||||
fileIndex: i + 1,
|
||||
totalFiles: files.length,
|
||||
uploadedLines: totalUploadedLines + uploadedLines,
|
||||
},
|
||||
};
|
||||
updateTaskList(updatedTask);
|
||||
},
|
||||
1024 * 1024, // 1MB chunk size
|
||||
{
|
||||
resolveReqId: async ({ totalFileNum, totalSize }) => {
|
||||
const { data: reqId } = await preUpload(task.key, {
|
||||
totalFileNum,
|
||||
totalSize,
|
||||
datasetId: task.key,
|
||||
hasArchive: task.hasArchive,
|
||||
prefix: task.prefix,
|
||||
});
|
||||
console.log(`[useSliceUpload] File ${file.name} preUpload response reqId:`, reqId);
|
||||
reqIds.push(reqId);
|
||||
return reqId;
|
||||
},
|
||||
hasArchive: newTask.hasArchive,
|
||||
prefix: newTask.prefix,
|
||||
signal: newTask.controller.signal,
|
||||
maxConcurrency: 3,
|
||||
}
|
||||
);
|
||||
|
||||
results.push(result);
|
||||
totalUploadedLines += result.uploadedCount;
|
||||
totalProcessedBytes += file.size;
|
||||
|
||||
console.log(`[useSliceUpload] File ${file.name} processed, uploaded ${result.uploadedCount} lines`);
|
||||
}
|
||||
|
||||
console.log('[useSliceUpload] Stream upload completed, total lines:', totalUploadedLines);
|
||||
removeTask(newTask);
|
||||
|
||||
message.success(`成功上传 ${totalUploadedLines} 个文件(按行分割)`);
|
||||
} catch (err) {
|
||||
console.error('[useSliceUpload] Stream upload error:', err);
|
||||
if (err.message === "Upload cancelled") {
|
||||
message.info("上传已取消");
|
||||
} else {
|
||||
message.error("文件上传失败,请稍后重试");
|
||||
}
|
||||
removeTask({
|
||||
...task,
|
||||
isCancel: true,
|
||||
...taskListRef.current.find((item) => item.key === task.key),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* 注册流式上传事件监听
|
||||
* 返回注销函数
|
||||
*/
|
||||
const registerStreamUploadListener = () => {
|
||||
if (!enableStreamUpload) return () => {};
|
||||
|
||||
const streamUploadHandler = async (e: Event) => {
|
||||
const customEvent = e as CustomEvent;
|
||||
const { dataset, files, updateEvent, hasArchive, prefix } = customEvent.detail;
|
||||
|
||||
const controller = new AbortController();
|
||||
const task: TaskItem = {
|
||||
key: dataset.id,
|
||||
title: `上传数据集: ${dataset.name} (按行分割)`,
|
||||
percent: 0,
|
||||
reqId: -1,
|
||||
controller,
|
||||
size: 0,
|
||||
updateEvent,
|
||||
hasArchive,
|
||||
prefix,
|
||||
};
|
||||
|
||||
taskListRef.current = [task, ...taskListRef.current];
|
||||
setTaskList(taskListRef.current);
|
||||
|
||||
// 显示任务中心
|
||||
if (showTaskCenter) {
|
||||
window.dispatchEvent(
|
||||
new CustomEvent("show:task-popover", { detail: { show: true } })
|
||||
);
|
||||
}
|
||||
|
||||
await handleStreamUpload({ task, files });
|
||||
};
|
||||
|
||||
window.addEventListener("upload:dataset-stream", streamUploadHandler);
|
||||
|
||||
return () => {
|
||||
window.removeEventListener("upload:dataset-stream", streamUploadHandler);
|
||||
};
|
||||
};
|
||||
|
||||
return {
|
||||
taskList,
|
||||
createTask,
|
||||
removeTask,
|
||||
handleUpload,
|
||||
handleStreamUpload,
|
||||
registerStreamUploadListener,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -3,7 +3,9 @@
|
||||
* 通过 iframe 加载外部页面
|
||||
*/
|
||||
export default function ContentGenerationPage() {
|
||||
const iframeUrl = "http://192.168.0.8:3000";
|
||||
const iframeUrl = "/api#/meeting";
|
||||
|
||||
window.localStorage.setItem("geeker-user", '{"token":"123","userInfo":{"name":"xteam"},"loginFrom":null,"loginData":null}');
|
||||
|
||||
return (
|
||||
<div className="h-full w-full flex flex-col">
|
||||
@@ -16,6 +18,11 @@ export default function ContentGenerationPage() {
|
||||
className="w-full h-full border-0"
|
||||
title="内容生成"
|
||||
sandbox="allow-same-origin allow-scripts allow-popups allow-forms allow-downloads"
|
||||
style={{marginLeft: "-220px",
|
||||
marginTop: "-66px",
|
||||
width: "calc(100% + 233px)",
|
||||
height: "calc(100% + 108px)"
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||
import { App, Button, Card, List, Spin, Typography, Tag, Switch, Tree, Empty } from "antd";
|
||||
import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined, CheckOutlined } from "@ant-design/icons";
|
||||
import { App, Button, Card, List, Spin, Typography, Tag, Empty } from "antd";
|
||||
import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined } from "@ant-design/icons";
|
||||
import { useNavigate, useParams } from "react-router";
|
||||
|
||||
import {
|
||||
@@ -35,16 +35,6 @@ type LsfMessage = {
|
||||
payload?: unknown;
|
||||
};
|
||||
|
||||
type SegmentInfo = {
|
||||
idx: number;
|
||||
text: string;
|
||||
start: number;
|
||||
end: number;
|
||||
hasAnnotation: boolean;
|
||||
lineIndex: number;
|
||||
chunkIndex: number;
|
||||
};
|
||||
|
||||
type ApiResponse<T> = {
|
||||
code?: number;
|
||||
message?: string;
|
||||
@@ -60,10 +50,11 @@ type EditorTaskPayload = {
|
||||
type EditorTaskResponse = {
|
||||
task?: EditorTaskPayload;
|
||||
segmented?: boolean;
|
||||
segments?: SegmentInfo[];
|
||||
totalSegments?: number;
|
||||
currentSegmentIndex?: number;
|
||||
};
|
||||
|
||||
|
||||
type EditorTaskListResponse = {
|
||||
content?: EditorTaskListItem[];
|
||||
totalElements?: number;
|
||||
@@ -85,8 +76,6 @@ type ExportPayload = {
|
||||
requestId?: string | null;
|
||||
};
|
||||
|
||||
type SwitchDecision = "save" | "discard" | "cancel";
|
||||
|
||||
const LSF_IFRAME_SRC = "/lsf/lsf.html";
|
||||
const TASK_PAGE_START = 0;
|
||||
const TASK_PAGE_SIZE = 200;
|
||||
@@ -96,6 +85,7 @@ const NO_ANNOTATION_CONFIRM_TITLE = "没有标注任何内容";
|
||||
const NO_ANNOTATION_CONFIRM_OK_TEXT = "设为无标注并保存";
|
||||
const NOT_APPLICABLE_CONFIRM_TEXT = "设为不适用并保存";
|
||||
const NO_ANNOTATION_CONFIRM_CANCEL_TEXT = "继续标注";
|
||||
const SAVE_AND_NEXT_LABEL = "保存并跳转到下一段/下一条";
|
||||
|
||||
type NormalizedTaskList = {
|
||||
items: EditorTaskListItem[];
|
||||
@@ -111,6 +101,17 @@ const resolveSegmentIndex = (value: unknown) => {
|
||||
return Number.isFinite(parsed) ? parsed : undefined;
|
||||
};
|
||||
|
||||
const isSaveShortcut = (event: KeyboardEvent) => {
|
||||
if (event.defaultPrevented || event.isComposing) return false;
|
||||
const key = event.key;
|
||||
const code = event.code;
|
||||
const isS = key === "s" || key === "S" || code === "KeyS";
|
||||
if (!isS) return false;
|
||||
if (!(event.ctrlKey || event.metaKey)) return false;
|
||||
if (event.shiftKey || event.altKey) return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
const normalizePayload = (payload: unknown): ExportPayload | undefined => {
|
||||
if (!payload || typeof payload !== "object") return undefined;
|
||||
return payload as ExportPayload;
|
||||
@@ -145,6 +146,9 @@ const resolveTaskStatusMeta = (item: EditorTaskListItem) => {
|
||||
if (item.annotationStatus === AnnotationResultStatus.NOT_APPLICABLE) {
|
||||
return { text: NOT_APPLICABLE_LABEL, type: "warning" as const };
|
||||
}
|
||||
if (item.annotationStatus === AnnotationResultStatus.IN_PROGRESS) {
|
||||
return { text: "标注中", type: "warning" as const };
|
||||
}
|
||||
return { text: "已标注", type: "success" as const };
|
||||
};
|
||||
|
||||
@@ -173,6 +177,7 @@ const stableStringify = (value: unknown) => {
|
||||
|
||||
const buildAnnotationSnapshot = (annotation?: Record<string, unknown>) => {
|
||||
if (!annotation) return "";
|
||||
if (isAnnotationResultEmpty(annotation)) return "";
|
||||
const cleaned: Record<string, unknown> = { ...annotation };
|
||||
delete cleaned.updated_at;
|
||||
delete cleaned.updatedAt;
|
||||
@@ -231,7 +236,6 @@ export default function LabelStudioTextEditor() {
|
||||
resolve: (payload?: ExportPayload) => void;
|
||||
timer?: number;
|
||||
} | null>(null);
|
||||
const exportCheckSeqRef = useRef(0);
|
||||
const savedSnapshotsRef = useRef<Record<string, string>>({});
|
||||
const pendingAutoAdvanceRef = useRef(false);
|
||||
|
||||
@@ -239,7 +243,6 @@ export default function LabelStudioTextEditor() {
|
||||
const [loadingTasks, setLoadingTasks] = useState(false);
|
||||
const [loadingTaskDetail, setLoadingTaskDetail] = useState(false);
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [segmentSwitching, setSegmentSwitching] = useState(false);
|
||||
|
||||
const [iframeReady, setIframeReady] = useState(false);
|
||||
const [lsReady, setLsReady] = useState(false);
|
||||
@@ -252,16 +255,19 @@ export default function LabelStudioTextEditor() {
|
||||
const [prefetching, setPrefetching] = useState(false);
|
||||
const [selectedFileId, setSelectedFileId] = useState<string>("");
|
||||
const [sidebarCollapsed, setSidebarCollapsed] = useState(false);
|
||||
const [autoSaveOnSwitch, setAutoSaveOnSwitch] = useState(false);
|
||||
|
||||
// 分段相关状态
|
||||
const [segmented, setSegmented] = useState(false);
|
||||
const [segments, setSegments] = useState<SegmentInfo[]>([]);
|
||||
const [currentSegmentIndex, setCurrentSegmentIndex] = useState(0);
|
||||
const [segmentTotal, setSegmentTotal] = useState(0);
|
||||
const isTextProject = useMemo(
|
||||
() => (project?.datasetType || "").toUpperCase() === "TEXT",
|
||||
[project?.datasetType],
|
||||
);
|
||||
const segmentIndices = useMemo(() => {
|
||||
if (segmentTotal <= 0) return [] as number[];
|
||||
return Array.from({ length: segmentTotal }, (_, index) => index);
|
||||
}, [segmentTotal]);
|
||||
|
||||
const focusIframe = useCallback(() => {
|
||||
const iframe = iframeRef.current;
|
||||
@@ -327,8 +333,11 @@ export default function LabelStudioTextEditor() {
|
||||
}, [message, projectId]);
|
||||
|
||||
const updateTaskSelection = useCallback((items: EditorTaskListItem[]) => {
|
||||
const isCompleted = (item: EditorTaskListItem) => {
|
||||
return item.hasAnnotation;
|
||||
};
|
||||
const defaultFileId =
|
||||
items.find((item) => !item.hasAnnotation)?.fileId || items[0]?.fileId || "";
|
||||
items.find((item) => !isCompleted(item))?.fileId || items[0]?.fileId || "";
|
||||
setSelectedFileId((prev) => {
|
||||
if (prev && items.some((item) => item.fileId === prev)) return prev;
|
||||
return defaultFileId;
|
||||
@@ -465,17 +474,19 @@ export default function LabelStudioTextEditor() {
|
||||
if (seq !== initSeqRef.current) return;
|
||||
|
||||
// 更新分段状态
|
||||
const segmentIndex = data?.segmented
|
||||
const isSegmented = !!data?.segmented;
|
||||
const segmentIndex = isSegmented
|
||||
? resolveSegmentIndex(data.currentSegmentIndex) ?? 0
|
||||
: undefined;
|
||||
if (data?.segmented) {
|
||||
if (isSegmented) {
|
||||
setSegmented(true);
|
||||
setSegments(data.segments || []);
|
||||
setCurrentSegmentIndex(segmentIndex ?? 0);
|
||||
const totalSegments = Number(data?.totalSegments ?? 0);
|
||||
setSegmentTotal(Number.isFinite(totalSegments) && totalSegments > 0 ? totalSegments : 0);
|
||||
} else {
|
||||
setSegmented(false);
|
||||
setSegments([]);
|
||||
setCurrentSegmentIndex(0);
|
||||
setSegmentTotal(0);
|
||||
}
|
||||
|
||||
const taskData = {
|
||||
@@ -539,15 +550,10 @@ export default function LabelStudioTextEditor() {
|
||||
|
||||
const advanceAfterSave = useCallback(async (fileId: string, segmentIndex?: number) => {
|
||||
if (!fileId) return;
|
||||
if (segmented && segments.length > 0) {
|
||||
const sortedSegmentIndices = segments
|
||||
.map((seg) => seg.idx)
|
||||
.sort((a, b) => a - b);
|
||||
const baseIndex = segmentIndex ?? currentSegmentIndex;
|
||||
const currentPos = sortedSegmentIndices.indexOf(baseIndex);
|
||||
const nextSegmentIndex =
|
||||
currentPos >= 0 ? sortedSegmentIndices[currentPos + 1] : sortedSegmentIndices[0];
|
||||
if (nextSegmentIndex !== undefined) {
|
||||
if (segmented && segmentTotal > 0) {
|
||||
const baseIndex = Math.max(segmentIndex ?? currentSegmentIndex, 0);
|
||||
const nextSegmentIndex = baseIndex + 1;
|
||||
if (nextSegmentIndex < segmentTotal) {
|
||||
await initEditorForFile(fileId, nextSegmentIndex);
|
||||
return;
|
||||
}
|
||||
@@ -569,7 +575,7 @@ export default function LabelStudioTextEditor() {
|
||||
initEditorForFile,
|
||||
message,
|
||||
segmented,
|
||||
segments,
|
||||
segmentTotal,
|
||||
tasks,
|
||||
]);
|
||||
|
||||
@@ -601,11 +607,13 @@ export default function LabelStudioTextEditor() {
|
||||
const annotationRecord = annotation as Record<string, unknown>;
|
||||
const currentTask = tasks.find((item) => item.fileId === String(fileId));
|
||||
const currentStatus = currentTask?.annotationStatus;
|
||||
const hasExistingAnnotation = !!currentTask?.hasAnnotation;
|
||||
let resolvedStatus: AnnotationResultStatus;
|
||||
if (isAnnotationResultEmpty(annotationRecord)) {
|
||||
if (currentStatus === AnnotationResultStatus.ANNOTATED || (hasExistingAnnotation && !currentStatus)) {
|
||||
resolvedStatus = AnnotationResultStatus.ANNOTATED;
|
||||
if (
|
||||
currentStatus === AnnotationResultStatus.NO_ANNOTATION ||
|
||||
currentStatus === AnnotationResultStatus.NOT_APPLICABLE
|
||||
) {
|
||||
resolvedStatus = currentStatus;
|
||||
} else {
|
||||
const selectedStatus = await confirmEmptyAnnotationStatus();
|
||||
if (!selectedStatus) return false;
|
||||
@@ -641,16 +649,6 @@ export default function LabelStudioTextEditor() {
|
||||
const snapshot = buildAnnotationSnapshot(isRecord(annotation) ? annotation : undefined);
|
||||
savedSnapshotsRef.current[snapshotKey] = snapshot;
|
||||
|
||||
// 分段模式下更新当前段落的标注状态
|
||||
if (segmented && segmentIndex !== undefined) {
|
||||
setSegments((prev) =>
|
||||
prev.map((seg) =>
|
||||
seg.idx === segmentIndex
|
||||
? { ...seg, hasAnnotation: true }
|
||||
: seg
|
||||
)
|
||||
);
|
||||
}
|
||||
if (options?.autoAdvance) {
|
||||
await advanceAfterSave(String(fileId), segmentIndex);
|
||||
}
|
||||
@@ -673,147 +671,27 @@ export default function LabelStudioTextEditor() {
|
||||
tasks,
|
||||
]);
|
||||
|
||||
const requestExportForCheck = useCallback(() => {
|
||||
if (!iframeReady || !lsReady) return Promise.resolve(undefined);
|
||||
if (exportCheckRef.current) {
|
||||
if (exportCheckRef.current.timer) {
|
||||
window.clearTimeout(exportCheckRef.current.timer);
|
||||
}
|
||||
exportCheckRef.current.resolve(undefined);
|
||||
exportCheckRef.current = null;
|
||||
}
|
||||
const requestId = `check_${Date.now()}_${++exportCheckSeqRef.current}`;
|
||||
return new Promise<ExportPayload | undefined>((resolve) => {
|
||||
const timer = window.setTimeout(() => {
|
||||
if (exportCheckRef.current?.requestId === requestId) {
|
||||
exportCheckRef.current = null;
|
||||
}
|
||||
resolve(undefined);
|
||||
}, 3000);
|
||||
exportCheckRef.current = {
|
||||
requestId,
|
||||
resolve,
|
||||
timer,
|
||||
};
|
||||
postToIframe("LS_EXPORT_CHECK", { requestId });
|
||||
});
|
||||
}, [iframeReady, lsReady, postToIframe]);
|
||||
|
||||
const confirmSaveBeforeSwitch = useCallback(() => {
|
||||
return new Promise<SwitchDecision>((resolve) => {
|
||||
let resolved = false;
|
||||
let modalInstance: { destroy: () => void } | null = null;
|
||||
const settle = (decision: SwitchDecision) => {
|
||||
if (resolved) return;
|
||||
resolved = true;
|
||||
resolve(decision);
|
||||
};
|
||||
const handleDiscard = () => {
|
||||
if (modalInstance) modalInstance.destroy();
|
||||
settle("discard");
|
||||
};
|
||||
modalInstance = modal.confirm({
|
||||
title: "当前段落有未保存标注",
|
||||
content: (
|
||||
<div className="flex flex-col gap-2">
|
||||
<Typography.Text>切换段落前请先保存当前标注。</Typography.Text>
|
||||
<Button type="link" danger style={{ padding: 0, height: "auto" }} onClick={handleDiscard}>
|
||||
放弃未保存并切换
|
||||
</Button>
|
||||
</div>
|
||||
),
|
||||
okText: "保存并切换",
|
||||
cancelText: "取消",
|
||||
onOk: () => settle("save"),
|
||||
onCancel: () => settle("cancel"),
|
||||
});
|
||||
});
|
||||
}, [modal]);
|
||||
|
||||
const requestExport = () => {
|
||||
const requestExport = useCallback((autoAdvance: boolean) => {
|
||||
if (!selectedFileId) {
|
||||
message.warning("请先选择文件");
|
||||
return;
|
||||
}
|
||||
pendingAutoAdvanceRef.current = true;
|
||||
pendingAutoAdvanceRef.current = autoAdvance;
|
||||
postToIframe("LS_EXPORT", {});
|
||||
}, [message, postToIframe, selectedFileId]);
|
||||
|
||||
useEffect(() => {
|
||||
const handleSaveShortcut = (event: KeyboardEvent) => {
|
||||
if (!isSaveShortcut(event) || event.repeat) return;
|
||||
if (saving || loadingTaskDetail) return;
|
||||
if (!iframeReady || !lsReady) return;
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
requestExport(false);
|
||||
};
|
||||
|
||||
// 段落切换处理
|
||||
const handleSegmentChange = useCallback(async (newIndex: number) => {
|
||||
if (newIndex === currentSegmentIndex) return;
|
||||
if (segmentSwitching || saving || loadingTaskDetail) return;
|
||||
if (!iframeReady || !lsReady) {
|
||||
message.warning("编辑器未就绪,无法切换段落");
|
||||
return;
|
||||
}
|
||||
|
||||
setSegmentSwitching(true);
|
||||
try {
|
||||
const payload = await requestExportForCheck();
|
||||
if (!payload) {
|
||||
message.warning("无法读取当前标注,已取消切换");
|
||||
return;
|
||||
}
|
||||
|
||||
const payloadTaskId = payload.taskId;
|
||||
if (expectedTaskIdRef.current && payloadTaskId) {
|
||||
if (Number(payloadTaskId) !== expectedTaskIdRef.current) {
|
||||
message.warning("已忽略过期的标注数据");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const payloadFileId = payload.fileId || selectedFileId;
|
||||
const payloadSegmentIndex = resolveSegmentIndex(payload.segmentIndex);
|
||||
const resolvedSegmentIndex =
|
||||
payloadSegmentIndex !== undefined
|
||||
? payloadSegmentIndex
|
||||
: segmented
|
||||
? currentSegmentIndex
|
||||
: undefined;
|
||||
const annotation = isRecord(payload.annotation) ? payload.annotation : undefined;
|
||||
const snapshotKey = payloadFileId
|
||||
? buildSnapshotKey(String(payloadFileId), resolvedSegmentIndex)
|
||||
: undefined;
|
||||
const latestSnapshot = buildAnnotationSnapshot(annotation);
|
||||
const lastSnapshot = snapshotKey ? savedSnapshotsRef.current[snapshotKey] : undefined;
|
||||
const hasUnsavedChange = snapshotKey !== undefined && lastSnapshot !== undefined && latestSnapshot !== lastSnapshot;
|
||||
|
||||
if (hasUnsavedChange) {
|
||||
if (autoSaveOnSwitch) {
|
||||
const saved = await saveFromExport(payload);
|
||||
if (!saved) return;
|
||||
} else {
|
||||
const decision = await confirmSaveBeforeSwitch();
|
||||
if (decision === "cancel") return;
|
||||
if (decision === "save") {
|
||||
const saved = await saveFromExport(payload);
|
||||
if (!saved) return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await initEditorForFile(selectedFileId, newIndex);
|
||||
} finally {
|
||||
setSegmentSwitching(false);
|
||||
}
|
||||
}, [
|
||||
autoSaveOnSwitch,
|
||||
confirmSaveBeforeSwitch,
|
||||
currentSegmentIndex,
|
||||
iframeReady,
|
||||
initEditorForFile,
|
||||
loadingTaskDetail,
|
||||
lsReady,
|
||||
message,
|
||||
requestExportForCheck,
|
||||
saveFromExport,
|
||||
segmented,
|
||||
selectedFileId,
|
||||
segmentSwitching,
|
||||
saving,
|
||||
]);
|
||||
window.addEventListener("keydown", handleSaveShortcut);
|
||||
return () => window.removeEventListener("keydown", handleSaveShortcut);
|
||||
}, [iframeReady, loadingTaskDetail, lsReady, requestExport, saving]);
|
||||
|
||||
useEffect(() => {
|
||||
setIframeReady(false);
|
||||
@@ -831,8 +709,8 @@ export default function LabelStudioTextEditor() {
|
||||
expectedTaskIdRef.current = null;
|
||||
// 重置分段状态
|
||||
setSegmented(false);
|
||||
setSegments([]);
|
||||
setCurrentSegmentIndex(0);
|
||||
setSegmentTotal(0);
|
||||
savedSnapshotsRef.current = {};
|
||||
if (exportCheckRef.current?.timer) {
|
||||
window.clearTimeout(exportCheckRef.current.timer);
|
||||
@@ -871,51 +749,6 @@ export default function LabelStudioTextEditor() {
|
||||
return () => window.removeEventListener("focus", handleWindowFocus);
|
||||
}, [focusIframe, lsReady]);
|
||||
|
||||
const segmentTreeData = useMemo(() => {
|
||||
if (!segmented || segments.length === 0) return [];
|
||||
const lineMap = new Map<number, SegmentInfo[]>();
|
||||
segments.forEach((seg) => {
|
||||
const list = lineMap.get(seg.lineIndex) || [];
|
||||
list.push(seg);
|
||||
lineMap.set(seg.lineIndex, list);
|
||||
});
|
||||
return Array.from(lineMap.entries())
|
||||
.sort((a, b) => a[0] - b[0])
|
||||
.map(([lineIndex, lineSegments]) => ({
|
||||
key: `line-${lineIndex}`,
|
||||
title: `第${lineIndex + 1}行`,
|
||||
selectable: false,
|
||||
children: lineSegments
|
||||
.sort((a, b) => a.chunkIndex - b.chunkIndex)
|
||||
.map((seg) => ({
|
||||
key: `seg-${seg.idx}`,
|
||||
title: (
|
||||
<span className="flex items-center gap-1">
|
||||
<span>{`片${seg.chunkIndex + 1}`}</span>
|
||||
{seg.hasAnnotation && (
|
||||
<CheckOutlined style={{ fontSize: 10, color: "#52c41a" }} />
|
||||
)}
|
||||
</span>
|
||||
),
|
||||
})),
|
||||
}));
|
||||
}, [segmented, segments]);
|
||||
|
||||
const segmentLineKeys = useMemo(
|
||||
() => segmentTreeData.map((item) => String(item.key)),
|
||||
[segmentTreeData]
|
||||
);
|
||||
|
||||
const handleSegmentSelect = useCallback((keys: Array<string | number>) => {
|
||||
const [first] = keys;
|
||||
if (first === undefined || first === null) return;
|
||||
const key = String(first);
|
||||
if (!key.startsWith("seg-")) return;
|
||||
const nextIndex = Number(key.replace("seg-", ""));
|
||||
if (!Number.isFinite(nextIndex)) return;
|
||||
handleSegmentChange(nextIndex);
|
||||
}, [handleSegmentChange]);
|
||||
|
||||
useEffect(() => {
|
||||
const handler = (event: MessageEvent<LsfMessage>) => {
|
||||
if (event.origin !== origin) return;
|
||||
@@ -983,6 +816,8 @@ export default function LabelStudioTextEditor() {
|
||||
}, [message, origin, saveFromExport]);
|
||||
|
||||
const canLoadMore = taskTotalPages > 0 && taskPage + 1 < taskTotalPages;
|
||||
const saveDisabled =
|
||||
!iframeReady || !selectedFileId || saving || loadingTaskDetail;
|
||||
const loadMoreNode = canLoadMore ? (
|
||||
<div className="p-2 text-center">
|
||||
<Button
|
||||
@@ -1046,7 +881,7 @@ export default function LabelStudioTextEditor() {
|
||||
return (
|
||||
<div className="h-full flex flex-col">
|
||||
{/* 顶部工具栏 */}
|
||||
<div className="flex items-center justify-between px-3 py-2 border-b border-gray-200 bg-white">
|
||||
<div className="grid grid-cols-[1fr_auto_1fr] items-center px-3 py-2 border-b border-gray-200 bg-white">
|
||||
<div className="flex items-center gap-2">
|
||||
<Button icon={<LeftOutlined />} onClick={() => navigate("/data/annotation")}>
|
||||
返回
|
||||
@@ -1060,7 +895,18 @@ export default function LabelStudioTextEditor() {
|
||||
标注编辑器
|
||||
</Typography.Title>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<div className="flex items-center justify-center">
|
||||
<Button
|
||||
type="primary"
|
||||
icon={<SaveOutlined />}
|
||||
loading={saving}
|
||||
disabled={saveDisabled}
|
||||
onClick={() => requestExport(true)}
|
||||
>
|
||||
{SAVE_AND_NEXT_LABEL}
|
||||
</Button>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 justify-end">
|
||||
<Button
|
||||
icon={<ReloadOutlined />}
|
||||
loading={loadingTasks}
|
||||
@@ -1069,11 +915,10 @@ export default function LabelStudioTextEditor() {
|
||||
刷新
|
||||
</Button>
|
||||
<Button
|
||||
type="primary"
|
||||
icon={<SaveOutlined />}
|
||||
loading={saving}
|
||||
disabled={!iframeReady || !selectedFileId}
|
||||
onClick={requestExport}
|
||||
disabled={saveDisabled}
|
||||
onClick={() => requestExport(false)}
|
||||
>
|
||||
保存
|
||||
</Button>
|
||||
@@ -1087,8 +932,8 @@ export default function LabelStudioTextEditor() {
|
||||
className="border-r border-gray-200 bg-gray-50 flex flex-col transition-all duration-200 min-h-0"
|
||||
style={{ width: sidebarCollapsed ? 0 : 240, overflow: "hidden" }}
|
||||
>
|
||||
<div className="px-3 py-2 border-b border-gray-200 bg-white font-medium text-sm">
|
||||
文件列表
|
||||
<div className="px-3 py-2 border-b border-gray-200 bg-white font-medium text-sm flex items-center justify-between gap-2">
|
||||
<span>文件列表</span>
|
||||
</div>
|
||||
<div className="flex-1 min-h-0 overflow-auto">
|
||||
<List
|
||||
@@ -1114,9 +959,11 @@ export default function LabelStudioTextEditor() {
|
||||
{item.fileName}
|
||||
</Typography.Text>
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-2">
|
||||
<Typography.Text type={statusMeta.type} style={{ fontSize: 11 }}>
|
||||
{statusMeta.text}
|
||||
</Typography.Text>
|
||||
</div>
|
||||
{item.annotationUpdatedAt && (
|
||||
<Typography.Text type="secondary" style={{ fontSize: 10 }}>
|
||||
{item.annotationUpdatedAt}
|
||||
@@ -1134,21 +981,28 @@ export default function LabelStudioTextEditor() {
|
||||
<div className="px-3 py-2 border-b border-gray-200 bg-gray-50 font-medium text-sm flex items-center justify-between">
|
||||
<span>段落/分段</span>
|
||||
<Tag color="blue" style={{ margin: 0 }}>
|
||||
{currentSegmentIndex + 1} / {segments.length}
|
||||
{segmentTotal > 0 ? currentSegmentIndex + 1 : 0} / {segmentTotal}
|
||||
</Tag>
|
||||
</div>
|
||||
<div className="flex-1 min-h-0 overflow-auto px-2 py-2">
|
||||
{segments.length > 0 ? (
|
||||
<Tree
|
||||
showLine
|
||||
blockNode
|
||||
selectedKeys={
|
||||
segmented ? [`seg-${currentSegmentIndex}`] : []
|
||||
{segmentTotal > 0 ? (
|
||||
<div className="grid grid-cols-[repeat(auto-fill,minmax(44px,1fr))] gap-1">
|
||||
{segmentIndices.map((segmentIndex) => {
|
||||
const isCurrent = segmentIndex === currentSegmentIndex;
|
||||
return (
|
||||
<div
|
||||
key={segmentIndex}
|
||||
className={
|
||||
isCurrent
|
||||
? "h-7 leading-7 rounded bg-blue-500 text-white text-center text-xs font-medium"
|
||||
: "h-7 leading-7 rounded bg-gray-100 text-gray-700 text-center text-xs"
|
||||
}
|
||||
expandedKeys={segmentLineKeys}
|
||||
onSelect={handleSegmentSelect}
|
||||
treeData={segmentTreeData}
|
||||
/>
|
||||
>
|
||||
{segmentIndex + 1}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
) : (
|
||||
<div className="py-6">
|
||||
<Empty
|
||||
@@ -1158,17 +1012,6 @@ export default function LabelStudioTextEditor() {
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="px-3 py-2 border-t border-gray-200 flex items-center justify-between">
|
||||
<Typography.Text style={{ fontSize: 12 }}>
|
||||
切段自动保存
|
||||
</Typography.Text>
|
||||
<Switch
|
||||
size="small"
|
||||
checked={autoSaveOnSwitch}
|
||||
onChange={(checked) => setAutoSaveOnSwitch(checked)}
|
||||
disabled={segmentSwitching || saving || loadingTaskDetail || !lsReady}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -19,7 +19,8 @@ import {
|
||||
queryAnnotationTemplatesUsingGet,
|
||||
} from "../../annotation.api";
|
||||
import { DatasetType, type Dataset } from "@/pages/DataManagement/dataset.model";
|
||||
import { DataType, type AnnotationTemplate, type AnnotationTask } from "../../annotation.model";
|
||||
import { DataType, type AnnotationTemplate } from "../../annotation.model";
|
||||
import type { AnnotationTaskListItem } from "../../annotation.const";
|
||||
import LabelStudioEmbed from "@/components/business/LabelStudioEmbed";
|
||||
import TemplateConfigurationTreeEditor from "../../components/TemplateConfigurationTreeEditor";
|
||||
import { useTagConfig } from "@/hooks/useTagConfig";
|
||||
@@ -29,7 +30,7 @@ interface AnnotationTaskDialogProps {
|
||||
onClose: () => void;
|
||||
onRefresh: () => void;
|
||||
/** 编辑模式:传入要编辑的任务数据 */
|
||||
editTask?: AnnotationTask | null;
|
||||
editTask?: AnnotationTaskListItem | null;
|
||||
}
|
||||
|
||||
type DatasetOption = Dataset & { icon?: ReactNode };
|
||||
@@ -60,6 +61,7 @@ const isRecord = (value: unknown): value is Record<string, unknown> =>
|
||||
|
||||
const DEFAULT_SEGMENTATION_ENABLED = true;
|
||||
const FILE_PREVIEW_MAX_HEIGHT = 500;
|
||||
const PREVIEW_MODAL_WIDTH = "80vw";
|
||||
const SEGMENTATION_OPTIONS = [
|
||||
{ label: "需要切片段", value: true },
|
||||
{ label: "不需要切片段", value: false },
|
||||
@@ -828,7 +830,7 @@ export default function CreateAnnotationTask({
|
||||
open={showPreview}
|
||||
onCancel={() => setShowPreview(false)}
|
||||
title="标注界面预览"
|
||||
width={1000}
|
||||
width={PREVIEW_MODAL_WIDTH}
|
||||
footer={[
|
||||
<Button key="close" onClick={() => setShowPreview(false)}>
|
||||
关闭
|
||||
@@ -853,7 +855,7 @@ export default function CreateAnnotationTask({
|
||||
open={datasetPreviewVisible}
|
||||
onCancel={() => setDatasetPreviewVisible(false)}
|
||||
title="数据集预览(前10条文件)"
|
||||
width={700}
|
||||
width={PREVIEW_MODAL_WIDTH}
|
||||
footer={[
|
||||
<Button key="close" onClick={() => setDatasetPreviewVisible(false)}>
|
||||
关闭
|
||||
@@ -910,7 +912,7 @@ export default function CreateAnnotationTask({
|
||||
setFileContent("");
|
||||
}}
|
||||
title={`文件预览:${previewFileName}`}
|
||||
width={previewFileType === "text" ? 800 : 700}
|
||||
width={PREVIEW_MODAL_WIDTH}
|
||||
footer={[
|
||||
<Button key="close" onClick={() => {
|
||||
setFileContentVisible(false);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useState } from "react";
|
||||
import { Card, Button, Table, message, Modal, Tabs } from "antd";
|
||||
import { Card, Button, Table, Tag, message, Modal, Tabs } from "antd";
|
||||
import {
|
||||
PlusOutlined,
|
||||
EditOutlined,
|
||||
@@ -10,27 +10,39 @@ import {
|
||||
import { useNavigate } from "react-router";
|
||||
import { SearchControls } from "@/components/SearchControls";
|
||||
import CardView from "@/components/CardView";
|
||||
import type { AnnotationTask } from "../annotation.model";
|
||||
import useFetchData from "@/hooks/useFetchData";
|
||||
import {
|
||||
deleteAnnotationTaskByIdUsingDelete,
|
||||
queryAnnotationTasksUsingGet,
|
||||
} from "../annotation.api";
|
||||
import { mapAnnotationTask } from "../annotation.const";
|
||||
import {
|
||||
AnnotationTypeMap,
|
||||
mapAnnotationTask,
|
||||
type AnnotationTaskListItem,
|
||||
} from "../annotation.const";
|
||||
import CreateAnnotationTask from "../Create/components/CreateAnnotationTaskDialog";
|
||||
import ExportAnnotationDialog from "./ExportAnnotationDialog";
|
||||
import { ColumnType } from "antd/es/table";
|
||||
import { TemplateList } from "../Template";
|
||||
// Note: DevelopmentInProgress intentionally not used here
|
||||
|
||||
type AnnotationTaskRowKey = string | number;
|
||||
type AnnotationTaskOperation = {
|
||||
key: string;
|
||||
label: string;
|
||||
icon: JSX.Element;
|
||||
danger?: boolean;
|
||||
onClick: (task: AnnotationTaskListItem) => void;
|
||||
};
|
||||
|
||||
export default function DataAnnotation() {
|
||||
// return <DevelopmentInProgress showTime="2025.10.30" />;
|
||||
const navigate = useNavigate();
|
||||
const [activeTab, setActiveTab] = useState("tasks");
|
||||
const [viewMode, setViewMode] = useState<"list" | "card">("list");
|
||||
const [showCreateDialog, setShowCreateDialog] = useState(false);
|
||||
const [exportTask, setExportTask] = useState<AnnotationTask | null>(null);
|
||||
const [editTask, setEditTask] = useState<AnnotationTask | null>(null);
|
||||
const [exportTask, setExportTask] = useState<AnnotationTaskListItem | null>(null);
|
||||
const [editTask, setEditTask] = useState<AnnotationTaskListItem | null>(null);
|
||||
|
||||
const {
|
||||
loading,
|
||||
@@ -40,13 +52,16 @@ export default function DataAnnotation() {
|
||||
fetchData,
|
||||
handleFiltersChange,
|
||||
handleKeywordChange,
|
||||
} = useFetchData(queryAnnotationTasksUsingGet, mapAnnotationTask, 30000, true, [], 0);
|
||||
} = useFetchData<AnnotationTaskListItem>(queryAnnotationTasksUsingGet, mapAnnotationTask, 30000, true, [], 0);
|
||||
|
||||
const [selectedRowKeys, setSelectedRowKeys] = useState<(string | number)[]>([]);
|
||||
const [selectedRows, setSelectedRows] = useState<any[]>([]);
|
||||
const [selectedRowKeys, setSelectedRowKeys] = useState<AnnotationTaskRowKey[]>([]);
|
||||
const [selectedRows, setSelectedRows] = useState<AnnotationTaskListItem[]>([]);
|
||||
|
||||
const handleAnnotate = (task: AnnotationTask) => {
|
||||
const projectId = (task as any)?.id;
|
||||
const toSafeCount = (value: unknown) =>
|
||||
typeof value === "number" && Number.isFinite(value) ? value : 0;
|
||||
|
||||
const handleAnnotate = (task: AnnotationTaskListItem) => {
|
||||
const projectId = task.id;
|
||||
if (!projectId) {
|
||||
message.error("无法进入标注:缺少标注项目ID");
|
||||
return;
|
||||
@@ -54,15 +69,15 @@ export default function DataAnnotation() {
|
||||
navigate(`/data/annotation/annotate/${projectId}`);
|
||||
};
|
||||
|
||||
const handleExport = (task: AnnotationTask) => {
|
||||
const handleExport = (task: AnnotationTaskListItem) => {
|
||||
setExportTask(task);
|
||||
};
|
||||
|
||||
const handleEdit = (task: AnnotationTask) => {
|
||||
const handleEdit = (task: AnnotationTaskListItem) => {
|
||||
setEditTask(task);
|
||||
};
|
||||
|
||||
const handleDelete = (task: AnnotationTask) => {
|
||||
const handleDelete = (task: AnnotationTaskListItem) => {
|
||||
Modal.confirm({
|
||||
title: `确认删除标注任务「${task.name}」吗?`,
|
||||
content: "删除标注任务不会删除对应数据集,但会删除该任务的所有标注结果。",
|
||||
@@ -110,7 +125,7 @@ export default function DataAnnotation() {
|
||||
});
|
||||
};
|
||||
|
||||
const operations = [
|
||||
const operations: AnnotationTaskOperation[] = [
|
||||
{
|
||||
key: "annotate",
|
||||
label: "标注",
|
||||
@@ -142,24 +157,45 @@ export default function DataAnnotation() {
|
||||
},
|
||||
];
|
||||
|
||||
const columns: ColumnType<any>[] = [
|
||||
const columns: ColumnType<AnnotationTaskListItem>[] = [
|
||||
{
|
||||
title: "序号",
|
||||
key: "index",
|
||||
width: 80,
|
||||
align: "center" as const,
|
||||
render: (_value: unknown, _record: AnnotationTaskListItem, index: number) => {
|
||||
const current = pagination.current ?? 1;
|
||||
const pageSize = pagination.pageSize ?? tableData.length ?? 0;
|
||||
return (current - 1) * pageSize + index + 1;
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "任务名称",
|
||||
dataIndex: "name",
|
||||
key: "name",
|
||||
fixed: "left" as const,
|
||||
},
|
||||
{
|
||||
title: "任务ID",
|
||||
dataIndex: "id",
|
||||
key: "id",
|
||||
},
|
||||
{
|
||||
title: "数据集",
|
||||
dataIndex: "datasetName",
|
||||
key: "datasetName",
|
||||
width: 180,
|
||||
},
|
||||
{
|
||||
title: "标注类型",
|
||||
dataIndex: "labelingType",
|
||||
key: "labelingType",
|
||||
width: 160,
|
||||
render: (value?: string) => {
|
||||
if (!value) {
|
||||
return "-";
|
||||
}
|
||||
const label =
|
||||
AnnotationTypeMap[value as keyof typeof AnnotationTypeMap]?.label ||
|
||||
value;
|
||||
return <Tag color="geekblue">{label}</Tag>;
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "数据量",
|
||||
dataIndex: "totalCount",
|
||||
@@ -173,9 +209,21 @@ export default function DataAnnotation() {
|
||||
key: "annotatedCount",
|
||||
width: 100,
|
||||
align: "center" as const,
|
||||
render: (value: number, record: any) => {
|
||||
const total = record.totalCount || 0;
|
||||
const annotated = value || 0;
|
||||
render: (value: number, record: AnnotationTaskListItem) => {
|
||||
const total = toSafeCount(record.totalCount ?? record.total_count);
|
||||
const annotatedRaw = toSafeCount(
|
||||
value ?? record.annotatedCount ?? record.annotated_count
|
||||
);
|
||||
const segmentationEnabled =
|
||||
record.segmentationEnabled ?? record.segmentation_enabled;
|
||||
const inProgressRaw = segmentationEnabled
|
||||
? toSafeCount(record.inProgressCount ?? record.in_progress_count)
|
||||
: 0;
|
||||
const shouldExcludeInProgress =
|
||||
total > 0 && annotatedRaw + inProgressRaw > total;
|
||||
const annotated = shouldExcludeInProgress
|
||||
? Math.max(annotatedRaw - inProgressRaw, 0)
|
||||
: annotatedRaw;
|
||||
const percent = total > 0 ? Math.round((annotated / total) * 100) : 0;
|
||||
return (
|
||||
<span title={`${annotated}/${total} (${percent}%)`}>
|
||||
@@ -184,6 +232,23 @@ export default function DataAnnotation() {
|
||||
);
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "标注中",
|
||||
dataIndex: "inProgressCount",
|
||||
key: "inProgressCount",
|
||||
width: 100,
|
||||
align: "center" as const,
|
||||
render: (value: number, record: AnnotationTaskListItem) => {
|
||||
const segmentationEnabled =
|
||||
record.segmentationEnabled ?? record.segmentation_enabled;
|
||||
if (!segmentationEnabled) return "-";
|
||||
const resolved =
|
||||
Number.isFinite(value)
|
||||
? value
|
||||
: record.inProgressCount ?? record.in_progress_count ?? 0;
|
||||
return resolved;
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "创建时间",
|
||||
dataIndex: "createdAt",
|
||||
@@ -202,14 +267,14 @@ export default function DataAnnotation() {
|
||||
fixed: "right" as const,
|
||||
width: 150,
|
||||
dataIndex: "actions",
|
||||
render: (_: any, task: any) => (
|
||||
render: (_value: unknown, task: AnnotationTaskListItem) => (
|
||||
<div className="flex items-center justify-center space-x-1">
|
||||
{operations.map((operation) => (
|
||||
<Button
|
||||
key={operation.key}
|
||||
type="text"
|
||||
icon={operation.icon}
|
||||
onClick={() => (operation?.onClick as any)?.(task)}
|
||||
onClick={() => operation.onClick(task)}
|
||||
title={operation.label}
|
||||
/>
|
||||
))}
|
||||
@@ -282,9 +347,9 @@ export default function DataAnnotation() {
|
||||
pagination={pagination}
|
||||
rowSelection={{
|
||||
selectedRowKeys,
|
||||
onChange: (keys, rows) => {
|
||||
setSelectedRowKeys(keys as (string | number)[]);
|
||||
setSelectedRows(rows as any[]);
|
||||
onChange: (keys: AnnotationTaskRowKey[], rows: AnnotationTaskListItem[]) => {
|
||||
setSelectedRowKeys(keys);
|
||||
setSelectedRows(rows);
|
||||
},
|
||||
}}
|
||||
scroll={{ x: "max-content", y: "calc(100vh - 24rem)" }}
|
||||
@@ -293,7 +358,7 @@ export default function DataAnnotation() {
|
||||
) : (
|
||||
<CardView
|
||||
data={tableData}
|
||||
operations={operations as any}
|
||||
operations={operations}
|
||||
pagination={pagination}
|
||||
loading={loading}
|
||||
/>
|
||||
|
||||
@@ -178,14 +178,15 @@ export default function ExportAnnotationDialog({
|
||||
<Select
|
||||
options={FORMAT_OPTIONS.map((opt) => ({
|
||||
label: (
|
||||
<div>
|
||||
<div className="py-1">
|
||||
<div className="font-medium">{opt.label}</div>
|
||||
<div className="text-xs text-gray-400">{opt.description}</div>
|
||||
</div>
|
||||
),
|
||||
value: opt.value,
|
||||
simpleLabel: opt.label,
|
||||
}))}
|
||||
optionLabelProp="label"
|
||||
optionLabelProp="simpleLabel"
|
||||
/>
|
||||
</Form.Item>
|
||||
|
||||
|
||||
@@ -43,14 +43,6 @@ const TemplateDetail: React.FC<TemplateDetailProps> = ({
|
||||
<Descriptions.Item label="样式">
|
||||
{template.style}
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label="类型">
|
||||
<Tag color={template.builtIn ? "gold" : "default"}>
|
||||
{template.builtIn ? "系统内置" : "自定义"}
|
||||
</Tag>
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label="版本">
|
||||
{template.version}
|
||||
</Descriptions.Item>
|
||||
<Descriptions.Item label="创建时间" span={2}>
|
||||
{new Date(template.createdAt).toLocaleString()}
|
||||
</Descriptions.Item>
|
||||
|
||||
@@ -36,6 +36,7 @@ const TemplateForm: React.FC<TemplateFormProps> = ({
|
||||
const [form] = Form.useForm();
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [labelConfig, setLabelConfig] = useState("");
|
||||
const selectedDataType = Form.useWatch("dataType", form);
|
||||
|
||||
useEffect(() => {
|
||||
if (visible && template && mode === "edit") {
|
||||
@@ -96,8 +97,12 @@ const TemplateForm: React.FC<TemplateFormProps> = ({
|
||||
} else {
|
||||
message.error(response.message || `模板${mode === "create" ? "创建" : "更新"}失败`);
|
||||
}
|
||||
} catch (error: any) {
|
||||
if (error.errorFields) {
|
||||
} catch (error: unknown) {
|
||||
const hasErrorFields =
|
||||
typeof error === "object" &&
|
||||
error !== null &&
|
||||
"errorFields" in error;
|
||||
if (hasErrorFields) {
|
||||
message.error("请填写所有必填字段");
|
||||
} else {
|
||||
message.error(`模板${mode === "create" ? "创建" : "更新"}失败`);
|
||||
@@ -195,6 +200,7 @@ const TemplateForm: React.FC<TemplateFormProps> = ({
|
||||
value={labelConfig}
|
||||
onChange={setLabelConfig}
|
||||
height={420}
|
||||
dataType={selectedDataType}
|
||||
/>
|
||||
</div>
|
||||
</Form>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useState } from "react";
|
||||
import React, { useState, useEffect } from "react";
|
||||
import {
|
||||
Button,
|
||||
Table,
|
||||
@@ -32,7 +32,16 @@ import {
|
||||
TemplateTypeMap
|
||||
} from "@/pages/DataAnnotation/annotation.const.tsx";
|
||||
|
||||
const TEMPLATE_ADMIN_KEY = "datamate_template_admin";
|
||||
|
||||
const TemplateList: React.FC = () => {
|
||||
const [isAdmin, setIsAdmin] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
// 检查 localStorage 中是否存在特殊键
|
||||
const hasAdminKey = localStorage.getItem(TEMPLATE_ADMIN_KEY) !== null;
|
||||
setIsAdmin(hasAdminKey);
|
||||
}, []);
|
||||
const filterOptions = [
|
||||
{
|
||||
key: "category",
|
||||
@@ -225,23 +234,7 @@ const TemplateList: React.FC = () => {
|
||||
<Tag color={getCategoryColor(category)}>{ClassificationMap[category as keyof typeof ClassificationMap]?.label || category}</Tag>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "类型",
|
||||
dataIndex: "builtIn",
|
||||
key: "builtIn",
|
||||
width: 100,
|
||||
render: (builtIn: boolean) => (
|
||||
<Tag color={builtIn ? "gold" : "default"}>
|
||||
{builtIn ? "系统内置" : "自定义"}
|
||||
</Tag>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "版本",
|
||||
dataIndex: "version",
|
||||
key: "version",
|
||||
width: 80,
|
||||
},
|
||||
|
||||
{
|
||||
title: "创建时间",
|
||||
dataIndex: "createdAt",
|
||||
@@ -263,6 +256,7 @@ const TemplateList: React.FC = () => {
|
||||
onClick={() => handleView(record)}
|
||||
/>
|
||||
</Tooltip>
|
||||
{isAdmin && (
|
||||
<>
|
||||
<Tooltip title="编辑">
|
||||
<Button
|
||||
@@ -286,6 +280,7 @@ const TemplateList: React.FC = () => {
|
||||
</Tooltip>
|
||||
</Popconfirm>
|
||||
</>
|
||||
)}
|
||||
</Space>
|
||||
),
|
||||
},
|
||||
@@ -310,11 +305,13 @@ const TemplateList: React.FC = () => {
|
||||
</div>
|
||||
|
||||
{/* Right side: Create button */}
|
||||
{isAdmin && (
|
||||
<div className="flex items-center gap-2">
|
||||
<Button type="primary" icon={<PlusOutlined />} onClick={handleCreate}>
|
||||
创建模板
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<Card>
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
import { TagBrowser } from "./components";
|
||||
|
||||
const { Paragraph } = Typography;
|
||||
const PREVIEW_DRAWER_WIDTH = "80vw";
|
||||
|
||||
interface VisualTemplateBuilderProps {
|
||||
onSave?: (templateCode: string) => void;
|
||||
@@ -129,7 +130,7 @@ const VisualTemplateBuilder: React.FC<VisualTemplateBuilderProps> = ({
|
||||
<Drawer
|
||||
title="模板代码预览"
|
||||
placement="right"
|
||||
width={600}
|
||||
width={PREVIEW_DRAWER_WIDTH}
|
||||
open={previewVisible}
|
||||
onClose={() => setPreviewVisible(false)}
|
||||
>
|
||||
|
||||
@@ -3,16 +3,19 @@ import { get, post, put, del, download } from "@/utils/request";
|
||||
// 导出格式类型
|
||||
export type ExportFormat = "json" | "jsonl" | "csv" | "coco" | "yolo";
|
||||
|
||||
type RequestParams = Record<string, unknown>;
|
||||
type RequestPayload = Record<string, unknown>;
|
||||
|
||||
// 标注任务管理相关接口
|
||||
export function queryAnnotationTasksUsingGet(params?: any) {
|
||||
export function queryAnnotationTasksUsingGet(params?: RequestParams) {
|
||||
return get("/api/annotation/project", params);
|
||||
}
|
||||
|
||||
export function createAnnotationTaskUsingPost(data: any) {
|
||||
export function createAnnotationTaskUsingPost(data: RequestPayload) {
|
||||
return post("/api/annotation/project", data);
|
||||
}
|
||||
|
||||
export function syncAnnotationTaskUsingPost(data: any) {
|
||||
export function syncAnnotationTaskUsingPost(data: RequestPayload) {
|
||||
return post(`/api/annotation/task/sync`, data);
|
||||
}
|
||||
|
||||
@@ -25,7 +28,7 @@ export function getAnnotationTaskByIdUsingGet(taskId: string) {
|
||||
return get(`/api/annotation/project/${taskId}`);
|
||||
}
|
||||
|
||||
export function updateAnnotationTaskByIdUsingPut(taskId: string, data: any) {
|
||||
export function updateAnnotationTaskByIdUsingPut(taskId: string, data: RequestPayload) {
|
||||
return put(`/api/annotation/project/${taskId}`, data);
|
||||
}
|
||||
|
||||
@@ -35,17 +38,17 @@ export function getTagConfigUsingGet() {
|
||||
}
|
||||
|
||||
// 标注模板管理
|
||||
export function queryAnnotationTemplatesUsingGet(params?: any) {
|
||||
export function queryAnnotationTemplatesUsingGet(params?: RequestParams) {
|
||||
return get("/api/annotation/template", params);
|
||||
}
|
||||
|
||||
export function createAnnotationTemplateUsingPost(data: any) {
|
||||
export function createAnnotationTemplateUsingPost(data: RequestPayload) {
|
||||
return post("/api/annotation/template", data);
|
||||
}
|
||||
|
||||
export function updateAnnotationTemplateByIdUsingPut(
|
||||
templateId: string | number,
|
||||
data: any
|
||||
data: RequestPayload
|
||||
) {
|
||||
return put(`/api/annotation/template/${templateId}`, data);
|
||||
}
|
||||
@@ -65,7 +68,7 @@ export function getEditorProjectInfoUsingGet(projectId: string) {
|
||||
return get(`/api/annotation/editor/projects/${projectId}`);
|
||||
}
|
||||
|
||||
export function listEditorTasksUsingGet(projectId: string, params?: any) {
|
||||
export function listEditorTasksUsingGet(projectId: string, params?: RequestParams) {
|
||||
return get(`/api/annotation/editor/projects/${projectId}/tasks`, params);
|
||||
}
|
||||
|
||||
@@ -77,11 +80,19 @@ export function getEditorTaskUsingGet(
|
||||
return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}`, params);
|
||||
}
|
||||
|
||||
export function getEditorTaskSegmentUsingGet(
|
||||
projectId: string,
|
||||
fileId: string,
|
||||
params: { segmentIndex: number }
|
||||
) {
|
||||
return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/segments`, params);
|
||||
}
|
||||
|
||||
export function upsertEditorAnnotationUsingPut(
|
||||
projectId: string,
|
||||
fileId: string,
|
||||
data: {
|
||||
annotation: any;
|
||||
annotation: Record<string, unknown>;
|
||||
expectedUpdatedAt?: string;
|
||||
segmentIndex?: number;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,71 @@ import {
|
||||
CloseCircleOutlined,
|
||||
} from "@ant-design/icons";
|
||||
|
||||
type AnnotationTaskStatistics = {
|
||||
accuracy?: number | string;
|
||||
averageTime?: number | string;
|
||||
reviewCount?: number | string;
|
||||
};
|
||||
|
||||
type AnnotationTaskPayload = {
|
||||
id?: string;
|
||||
labelingProjId?: string;
|
||||
labelingProjectId?: string;
|
||||
projId?: string;
|
||||
labeling_project_id?: string;
|
||||
name?: string;
|
||||
description?: string;
|
||||
datasetId?: string;
|
||||
datasetName?: string;
|
||||
dataset_name?: string;
|
||||
labelingType?: string;
|
||||
labeling_type?: string;
|
||||
template?: {
|
||||
labelingType?: string;
|
||||
labeling_type?: string;
|
||||
};
|
||||
totalCount?: number;
|
||||
total_count?: number;
|
||||
annotatedCount?: number;
|
||||
annotated_count?: number;
|
||||
inProgressCount?: number;
|
||||
in_progress_count?: number;
|
||||
segmentationEnabled?: boolean;
|
||||
segmentation_enabled?: boolean;
|
||||
createdAt?: string;
|
||||
created_at?: string;
|
||||
updatedAt?: string;
|
||||
updated_at?: string;
|
||||
status?: string;
|
||||
statistics?: AnnotationTaskStatistics;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
export type AnnotationTaskListItem = {
|
||||
id?: string;
|
||||
labelingProjId?: string;
|
||||
projId?: string;
|
||||
name?: string;
|
||||
description?: string;
|
||||
datasetId?: string;
|
||||
datasetName?: string;
|
||||
labelingType?: string;
|
||||
totalCount?: number;
|
||||
annotatedCount?: number;
|
||||
inProgressCount?: number;
|
||||
segmentationEnabled?: boolean;
|
||||
createdAt?: string;
|
||||
updatedAt?: string;
|
||||
icon?: JSX.Element;
|
||||
iconColor?: string;
|
||||
status?: {
|
||||
label: string;
|
||||
color: string;
|
||||
};
|
||||
statistics?: { label: string; value: string | number }[];
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
export const AnnotationTaskStatusMap = {
|
||||
[AnnotationTaskStatus.ACTIVE]: {
|
||||
label: "活跃",
|
||||
@@ -27,9 +92,16 @@ export const AnnotationTaskStatusMap = {
|
||||
},
|
||||
};
|
||||
|
||||
export function mapAnnotationTask(task: any) {
|
||||
export function mapAnnotationTask(task: AnnotationTaskPayload): AnnotationTaskListItem {
|
||||
// Normalize labeling project id from possible backend field names
|
||||
const labelingProjId = task?.labelingProjId || task?.labelingProjectId || task?.projId || task?.labeling_project_id || "";
|
||||
const segmentationEnabled = task?.segmentationEnabled ?? task?.segmentation_enabled ?? false;
|
||||
const inProgressCount = task?.inProgressCount ?? task?.in_progress_count ?? 0;
|
||||
const labelingType =
|
||||
task?.labelingType ||
|
||||
task?.labeling_type ||
|
||||
task?.template?.labelingType ||
|
||||
task?.template?.labeling_type;
|
||||
|
||||
const statsArray = task?.statistics
|
||||
? [
|
||||
@@ -45,6 +117,9 @@ export function mapAnnotationTask(task: any) {
|
||||
// provide consistent field for components
|
||||
labelingProjId,
|
||||
projId: labelingProjId,
|
||||
segmentationEnabled,
|
||||
inProgressCount,
|
||||
labelingType,
|
||||
name: task.name,
|
||||
description: task.description || "",
|
||||
datasetName: task.datasetName || task.dataset_name || "-",
|
||||
|
||||
@@ -10,6 +10,7 @@ export enum AnnotationTaskStatus {
|
||||
|
||||
export enum AnnotationResultStatus {
|
||||
ANNOTATED = "ANNOTATED",
|
||||
IN_PROGRESS = "IN_PROGRESS",
|
||||
NO_ANNOTATION = "NO_ANNOTATION",
|
||||
NOT_APPLICABLE = "NOT_APPLICABLE",
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ import {
|
||||
getObjectDisplayName,
|
||||
type LabelStudioTagConfig,
|
||||
} from "../annotation.tagconfig";
|
||||
import { DataType } from "../annotation.model";
|
||||
|
||||
const { Text, Title } = Typography;
|
||||
|
||||
@@ -44,10 +45,22 @@ interface TemplateConfigurationTreeEditorProps {
|
||||
readOnly?: boolean;
|
||||
readOnlyStructure?: boolean;
|
||||
height?: number | string;
|
||||
dataType?: DataType;
|
||||
}
|
||||
|
||||
const DEFAULT_ROOT_TAG = "View";
|
||||
const CHILD_TAGS = ["Label", "Choice", "Relation", "Item", "Path", "Channel"];
|
||||
const OBJECT_TAGS_BY_DATA_TYPE: Record<DataType, string[]> = {
|
||||
[DataType.TEXT]: ["Text", "Paragraphs", "Markdown"],
|
||||
[DataType.IMAGE]: ["Image", "Bitmask"],
|
||||
[DataType.AUDIO]: ["Audio", "AudioPlus"],
|
||||
[DataType.VIDEO]: ["Video"],
|
||||
[DataType.PDF]: ["PDF"],
|
||||
[DataType.TIMESERIES]: ["Timeseries", "TimeSeries", "Vector"],
|
||||
[DataType.CHAT]: ["Chat"],
|
||||
[DataType.HTML]: ["HyperText", "Markdown"],
|
||||
[DataType.TABLE]: ["Table", "Vector"],
|
||||
};
|
||||
|
||||
const createId = () =>
|
||||
`node_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
|
||||
@@ -247,19 +260,35 @@ const createNode = (
|
||||
attrs[attr] = "";
|
||||
});
|
||||
|
||||
if (objectConfig && attrs.name !== undefined) {
|
||||
if (objectConfig) {
|
||||
const name = getDefaultName(tag);
|
||||
if (!attrs.name) {
|
||||
attrs.name = name;
|
||||
if (attrs.value !== undefined) {
|
||||
attrs.value = `$${name}`;
|
||||
}
|
||||
if (!attrs.value) {
|
||||
attrs.value = `$${attrs.name}`;
|
||||
}
|
||||
}
|
||||
|
||||
if (controlConfig && attrs.name !== undefined) {
|
||||
if (controlConfig) {
|
||||
const isLabeling = controlConfig.category === "labeling";
|
||||
|
||||
if (isLabeling) {
|
||||
if (!attrs.name) {
|
||||
attrs.name = getDefaultName(tag);
|
||||
if (attrs.toName !== undefined) {
|
||||
}
|
||||
if (!attrs.toName) {
|
||||
attrs.toName = objectNames[0] || "";
|
||||
}
|
||||
} else {
|
||||
// For layout controls, only fill if required
|
||||
if (attrs.name !== undefined && !attrs.name) {
|
||||
attrs.name = getDefaultName(tag);
|
||||
}
|
||||
if (attrs.toName !== undefined && !attrs.toName) {
|
||||
attrs.toName = objectNames[0] || "";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (CHILD_TAGS.includes(tag)) {
|
||||
@@ -420,14 +449,13 @@ const TemplateConfigurationTreeEditor = ({
|
||||
readOnly = false,
|
||||
readOnlyStructure = false,
|
||||
height = 420,
|
||||
dataType,
|
||||
}: TemplateConfigurationTreeEditorProps) => {
|
||||
const { config } = useTagConfig(false);
|
||||
const [tree, setTree] = useState<XmlNode>(() => createEmptyTree());
|
||||
const [selectedId, setSelectedId] = useState<string>(tree.id);
|
||||
const [parseError, setParseError] = useState<string | null>(null);
|
||||
const lastSerialized = useRef<string>("");
|
||||
const [addChildTag, setAddChildTag] = useState<string | undefined>();
|
||||
const [addSiblingTag, setAddSiblingTag] = useState<string | undefined>();
|
||||
|
||||
useEffect(() => {
|
||||
if (!value) {
|
||||
@@ -498,11 +526,17 @@ const TemplateConfigurationTreeEditor = ({
|
||||
|
||||
const objectOptions = useMemo(() => {
|
||||
if (!config?.objects) return [];
|
||||
return Object.keys(config.objects).map((tag) => ({
|
||||
const options = Object.keys(config.objects).map((tag) => ({
|
||||
value: tag,
|
||||
label: getObjectDisplayName(tag),
|
||||
}));
|
||||
}, [config]);
|
||||
if (!dataType) return options;
|
||||
const allowedTags = OBJECT_TAGS_BY_DATA_TYPE[dataType];
|
||||
if (!allowedTags) return options;
|
||||
const allowedSet = new Set(allowedTags);
|
||||
const filtered = options.filter((option) => allowedSet.has(option.value));
|
||||
return filtered.length > 0 ? filtered : options;
|
||||
}, [config, dataType]);
|
||||
|
||||
const tagOptions = useMemo(() => {
|
||||
const options = [] as {
|
||||
@@ -763,9 +797,8 @@ const TemplateConfigurationTreeEditor = ({
|
||||
<Select
|
||||
placeholder="添加子节点"
|
||||
options={tagOptions}
|
||||
value={addChildTag}
|
||||
value={null}
|
||||
onChange={(value) => {
|
||||
setAddChildTag(undefined);
|
||||
handleAddNode(value, "child");
|
||||
}}
|
||||
disabled={isStructureLocked}
|
||||
@@ -773,9 +806,8 @@ const TemplateConfigurationTreeEditor = ({
|
||||
<Select
|
||||
placeholder="添加同级节点"
|
||||
options={tagOptions}
|
||||
value={addSiblingTag}
|
||||
value={null}
|
||||
onChange={(value) => {
|
||||
setAddSiblingTag(undefined);
|
||||
handleAddNode(value, "sibling");
|
||||
}}
|
||||
disabled={isStructureLocked || selectedNode.id === tree.id}
|
||||
|
||||
@@ -7,6 +7,8 @@ interface PreviewPromptModalProps {
|
||||
evaluationPrompt: string;
|
||||
}
|
||||
|
||||
const PREVIEW_MODAL_WIDTH = "80vw";
|
||||
|
||||
const PreviewPromptModal: React.FC<PreviewPromptModalProps> = ({ previewVisible, onCancel, evaluationPrompt }) => {
|
||||
return (
|
||||
<Modal
|
||||
@@ -24,7 +26,7 @@ const PreviewPromptModal: React.FC<PreviewPromptModalProps> = ({ previewVisible,
|
||||
关闭
|
||||
</Button>
|
||||
]}
|
||||
width={800}
|
||||
width={PREVIEW_MODAL_WIDTH}
|
||||
>
|
||||
<div style={{
|
||||
background: '#f5f5f5',
|
||||
|
||||
@@ -11,10 +11,12 @@ export default function BasicInformation({
|
||||
data,
|
||||
setData,
|
||||
hidden = [],
|
||||
datasetTypeOptions = datasetTypes,
|
||||
}: {
|
||||
data: DatasetFormData;
|
||||
setData: Dispatch<SetStateAction<DatasetFormData>>;
|
||||
hidden?: string[];
|
||||
datasetTypeOptions?: DatasetTypeOption[];
|
||||
}) {
|
||||
const [tagOptions, setTagOptions] = useState<DatasetTagOption[]>([]);
|
||||
const [collectionOptions, setCollectionOptions] = useState<SelectOption[]>([]);
|
||||
@@ -119,7 +121,7 @@ export default function BasicInformation({
|
||||
rules={[{ required: true, message: "请选择数据集类型" }]}
|
||||
>
|
||||
<RadioCard
|
||||
options={datasetTypes}
|
||||
options={datasetTypeOptions}
|
||||
value={data.type}
|
||||
onChange={(datasetType) => setData({ ...data, datasetType })}
|
||||
/>
|
||||
@@ -149,6 +151,8 @@ type DatasetFormData = Partial<Dataset> & {
|
||||
parentDatasetId?: string;
|
||||
};
|
||||
|
||||
type DatasetTypeOption = (typeof datasetTypes)[number];
|
||||
|
||||
type DatasetTagOption = {
|
||||
label: string;
|
||||
value: string;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import { Select, Input, Form, Radio, Modal, Button, UploadFile, Switch, Tooltip } from "antd";
|
||||
import { InboxOutlined, QuestionCircleOutlined } from "@ant-design/icons";
|
||||
import { dataSourceOptions } from "../../dataset.const";
|
||||
import { Dataset, DataSource } from "../../dataset.model";
|
||||
import { Dataset, DatasetType, DataSource } from "../../dataset.model";
|
||||
import { useCallback, useEffect, useMemo, useState } from "react";
|
||||
import { queryTasksUsingGet } from "@/pages/DataCollection/collection.apis";
|
||||
import { updateDatasetByIdUsingPut } from "../../dataset.api";
|
||||
import { sliceFile } from "@/utils/file.util";
|
||||
import { sliceFile, shouldStreamUpload } from "@/utils/file.util";
|
||||
import Dragger from "antd/es/upload/Dragger";
|
||||
|
||||
const TEXT_FILE_MIME_PREFIX = "text/";
|
||||
@@ -90,14 +90,16 @@ async function splitFileByLines(file: UploadFile): Promise<UploadFile[]> {
|
||||
const lines = text.split(/\r?\n/).filter((line: string) => line.trim() !== "");
|
||||
if (lines.length === 0) return [];
|
||||
|
||||
// 生成文件名:原文件名_序号.扩展名
|
||||
// 生成文件名:原文件名_序号(不保留后缀)
|
||||
const nameParts = file.name.split(".");
|
||||
const ext = nameParts.length > 1 ? "." + nameParts.pop() : "";
|
||||
if (nameParts.length > 1) {
|
||||
nameParts.pop();
|
||||
}
|
||||
const baseName = nameParts.join(".");
|
||||
const padLength = String(lines.length).length;
|
||||
|
||||
return lines.map((line: string, index: number) => {
|
||||
const newFileName = `${baseName}_${String(index + 1).padStart(padLength, "0")}${ext}`;
|
||||
const newFileName = `${baseName}_${String(index + 1).padStart(padLength, "0")}`;
|
||||
const blob = new Blob([line], { type: "text/plain" });
|
||||
const newFile = new File([blob], newFileName, { type: "text/plain" });
|
||||
return {
|
||||
@@ -159,21 +161,80 @@ export default function ImportConfiguration({
|
||||
if (files.length === 0) return false;
|
||||
return files.some((file) => !isTextUploadFile(file));
|
||||
}, [importConfig.files]);
|
||||
const isTextDataset = data?.datasetType === DatasetType.TEXT;
|
||||
|
||||
// 本地上传文件相关逻辑
|
||||
|
||||
const handleUpload = async (dataset: Dataset) => {
|
||||
let filesToUpload =
|
||||
const filesToUpload =
|
||||
(form.getFieldValue("files") as UploadFile[] | undefined) || [];
|
||||
|
||||
// 如果启用分行分割,处理文件
|
||||
// 如果启用分行分割,对大文件使用流式处理
|
||||
if (importConfig.splitByLine && !hasNonTextFile) {
|
||||
const splitResults = await Promise.all(
|
||||
filesToUpload.map((file) => splitFileByLines(file))
|
||||
);
|
||||
filesToUpload = splitResults.flat();
|
||||
// 检查是否有大文件需要流式分割上传
|
||||
const filesForStreamUpload: File[] = [];
|
||||
const filesForNormalUpload: UploadFile[] = [];
|
||||
|
||||
for (const file of filesToUpload) {
|
||||
const originFile = file.originFileObj ?? file;
|
||||
if (originFile instanceof File && shouldStreamUpload(originFile)) {
|
||||
filesForStreamUpload.push(originFile);
|
||||
} else {
|
||||
filesForNormalUpload.push(file);
|
||||
}
|
||||
}
|
||||
|
||||
// 大文件使用流式分割上传
|
||||
if (filesForStreamUpload.length > 0) {
|
||||
window.dispatchEvent(
|
||||
new CustomEvent("upload:dataset-stream", {
|
||||
detail: {
|
||||
dataset,
|
||||
files: filesForStreamUpload,
|
||||
updateEvent,
|
||||
hasArchive: importConfig.hasArchive,
|
||||
prefix: currentPrefix,
|
||||
},
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
// 小文件使用传统分割方式
|
||||
if (filesForNormalUpload.length > 0) {
|
||||
const splitResults = await Promise.all(
|
||||
filesForNormalUpload.map((file) => splitFileByLines(file))
|
||||
);
|
||||
const smallFilesToUpload = splitResults.flat();
|
||||
|
||||
// 计算分片列表
|
||||
const sliceList = smallFilesToUpload.map((file) => {
|
||||
const originFile = (file.originFileObj ?? file) as Blob;
|
||||
const slices = sliceFile(originFile);
|
||||
return {
|
||||
originFile: originFile,
|
||||
slices,
|
||||
name: file.name,
|
||||
size: originFile.size || 0,
|
||||
};
|
||||
});
|
||||
|
||||
console.log("[ImportConfiguration] Uploading small files with currentPrefix:", currentPrefix);
|
||||
window.dispatchEvent(
|
||||
new CustomEvent("upload:dataset", {
|
||||
detail: {
|
||||
dataset,
|
||||
files: sliceList,
|
||||
updateEvent,
|
||||
hasArchive: importConfig.hasArchive,
|
||||
prefix: currentPrefix,
|
||||
},
|
||||
})
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// 未启用分行分割,使用普通上传
|
||||
// 计算分片列表
|
||||
const sliceList = filesToUpload.map((file) => {
|
||||
const originFile = (file.originFileObj ?? file) as Blob;
|
||||
@@ -233,6 +294,10 @@ export default function ImportConfiguration({
|
||||
if (!data) return;
|
||||
console.log('[ImportConfiguration] handleImportData called, currentPrefix:', currentPrefix);
|
||||
if (importConfig.source === DataSource.UPLOAD) {
|
||||
// 立即显示任务中心,让用户感知上传已开始(在文件分割等耗时操作之前)
|
||||
window.dispatchEvent(
|
||||
new CustomEvent("show:task-popover", { detail: { show: true } })
|
||||
);
|
||||
await handleUpload(data);
|
||||
} else if (importConfig.source === DataSource.COLLECTION) {
|
||||
await updateDatasetByIdUsingPut(data.id, {
|
||||
@@ -363,6 +428,7 @@ export default function ImportConfiguration({
|
||||
>
|
||||
<Switch />
|
||||
</Form.Item>
|
||||
{isTextDataset && (
|
||||
<Form.Item
|
||||
label={
|
||||
<span>
|
||||
@@ -383,6 +449,7 @@ export default function ImportConfiguration({
|
||||
>
|
||||
<Switch disabled={hasNonTextFile} />
|
||||
</Form.Item>
|
||||
)}
|
||||
<Form.Item
|
||||
label="上传文件"
|
||||
name="files"
|
||||
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
Descriptions,
|
||||
DescriptionsProps,
|
||||
Modal,
|
||||
Spin,
|
||||
Table,
|
||||
Input,
|
||||
} from "antd";
|
||||
@@ -21,8 +22,8 @@ type DatasetFileRow = DatasetFile & {
|
||||
|
||||
const PREVIEW_MAX_HEIGHT = 500;
|
||||
const PREVIEW_MODAL_WIDTH = {
|
||||
text: 800,
|
||||
media: 700,
|
||||
text: "80vw",
|
||||
media: "80vw",
|
||||
};
|
||||
const PREVIEW_TEXT_FONT_SIZE = 12;
|
||||
const PREVIEW_TEXT_PADDING = 12;
|
||||
@@ -52,6 +53,8 @@ export default function Overview({
|
||||
previewFileType,
|
||||
previewMediaUrl,
|
||||
previewLoading,
|
||||
officePreviewStatus,
|
||||
officePreviewError,
|
||||
closePreview,
|
||||
handleDeleteFile,
|
||||
handleDownloadFile,
|
||||
@@ -447,11 +450,39 @@ export default function Overview({
|
||||
</div>
|
||||
)}
|
||||
{previewFileType === "pdf" && (
|
||||
<>
|
||||
{previewMediaUrl ? (
|
||||
<iframe
|
||||
src={previewMediaUrl}
|
||||
title={previewFileName || "PDF 预览"}
|
||||
style={{ width: "100%", height: `${PREVIEW_MAX_HEIGHT}px`, border: "none" }}
|
||||
/>
|
||||
) : (
|
||||
<div
|
||||
style={{
|
||||
height: `${PREVIEW_MAX_HEIGHT}px`,
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
gap: 12,
|
||||
color: "#666",
|
||||
}}
|
||||
>
|
||||
{officePreviewStatus === "FAILED" ? (
|
||||
<>
|
||||
<div>转换失败</div>
|
||||
<div>{officePreviewError || "请稍后重试"}</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Spin />
|
||||
<div>正在转换,请稍候...</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
{previewFileType === "video" && (
|
||||
<div style={{ textAlign: "center" }}>
|
||||
|
||||
@@ -2,9 +2,8 @@ import type {
|
||||
Dataset,
|
||||
DatasetFile,
|
||||
} from "@/pages/DataManagement/dataset.model";
|
||||
import { DatasetType } from "@/pages/DataManagement/dataset.model";
|
||||
import { App } from "antd";
|
||||
import { useState } from "react";
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import {
|
||||
PREVIEW_TEXT_MAX_LENGTH,
|
||||
resolvePreviewFileType,
|
||||
@@ -19,9 +18,33 @@ import {
|
||||
createDatasetDirectoryUsingPost,
|
||||
downloadDirectoryUsingGet,
|
||||
deleteDirectoryUsingDelete,
|
||||
queryDatasetFilePreviewStatusUsingGet,
|
||||
convertDatasetFilePreviewUsingPost,
|
||||
} from "../dataset.api";
|
||||
import { useParams } from "react-router";
|
||||
|
||||
const OFFICE_FILE_EXTENSIONS = [".doc", ".docx"];
|
||||
const OFFICE_PREVIEW_POLL_INTERVAL = 2000;
|
||||
const OFFICE_PREVIEW_POLL_MAX_TIMES = 60;
|
||||
|
||||
type OfficePreviewStatus = "UNSET" | "PENDING" | "PROCESSING" | "READY" | "FAILED";
|
||||
|
||||
const isOfficeFileName = (fileName?: string) => {
|
||||
const lowerName = (fileName || "").toLowerCase();
|
||||
return OFFICE_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext));
|
||||
};
|
||||
|
||||
const normalizeOfficePreviewStatus = (status?: string): OfficePreviewStatus => {
|
||||
if (!status) {
|
||||
return "UNSET";
|
||||
}
|
||||
const upper = status.toUpperCase();
|
||||
if (upper === "PENDING" || upper === "PROCESSING" || upper === "READY" || upper === "FAILED") {
|
||||
return upper as OfficePreviewStatus;
|
||||
}
|
||||
return "UNSET";
|
||||
};
|
||||
|
||||
|
||||
export function useFilesOperation(dataset: Dataset) {
|
||||
const { message } = App.useApp();
|
||||
@@ -44,6 +67,23 @@ export function useFilesOperation(dataset: Dataset) {
|
||||
const [previewFileType, setPreviewFileType] = useState<PreviewFileType>("text");
|
||||
const [previewMediaUrl, setPreviewMediaUrl] = useState("");
|
||||
const [previewLoading, setPreviewLoading] = useState(false);
|
||||
const [officePreviewStatus, setOfficePreviewStatus] = useState<OfficePreviewStatus | null>(null);
|
||||
const [officePreviewError, setOfficePreviewError] = useState("");
|
||||
const officePreviewPollingRef = useRef<number | null>(null);
|
||||
const officePreviewFileRef = useRef<string | null>(null);
|
||||
|
||||
const clearOfficePreviewPolling = useCallback(() => {
|
||||
if (officePreviewPollingRef.current) {
|
||||
window.clearTimeout(officePreviewPollingRef.current);
|
||||
officePreviewPollingRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
clearOfficePreviewPolling();
|
||||
};
|
||||
}, [clearOfficePreviewPolling]);
|
||||
|
||||
const fetchFiles = async (
|
||||
prefix?: string,
|
||||
@@ -52,14 +92,13 @@ export function useFilesOperation(dataset: Dataset) {
|
||||
) => {
|
||||
// 如果明确传了 prefix(包括空字符串),使用传入的值;否则使用当前 pagination.prefix
|
||||
const targetPrefix = prefix !== undefined ? prefix : (pagination.prefix || '');
|
||||
const shouldExcludeDerivedFiles = dataset?.datasetType === DatasetType.TEXT;
|
||||
|
||||
const params: DatasetFilesQueryParams = {
|
||||
page: current !== undefined ? current : pagination.current,
|
||||
size: pageSize !== undefined ? pageSize : pagination.pageSize,
|
||||
isWithDirectory: true,
|
||||
prefix: targetPrefix,
|
||||
...(shouldExcludeDerivedFiles ? { excludeDerivedFiles: true } : {}),
|
||||
excludeDerivedFiles: true,
|
||||
};
|
||||
|
||||
const { data } = await queryDatasetFilesUsingGet(id!, params);
|
||||
@@ -113,17 +152,61 @@ export function useFilesOperation(dataset: Dataset) {
|
||||
return;
|
||||
}
|
||||
|
||||
const previewUrl = `/api/data-management/datasets/${datasetId}/files/${file.id}/preview`;
|
||||
setPreviewFileName(file.fileName);
|
||||
setPreviewContent("");
|
||||
setPreviewMediaUrl("");
|
||||
|
||||
if (isOfficeFileName(file?.fileName)) {
|
||||
setPreviewFileType("pdf");
|
||||
setPreviewVisible(true);
|
||||
setPreviewLoading(true);
|
||||
setOfficePreviewStatus("PROCESSING");
|
||||
setOfficePreviewError("");
|
||||
officePreviewFileRef.current = file.id;
|
||||
try {
|
||||
const { data: statusData } = await queryDatasetFilePreviewStatusUsingGet(datasetId, file.id);
|
||||
const currentStatus = normalizeOfficePreviewStatus(statusData?.status);
|
||||
if (currentStatus === "READY") {
|
||||
setPreviewMediaUrl(previewUrl);
|
||||
setOfficePreviewStatus("READY");
|
||||
setPreviewLoading(false);
|
||||
return;
|
||||
}
|
||||
if (currentStatus === "PROCESSING") {
|
||||
pollOfficePreviewStatus(datasetId, file.id, 0);
|
||||
return;
|
||||
}
|
||||
const { data } = await convertDatasetFilePreviewUsingPost(datasetId, file.id);
|
||||
const status = normalizeOfficePreviewStatus(data?.status);
|
||||
if (status === "READY") {
|
||||
setPreviewMediaUrl(previewUrl);
|
||||
setOfficePreviewStatus("READY");
|
||||
} else if (status === "FAILED") {
|
||||
setOfficePreviewStatus("FAILED");
|
||||
setOfficePreviewError(data?.previewError || "转换失败,请稍后重试");
|
||||
} else {
|
||||
setOfficePreviewStatus("PROCESSING");
|
||||
pollOfficePreviewStatus(datasetId, file.id, 0);
|
||||
return;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("触发预览转换失败", error);
|
||||
message.error({ content: "触发预览转换失败" });
|
||||
setOfficePreviewStatus("FAILED");
|
||||
setOfficePreviewError("触发预览转换失败");
|
||||
} finally {
|
||||
setPreviewLoading(false);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const fileType = resolvePreviewFileType(file?.fileName);
|
||||
if (!fileType) {
|
||||
message.warning({ content: "不支持预览该文件类型" });
|
||||
return;
|
||||
}
|
||||
|
||||
const previewUrl = `/api/data-management/datasets/${datasetId}/files/${file.id}/preview`;
|
||||
setPreviewFileName(file.fileName);
|
||||
setPreviewFileType(fileType);
|
||||
setPreviewContent("");
|
||||
setPreviewMediaUrl("");
|
||||
|
||||
if (fileType === "text") {
|
||||
setPreviewLoading(true);
|
||||
@@ -149,13 +232,62 @@ export function useFilesOperation(dataset: Dataset) {
|
||||
};
|
||||
|
||||
const closePreview = () => {
|
||||
clearOfficePreviewPolling();
|
||||
officePreviewFileRef.current = null;
|
||||
setPreviewVisible(false);
|
||||
setPreviewContent("");
|
||||
setPreviewMediaUrl("");
|
||||
setPreviewFileName("");
|
||||
setPreviewFileType("text");
|
||||
setOfficePreviewStatus(null);
|
||||
setOfficePreviewError("");
|
||||
};
|
||||
|
||||
const pollOfficePreviewStatus = useCallback(
|
||||
async (datasetId: string, fileId: string, attempt: number) => {
|
||||
clearOfficePreviewPolling();
|
||||
officePreviewPollingRef.current = window.setTimeout(async () => {
|
||||
if (officePreviewFileRef.current !== fileId) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const { data } = await queryDatasetFilePreviewStatusUsingGet(datasetId, fileId);
|
||||
const status = normalizeOfficePreviewStatus(data?.status);
|
||||
if (status === "READY") {
|
||||
setPreviewMediaUrl(`/api/data-management/datasets/${datasetId}/files/${fileId}/preview`);
|
||||
setOfficePreviewStatus("READY");
|
||||
setOfficePreviewError("");
|
||||
setPreviewLoading(false);
|
||||
return;
|
||||
}
|
||||
if (status === "FAILED") {
|
||||
setOfficePreviewStatus("FAILED");
|
||||
setOfficePreviewError(data?.previewError || "转换失败,请稍后重试");
|
||||
setPreviewLoading(false);
|
||||
return;
|
||||
}
|
||||
if (attempt >= OFFICE_PREVIEW_POLL_MAX_TIMES - 1) {
|
||||
setOfficePreviewStatus("FAILED");
|
||||
setOfficePreviewError("转换超时,请稍后重试");
|
||||
setPreviewLoading(false);
|
||||
return;
|
||||
}
|
||||
pollOfficePreviewStatus(datasetId, fileId, attempt + 1);
|
||||
} catch (error) {
|
||||
console.error("轮询预览状态失败", error);
|
||||
if (attempt >= OFFICE_PREVIEW_POLL_MAX_TIMES - 1) {
|
||||
setOfficePreviewStatus("FAILED");
|
||||
setOfficePreviewError("转换超时,请稍后重试");
|
||||
setPreviewLoading(false);
|
||||
return;
|
||||
}
|
||||
pollOfficePreviewStatus(datasetId, fileId, attempt + 1);
|
||||
}
|
||||
}, OFFICE_PREVIEW_POLL_INTERVAL);
|
||||
},
|
||||
[clearOfficePreviewPolling]
|
||||
);
|
||||
|
||||
const handleDeleteFile = async (file: DatasetFile) => {
|
||||
try {
|
||||
await deleteDatasetFileUsingDelete(dataset.id, file.id);
|
||||
@@ -198,6 +330,8 @@ export function useFilesOperation(dataset: Dataset) {
|
||||
previewFileType,
|
||||
previewMediaUrl,
|
||||
previewLoading,
|
||||
officePreviewStatus,
|
||||
officePreviewError,
|
||||
closePreview,
|
||||
fetchFiles,
|
||||
setFileList,
|
||||
|
||||
@@ -329,7 +329,7 @@ export default function DatasetManagementPage() {
|
||||
<div className="gap-4 h-full flex flex-col">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-bold">数据管理</h1>
|
||||
<h1 className="text-xl font-bold">数据集统计</h1>
|
||||
<div className="flex gap-2 items-center">
|
||||
{/* tasks */}
|
||||
<TagManager
|
||||
|
||||
@@ -119,6 +119,22 @@ export function downloadFileByIdUsingGet(
|
||||
);
|
||||
}
|
||||
|
||||
// 数据集文件预览状态
|
||||
export function queryDatasetFilePreviewStatusUsingGet(
|
||||
datasetId: string | number,
|
||||
fileId: string | number
|
||||
) {
|
||||
return get(`/api/data-management/datasets/${datasetId}/files/${fileId}/preview/status`);
|
||||
}
|
||||
|
||||
// 触发数据集文件预览转换
|
||||
export function convertDatasetFilePreviewUsingPost(
|
||||
datasetId: string | number,
|
||||
fileId: string | number
|
||||
) {
|
||||
return post(`/api/data-management/datasets/${datasetId}/files/${fileId}/preview/convert`, {});
|
||||
}
|
||||
|
||||
// 删除数据集文件
|
||||
export function deleteDatasetFileUsingDelete(
|
||||
datasetId: string | number,
|
||||
|
||||
@@ -102,6 +102,13 @@ export interface DatasetTask {
|
||||
executionHistory?: { time: string; status: string }[];
|
||||
}
|
||||
|
||||
export interface StreamUploadInfo {
|
||||
currentFile: string;
|
||||
fileIndex: number;
|
||||
totalFiles: number;
|
||||
uploadedLines: number;
|
||||
}
|
||||
|
||||
export interface TaskItem {
|
||||
key: string;
|
||||
title: string;
|
||||
@@ -113,4 +120,6 @@ export interface TaskItem {
|
||||
updateEvent?: string;
|
||||
size?: number;
|
||||
hasArchive?: boolean;
|
||||
prefix?: string;
|
||||
streamUploadInfo?: StreamUploadInfo;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -36,6 +36,10 @@ const DEFAULT_STATISTICS: StatisticsItem[] = [
|
||||
title: "知识集总数",
|
||||
value: 0,
|
||||
},
|
||||
{
|
||||
title: "知识类别",
|
||||
value: 0,
|
||||
},
|
||||
{
|
||||
title: "文件总数",
|
||||
value: 0,
|
||||
@@ -109,6 +113,10 @@ export default function KnowledgeManagementPage() {
|
||||
title: "知识集总数",
|
||||
value: stats?.totalKnowledgeSets ?? 0,
|
||||
},
|
||||
{
|
||||
title: "知识类别",
|
||||
value: stats?.totalTags ?? 0,
|
||||
},
|
||||
{
|
||||
title: "文件总数",
|
||||
value: stats?.totalFiles ?? 0,
|
||||
@@ -249,7 +257,7 @@ export default function KnowledgeManagementPage() {
|
||||
return (
|
||||
<div className="h-full flex flex-col gap-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-bold">知识管理</h1>
|
||||
<h1 className="text-xl font-bold">知识集</h1>
|
||||
<div className="flex gap-2 items-center">
|
||||
<Button onClick={() => navigate("/data/knowledge-management/search")}>
|
||||
全库搜索
|
||||
@@ -276,7 +284,7 @@ export default function KnowledgeManagementPage() {
|
||||
|
||||
<div className="grid grid-cols-1 gap-4">
|
||||
<Card>
|
||||
<div className="grid grid-cols-3">
|
||||
<div className="grid grid-cols-4">
|
||||
{statisticsData.map((item) => (
|
||||
<Statistic
|
||||
title={item.title}
|
||||
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
import {
|
||||
knowledgeSourceTypeOptions,
|
||||
knowledgeStatusOptions,
|
||||
// sensitivityOptions,
|
||||
} from "../knowledge-management.const";
|
||||
import {
|
||||
KnowledgeSet,
|
||||
@@ -169,9 +170,9 @@ export default function CreateKnowledgeSet({
|
||||
<Form.Item label="负责人" name="owner">
|
||||
<Input placeholder="请输入负责人" />
|
||||
</Form.Item>
|
||||
<Form.Item label="敏感级别" name="sensitivity">
|
||||
<Input placeholder="请输入敏感级别" />
|
||||
</Form.Item>
|
||||
{/* <Form.Item label="敏感级别" name="sensitivity">
|
||||
<Select options={sensitivityOptions} placeholder="请选择敏感级别" />
|
||||
</Form.Item> */}
|
||||
</div>
|
||||
<div className="grid grid-cols-2 gap-4">
|
||||
<Form.Item label="有效期开始" name="validFrom">
|
||||
@@ -191,9 +192,6 @@ export default function CreateKnowledgeSet({
|
||||
placeholder="请选择或输入标签"
|
||||
/>
|
||||
</Form.Item>
|
||||
<Form.Item label="扩展元数据" name="metadata">
|
||||
<Input.TextArea placeholder="请输入元数据(JSON)" rows={3} />
|
||||
</Form.Item>
|
||||
</Form>
|
||||
</Modal>
|
||||
</>
|
||||
|
||||
@@ -16,6 +16,7 @@ export default function KnowledgeItemEditor({
|
||||
open,
|
||||
setId,
|
||||
data,
|
||||
parentPrefix,
|
||||
onCancel,
|
||||
onSuccess,
|
||||
readOnly,
|
||||
@@ -23,12 +24,14 @@ export default function KnowledgeItemEditor({
|
||||
open: boolean;
|
||||
setId: string;
|
||||
data?: Partial<KnowledgeItem> | null;
|
||||
parentPrefix?: string;
|
||||
readOnly?: boolean;
|
||||
onCancel: () => void;
|
||||
onSuccess: () => void;
|
||||
}) {
|
||||
const [fileList, setFileList] = useState<UploadFile[]>([]);
|
||||
const [replaceFileList, setReplaceFileList] = useState<UploadFile[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const isFileItem =
|
||||
data?.contentType === KnowledgeContentType.FILE ||
|
||||
data?.sourceType === KnowledgeSourceType.FILE_UPLOAD;
|
||||
@@ -49,7 +52,6 @@ export default function KnowledgeItemEditor({
|
||||
originFileObj: file,
|
||||
},
|
||||
]);
|
||||
message.success("文件已就绪,可提交创建条目");
|
||||
return false;
|
||||
};
|
||||
|
||||
@@ -95,6 +97,7 @@ export default function KnowledgeItemEditor({
|
||||
message.warning("请先选择文件");
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
const formData = new FormData();
|
||||
fileList.forEach((file) => {
|
||||
const origin = file.originFileObj as File | undefined;
|
||||
@@ -102,6 +105,9 @@ export default function KnowledgeItemEditor({
|
||||
formData.append("files", origin);
|
||||
}
|
||||
});
|
||||
if (parentPrefix) {
|
||||
formData.append("parentPrefix", parentPrefix);
|
||||
}
|
||||
await uploadKnowledgeItemsUsingPost(setId, formData);
|
||||
message.success(`已创建 ${fileList.length} 个知识条目`);
|
||||
} else {
|
||||
@@ -121,6 +127,7 @@ export default function KnowledgeItemEditor({
|
||||
message.warning("请先选择要替换的文件");
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
const formData = new FormData();
|
||||
formData.append("file", replaceFile);
|
||||
await replaceKnowledgeItemFileUsingPut(setId, data.id, formData);
|
||||
@@ -132,6 +139,8 @@ export default function KnowledgeItemEditor({
|
||||
onSuccess();
|
||||
} catch {
|
||||
message.error("操作失败,请重试");
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -148,6 +157,7 @@ export default function KnowledgeItemEditor({
|
||||
width={860}
|
||||
maskClosable={false}
|
||||
okButtonProps={{ disabled: readOnly }}
|
||||
confirmLoading={loading}
|
||||
>
|
||||
<Form layout="vertical" disabled={readOnly}>
|
||||
{isCreateMode && (
|
||||
|
||||
@@ -35,6 +35,22 @@ export function queryKnowledgeItemsUsingGet(setId: string, params?: Record<strin
|
||||
return get(`/api/data-management/knowledge-sets/${setId}/items`, params);
|
||||
}
|
||||
|
||||
// 知识条目目录列表
|
||||
export function queryKnowledgeDirectoriesUsingGet(setId: string, params?: Record<string, unknown>) {
|
||||
return get(`/api/data-management/knowledge-sets/${setId}/directories`, params);
|
||||
}
|
||||
|
||||
// 创建知识条目目录
|
||||
export function createKnowledgeDirectoryUsingPost(setId: string, data: Record<string, unknown>) {
|
||||
return post(`/api/data-management/knowledge-sets/${setId}/directories`, data);
|
||||
}
|
||||
|
||||
// 删除知识条目目录
|
||||
export function deleteKnowledgeDirectoryUsingDelete(setId: string, relativePath: string) {
|
||||
const query = new URLSearchParams({ relativePath }).toString();
|
||||
return del(`/api/data-management/knowledge-sets/${setId}/directories?${query}`);
|
||||
}
|
||||
|
||||
// 知识条目文件搜索
|
||||
export function searchKnowledgeItemsUsingGet(params?: Record<string, unknown>) {
|
||||
return get("/api/data-management/knowledge-items/search", params);
|
||||
@@ -70,6 +86,11 @@ export function deleteKnowledgeItemByIdUsingDelete(setId: string, itemId: string
|
||||
return del(`/api/data-management/knowledge-sets/${setId}/items/${itemId}`);
|
||||
}
|
||||
|
||||
// 批量删除知识条目
|
||||
export function deleteKnowledgeItemsByIdsUsingPost(setId: string, data: { ids: string[] }) {
|
||||
return post(`/api/data-management/knowledge-sets/${setId}/items/batch-delete`, data);
|
||||
}
|
||||
|
||||
// 上传知识条目文件
|
||||
export function uploadKnowledgeItemsUsingPost(setId: string, data: FormData) {
|
||||
return post(`/api/data-management/knowledge-sets/${setId}/items/upload`, data);
|
||||
@@ -80,6 +101,16 @@ export function downloadKnowledgeItemFileUsingGet(setId: string, itemId: string,
|
||||
return download(`/api/data-management/knowledge-sets/${setId}/items/${itemId}/file`, null, fileName || "");
|
||||
}
|
||||
|
||||
// 知识条目预览状态
|
||||
export function queryKnowledgeItemPreviewStatusUsingGet(setId: string, itemId: string) {
|
||||
return get(`/api/data-management/knowledge-sets/${setId}/items/${itemId}/preview/status`);
|
||||
}
|
||||
|
||||
// 触发知识条目预览转换
|
||||
export function convertKnowledgeItemPreviewUsingPost(setId: string, itemId: string) {
|
||||
return post(`/api/data-management/knowledge-sets/${setId}/items/${itemId}/preview/convert`, {});
|
||||
}
|
||||
|
||||
// 导出知识条目
|
||||
export function exportKnowledgeItemsUsingGet(setId: string) {
|
||||
return download(`/api/data-management/knowledge-sets/${setId}/items/export`);
|
||||
|
||||
@@ -66,6 +66,11 @@ export const knowledgeSourceTypeOptions = [
|
||||
{ label: "文件上传", value: KnowledgeSourceType.FILE_UPLOAD },
|
||||
];
|
||||
|
||||
// export const sensitivityOptions = [
|
||||
// { label: "敏感", value: "敏感" },
|
||||
// { label: "不敏感", value: "不敏感" },
|
||||
// ];
|
||||
|
||||
export type KnowledgeSetView = {
|
||||
id: string;
|
||||
name: string;
|
||||
@@ -106,6 +111,7 @@ export type KnowledgeItemView = {
|
||||
sensitivity?: string;
|
||||
sourceDatasetId?: string;
|
||||
sourceFileId?: string;
|
||||
relativePath?: string;
|
||||
metadata?: string;
|
||||
createdAt?: string;
|
||||
updatedAt?: string;
|
||||
@@ -153,6 +159,7 @@ export function mapKnowledgeItem(data: KnowledgeItem): KnowledgeItemView {
|
||||
sensitivity: data.sensitivity,
|
||||
sourceDatasetId: data.sourceDatasetId,
|
||||
sourceFileId: data.sourceFileId,
|
||||
relativePath: data.relativePath,
|
||||
metadata: data.metadata,
|
||||
createdAt: data.createdAt ? formatDateTime(data.createdAt) : "",
|
||||
updatedAt: data.updatedAt ? formatDateTime(data.updatedAt) : "",
|
||||
|
||||
@@ -61,6 +61,7 @@ export interface KnowledgeItem {
|
||||
sensitivity?: string;
|
||||
sourceDatasetId?: string;
|
||||
sourceFileId?: string;
|
||||
relativePath?: string;
|
||||
metadata?: string;
|
||||
createdAt?: string;
|
||||
updatedAt?: string;
|
||||
@@ -68,10 +69,20 @@ export interface KnowledgeItem {
|
||||
updatedBy?: string;
|
||||
}
|
||||
|
||||
export interface KnowledgeDirectory {
|
||||
id: string;
|
||||
setId: string;
|
||||
name: string;
|
||||
relativePath: string;
|
||||
createdAt?: string;
|
||||
updatedAt?: string;
|
||||
}
|
||||
|
||||
export interface KnowledgeManagementStatistics {
|
||||
totalKnowledgeSets: number;
|
||||
totalFiles: number;
|
||||
totalSize: number;
|
||||
totalTags: number;
|
||||
}
|
||||
|
||||
export interface KnowledgeItemSearchResult {
|
||||
@@ -84,6 +95,7 @@ export interface KnowledgeItemSearchResult {
|
||||
sourceFileId?: string;
|
||||
fileName?: string;
|
||||
fileSize?: number;
|
||||
relativePath?: string;
|
||||
createdAt?: string;
|
||||
updatedAt?: string;
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
CloseOutlined,
|
||||
MenuOutlined,
|
||||
SettingOutlined,
|
||||
LogoutOutlined,
|
||||
} from "@ant-design/icons";
|
||||
import { ClipboardList, X } from "lucide-react";
|
||||
import { menuItems } from "@/pages/Layout/menu";
|
||||
@@ -12,6 +13,7 @@ import TaskUpload from "./TaskUpload";
|
||||
import SettingsPage from "../SettingsPage/SettingsPage";
|
||||
import { useAppSelector, useAppDispatch } from "@/store/hooks";
|
||||
import { showSettings, hideSettings } from "@/store/slices/settingsSlice";
|
||||
import { logout } from "@/store/slices/authSlice";
|
||||
|
||||
const isPathMatch = (currentPath: string, targetPath: string) =>
|
||||
currentPath === targetPath || currentPath.startsWith(`${targetPath}/`);
|
||||
@@ -67,6 +69,11 @@ const AsiderAndHeaderLayout = () => {
|
||||
};
|
||||
}, []);
|
||||
|
||||
const handleLogout = () => {
|
||||
dispatch(logout());
|
||||
navigate("/login");
|
||||
};
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`${
|
||||
@@ -148,6 +155,9 @@ const AsiderAndHeaderLayout = () => {
|
||||
>
|
||||
设置
|
||||
</Button>
|
||||
<Button block danger onClick={handleLogout}>
|
||||
退出登录
|
||||
</Button>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-2">
|
||||
@@ -175,6 +185,7 @@ const AsiderAndHeaderLayout = () => {
|
||||
>
|
||||
<SettingOutlined />
|
||||
</Button>
|
||||
<Button block danger onClick={handleLogout} icon={<LogoutOutlined />} />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -3,25 +3,28 @@ import {
|
||||
preUploadUsingPost,
|
||||
uploadFileChunkUsingPost,
|
||||
} from "@/pages/DataManagement/dataset.api";
|
||||
import { Button, Empty, Progress } from "antd";
|
||||
import { DeleteOutlined } from "@ant-design/icons";
|
||||
import { Button, Empty, Progress, Tag } from "antd";
|
||||
import { DeleteOutlined, FileTextOutlined } from "@ant-design/icons";
|
||||
import { useEffect } from "react";
|
||||
import { useFileSliceUpload } from "@/hooks/useSliceUpload";
|
||||
|
||||
export default function TaskUpload() {
|
||||
const { createTask, taskList, removeTask, handleUpload } = useFileSliceUpload(
|
||||
const { createTask, taskList, removeTask, handleUpload, registerStreamUploadListener } = useFileSliceUpload(
|
||||
{
|
||||
preUpload: preUploadUsingPost,
|
||||
uploadChunk: uploadFileChunkUsingPost,
|
||||
cancelUpload: cancelUploadUsingPut,
|
||||
}
|
||||
},
|
||||
true, // showTaskCenter
|
||||
true // enableStreamUpload
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
const uploadHandler = (e: any) => {
|
||||
console.log('[TaskUpload] Received upload event detail:', e.detail);
|
||||
const { files } = e.detail;
|
||||
const task = createTask(e.detail);
|
||||
const uploadHandler = (e: Event) => {
|
||||
const customEvent = e as CustomEvent;
|
||||
console.log('[TaskUpload] Received upload event detail:', customEvent.detail);
|
||||
const { files } = customEvent.detail;
|
||||
const task = createTask(customEvent.detail);
|
||||
console.log('[TaskUpload] Created task with prefix:', task.prefix);
|
||||
handleUpload({ task, files });
|
||||
};
|
||||
@@ -29,7 +32,13 @@ export default function TaskUpload() {
|
||||
return () => {
|
||||
window.removeEventListener("upload:dataset", uploadHandler);
|
||||
};
|
||||
}, []);
|
||||
}, [createTask, handleUpload]);
|
||||
|
||||
// 注册流式上传监听器
|
||||
useEffect(() => {
|
||||
const unregister = registerStreamUploadListener();
|
||||
return unregister;
|
||||
}, [registerStreamUploadListener]);
|
||||
|
||||
return (
|
||||
<div
|
||||
@@ -55,7 +64,22 @@ export default function TaskUpload() {
|
||||
></Button>
|
||||
</div>
|
||||
|
||||
<Progress size="small" percent={task.percent} />
|
||||
<Progress size="small" percent={Number(task.percent)} />
|
||||
{task.streamUploadInfo && (
|
||||
<div className="flex items-center gap-2 text-xs text-gray-500 mt-1">
|
||||
<Tag icon={<FileTextOutlined />} size="small">
|
||||
按行分割
|
||||
</Tag>
|
||||
<span>
|
||||
已上传: {task.streamUploadInfo.uploadedLines} 行
|
||||
</span>
|
||||
{task.streamUploadInfo.totalFiles > 1 && (
|
||||
<span>
|
||||
({task.streamUploadInfo.fileIndex}/{task.streamUploadInfo.totalFiles} 文件)
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
{taskList.length === 0 && (
|
||||
|
||||
@@ -24,11 +24,25 @@ export const menuItems = [
|
||||
// },
|
||||
{
|
||||
id: "management",
|
||||
title: "数据管理",
|
||||
title: "数集管理",
|
||||
icon: FolderOpen,
|
||||
description: "创建、导入和管理数据集",
|
||||
color: "bg-blue-500",
|
||||
},
|
||||
{
|
||||
id: "annotation",
|
||||
title: "数据标注",
|
||||
icon: Tag,
|
||||
description: "对数据进行标注和标记",
|
||||
color: "bg-green-500",
|
||||
},
|
||||
{
|
||||
id: "content-generation",
|
||||
title: "内容生成",
|
||||
icon: Sparkles,
|
||||
description: "智能内容生成与创作",
|
||||
color: "bg-purple-500",
|
||||
},
|
||||
{
|
||||
id: "knowledge-management",
|
||||
title: "知识管理",
|
||||
@@ -43,20 +57,6 @@ export const menuItems = [
|
||||
// description: "数据清洗和预处理",
|
||||
// color: "bg-purple-500",
|
||||
// },
|
||||
{
|
||||
id: "annotation",
|
||||
title: "数据标注",
|
||||
icon: Tag,
|
||||
description: "对数据进行标注和标记",
|
||||
color: "bg-green-500",
|
||||
},
|
||||
{
|
||||
id: "content-generation",
|
||||
title: "内容生成",
|
||||
icon: Sparkles,
|
||||
description: "智能内容生成与创作",
|
||||
color: "bg-purple-500",
|
||||
},
|
||||
// {
|
||||
// id: "synthesis",
|
||||
// title: "数据合成",
|
||||
|
||||
114
frontend/src/pages/Login/LoginPage.tsx
Normal file
114
frontend/src/pages/Login/LoginPage.tsx
Normal file
@@ -0,0 +1,114 @@
|
||||
import React, { useState } from 'react';
|
||||
import { useNavigate, useLocation } from 'react-router';
|
||||
import { Form, Input, Button, Typography, message, Card } from 'antd';
|
||||
import { UserOutlined, LockOutlined } from '@ant-design/icons';
|
||||
import { useAppDispatch, useAppSelector } from '@/store/hooks';
|
||||
import { loginLocal } from '@/store/slices/authSlice';
|
||||
|
||||
const { Title, Text } = Typography;
|
||||
|
||||
const LoginPage: React.FC = () => {
|
||||
const navigate = useNavigate();
|
||||
const location = useLocation();
|
||||
const dispatch = useAppDispatch();
|
||||
const { loading, error } = useAppSelector((state) => state.auth);
|
||||
const [messageApi, contextHolder] = message.useMessage();
|
||||
|
||||
const from = location.state?.from?.pathname || '/data';
|
||||
|
||||
const onFinish = (values: any) => {
|
||||
dispatch(loginLocal(values));
|
||||
// The reducer updates state synchronously.
|
||||
if (values.username === 'admin' && values.password === '123456') {
|
||||
messageApi.success('登录成功');
|
||||
navigate(from, { replace: true });
|
||||
} else {
|
||||
messageApi.error('账号或密码错误');
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen flex items-center justify-center bg-[#050b14] relative overflow-hidden">
|
||||
{contextHolder}
|
||||
|
||||
{/* Background Effects */}
|
||||
<div className="absolute inset-0 z-0">
|
||||
<div className="absolute top-0 left-0 w-full h-full bg-[radial-gradient(ellipse_at_center,_var(--tw-gradient-stops))] from-blue-900/20 via-[#050b14] to-[#050b14]"></div>
|
||||
{/* Simple grid pattern if possible, or just gradient */}
|
||||
</div>
|
||||
|
||||
<div className="absolute top-1/4 left-1/4 w-72 h-72 bg-blue-500/10 rounded-full blur-3xl animate-pulse"></div>
|
||||
<div className="absolute bottom-1/4 right-1/4 w-96 h-96 bg-cyan-500/10 rounded-full blur-3xl animate-pulse delay-700"></div>
|
||||
|
||||
<div className="z-10 w-full max-w-md p-8 animate-[fadeIn_0.5s_ease-out_forwards]">
|
||||
<div className="backdrop-blur-xl bg-white/5 border border-white/10 rounded-2xl shadow-2xl p-8 relative overflow-hidden">
|
||||
{/* Decorative line */}
|
||||
<div className="absolute top-0 left-0 w-full h-1 bg-gradient-to-r from-transparent via-blue-500 to-transparent"></div>
|
||||
|
||||
<div className="text-center mb-8">
|
||||
<div className="inline-flex items-center justify-center w-16 h-16 rounded-full bg-blue-500/20 mb-4 border border-blue-500/30">
|
||||
<svg className="w-8 h-8 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 11H5m14 0a2 2 0 012 2v6a2 2 0 01-2 2H5a2 2 0 01-2-2v-6a2 2 0 012-2m14 0V9a2 2 0 00-2-2M5 11V9a2 2 0 012-2m0 0V5a2 2 0 012-2h6a2 2 0 012 2v2M7 7h10" />
|
||||
</svg>
|
||||
</div>
|
||||
<Title level={2} className="!text-white !mb-2 tracking-wide font-bold">
|
||||
DataBuilder
|
||||
</Title>
|
||||
<Text className="text-gray-400! text-sm tracking-wider">
|
||||
一站式数据工作平台
|
||||
</Text>
|
||||
</div>
|
||||
|
||||
<Form
|
||||
name="login"
|
||||
initialValues={{ remember: true, username: 'admin', password: '123456' }}
|
||||
onFinish={onFinish}
|
||||
layout="vertical"
|
||||
size="large"
|
||||
>
|
||||
<Form.Item
|
||||
name="username"
|
||||
rules={[{ required: true, message: '请输入账号!' }]}
|
||||
>
|
||||
<Input
|
||||
prefix={<UserOutlined className="text-blue-400" />}
|
||||
placeholder="账号"
|
||||
className="!bg-white/5 !border-white/10 !text-white placeholder:!text-gray-600 hover:!border-blue-500/50 focus:!border-blue-500 !rounded-lg"
|
||||
/>
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name="password"
|
||||
rules={[{ required: true, message: '请输入密码!' }]}
|
||||
>
|
||||
<Input.Password
|
||||
prefix={<LockOutlined className="text-blue-400" />}
|
||||
type="password"
|
||||
placeholder="密码"
|
||||
className="!bg-white/5 !border-white/10 !text-white placeholder:!text-gray-600 hover:!border-blue-500/50 focus:!border-blue-500 !rounded-lg"
|
||||
/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item className="mb-2">
|
||||
<Button
|
||||
type="primary"
|
||||
htmlType="submit"
|
||||
className="w-full bg-gradient-to-r from-blue-600 to-cyan-600 hover:from-blue-500 hover:to-cyan-500 border-none h-12 rounded-lg font-semibold tracking-wide shadow-lg shadow-blue-900/20"
|
||||
loading={loading}
|
||||
>
|
||||
登录系统
|
||||
</Button>
|
||||
</Form.Item>
|
||||
|
||||
<div className="text-center mt-4">
|
||||
<Text className="text-gray-600! text-xs">
|
||||
数据处理平台 · 安全接入
|
||||
</Text>
|
||||
</div>
|
||||
</Form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default LoginPage;
|
||||
@@ -49,12 +49,21 @@ import EvaluationDetailPage from "@/pages/DataEvaluation/Detail/TaskDetail.tsx";
|
||||
import SynthDataDetail from "@/pages/SynthesisTask/SynthDataDetail.tsx";
|
||||
import Home from "@/pages/Home/Home";
|
||||
import ContentGenerationPage from "@/pages/ContentGeneration/ContentGenerationPage";
|
||||
import LoginPage from "@/pages/Login/LoginPage";
|
||||
import ProtectedRoute from "@/components/ProtectedRoute";
|
||||
|
||||
const router = createBrowserRouter([
|
||||
{
|
||||
path: "/login",
|
||||
Component: LoginPage,
|
||||
},
|
||||
{
|
||||
path: "/",
|
||||
Component: Home,
|
||||
},
|
||||
{
|
||||
Component: ProtectedRoute,
|
||||
children: [
|
||||
{
|
||||
path: "/chat",
|
||||
Component: withErrorBoundary(AgentPage),
|
||||
@@ -286,6 +295,8 @@ const router = createBrowserRouter([
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
}
|
||||
]);
|
||||
|
||||
export default router;
|
||||
@@ -31,7 +31,7 @@ const authSlice = createSlice({
|
||||
initialState: {
|
||||
user: null,
|
||||
token: localStorage.getItem('token'),
|
||||
isAuthenticated: false,
|
||||
isAuthenticated: !!localStorage.getItem('token'),
|
||||
loading: false,
|
||||
error: null,
|
||||
},
|
||||
@@ -49,6 +49,19 @@ const authSlice = createSlice({
|
||||
state.token = action.payload;
|
||||
localStorage.setItem('token', action.payload);
|
||||
},
|
||||
loginLocal: (state, action) => {
|
||||
const { username, password } = action.payload;
|
||||
if (username === 'admin' && password === '123456') {
|
||||
state.user = { username: 'admin', role: 'admin' };
|
||||
state.token = 'mock-token-' + Date.now();
|
||||
state.isAuthenticated = true;
|
||||
localStorage.setItem('token', state.token);
|
||||
state.error = null;
|
||||
} else {
|
||||
state.error = 'Invalid credentials';
|
||||
state.isAuthenticated = false;
|
||||
}
|
||||
},
|
||||
},
|
||||
extraReducers: (builder) => {
|
||||
builder
|
||||
@@ -71,5 +84,5 @@ const authSlice = createSlice({
|
||||
},
|
||||
});
|
||||
|
||||
export const { logout, clearError, setToken } = authSlice.actions;
|
||||
export const { logout, clearError, setToken, loginLocal } = authSlice.actions;
|
||||
export default authSlice.reducer;
|
||||
@@ -1,79 +1,657 @@
|
||||
import { UploadFile } from "antd";
|
||||
import jsSHA from "jssha";
|
||||
|
||||
const CHUNK_SIZE = 1024 * 1024 * 60;
|
||||
// 默认分片大小:5MB(适合大多数网络环境)
|
||||
export const DEFAULT_CHUNK_SIZE = 1024 * 1024 * 5;
|
||||
// 大文件阈值:10MB
|
||||
export const LARGE_FILE_THRESHOLD = 1024 * 1024 * 10;
|
||||
// 最大并发上传数
|
||||
export const MAX_CONCURRENT_UPLOADS = 3;
|
||||
// 文本文件读取块大小:20MB(用于计算 SHA256)
|
||||
const BUFFER_CHUNK_SIZE = 1024 * 1024 * 20;
|
||||
|
||||
export function sliceFile(file, chunkSize = CHUNK_SIZE): Blob[] {
|
||||
/**
|
||||
* 将文件分割为多个分片
|
||||
* @param file 文件对象
|
||||
* @param chunkSize 分片大小(字节),默认 5MB
|
||||
* @returns 分片数组(Blob 列表)
|
||||
*/
|
||||
export function sliceFile(file: Blob, chunkSize = DEFAULT_CHUNK_SIZE): Blob[] {
|
||||
const totalSize = file.size;
|
||||
const chunks: Blob[] = [];
|
||||
|
||||
// 小文件不需要分片
|
||||
if (totalSize <= chunkSize) {
|
||||
return [file];
|
||||
}
|
||||
|
||||
let start = 0;
|
||||
let end = start + chunkSize;
|
||||
const chunks = [];
|
||||
while (start < totalSize) {
|
||||
const end = Math.min(start + chunkSize, totalSize);
|
||||
const blob = file.slice(start, end);
|
||||
chunks.push(blob);
|
||||
|
||||
start = end;
|
||||
end = start + chunkSize;
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
export function calculateSHA256(file: Blob): Promise<string> {
|
||||
let count = 0;
|
||||
const hash = new jsSHA("SHA-256", "ARRAYBUFFER", { encoding: "UTF8" });
|
||||
/**
|
||||
* 计算文件的 SHA256 哈希值
|
||||
* @param file 文件 Blob
|
||||
* @param onProgress 进度回调(可选)
|
||||
* @returns SHA256 哈希字符串
|
||||
*/
|
||||
export function calculateSHA256(
|
||||
file: Blob,
|
||||
onProgress?: (percent: number) => void
|
||||
): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const hash = new jsSHA("SHA-256", "ARRAYBUFFER", { encoding: "UTF8" });
|
||||
const reader = new FileReader();
|
||||
let processedSize = 0;
|
||||
|
||||
function readChunk(start: number, end: number) {
|
||||
const slice = file.slice(start, end);
|
||||
reader.readAsArrayBuffer(slice);
|
||||
}
|
||||
|
||||
const bufferChunkSize = 1024 * 1024 * 20;
|
||||
|
||||
function processChunk(offset: number) {
|
||||
const start = offset;
|
||||
const end = Math.min(start + bufferChunkSize, file.size);
|
||||
count = end;
|
||||
|
||||
const end = Math.min(start + BUFFER_CHUNK_SIZE, file.size);
|
||||
readChunk(start, end);
|
||||
}
|
||||
|
||||
reader.onloadend = function () {
|
||||
const arraybuffer = reader.result;
|
||||
reader.onloadend = function (e) {
|
||||
const arraybuffer = reader.result as ArrayBuffer;
|
||||
if (!arraybuffer) {
|
||||
reject(new Error("Failed to read file"));
|
||||
return;
|
||||
}
|
||||
|
||||
hash.update(arraybuffer);
|
||||
if (count < file.size) {
|
||||
processChunk(count);
|
||||
processedSize += (e.target as FileReader).result?.byteLength || 0;
|
||||
|
||||
if (onProgress) {
|
||||
const percent = Math.min(100, Math.round((processedSize / file.size) * 100));
|
||||
onProgress(percent);
|
||||
}
|
||||
|
||||
if (processedSize < file.size) {
|
||||
processChunk(processedSize);
|
||||
} else {
|
||||
resolve(hash.getHash("HEX", { outputLen: 256 }));
|
||||
}
|
||||
};
|
||||
|
||||
reader.onerror = () => reject(new Error("File reading failed"));
|
||||
processChunk(0);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量计算多个文件的 SHA256
|
||||
* @param files 文件列表
|
||||
* @param onFileProgress 单个文件进度回调(可选)
|
||||
* @returns 哈希值数组
|
||||
*/
|
||||
export async function calculateSHA256Batch(
|
||||
files: Blob[],
|
||||
onFileProgress?: (index: number, percent: number) => void
|
||||
): Promise<string[]> {
|
||||
const results: string[] = [];
|
||||
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
const hash = await calculateSHA256(files[i], (percent) => {
|
||||
onFileProgress?.(i, percent);
|
||||
});
|
||||
results.push(hash);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查文件是否存在(未被修改或删除)
|
||||
* @param fileList 文件列表
|
||||
* @returns 返回第一个不存在的文件,或 null(如果都存在)
|
||||
*/
|
||||
export function checkIsFilesExist(
|
||||
fileList: UploadFile[]
|
||||
): Promise<UploadFile | null> {
|
||||
fileList: Array<{ originFile?: Blob }>
|
||||
): Promise<{ originFile?: Blob } | null> {
|
||||
return new Promise((resolve) => {
|
||||
const loadEndFn = (file: UploadFile, reachEnd: boolean, e) => {
|
||||
const fileNotExist = !e.target.result;
|
||||
if (!fileList.length) {
|
||||
resolve(null);
|
||||
return;
|
||||
}
|
||||
|
||||
let checkedCount = 0;
|
||||
const totalCount = fileList.length;
|
||||
|
||||
const loadEndFn = (file: { originFile?: Blob }, e: ProgressEvent<FileReader>) => {
|
||||
checkedCount++;
|
||||
const fileNotExist = !e.target?.result;
|
||||
if (fileNotExist) {
|
||||
resolve(file);
|
||||
return;
|
||||
}
|
||||
if (reachEnd) {
|
||||
if (checkedCount >= totalCount) {
|
||||
resolve(null);
|
||||
}
|
||||
};
|
||||
|
||||
for (let i = 0; i < fileList.length; i++) {
|
||||
const { originFile: file } = fileList[i];
|
||||
for (const file of fileList) {
|
||||
const fileReader = new FileReader();
|
||||
fileReader.readAsArrayBuffer(file);
|
||||
fileReader.onloadend = (e) =>
|
||||
loadEndFn(fileList[i], i === fileList.length - 1, e);
|
||||
const actualFile = file.originFile;
|
||||
|
||||
if (!actualFile) {
|
||||
checkedCount++;
|
||||
if (checkedCount >= totalCount) {
|
||||
resolve(null);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
fileReader.readAsArrayBuffer(actualFile.slice(0, 1));
|
||||
fileReader.onloadend = (e) => loadEndFn(file, e);
|
||||
fileReader.onerror = () => {
|
||||
checkedCount++;
|
||||
resolve(file);
|
||||
};
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断文件是否为大文件
|
||||
* @param size 文件大小(字节)
|
||||
* @param threshold 阈值(字节),默认 10MB
|
||||
*/
|
||||
export function isLargeFile(size: number, threshold = LARGE_FILE_THRESHOLD): boolean {
|
||||
return size > threshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化文件大小为人类可读格式
|
||||
* @param bytes 字节数
|
||||
* @param decimals 小数位数
|
||||
*/
|
||||
export function formatFileSize(bytes: number, decimals = 2): string {
|
||||
if (bytes === 0) return "0 B";
|
||||
|
||||
const k = 1024;
|
||||
const sizes = ["B", "KB", "MB", "GB", "TB", "PB"];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
|
||||
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(decimals))} ${sizes[i]}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* 并发执行异步任务
|
||||
* @param tasks 任务函数数组
|
||||
* @param maxConcurrency 最大并发数
|
||||
* @param onTaskComplete 单个任务完成回调(可选)
|
||||
*/
|
||||
export async function runConcurrentTasks<T>(
|
||||
tasks: (() => Promise<T>)[],
|
||||
maxConcurrency: number,
|
||||
onTaskComplete?: (index: number, result: T) => void
|
||||
): Promise<T[]> {
|
||||
const results: T[] = new Array(tasks.length);
|
||||
let index = 0;
|
||||
|
||||
async function runNext(): Promise<void> {
|
||||
const currentIndex = index++;
|
||||
if (currentIndex >= tasks.length) return;
|
||||
|
||||
const result = await tasks[currentIndex]();
|
||||
results[currentIndex] = result;
|
||||
onTaskComplete?.(currentIndex, result);
|
||||
|
||||
await runNext();
|
||||
}
|
||||
|
||||
const workers = Array(Math.min(maxConcurrency, tasks.length))
|
||||
.fill(null)
|
||||
.map(() => runNext());
|
||||
|
||||
await Promise.all(workers);
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* 按行分割文本文件内容
|
||||
* @param text 文本内容
|
||||
* @param skipEmptyLines 是否跳过空行,默认 true
|
||||
* @returns 行数组
|
||||
*/
|
||||
export function splitTextByLines(text: string, skipEmptyLines = true): string[] {
|
||||
const lines = text.split(/\r?\n/);
|
||||
if (skipEmptyLines) {
|
||||
return lines.filter((line) => line.trim() !== "");
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建分片信息对象
|
||||
* @param file 原始文件
|
||||
* @param chunkSize 分片大小
|
||||
*/
|
||||
export function createFileSliceInfo(
|
||||
file: File | Blob,
|
||||
chunkSize = DEFAULT_CHUNK_SIZE
|
||||
): {
|
||||
originFile: Blob;
|
||||
slices: Blob[];
|
||||
name: string;
|
||||
size: number;
|
||||
totalChunks: number;
|
||||
} {
|
||||
const slices = sliceFile(file, chunkSize);
|
||||
return {
|
||||
originFile: file,
|
||||
slices,
|
||||
name: (file as File).name || "unnamed",
|
||||
size: file.size,
|
||||
totalChunks: slices.length,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 支持的文本文件 MIME 类型前缀
|
||||
*/
|
||||
export const TEXT_FILE_MIME_PREFIX = "text/";
|
||||
|
||||
/**
|
||||
* 支持的文本文件 MIME 类型集合
|
||||
*/
|
||||
export const TEXT_FILE_MIME_TYPES = new Set([
|
||||
"application/json",
|
||||
"application/xml",
|
||||
"application/csv",
|
||||
"application/ndjson",
|
||||
"application/x-ndjson",
|
||||
"application/x-yaml",
|
||||
"application/yaml",
|
||||
"application/javascript",
|
||||
"application/x-javascript",
|
||||
"application/sql",
|
||||
"application/rtf",
|
||||
"application/xhtml+xml",
|
||||
"application/svg+xml",
|
||||
]);
|
||||
|
||||
/**
|
||||
* 支持的文本文件扩展名集合
|
||||
*/
|
||||
export const TEXT_FILE_EXTENSIONS = new Set([
|
||||
".txt",
|
||||
".md",
|
||||
".markdown",
|
||||
".csv",
|
||||
".tsv",
|
||||
".json",
|
||||
".jsonl",
|
||||
".ndjson",
|
||||
".log",
|
||||
".xml",
|
||||
".yaml",
|
||||
".yml",
|
||||
".sql",
|
||||
".js",
|
||||
".ts",
|
||||
".jsx",
|
||||
".tsx",
|
||||
".html",
|
||||
".htm",
|
||||
".css",
|
||||
".scss",
|
||||
".less",
|
||||
".py",
|
||||
".java",
|
||||
".c",
|
||||
".cpp",
|
||||
".h",
|
||||
".hpp",
|
||||
".go",
|
||||
".rs",
|
||||
".rb",
|
||||
".php",
|
||||
".sh",
|
||||
".bash",
|
||||
".zsh",
|
||||
".ps1",
|
||||
".bat",
|
||||
".cmd",
|
||||
".svg",
|
||||
".rtf",
|
||||
]);
|
||||
|
||||
/**
|
||||
* 判断文件是否为文本文件(支持 UploadFile 类型)
|
||||
* @param file UploadFile 对象
|
||||
*/
|
||||
export function isTextUploadFile(file: UploadFile): boolean {
|
||||
const mimeType = (file.type || "").toLowerCase();
|
||||
if (mimeType) {
|
||||
if (mimeType.startsWith(TEXT_FILE_MIME_PREFIX)) return true;
|
||||
if (TEXT_FILE_MIME_TYPES.has(mimeType)) return true;
|
||||
}
|
||||
|
||||
const fileName = file.name || "";
|
||||
const dotIndex = fileName.lastIndexOf(".");
|
||||
if (dotIndex < 0) return false;
|
||||
const ext = fileName.slice(dotIndex).toLowerCase();
|
||||
return TEXT_FILE_EXTENSIONS.has(ext);
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断文件名是否为文本文件
|
||||
* @param fileName 文件名
|
||||
*/
|
||||
export function isTextFileByName(fileName: string): boolean {
|
||||
const lowerName = fileName.toLowerCase();
|
||||
|
||||
// 先检查 MIME 类型(如果有)
|
||||
// 这里简化处理,主要通过扩展名判断
|
||||
|
||||
const dotIndex = lowerName.lastIndexOf(".");
|
||||
if (dotIndex < 0) return false;
|
||||
const ext = lowerName.slice(dotIndex);
|
||||
return TEXT_FILE_EXTENSIONS.has(ext);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文件扩展名
|
||||
* @param fileName 文件名
|
||||
*/
|
||||
export function getFileExtension(fileName: string): string {
|
||||
const dotIndex = fileName.lastIndexOf(".");
|
||||
if (dotIndex < 0) return "";
|
||||
return fileName.slice(dotIndex).toLowerCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* 安全地读取文件为文本
|
||||
* @param file 文件对象
|
||||
* @param encoding 编码,默认 UTF-8
|
||||
*/
|
||||
export function readFileAsText(
|
||||
file: File | Blob,
|
||||
encoding = "UTF-8"
|
||||
): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = (e) => resolve(e.target?.result as string);
|
||||
reader.onerror = () => reject(new Error("Failed to read file"));
|
||||
reader.readAsText(file, encoding);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 流式分割文件并逐行上传
|
||||
* 使用 Blob.slice 逐块读取,避免一次性加载大文件到内存
|
||||
* @param file 文件对象
|
||||
* @param datasetId 数据集ID
|
||||
* @param uploadFn 上传函数,接收 FormData 和配置,返回 Promise
|
||||
* @param onProgress 进度回调 (currentBytes, totalBytes, uploadedLines)
|
||||
* @param chunkSize 每次读取的块大小,默认 1MB
|
||||
* @param options 其他选项
|
||||
* @returns 上传结果统计
|
||||
*/
|
||||
export interface StreamUploadOptions {
|
||||
reqId?: number;
|
||||
resolveReqId?: (params: { totalFileNum: number; totalSize: number }) => Promise<number>;
|
||||
onReqIdResolved?: (reqId: number) => void;
|
||||
fileNamePrefix?: string;
|
||||
hasArchive?: boolean;
|
||||
prefix?: string;
|
||||
signal?: AbortSignal;
|
||||
maxConcurrency?: number;
|
||||
}
|
||||
|
||||
export interface StreamUploadResult {
|
||||
uploadedCount: number;
|
||||
totalBytes: number;
|
||||
skippedEmptyCount: number;
|
||||
}
|
||||
|
||||
async function processFileLines(
|
||||
file: File,
|
||||
chunkSize: number,
|
||||
signal: AbortSignal | undefined,
|
||||
onLine?: (line: string, index: number) => Promise<void> | void,
|
||||
onProgress?: (currentBytes: number, totalBytes: number, processedLines: number) => void
|
||||
): Promise<{ lineCount: number; skippedEmptyCount: number }> {
|
||||
const fileSize = file.size;
|
||||
let offset = 0;
|
||||
let buffer = "";
|
||||
let skippedEmptyCount = 0;
|
||||
let lineIndex = 0;
|
||||
|
||||
while (offset < fileSize) {
|
||||
if (signal?.aborted) {
|
||||
throw new Error("Upload cancelled");
|
||||
}
|
||||
|
||||
const end = Math.min(offset + chunkSize, fileSize);
|
||||
const chunk = file.slice(offset, end);
|
||||
const text = await readFileAsText(chunk);
|
||||
const combined = buffer + text;
|
||||
const lines = combined.split(/\r?\n/);
|
||||
buffer = lines.pop() || "";
|
||||
|
||||
for (const line of lines) {
|
||||
if (signal?.aborted) {
|
||||
throw new Error("Upload cancelled");
|
||||
}
|
||||
if (!line.trim()) {
|
||||
skippedEmptyCount++;
|
||||
continue;
|
||||
}
|
||||
const currentIndex = lineIndex;
|
||||
lineIndex += 1;
|
||||
if (onLine) {
|
||||
await onLine(line, currentIndex);
|
||||
}
|
||||
}
|
||||
|
||||
offset = end;
|
||||
onProgress?.(offset, fileSize, lineIndex);
|
||||
}
|
||||
|
||||
if (buffer.trim()) {
|
||||
const currentIndex = lineIndex;
|
||||
lineIndex += 1;
|
||||
if (onLine) {
|
||||
await onLine(buffer, currentIndex);
|
||||
}
|
||||
} else if (buffer.length > 0) {
|
||||
skippedEmptyCount++;
|
||||
}
|
||||
|
||||
return { lineCount: lineIndex, skippedEmptyCount };
|
||||
}
|
||||
|
||||
export async function streamSplitAndUpload(
|
||||
file: File,
|
||||
uploadFn: (formData: FormData, config?: { onUploadProgress?: (e: { loaded: number; total: number }) => void }) => Promise<unknown>,
|
||||
onProgress?: (currentBytes: number, totalBytes: number, uploadedLines: number) => void,
|
||||
chunkSize: number = 1024 * 1024, // 1MB
|
||||
options: StreamUploadOptions
|
||||
): Promise<StreamUploadResult> {
|
||||
const {
|
||||
reqId: initialReqId,
|
||||
resolveReqId,
|
||||
onReqIdResolved,
|
||||
fileNamePrefix,
|
||||
prefix,
|
||||
signal,
|
||||
maxConcurrency = 3,
|
||||
} = options;
|
||||
|
||||
const fileSize = file.size;
|
||||
let uploadedCount = 0;
|
||||
let skippedEmptyCount = 0;
|
||||
|
||||
// 获取文件名基础部分和扩展名
|
||||
const originalFileName = fileNamePrefix || file.name;
|
||||
const lastDotIndex = originalFileName.lastIndexOf(".");
|
||||
const baseName = lastDotIndex > 0 ? originalFileName.slice(0, lastDotIndex) : originalFileName;
|
||||
const fileExtension = lastDotIndex > 0 ? originalFileName.slice(lastDotIndex) : "";
|
||||
|
||||
let resolvedReqId = initialReqId;
|
||||
if (!resolvedReqId) {
|
||||
const scanResult = await processFileLines(file, chunkSize, signal);
|
||||
const totalFileNum = scanResult.lineCount;
|
||||
skippedEmptyCount = scanResult.skippedEmptyCount;
|
||||
if (totalFileNum === 0) {
|
||||
return {
|
||||
uploadedCount: 0,
|
||||
totalBytes: fileSize,
|
||||
skippedEmptyCount,
|
||||
};
|
||||
}
|
||||
if (signal?.aborted) {
|
||||
throw new Error("Upload cancelled");
|
||||
}
|
||||
if (!resolveReqId) {
|
||||
throw new Error("Missing pre-upload request id");
|
||||
}
|
||||
resolvedReqId = await resolveReqId({ totalFileNum, totalSize: fileSize });
|
||||
if (!resolvedReqId) {
|
||||
throw new Error("Failed to resolve pre-upload request id");
|
||||
}
|
||||
onReqIdResolved?.(resolvedReqId);
|
||||
}
|
||||
if (!resolvedReqId) {
|
||||
throw new Error("Missing pre-upload request id");
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传单行内容
|
||||
* 每行作为独立文件上传,fileNo 对应行序号,chunkNo 固定为 1
|
||||
*/
|
||||
async function uploadLine(line: string, index: number): Promise<void> {
|
||||
// 检查是否已取消
|
||||
if (signal?.aborted) {
|
||||
throw new Error("Upload cancelled");
|
||||
}
|
||||
|
||||
if (!line.trim()) {
|
||||
skippedEmptyCount++;
|
||||
return;
|
||||
}
|
||||
|
||||
// 保留原始文件扩展名
|
||||
const fileIndex = index + 1;
|
||||
const newFileName = `${baseName}_${String(fileIndex).padStart(6, "0")}${fileExtension}`;
|
||||
const blob = new Blob([line], { type: "text/plain" });
|
||||
const lineFile = new File([blob], newFileName, { type: "text/plain" });
|
||||
|
||||
// 计算分片(小文件通常只需要一个分片)
|
||||
const slices = sliceFile(lineFile, DEFAULT_CHUNK_SIZE);
|
||||
const checkSum = await calculateSHA256(slices[0]);
|
||||
|
||||
// 检查是否已取消(计算哈希后)
|
||||
if (signal?.aborted) {
|
||||
throw new Error("Upload cancelled");
|
||||
}
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append("file", slices[0]);
|
||||
formData.append("reqId", resolvedReqId.toString());
|
||||
// 每行作为独立文件上传
|
||||
formData.append("fileNo", fileIndex.toString());
|
||||
formData.append("chunkNo", "1");
|
||||
formData.append("fileName", newFileName);
|
||||
formData.append("fileSize", lineFile.size.toString());
|
||||
formData.append("totalChunkNum", "1");
|
||||
formData.append("checkSumHex", checkSum);
|
||||
if (prefix !== undefined) {
|
||||
formData.append("prefix", prefix);
|
||||
}
|
||||
|
||||
await uploadFn(formData, {
|
||||
onUploadProgress: () => {
|
||||
// 单行文件很小,进度主要用于追踪上传状态
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const inFlight = new Set<Promise<void>>();
|
||||
let uploadError: unknown = null;
|
||||
const enqueueUpload = async (line: string, index: number) => {
|
||||
if (signal?.aborted) {
|
||||
throw new Error("Upload cancelled");
|
||||
}
|
||||
if (uploadError) {
|
||||
throw uploadError;
|
||||
}
|
||||
const uploadPromise = uploadLine(line, index)
|
||||
.then(() => {
|
||||
uploadedCount++;
|
||||
})
|
||||
.catch((err) => {
|
||||
uploadError = err;
|
||||
});
|
||||
inFlight.add(uploadPromise);
|
||||
uploadPromise.finally(() => inFlight.delete(uploadPromise));
|
||||
if (inFlight.size >= maxConcurrency) {
|
||||
await Promise.race(inFlight);
|
||||
if (uploadError) {
|
||||
throw uploadError;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let uploadResult: { lineCount: number; skippedEmptyCount: number } | null = null;
|
||||
try {
|
||||
uploadResult = await processFileLines(
|
||||
file,
|
||||
chunkSize,
|
||||
signal,
|
||||
enqueueUpload,
|
||||
(currentBytes, totalBytes) => {
|
||||
onProgress?.(currentBytes, totalBytes, uploadedCount);
|
||||
}
|
||||
);
|
||||
if (uploadError) {
|
||||
throw uploadError;
|
||||
}
|
||||
} finally {
|
||||
if (inFlight.size > 0) {
|
||||
await Promise.allSettled(inFlight);
|
||||
}
|
||||
}
|
||||
|
||||
if (!uploadResult || (initialReqId && uploadResult.lineCount === 0)) {
|
||||
return {
|
||||
uploadedCount: 0,
|
||||
totalBytes: fileSize,
|
||||
skippedEmptyCount: uploadResult?.skippedEmptyCount ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
if (!initialReqId) {
|
||||
skippedEmptyCount = skippedEmptyCount || uploadResult.skippedEmptyCount;
|
||||
} else {
|
||||
skippedEmptyCount = uploadResult.skippedEmptyCount;
|
||||
}
|
||||
|
||||
return {
|
||||
uploadedCount,
|
||||
totalBytes: fileSize,
|
||||
skippedEmptyCount,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断文件是否需要流式分割上传
|
||||
* @param file 文件对象
|
||||
* @param threshold 阈值,默认 5MB
|
||||
*/
|
||||
export function shouldStreamUpload(file: File, threshold: number = 5 * 1024 * 1024): boolean {
|
||||
return file.size > threshold;
|
||||
}
|
||||
|
||||
@@ -82,6 +82,9 @@ class Request {
|
||||
*/
|
||||
createXHRWithProgress(url, config, onProgress, onDownloadProgress) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const xhr = new XMLHttpRequest();
|
||||
xhr.open(config.method || "POST", url);
|
||||
|
||||
// 设置请求头
|
||||
if (config.headers) {
|
||||
Object.keys(config.headers).forEach((key) => {
|
||||
@@ -89,7 +92,13 @@ class Request {
|
||||
});
|
||||
}
|
||||
|
||||
const xhr = new XMLHttpRequest();
|
||||
// 监听 AbortSignal 来中止请求
|
||||
if (config.signal) {
|
||||
config.signal.addEventListener("abort", () => {
|
||||
xhr.abort();
|
||||
reject(new Error("上传已取消"));
|
||||
});
|
||||
}
|
||||
|
||||
// 监听上传进度
|
||||
xhr.upload.addEventListener("progress", function (event) {
|
||||
@@ -103,14 +112,6 @@ class Request {
|
||||
}
|
||||
});
|
||||
|
||||
// 请求完成
|
||||
// xhr.addEventListener("load", function () {
|
||||
// if (xhr.status >= 200 && xhr.status < 300) {
|
||||
// const response = JSON.parse(xhr.responseText);
|
||||
// resolve(xhr);
|
||||
// }
|
||||
// });
|
||||
|
||||
// 请求完成处理
|
||||
xhr.addEventListener("load", () => {
|
||||
if (xhr.status >= 200 && xhr.status < 300) {
|
||||
@@ -142,16 +143,15 @@ class Request {
|
||||
// 请求错误
|
||||
xhr.addEventListener("error", function () {
|
||||
console.error("网络错误");
|
||||
if (onError) onError(new Error("网络错误"));
|
||||
reject(new Error("网络错误"));
|
||||
});
|
||||
|
||||
// 请求中止
|
||||
xhr.addEventListener("abort", function () {
|
||||
console.log("上传已取消");
|
||||
if (onError) onError(new Error("上传已取消"));
|
||||
reject(new Error("上传已取消"));
|
||||
});
|
||||
|
||||
xhr.open("POST", url);
|
||||
xhr.send(config.body);
|
||||
|
||||
return xhr; // 返回 xhr 对象以便后续控制
|
||||
|
||||
@@ -66,7 +66,7 @@ class Settings(BaseSettings):
|
||||
datamate_backend_base_url: str = "http://datamate-backend:8080/api"
|
||||
|
||||
# 标注编辑器(Label Studio Editor)相关
|
||||
editor_max_text_bytes: int = 2 * 1024 * 1024 # 2MB,避免一次加载超大文本卡死前端
|
||||
editor_max_text_bytes: int = 0 # <=0 表示不限制,正数为最大字节数
|
||||
|
||||
# 全局设置实例
|
||||
settings = Settings()
|
||||
|
||||
@@ -9,10 +9,17 @@ from app.db.session import Base
|
||||
ANNOTATION_STATUS_ANNOTATED = "ANNOTATED"
|
||||
ANNOTATION_STATUS_NO_ANNOTATION = "NO_ANNOTATION"
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE = "NOT_APPLICABLE"
|
||||
ANNOTATION_STATUS_IN_PROGRESS = "IN_PROGRESS"
|
||||
ANNOTATION_STATUS_VALUES = {
|
||||
ANNOTATION_STATUS_ANNOTATED,
|
||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||
ANNOTATION_STATUS_IN_PROGRESS,
|
||||
}
|
||||
ANNOTATION_STATUS_CLIENT_VALUES = {
|
||||
ANNOTATION_STATUS_ANNOTATED,
|
||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||
}
|
||||
|
||||
class AnnotationTemplate(Base):
|
||||
@@ -101,7 +108,7 @@ class AnnotationResult(Base):
|
||||
String(32),
|
||||
nullable=False,
|
||||
default=ANNOTATION_STATUS_ANNOTATED,
|
||||
comment="标注状态: ANNOTATED/NO_ANNOTATION/NOT_APPLICABLE",
|
||||
comment="标注状态: ANNOTATED/NO_ANNOTATION/NOT_APPLICABLE/IN_PROGRESS",
|
||||
)
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||
|
||||
@@ -19,6 +19,7 @@ from app.db.session import get_db
|
||||
from app.module.annotation.schema.editor import (
|
||||
EditorProjectInfo,
|
||||
EditorTaskListResponse,
|
||||
EditorTaskSegmentResponse,
|
||||
EditorTaskResponse,
|
||||
UpsertAnnotationRequest,
|
||||
UpsertAnnotationResponse,
|
||||
@@ -87,6 +88,21 @@ async def get_editor_task(
|
||||
return StandardResponse(code=200, message="success", data=task)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/projects/{project_id}/tasks/{file_id}/segments",
|
||||
response_model=StandardResponse[EditorTaskSegmentResponse],
|
||||
)
|
||||
async def get_editor_task_segment(
|
||||
project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
|
||||
file_id: str = Path(..., description="文件ID(t_dm_dataset_files.id)"),
|
||||
segment_index: int = Query(..., ge=0, alias="segmentIndex", description="段落索引(从0开始)"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
service = AnnotationEditorService(db)
|
||||
result = await service.get_task_segment(project_id, file_id, segment_index)
|
||||
return StandardResponse(code=200, message="success", data=result)
|
||||
|
||||
|
||||
@router.put(
|
||||
"/projects/{project_id}/tasks/{file_id}/annotation",
|
||||
response_model=StandardResponse[UpsertAnnotationResponse],
|
||||
|
||||
@@ -3,7 +3,7 @@ import math
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import select, update
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db.session import get_db
|
||||
@@ -17,6 +17,7 @@ from ..service.template import AnnotationTemplateService
|
||||
from ..schema import (
|
||||
DatasetMappingCreateRequest,
|
||||
DatasetMappingCreateResponse,
|
||||
DatasetMappingUpdateRequest,
|
||||
DeleteDatasetResponse,
|
||||
DatasetMappingResponse,
|
||||
)
|
||||
@@ -28,6 +29,7 @@ router = APIRouter(
|
||||
logger = get_logger(__name__)
|
||||
TEXT_DATASET_TYPE = "TEXT"
|
||||
SOURCE_DOCUMENT_FILE_TYPES = {"pdf", "doc", "docx", "xls", "xlsx"}
|
||||
LABELING_TYPE_CONFIG_KEY = "labeling_type"
|
||||
|
||||
@router.get("/{mapping_id}/login")
|
||||
async def login_label_studio(
|
||||
@@ -81,6 +83,7 @@ async def create_mapping(
|
||||
|
||||
# 如果提供了模板ID,获取模板配置
|
||||
label_config = None
|
||||
template_labeling_type = None
|
||||
if request.template_id:
|
||||
logger.info(f"Using template: {request.template_id}")
|
||||
template = await template_service.get_template(db, request.template_id)
|
||||
@@ -90,6 +93,7 @@ async def create_mapping(
|
||||
detail=f"Template not found: {request.template_id}"
|
||||
)
|
||||
label_config = template.label_config
|
||||
template_labeling_type = getattr(template, "labeling_type", None)
|
||||
logger.debug(f"Template label config loaded for template: {template.name}")
|
||||
|
||||
# 如果直接提供了 label_config (自定义或修改后的),则覆盖模板配置
|
||||
@@ -108,6 +112,8 @@ async def create_mapping(
|
||||
project_configuration["description"] = project_description
|
||||
if dataset_type == TEXT_DATASET_TYPE and request.segmentation_enabled is not None:
|
||||
project_configuration["segmentation_enabled"] = bool(request.segmentation_enabled)
|
||||
if template_labeling_type:
|
||||
project_configuration[LABELING_TYPE_CONFIG_KEY] = template_labeling_type
|
||||
|
||||
labeling_project = LabelingProject(
|
||||
id=str(uuid.uuid4()), # Generate UUID here
|
||||
@@ -144,6 +150,18 @@ async def create_mapping(
|
||||
labeling_project, snapshot_file_ids
|
||||
)
|
||||
|
||||
# 如果启用了分段且为文本数据集,预生成切片结构
|
||||
if dataset_type == TEXT_DATASET_TYPE and request.segmentation_enabled:
|
||||
try:
|
||||
from ..service.editor import AnnotationEditorService
|
||||
editor_service = AnnotationEditorService(db)
|
||||
# 异步预计算切片(不阻塞创建响应)
|
||||
segmentation_result = await editor_service.precompute_segmentation_for_project(labeling_project.id)
|
||||
logger.info(f"Precomputed segmentation for project {labeling_project.id}: {segmentation_result}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to precompute segmentation for project {labeling_project.id}: {e}")
|
||||
# 不影响项目创建,只记录警告
|
||||
|
||||
response_data = DatasetMappingCreateResponse(
|
||||
id=mapping.id,
|
||||
labeling_project_id=str(mapping.labeling_project_id),
|
||||
@@ -382,3 +400,116 @@ async def delete_mapping(
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting mapping: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.put("/{project_id}", response_model=StandardResponse[DatasetMappingResponse])
|
||||
async def update_mapping(
|
||||
project_id: str = Path(..., description="映射UUID(path param)"),
|
||||
request: DatasetMappingUpdateRequest = None,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
更新标注项目信息
|
||||
|
||||
通过 path 参数 `project_id` 指定要更新的映射(映射的 UUID)。
|
||||
支持更新的字段:
|
||||
- name: 标注项目名称
|
||||
- description: 标注项目描述
|
||||
- template_id: 标注模板ID
|
||||
- label_config: Label Studio XML配置
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Update mapping request received: project_id={project_id!r}")
|
||||
|
||||
service = DatasetMappingService(db)
|
||||
|
||||
# 直接查询 ORM 模型获取原始数据
|
||||
result = await db.execute(
|
||||
select(LabelingProject).where(
|
||||
LabelingProject.id == project_id,
|
||||
LabelingProject.deleted_at.is_(None)
|
||||
)
|
||||
)
|
||||
mapping_orm = result.scalar_one_or_none()
|
||||
|
||||
if not mapping_orm:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Mapping not found: {project_id}"
|
||||
)
|
||||
|
||||
# 构建更新数据
|
||||
update_values = {}
|
||||
if request.name is not None:
|
||||
update_values["name"] = request.name
|
||||
|
||||
# 从 configuration 字段中读取和更新 description 和 label_config
|
||||
configuration = {}
|
||||
if mapping_orm.configuration:
|
||||
configuration = mapping_orm.configuration.copy() if isinstance(mapping_orm.configuration, dict) else {}
|
||||
|
||||
if request.description is not None:
|
||||
configuration["description"] = request.description
|
||||
if request.label_config is not None:
|
||||
configuration["label_config"] = request.label_config
|
||||
|
||||
if configuration:
|
||||
update_values["configuration"] = configuration
|
||||
|
||||
if request.template_id is not None:
|
||||
update_values["template_id"] = request.template_id
|
||||
template_labeling_type = None
|
||||
if request.template_id:
|
||||
template_service = AnnotationTemplateService()
|
||||
template = await template_service.get_template(db, request.template_id)
|
||||
if not template:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Template not found: {request.template_id}"
|
||||
)
|
||||
template_labeling_type = getattr(template, "labeling_type", None)
|
||||
if template_labeling_type:
|
||||
configuration[LABELING_TYPE_CONFIG_KEY] = template_labeling_type
|
||||
|
||||
if not update_values:
|
||||
# 没有要更新的字段,直接返回当前数据
|
||||
response_data = await service.get_mapping_by_uuid(project_id)
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=response_data
|
||||
)
|
||||
|
||||
# 执行更新
|
||||
from datetime import datetime
|
||||
update_values["updated_at"] = datetime.now()
|
||||
|
||||
result = await db.execute(
|
||||
update(LabelingProject)
|
||||
.where(LabelingProject.id == project_id)
|
||||
.values(**update_values)
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
if result.rowcount == 0:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Failed to update mapping"
|
||||
)
|
||||
|
||||
# 重新获取更新后的数据
|
||||
updated_mapping = await service.get_mapping_by_uuid(project_id)
|
||||
|
||||
logger.info(f"Successfully updated mapping: {project_id}")
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=updated_mapping
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating mapping: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@@ -16,6 +16,7 @@ from pydantic import BaseModel, Field, ConfigDict
|
||||
|
||||
from app.db.models.annotation_management import (
|
||||
ANNOTATION_STATUS_ANNOTATED,
|
||||
ANNOTATION_STATUS_IN_PROGRESS,
|
||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||
)
|
||||
@@ -25,6 +26,7 @@ class AnnotationStatus(str, Enum):
|
||||
"""标注状态枚举"""
|
||||
|
||||
ANNOTATED = ANNOTATION_STATUS_ANNOTATED
|
||||
IN_PROGRESS = ANNOTATION_STATUS_IN_PROGRESS
|
||||
NO_ANNOTATION = ANNOTATION_STATUS_NO_ANNOTATION
|
||||
NOT_APPLICABLE = ANNOTATION_STATUS_NOT_APPLICABLE
|
||||
|
||||
@@ -77,12 +79,9 @@ class EditorTaskListResponse(BaseModel):
|
||||
|
||||
|
||||
class SegmentInfo(BaseModel):
|
||||
"""段落信息(用于文本分段标注)"""
|
||||
"""段落摘要(用于文本分段标注)"""
|
||||
|
||||
idx: int = Field(..., description="段落索引")
|
||||
text: str = Field(..., description="段落文本")
|
||||
start: int = Field(..., description="在原文中的起始位置")
|
||||
end: int = Field(..., description="在原文中的结束位置")
|
||||
has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
|
||||
line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引(从0开始)")
|
||||
chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引(从0开始)")
|
||||
@@ -98,7 +97,29 @@ class EditorTaskResponse(BaseModel):
|
||||
|
||||
# 分段相关字段
|
||||
segmented: bool = Field(False, description="是否启用分段模式")
|
||||
segments: Optional[List[SegmentInfo]] = Field(None, description="段落列表")
|
||||
total_segments: int = Field(0, alias="totalSegments", description="总段落数")
|
||||
current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
class SegmentDetail(BaseModel):
|
||||
"""段落内容"""
|
||||
|
||||
idx: int = Field(..., description="段落索引")
|
||||
text: str = Field(..., description="段落文本")
|
||||
has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
|
||||
line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引(从0开始)")
|
||||
chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引(从0开始)")
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
class EditorTaskSegmentResponse(BaseModel):
|
||||
"""编辑器单段内容响应"""
|
||||
|
||||
segmented: bool = Field(False, description="是否启用分段模式")
|
||||
segment: Optional[SegmentDetail] = Field(None, description="段落内容")
|
||||
total_segments: int = Field(0, alias="totalSegments", description="总段落数")
|
||||
current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
|
||||
|
||||
@@ -112,7 +133,7 @@ class UpsertAnnotationRequest(BaseModel):
|
||||
annotation_status: Optional[AnnotationStatus] = Field(
|
||||
None,
|
||||
alias="annotationStatus",
|
||||
description="标注状态(无标注传 NO_ANNOTATION,不适用传 NOT_APPLICABLE)",
|
||||
description="标注状态(无标注传 NO_ANNOTATION,不适用传 NOT_APPLICABLE,IN_PROGRESS 由后端维护)",
|
||||
)
|
||||
expected_updated_at: Optional[datetime] = Field(
|
||||
None,
|
||||
|
||||
@@ -39,9 +39,22 @@ class DatasetMappingCreateResponse(BaseResponseModel):
|
||||
labeling_project_id: str = Field(..., description="Label Studio项目ID")
|
||||
labeling_project_name: str = Field(..., description="Label Studio项目名称")
|
||||
|
||||
class DatasetMappingUpdateRequest(BaseResponseModel):
|
||||
"""数据集映射 更新 请求模型"""
|
||||
dataset_id: Optional[str] = Field(None, description="源数据集ID")
|
||||
class DatasetMappingUpdateRequest(BaseModel):
|
||||
"""数据集映射 更新 请求模型
|
||||
|
||||
支持更新的字段:
|
||||
- name: 标注项目名称
|
||||
- description: 标注项目描述
|
||||
- template_id: 标注模板ID
|
||||
- label_config: Label Studio XML配置
|
||||
"""
|
||||
name: Optional[str] = Field(None, alias="name", description="标注项目名称")
|
||||
description: Optional[str] = Field(None, alias="description", description="标注项目描述")
|
||||
template_id: Optional[str] = Field(None, alias="templateId", description="标注模板ID")
|
||||
label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML配置")
|
||||
|
||||
class Config:
|
||||
validate_by_name = True
|
||||
|
||||
class DatasetMappingResponse(BaseModel):
|
||||
"""数据集映射 查询 响应模型"""
|
||||
@@ -52,6 +65,7 @@ class DatasetMappingResponse(BaseModel):
|
||||
name: Optional[str] = Field(None, description="标注项目名称")
|
||||
description: Optional[str] = Field(None, description="标注项目描述")
|
||||
template_id: Optional[str] = Field(None, alias="templateId", description="关联的模板ID")
|
||||
labeling_type: Optional[str] = Field(None, alias="labelingType", description="标注类型")
|
||||
template: Optional['AnnotationTemplateResponse'] = Field(None, description="关联的标注模板详情")
|
||||
label_config: Optional[str] = Field(None, alias="labelConfig", description="实际使用的 Label Studio XML 配置")
|
||||
segmentation_enabled: Optional[bool] = Field(
|
||||
@@ -61,6 +75,7 @@ class DatasetMappingResponse(BaseModel):
|
||||
)
|
||||
total_count: int = Field(0, alias="totalCount", description="数据集总数据量")
|
||||
annotated_count: int = Field(0, alias="annotatedCount", description="已标注数据量")
|
||||
in_progress_count: int = Field(0, alias="inProgressCount", description="分段标注中数据量")
|
||||
created_at: datetime = Field(..., alias="createdAt", description="创建时间")
|
||||
updated_at: Optional[datetime] = Field(None, alias="updatedAt", description="更新时间")
|
||||
deleted_at: Optional[datetime] = Field(None, alias="deletedAt", description="删除时间")
|
||||
|
||||
@@ -26,16 +26,19 @@ from app.core.logging import get_logger
|
||||
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
|
||||
from app.db.models.annotation_management import (
|
||||
ANNOTATION_STATUS_ANNOTATED,
|
||||
ANNOTATION_STATUS_IN_PROGRESS,
|
||||
ANNOTATION_STATUS_CLIENT_VALUES,
|
||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||
ANNOTATION_STATUS_VALUES,
|
||||
)
|
||||
from app.module.annotation.config import LabelStudioTagConfig
|
||||
from app.module.annotation.schema.editor import (
|
||||
EditorProjectInfo,
|
||||
EditorTaskListItem,
|
||||
EditorTaskListResponse,
|
||||
EditorTaskSegmentResponse,
|
||||
EditorTaskResponse,
|
||||
SegmentDetail,
|
||||
SegmentInfo,
|
||||
UpsertAnnotationRequest,
|
||||
UpsertAnnotationResponse,
|
||||
@@ -61,6 +64,7 @@ SEGMENT_INDEX_KEY = "segment_index"
|
||||
SEGMENT_INDEX_CAMEL_KEY = "segmentIndex"
|
||||
SEGMENTED_KEY = "segmented"
|
||||
SEGMENTS_KEY = "segments"
|
||||
SEGMENT_TOTAL_KEY = "total_segments"
|
||||
SEGMENT_RESULT_KEY = "result"
|
||||
SEGMENT_CREATED_AT_KEY = "created_at"
|
||||
SEGMENT_UPDATED_AT_KEY = "updated_at"
|
||||
@@ -416,6 +420,76 @@ class AnnotationEditorService:
|
||||
result = payload.get(SEGMENT_RESULT_KEY)
|
||||
return isinstance(result, list) and len(result) > 0
|
||||
|
||||
@staticmethod
|
||||
def _resolve_segment_total(payload: Optional[Dict[str, Any]]) -> Optional[int]:
|
||||
if not payload or not isinstance(payload, dict):
|
||||
return None
|
||||
value = payload.get(SEGMENT_TOTAL_KEY)
|
||||
if isinstance(value, int):
|
||||
return value if value > 0 else None
|
||||
if isinstance(value, float) and value.is_integer():
|
||||
return int(value) if value > 0 else None
|
||||
if isinstance(value, str) and value.isdigit():
|
||||
parsed = int(value)
|
||||
return parsed if parsed > 0 else None
|
||||
return None
|
||||
|
||||
async def _compute_segment_total(
|
||||
self,
|
||||
project: LabelingProject,
|
||||
file_record: DatasetFiles,
|
||||
file_id: str,
|
||||
) -> Optional[int]:
|
||||
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
|
||||
if dataset_type != DATASET_TYPE_TEXT:
|
||||
return None
|
||||
if not self._resolve_segmentation_enabled(project):
|
||||
return None
|
||||
|
||||
text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
|
||||
if not isinstance(text_content, str):
|
||||
return None
|
||||
|
||||
label_config = await self._resolve_project_label_config(project)
|
||||
primary_text_key = self._resolve_primary_text_key(label_config)
|
||||
file_name = str(getattr(file_record, "file_name", "")).lower()
|
||||
|
||||
records: List[Tuple[Optional[Dict[str, Any]], str]] = []
|
||||
if file_name.endswith(JSONL_EXTENSION):
|
||||
records = self._parse_jsonl_records(text_content)
|
||||
else:
|
||||
parsed_payload = self._try_parse_json_payload(text_content)
|
||||
if parsed_payload:
|
||||
records = [(parsed_payload, text_content)]
|
||||
|
||||
if not records:
|
||||
records = [(None, text_content)]
|
||||
|
||||
record_texts = [
|
||||
self._resolve_primary_text_value(payload, raw_text, primary_text_key)
|
||||
for payload, raw_text in records
|
||||
]
|
||||
if not record_texts:
|
||||
record_texts = [text_content]
|
||||
|
||||
needs_segmentation = len(records) > 1 or any(
|
||||
len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts
|
||||
)
|
||||
if not needs_segmentation:
|
||||
return None
|
||||
|
||||
splitter = AnnotationTextSplitter(max_chars=self.SEGMENT_THRESHOLD)
|
||||
total_segments = 0
|
||||
for record_text in record_texts:
|
||||
normalized_text = record_text or ""
|
||||
if len(normalized_text) > self.SEGMENT_THRESHOLD:
|
||||
raw_segments = splitter.split(normalized_text)
|
||||
total_segments += len(raw_segments) if raw_segments else 1
|
||||
else:
|
||||
total_segments += 1
|
||||
|
||||
return total_segments if total_segments > 0 else 1
|
||||
|
||||
@classmethod
|
||||
def _build_source_document_filter(cls):
|
||||
file_type_lower = func.lower(DatasetFiles.file_type)
|
||||
@@ -466,6 +540,50 @@ class AnnotationEditorService:
|
||||
return value
|
||||
return raw_text
|
||||
|
||||
def _build_segment_contexts(
|
||||
self,
|
||||
records: List[Tuple[Optional[Dict[str, Any]], str]],
|
||||
record_texts: List[str],
|
||||
segment_annotation_keys: set[str],
|
||||
) -> Tuple[List[SegmentInfo], List[Tuple[Optional[Dict[str, Any]], str, str, int, int]]]:
|
||||
splitter = AnnotationTextSplitter(max_chars=self.SEGMENT_THRESHOLD)
|
||||
segments: List[SegmentInfo] = []
|
||||
segment_contexts: List[Tuple[Optional[Dict[str, Any]], str, str, int, int]] = []
|
||||
segment_cursor = 0
|
||||
|
||||
for record_index, ((payload, raw_text), record_text) in enumerate(zip(records, record_texts)):
|
||||
normalized_text = record_text or ""
|
||||
if len(normalized_text) > self.SEGMENT_THRESHOLD:
|
||||
raw_segments = splitter.split(normalized_text)
|
||||
for chunk_index, seg in enumerate(raw_segments):
|
||||
segments.append(
|
||||
SegmentInfo(
|
||||
idx=segment_cursor,
|
||||
hasAnnotation=str(segment_cursor) in segment_annotation_keys,
|
||||
lineIndex=record_index,
|
||||
chunkIndex=chunk_index,
|
||||
)
|
||||
)
|
||||
segment_contexts.append((payload, raw_text, seg["text"], record_index, chunk_index))
|
||||
segment_cursor += 1
|
||||
else:
|
||||
segments.append(
|
||||
SegmentInfo(
|
||||
idx=segment_cursor,
|
||||
hasAnnotation=str(segment_cursor) in segment_annotation_keys,
|
||||
lineIndex=record_index,
|
||||
chunkIndex=0,
|
||||
)
|
||||
)
|
||||
segment_contexts.append((payload, raw_text, normalized_text, record_index, 0))
|
||||
segment_cursor += 1
|
||||
|
||||
if not segments:
|
||||
segments = [SegmentInfo(idx=0, hasAnnotation=False, lineIndex=0, chunkIndex=0)]
|
||||
segment_contexts = [(None, "", "", 0, 0)]
|
||||
|
||||
return segments, segment_contexts
|
||||
|
||||
async def get_project_info(self, project_id: str) -> EditorProjectInfo:
|
||||
project = await self._get_project_or_404(project_id)
|
||||
|
||||
@@ -596,6 +714,124 @@ class AnnotationEditorService:
|
||||
|
||||
return await self._build_text_task(project, file_record, file_id, segment_index)
|
||||
|
||||
async def get_task_segment(
|
||||
self,
|
||||
project_id: str,
|
||||
file_id: str,
|
||||
segment_index: int,
|
||||
) -> EditorTaskSegmentResponse:
|
||||
project = await self._get_project_or_404(project_id)
|
||||
|
||||
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
|
||||
if dataset_type != DATASET_TYPE_TEXT:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="当前仅支持 TEXT 项目的段落内容",
|
||||
)
|
||||
|
||||
file_result = await self.db.execute(
|
||||
select(DatasetFiles).where(
|
||||
DatasetFiles.id == file_id,
|
||||
DatasetFiles.dataset_id == project.dataset_id,
|
||||
)
|
||||
)
|
||||
file_record = file_result.scalar_one_or_none()
|
||||
if not file_record:
|
||||
raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
|
||||
|
||||
if not self._resolve_segmentation_enabled(project):
|
||||
return EditorTaskSegmentResponse(
|
||||
segmented=False,
|
||||
segment=None,
|
||||
totalSegments=0,
|
||||
currentSegmentIndex=0,
|
||||
)
|
||||
|
||||
text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
|
||||
assert isinstance(text_content, str)
|
||||
label_config = await self._resolve_project_label_config(project)
|
||||
primary_text_key = self._resolve_primary_text_key(label_config)
|
||||
file_name = str(getattr(file_record, "file_name", "")).lower()
|
||||
|
||||
records: List[Tuple[Optional[Dict[str, Any]], str]] = []
|
||||
if file_name.endswith(JSONL_EXTENSION):
|
||||
records = self._parse_jsonl_records(text_content)
|
||||
else:
|
||||
parsed_payload = self._try_parse_json_payload(text_content)
|
||||
if parsed_payload:
|
||||
records = [(parsed_payload, text_content)]
|
||||
|
||||
if not records:
|
||||
records = [(None, text_content)]
|
||||
|
||||
record_texts = [
|
||||
self._resolve_primary_text_value(payload, raw_text, primary_text_key)
|
||||
for payload, raw_text in records
|
||||
]
|
||||
if not record_texts:
|
||||
record_texts = [text_content]
|
||||
|
||||
needs_segmentation = len(records) > 1 or any(
|
||||
len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts
|
||||
)
|
||||
if not needs_segmentation:
|
||||
return EditorTaskSegmentResponse(
|
||||
segmented=False,
|
||||
segment=None,
|
||||
totalSegments=0,
|
||||
currentSegmentIndex=0,
|
||||
)
|
||||
|
||||
ann_result = await self.db.execute(
|
||||
select(AnnotationResult).where(
|
||||
AnnotationResult.project_id == project.id,
|
||||
AnnotationResult.file_id == file_id,
|
||||
)
|
||||
)
|
||||
ann = ann_result.scalar_one_or_none()
|
||||
segment_annotations: Dict[str, Dict[str, Any]] = {}
|
||||
if ann and isinstance(ann.annotation, dict):
|
||||
segment_annotations = self._extract_segment_annotations(ann.annotation)
|
||||
segment_annotation_keys = set(segment_annotations.keys())
|
||||
|
||||
segments, segment_contexts = self._build_segment_contexts(
|
||||
records,
|
||||
record_texts,
|
||||
segment_annotation_keys,
|
||||
)
|
||||
|
||||
total_segments = len(segment_contexts)
|
||||
if total_segments == 0:
|
||||
return EditorTaskSegmentResponse(
|
||||
segmented=False,
|
||||
segment=None,
|
||||
totalSegments=0,
|
||||
currentSegmentIndex=0,
|
||||
)
|
||||
|
||||
if segment_index < 0 or segment_index >= total_segments:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"segmentIndex 超出范围: {segment_index}",
|
||||
)
|
||||
|
||||
segment_info = segments[segment_index]
|
||||
_, _, segment_text, line_index, chunk_index = segment_contexts[segment_index]
|
||||
segment_detail = SegmentDetail(
|
||||
idx=segment_info.idx,
|
||||
text=segment_text,
|
||||
hasAnnotation=segment_info.has_annotation,
|
||||
lineIndex=line_index,
|
||||
chunkIndex=chunk_index,
|
||||
)
|
||||
|
||||
return EditorTaskSegmentResponse(
|
||||
segmented=True,
|
||||
segment=segment_detail,
|
||||
totalSegments=total_segments,
|
||||
currentSegmentIndex=segment_index,
|
||||
)
|
||||
|
||||
async def _build_text_task(
|
||||
self,
|
||||
project: LabelingProject,
|
||||
@@ -651,7 +887,8 @@ class AnnotationEditorService:
|
||||
needs_segmentation = segmentation_enabled and (
|
||||
len(records) > 1 or any(len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts)
|
||||
)
|
||||
segments: Optional[List[SegmentInfo]] = None
|
||||
segments: List[SegmentInfo] = []
|
||||
segment_contexts: List[Tuple[Optional[Dict[str, Any]], str, str, int, int]] = []
|
||||
current_segment_index = 0
|
||||
display_text = record_texts[0] if record_texts else text_content
|
||||
selected_payload = records[0][0] if records else None
|
||||
@@ -660,46 +897,13 @@ class AnnotationEditorService:
|
||||
display_text = "\n".join(record_texts) if record_texts else text_content
|
||||
|
||||
if needs_segmentation:
|
||||
splitter = AnnotationTextSplitter(max_chars=self.SEGMENT_THRESHOLD)
|
||||
segment_contexts: List[Tuple[Optional[Dict[str, Any]], str, str, int, int]] = []
|
||||
segments = []
|
||||
segment_cursor = 0
|
||||
|
||||
for record_index, ((payload, raw_text), record_text) in enumerate(zip(records, record_texts)):
|
||||
normalized_text = record_text or ""
|
||||
if len(normalized_text) > self.SEGMENT_THRESHOLD:
|
||||
raw_segments = splitter.split(normalized_text)
|
||||
for chunk_index, seg in enumerate(raw_segments):
|
||||
segments.append(SegmentInfo(
|
||||
idx=segment_cursor,
|
||||
text=seg["text"],
|
||||
start=seg["start"],
|
||||
end=seg["end"],
|
||||
hasAnnotation=str(segment_cursor) in segment_annotation_keys,
|
||||
lineIndex=record_index,
|
||||
chunkIndex=chunk_index,
|
||||
))
|
||||
segment_contexts.append((payload, raw_text, seg["text"], record_index, chunk_index))
|
||||
segment_cursor += 1
|
||||
else:
|
||||
segments.append(SegmentInfo(
|
||||
idx=segment_cursor,
|
||||
text=normalized_text,
|
||||
start=0,
|
||||
end=len(normalized_text),
|
||||
hasAnnotation=str(segment_cursor) in segment_annotation_keys,
|
||||
lineIndex=record_index,
|
||||
chunkIndex=0,
|
||||
))
|
||||
segment_contexts.append((payload, raw_text, normalized_text, record_index, 0))
|
||||
segment_cursor += 1
|
||||
|
||||
if not segments:
|
||||
segments = [SegmentInfo(idx=0, text="", start=0, end=0, hasAnnotation=False, lineIndex=0, chunkIndex=0)]
|
||||
segment_contexts = [(None, "", "", 0, 0)]
|
||||
|
||||
_, segment_contexts = self._build_segment_contexts(
|
||||
records,
|
||||
record_texts,
|
||||
segment_annotation_keys,
|
||||
)
|
||||
current_segment_index = segment_index if segment_index is not None else 0
|
||||
if current_segment_index < 0 or current_segment_index >= len(segments):
|
||||
if current_segment_index < 0 or current_segment_index >= len(segment_contexts):
|
||||
current_segment_index = 0
|
||||
|
||||
selected_payload, _, display_text, _, _ = segment_contexts[current_segment_index]
|
||||
@@ -777,8 +981,7 @@ class AnnotationEditorService:
|
||||
task=task,
|
||||
annotationUpdatedAt=annotation_updated_at,
|
||||
segmented=needs_segmentation,
|
||||
segments=segments,
|
||||
totalSegments=len(segments) if segments else 1,
|
||||
totalSegments=len(segment_contexts) if needs_segmentation else 1,
|
||||
currentSegmentIndex=current_segment_index,
|
||||
)
|
||||
|
||||
@@ -920,11 +1123,19 @@ class AnnotationEditorService:
|
||||
|
||||
ls_task_id = self._make_ls_task_id(project_id, file_id)
|
||||
|
||||
segment_total_hint = None
|
||||
if request.segment_index is not None:
|
||||
segment_total_hint = self._resolve_segment_total(annotation_payload)
|
||||
if segment_total_hint is None:
|
||||
segment_total_hint = await self._compute_segment_total(project, file_record, file_id)
|
||||
|
||||
existing_result = await self.db.execute(
|
||||
select(AnnotationResult).where(
|
||||
select(AnnotationResult)
|
||||
.where(
|
||||
AnnotationResult.project_id == project_id,
|
||||
AnnotationResult.file_id == file_id,
|
||||
)
|
||||
.with_for_update()
|
||||
)
|
||||
existing = existing_result.scalar_one_or_none()
|
||||
|
||||
@@ -938,6 +1149,14 @@ class AnnotationEditorService:
|
||||
request.segment_index,
|
||||
annotation_payload,
|
||||
)
|
||||
segment_entries = self._extract_segment_annotations(final_payload)
|
||||
if str(request.segment_index) not in segment_entries:
|
||||
logger.warning(
|
||||
"分段标注合并异常:未找到当前段落 key,project_id=%s file_id=%s segment_index=%s",
|
||||
project_id,
|
||||
file_id,
|
||||
request.segment_index,
|
||||
)
|
||||
else:
|
||||
# 非分段模式:直接使用传入的 annotation
|
||||
annotation_payload["task"] = ls_task_id
|
||||
@@ -946,9 +1165,26 @@ class AnnotationEditorService:
|
||||
final_payload = annotation_payload
|
||||
|
||||
requested_status = request.annotation_status
|
||||
if requested_status is not None and requested_status not in ANNOTATION_STATUS_VALUES:
|
||||
if requested_status is not None and requested_status not in ANNOTATION_STATUS_CLIENT_VALUES:
|
||||
raise HTTPException(status_code=400, detail="annotationStatus 不合法")
|
||||
|
||||
segment_total = None
|
||||
segment_done = None
|
||||
if request.segment_index is not None:
|
||||
segment_total = self._resolve_segment_total(final_payload)
|
||||
if segment_total is None:
|
||||
segment_total = segment_total_hint
|
||||
if segment_total and segment_total > 0:
|
||||
final_payload[SEGMENT_TOTAL_KEY] = segment_total
|
||||
segment_done = len(self._extract_segment_annotations(final_payload))
|
||||
|
||||
if (
|
||||
segment_total is not None
|
||||
and segment_done is not None
|
||||
and segment_done < segment_total
|
||||
):
|
||||
final_status = ANNOTATION_STATUS_IN_PROGRESS
|
||||
else:
|
||||
has_result = self._has_annotation_result(final_payload)
|
||||
if has_result:
|
||||
final_status = ANNOTATION_STATUS_ANNOTATED
|
||||
@@ -960,6 +1196,18 @@ class AnnotationEditorService:
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="未发现标注内容,请确认无标注/不适用后再保存")
|
||||
|
||||
if request.segment_index is not None:
|
||||
segment_entries = self._extract_segment_annotations(final_payload)
|
||||
logger.info(
|
||||
"分段标注保存:project_id=%s file_id=%s segment_index=%s segments=%s total=%s status=%s",
|
||||
project_id,
|
||||
file_id,
|
||||
request.segment_index,
|
||||
len(segment_entries),
|
||||
segment_total,
|
||||
final_status,
|
||||
)
|
||||
|
||||
if existing:
|
||||
if request.expected_updated_at and existing.updated_at:
|
||||
if existing.updated_at != request.expected_updated_at.replace(tzinfo=None):
|
||||
@@ -1029,7 +1277,15 @@ class AnnotationEditorService:
|
||||
if not base.get(SEGMENTED_KEY):
|
||||
base[SEGMENTED_KEY] = True
|
||||
segments = base.get(SEGMENTS_KEY)
|
||||
if not isinstance(segments, dict):
|
||||
if isinstance(segments, dict):
|
||||
# 拷贝一份,避免原地修改导致 SQLAlchemy 变更检测失效
|
||||
segments = dict(segments)
|
||||
base[SEGMENTS_KEY] = segments
|
||||
elif isinstance(segments, list):
|
||||
# 兼容旧的 list 结构,归一化为 dict 结构
|
||||
segments = self._extract_segment_annotations(base)
|
||||
base[SEGMENTS_KEY] = segments
|
||||
else:
|
||||
segments = {}
|
||||
base[SEGMENTS_KEY] = segments
|
||||
|
||||
@@ -1060,3 +1316,195 @@ class AnnotationEditorService:
|
||||
except Exception as exc:
|
||||
logger.warning("标注同步知识管理失败:%s", exc)
|
||||
|
||||
async def precompute_segmentation_for_project(
|
||||
self,
|
||||
project_id: str,
|
||||
max_retries: int = 3
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
为指定项目的所有文本文件预计算切片结构并持久化到数据库
|
||||
|
||||
Args:
|
||||
project_id: 标注项目ID
|
||||
max_retries: 失败重试次数
|
||||
|
||||
Returns:
|
||||
统计信息:{total_files, succeeded, failed}
|
||||
"""
|
||||
project = await self._get_project_or_404(project_id)
|
||||
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
|
||||
|
||||
# 只处理文本数据集
|
||||
if dataset_type != DATASET_TYPE_TEXT:
|
||||
logger.info(f"项目 {project_id} 不是文本数据集,跳过切片预生成")
|
||||
return {"total_files": 0, "succeeded": 0, "failed": 0}
|
||||
|
||||
# 检查是否启用分段
|
||||
if not self._resolve_segmentation_enabled(project):
|
||||
logger.info(f"项目 {project_id} 未启用分段,跳过切片预生成")
|
||||
return {"total_files": 0, "succeeded": 0, "failed": 0}
|
||||
|
||||
# 获取项目的所有文本文件(排除源文档)
|
||||
files_result = await self.db.execute(
|
||||
select(DatasetFiles)
|
||||
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
|
||||
.where(
|
||||
LabelingProjectFile.project_id == project_id,
|
||||
DatasetFiles.dataset_id == project.dataset_id,
|
||||
)
|
||||
)
|
||||
file_records = files_result.scalars().all()
|
||||
|
||||
if not file_records:
|
||||
logger.info(f"项目 {project_id} 没有文件,跳过切片预生成")
|
||||
return {"total_files": 0, "succeeded": 0, "failed": 0}
|
||||
|
||||
# 过滤源文档文件
|
||||
valid_files = []
|
||||
for file_record in file_records:
|
||||
file_type = str(getattr(file_record, "file_type", "") or "").lower()
|
||||
file_name = str(getattr(file_record, "file_name", "")).lower()
|
||||
is_source_document = (
|
||||
file_type in SOURCE_DOCUMENT_TYPES or
|
||||
any(file_name.endswith(ext) for ext in SOURCE_DOCUMENT_EXTENSIONS)
|
||||
)
|
||||
if not is_source_document:
|
||||
valid_files.append(file_record)
|
||||
|
||||
total_files = len(valid_files)
|
||||
succeeded = 0
|
||||
failed = 0
|
||||
|
||||
label_config = await self._resolve_project_label_config(project)
|
||||
primary_text_key = self._resolve_primary_text_key(label_config)
|
||||
|
||||
for file_record in valid_files:
|
||||
file_id = str(file_record.id) # type: ignore
|
||||
file_name = str(getattr(file_record, "file_name", ""))
|
||||
|
||||
for retry in range(max_retries):
|
||||
try:
|
||||
# 读取文本内容
|
||||
text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
|
||||
if not isinstance(text_content, str):
|
||||
logger.warning(f"文件 {file_id} 内容不是字符串,跳过切片")
|
||||
failed += 1
|
||||
break
|
||||
|
||||
# 解析文本记录
|
||||
records: List[Tuple[Optional[Dict[str, Any]], str]] = []
|
||||
if file_name.lower().endswith(JSONL_EXTENSION):
|
||||
records = self._parse_jsonl_records(text_content)
|
||||
else:
|
||||
parsed_payload = self._try_parse_json_payload(text_content)
|
||||
if parsed_payload:
|
||||
records = [(parsed_payload, text_content)]
|
||||
|
||||
if not records:
|
||||
records = [(None, text_content)]
|
||||
|
||||
record_texts = [
|
||||
self._resolve_primary_text_value(payload, raw_text, primary_text_key)
|
||||
for payload, raw_text in records
|
||||
]
|
||||
if not record_texts:
|
||||
record_texts = [text_content]
|
||||
|
||||
# 判断是否需要分段
|
||||
needs_segmentation = len(records) > 1 or any(
|
||||
len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts
|
||||
)
|
||||
|
||||
if not needs_segmentation:
|
||||
# 不需要分段的文件,跳过
|
||||
succeeded += 1
|
||||
break
|
||||
|
||||
# 执行切片
|
||||
splitter = AnnotationTextSplitter(max_chars=self.SEGMENT_THRESHOLD)
|
||||
segment_cursor = 0
|
||||
segments = {}
|
||||
|
||||
for record_index, ((payload, raw_text), record_text) in enumerate(zip(records, record_texts)):
|
||||
normalized_text = record_text or ""
|
||||
|
||||
if len(normalized_text) > self.SEGMENT_THRESHOLD:
|
||||
raw_segments = splitter.split(normalized_text)
|
||||
for chunk_index, seg in enumerate(raw_segments):
|
||||
segments[str(segment_cursor)] = {
|
||||
SEGMENT_RESULT_KEY: [],
|
||||
SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
|
||||
SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
|
||||
}
|
||||
segment_cursor += 1
|
||||
else:
|
||||
segments[str(segment_cursor)] = {
|
||||
SEGMENT_RESULT_KEY: [],
|
||||
SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
|
||||
SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
|
||||
}
|
||||
segment_cursor += 1
|
||||
|
||||
if not segments:
|
||||
succeeded += 1
|
||||
break
|
||||
|
||||
# 构造分段标注结构
|
||||
final_payload = {
|
||||
SEGMENTED_KEY: True,
|
||||
"version": 1,
|
||||
SEGMENTS_KEY: segments,
|
||||
SEGMENT_TOTAL_KEY: segment_cursor,
|
||||
}
|
||||
|
||||
# 检查是否已存在标注
|
||||
existing_result = await self.db.execute(
|
||||
select(AnnotationResult).where(
|
||||
AnnotationResult.project_id == project_id,
|
||||
AnnotationResult.file_id == file_id,
|
||||
)
|
||||
)
|
||||
existing = existing_result.scalar_one_or_none()
|
||||
|
||||
now = datetime.utcnow()
|
||||
|
||||
if existing:
|
||||
# 更新现有标注
|
||||
existing.annotation = final_payload # type: ignore[assignment]
|
||||
existing.annotation_status = ANNOTATION_STATUS_IN_PROGRESS # type: ignore[assignment]
|
||||
existing.updated_at = now # type: ignore[assignment]
|
||||
else:
|
||||
# 创建新标注记录
|
||||
record = AnnotationResult(
|
||||
id=str(uuid.uuid4()),
|
||||
project_id=project_id,
|
||||
file_id=file_id,
|
||||
annotation=final_payload,
|
||||
annotation_status=ANNOTATION_STATUS_IN_PROGRESS,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
)
|
||||
self.db.add(record)
|
||||
|
||||
await self.db.commit()
|
||||
succeeded += 1
|
||||
logger.info(f"成功为文件 {file_id} 预生成 {segment_cursor} 个切片")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"为文件 {file_id} 预生成切片失败 (重试 {retry + 1}/{max_retries}): {e}"
|
||||
)
|
||||
if retry == max_retries - 1:
|
||||
failed += 1
|
||||
await self.db.rollback()
|
||||
|
||||
logger.info(
|
||||
f"项目 {project_id} 切片预生成完成: 总计 {total_files}, 成功 {succeeded}, 失败 {failed}"
|
||||
)
|
||||
return {
|
||||
"total_files": total_files,
|
||||
"succeeded": succeeded,
|
||||
"failed": failed,
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.core.config import settings
|
||||
from app.core.logging import get_logger
|
||||
from app.db.models import Dataset, DatasetFiles, LabelingProject
|
||||
from app.module.annotation.service.text_fetcher import fetch_text_content_via_download_api
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -77,15 +76,18 @@ class KnowledgeSyncService:
|
||||
|
||||
if set_id:
|
||||
exists = await self._get_knowledge_set(set_id)
|
||||
if exists:
|
||||
if exists and self._metadata_matches_project(exists.get("metadata"), project.id):
|
||||
return set_id
|
||||
logger.warning("知识集不存在,准备重建:set_id=%s", set_id)
|
||||
logger.warning(
|
||||
"知识集不存在或归属不匹配,准备重建:set_id=%s project_id=%s",
|
||||
set_id,
|
||||
project.id,
|
||||
)
|
||||
|
||||
dataset_name = project.name or "annotation-project"
|
||||
base_name = dataset_name.strip() or "annotation-project"
|
||||
project_name = (project.name or "annotation-project").strip() or "annotation-project"
|
||||
metadata = self._build_set_metadata(project)
|
||||
|
||||
existing = await self._find_knowledge_set_by_name(base_name)
|
||||
existing = await self._find_knowledge_set_by_name_and_project(project_name, project.id)
|
||||
if existing:
|
||||
await self._update_project_config(
|
||||
project,
|
||||
@@ -96,19 +98,19 @@ class KnowledgeSyncService:
|
||||
)
|
||||
return existing.get("id")
|
||||
|
||||
created = await self._create_knowledge_set(base_name, metadata)
|
||||
created = await self._create_knowledge_set(project_name, metadata)
|
||||
if not created:
|
||||
created = await self._find_knowledge_set_by_name(base_name)
|
||||
created = await self._find_knowledge_set_by_name_and_project(project_name, project.id)
|
||||
|
||||
if not created:
|
||||
fallback_name = self._build_fallback_set_name(base_name, project.id)
|
||||
existing = await self._find_knowledge_set_by_name(fallback_name)
|
||||
fallback_name = self._build_fallback_set_name(project_name, project.id)
|
||||
existing = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id)
|
||||
if existing:
|
||||
created = existing
|
||||
else:
|
||||
created = await self._create_knowledge_set(fallback_name, metadata)
|
||||
if not created:
|
||||
created = await self._find_knowledge_set_by_name(fallback_name)
|
||||
created = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id)
|
||||
|
||||
if not created:
|
||||
return None
|
||||
@@ -153,16 +155,18 @@ class KnowledgeSyncService:
|
||||
return []
|
||||
return [item for item in content if isinstance(item, dict)]
|
||||
|
||||
async def _find_knowledge_set_by_name(self, name: str) -> Optional[Dict[str, Any]]:
|
||||
async def _find_knowledge_set_by_name_and_project(self, name: str, project_id: str) -> Optional[Dict[str, Any]]:
|
||||
if not name:
|
||||
return None
|
||||
items = await self._list_knowledge_sets(name)
|
||||
if not items:
|
||||
return None
|
||||
exact_matches = [item for item in items if item.get("name") == name]
|
||||
if not exact_matches:
|
||||
for item in items:
|
||||
if item.get("name") != name:
|
||||
continue
|
||||
if self._metadata_matches_project(item.get("metadata"), project_id):
|
||||
return item
|
||||
return None
|
||||
return exact_matches[0]
|
||||
|
||||
async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]:
|
||||
payload = {
|
||||
@@ -249,16 +253,6 @@ class KnowledgeSyncService:
|
||||
content_type = "MARKDOWN"
|
||||
|
||||
content = annotation_json
|
||||
if dataset_type == "TEXT":
|
||||
try:
|
||||
content = await fetch_text_content_via_download_api(
|
||||
project.dataset_id,
|
||||
str(file_record.id),
|
||||
)
|
||||
content = self._append_annotation_to_content(content, annotation_json, content_type)
|
||||
except Exception as exc:
|
||||
logger.warning("读取文本失败,改为仅存标注JSON:%s", exc)
|
||||
content = annotation_json
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"title": title,
|
||||
@@ -289,13 +283,6 @@ class KnowledgeSyncService:
|
||||
extension = file_type
|
||||
return extension.lower() in {"md", "markdown"}
|
||||
|
||||
def _append_annotation_to_content(self, content: str, annotation_json: str, content_type: str) -> str:
|
||||
if content_type == "MARKDOWN":
|
||||
return (
|
||||
f"{content}\n\n---\n\n## 标注结果\n\n```json\n"
|
||||
f"{annotation_json}\n```")
|
||||
return f"{content}\n\n---\n\n标注结果(JSON):\n{annotation_json}"
|
||||
|
||||
def _strip_extension(self, file_name: str) -> str:
|
||||
if not file_name:
|
||||
return ""
|
||||
@@ -359,6 +346,27 @@ class KnowledgeSyncService:
|
||||
except Exception:
|
||||
return json.dumps({"error": "failed to serialize"}, ensure_ascii=False)
|
||||
|
||||
def _metadata_matches_project(self, metadata: Any, project_id: str) -> bool:
|
||||
if not project_id:
|
||||
return False
|
||||
parsed = self._parse_metadata(metadata)
|
||||
if not parsed:
|
||||
return False
|
||||
return str(parsed.get("project_id") or "").strip() == project_id
|
||||
|
||||
def _parse_metadata(self, metadata: Any) -> Optional[Dict[str, Any]]:
|
||||
if metadata is None:
|
||||
return None
|
||||
if isinstance(metadata, dict):
|
||||
return metadata
|
||||
if isinstance(metadata, str):
|
||||
try:
|
||||
payload = json.loads(metadata)
|
||||
except Exception:
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
return None
|
||||
|
||||
def _safe_response_text(self, response: httpx.Response) -> str:
|
||||
try:
|
||||
return response.text
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user