feat(data-management): 扩展源文档排除功能支持Excel文件类型

- 在后端服务中扩展源文档类型检查，新增对XLS和XLSX文件的支持 - 修改DatasetFileApplicationService中的过滤逻辑，统一处理所有源文档类型 - 新增isSourceDocument和isDerivedFile辅助方法进行文件类型判断 - 更新前端DatasetFileTransfer组件中的注释说明 - 在Python运行时依赖中添加openpyxl和xlrd库以支持Excel文件处理 - 修改标注项目接口中源文档类型的集合定义 - 更新文件操作钩子中的派生文件排除逻辑
chore(deps): 更新 Docker 镜像源地址
2026-01-31 11:30:55 +08:00 · 2026-01-31 11:21:47 +08:00
10 changed files with 213 additions and 122 deletions
--- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java
+++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java
@@ -28,6 +28,7 @@ import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
 import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
 import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
 import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import jakarta.servlet.http.HttpServletResponse;
@@ -79,6 +80,7 @@ public class DatasetFileApplicationService {
        XLS_FILE_TYPE,
        XLSX_FILE_TYPE
    );
    private static final String DERIVED_METADATA_KEY = "derived_from_file_id";
    private final DatasetFileRepository datasetFileRepository;
    private final DatasetRepository datasetRepository;
@@ -119,7 +121,7 @@ public class DatasetFileApplicationService {
     * @param status                状态过滤
     * @param name                  文件名模糊查询
     * @param hasAnnotation         是否有标注
-     * @param excludeSourceDocuments 是否排除已被转换为TXT的源文档（PDF/DOC/DOCX）
+     * @param excludeSourceDocuments 是否排除源文档（PDF/DOC/DOCX/XLS/XLSX）
     * @param pagingQuery           分页参数
     * @return 分页文件列表
     */
@@ -130,19 +132,15 @@ public class DatasetFileApplicationService {
        IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, hasAnnotation, page);
        if (excludeSourceDocuments) {
-            // 查询所有作为衍生TXT文件源的文档文件ID
+            // 过滤掉源文档文件（PDF/DOC/DOCX/XLS/XLSX），用于标注场景只展示派生文件
-            List<String> sourceFileIds = datasetFileRepository.findSourceFileIdsWithDerivedFiles(datasetId);
+            List<DatasetFile> filteredRecords = files.getRecords().stream()
-            if (!sourceFileIds.isEmpty()) {
+                .filter(file -> !isSourceDocument(file))
-                // 过滤掉源文件
+                .collect(Collectors.toList());
                List<DatasetFile> filteredRecords = files.getRecords().stream()
                        .filter(file -> !sourceFileIds.contains(file.getId()))
                        .collect(Collectors.toList());
-                // 重新构建分页结果
+            // 重新构建分页结果
-                Page<DatasetFile> filteredPage = new Page<>(files.getCurrent(), files.getSize(), files.getTotal());
+            Page<DatasetFile> filteredPage = new Page<>(files.getCurrent(), files.getSize(), files.getTotal());
-                filteredPage.setRecords(filteredRecords);
+            filteredPage.setRecords(filteredRecords);
-                return PagedResponse.of(filteredPage);
+            return PagedResponse.of(filteredPage);
            }
        }
        return PagedResponse.of(files);
@@ -152,7 +150,7 @@ public class DatasetFileApplicationService {
     * 获取数据集文件列表
     */
    @Transactional(readOnly = true)
-    public PagedResponse<DatasetFile> getDatasetFilesWithDirectory(String datasetId, String prefix, PagingQuery pagingQuery) {
+    public PagedResponse<DatasetFile> getDatasetFilesWithDirectory(String datasetId, String prefix, boolean excludeDerivedFiles, PagingQuery pagingQuery) {
        Dataset dataset = datasetRepository.getById(datasetId);
        int page = Math.max(pagingQuery.getPage(), 1);
        int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize();
@@ -163,9 +161,17 @@ public class DatasetFileApplicationService {
        Path queryPath = Path.of(dataset.getPath() + File.separator + prefix);
        Map<String, DatasetFile> datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId)
            .stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity()));
        Set<String> derivedFilePaths = excludeDerivedFiles
            ? datasetFilesMap.values().stream()
                .filter(this::isDerivedFile)
                .map(DatasetFile::getFilePath)
                .filter(Objects::nonNull)
                .collect(Collectors.toSet())
            : Collections.emptySet();
        try (Stream<Path> pathStream = Files.list(queryPath)) {
            List<Path> allFiles = pathStream
                .filter(path -> path.toString().startsWith(datasetPath))
                .filter(path -> !excludeDerivedFiles || Files.isDirectory(path) || !derivedFilePaths.contains(path.toString()))
                .sorted(Comparator
                    .comparing((Path path) -> !Files.isDirectory(path))
                    .thenComparing(path -> path.getFileName().toString()))
@@ -249,6 +255,35 @@ public class DatasetFileApplicationService {
        return datasetFile;
    }
    private boolean isSourceDocument(DatasetFile datasetFile) {
        if (datasetFile == null) {
            return false;
        }
        String fileType = datasetFile.getFileType();
        if (fileType == null || fileType.isBlank()) {
            return false;
        }
        return DOCUMENT_TEXT_FILE_TYPES.contains(fileType.toLowerCase(Locale.ROOT));
    }
    private boolean isDerivedFile(DatasetFile datasetFile) {
        if (datasetFile == null) {
            return false;
        }
        String metadata = datasetFile.getMetadata();
        if (metadata == null || metadata.isBlank()) {
            return false;
        }
        try {
            ObjectMapper mapper = new ObjectMapper();
            Map<String, Object> metadataMap = mapper.readValue(metadata, new TypeReference<Map<String, Object>>() {});
            return metadataMap.get(DERIVED_METADATA_KEY) != null;
        } catch (Exception e) {
            log.debug("Failed to parse dataset file metadata for derived detection: {}", datasetFile.getId(), e);
            return false;
        }
    }
    /**
     * 获取文件详情
     */
--- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java
+++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java
@@ -52,11 +52,17 @@ public class DatasetFileController {
            @RequestParam(value = "prefix", required = false, defaultValue = "") String prefix,
            @RequestParam(value = "status", required = false) String status,
            @RequestParam(value = "hasAnnotation", required = false) Boolean hasAnnotation,
-            @RequestParam(value = "excludeSourceDocuments", required = false, defaultValue = "false") Boolean excludeSourceDocuments) {
+            @RequestParam(value = "excludeSourceDocuments", required = false, defaultValue = "false") Boolean excludeSourceDocuments,
            @RequestParam(value = "excludeDerivedFiles", required = false, defaultValue = "false") Boolean excludeDerivedFiles) {
        PagingQuery pagingQuery = new PagingQuery(page, size);
        PagedResponse<DatasetFile> filesPage;
        if (isWithDirectory) {
-            filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
+            filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(
                datasetId,
                prefix,
                Boolean.TRUE.equals(excludeDerivedFiles),
                pagingQuery
            );
        } else {
            filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, status, null, hasAnnotation, 
                    Boolean.TRUE.equals(excludeSourceDocuments), pagingQuery);
--- a/deployment/docker/milvus/docker-compose.yml
+++ b/deployment/docker/milvus/docker-compose.yml
@@ -1,7 +1,7 @@
 services:
  etcd:
    container_name: milvus-etcd
-    image: quay.io/coreos/etcd:v3.5.18
+    image: quay.nju.edu.cn/coreos/etcd:v3.5.18
    environment:
      - ETCD_AUTO_COMPACTION_MODE=revision
      - ETCD_AUTO_COMPACTION_RETENTION=1000
--- a/frontend/src/components/business/DatasetFileTransfer.tsx
+++ b/frontend/src/components/business/DatasetFileTransfer.tsx
@@ -23,8 +23,7 @@ interface DatasetFileTransferProps
  datasetTypeFilter?: DatasetType;
  hasAnnotationFilter?: boolean;
  /**
-   * 是否排除已被转换为TXT的源文档文件（PDF/DOC/DOCX）
+   * 是否排除源文档文件（PDF/DOC/DOCX/XLS/XLSX），文本标注默认启用
   * 默认为 true，当 datasetTypeFilter 为 TEXT 时自动启用
   */
  excludeSourceDocuments?: boolean;
 }
--- a/frontend/src/pages/DataAnnotation/Create/components/CreateAnnotationTaskDialog.tsx
+++ b/frontend/src/pages/DataAnnotation/Create/components/CreateAnnotationTaskDialog.tsx
@@ -282,7 +282,7 @@ export default function CreateAnnotationTask({
    }
    setDatasetPreviewLoading(true);
    try {
-      // 对于文本数据集，排除已被转换为TXT的源文档文件（PDF/DOC/DOCX）
+      // 对于文本数据集，排除源文档文件（PDF/DOC/DOCX/XLS/XLSX）
      const params: { page: number; size: number; excludeSourceDocuments?: boolean } = { page: 0, size: 10 };
      if (isTextDataset) {
        params.excludeSourceDocuments = true;
--- a/frontend/src/pages/DataManagement/Detail/useFilesOperation.ts
+++ b/frontend/src/pages/DataManagement/Detail/useFilesOperation.ts
@@ -2,6 +2,7 @@ import type {
  Dataset,
  DatasetFile,
 } from "@/pages/DataManagement/dataset.model";
 import { DatasetType } from "@/pages/DataManagement/dataset.model";
 import { App } from "antd";
 import { useState } from "react";
 import {
@@ -51,12 +52,14 @@ export function useFilesOperation(dataset: Dataset) {
  ) => {
    // 如果明确传了 prefix（包括空字符串），使用传入的值；否则使用当前 pagination.prefix
    const targetPrefix = prefix !== undefined ? prefix : (pagination.prefix || '');
    const shouldExcludeDerivedFiles = dataset?.datasetType === DatasetType.TEXT;
    const params: DatasetFilesQueryParams = {
      page: current !== undefined ? current : pagination.current,
      size: pageSize !== undefined ? pageSize : pagination.pageSize,
      isWithDirectory: true,
      prefix: targetPrefix,
      ...(shouldExcludeDerivedFiles ? { excludeDerivedFiles: true } : {}),
    };
    const { data } = await queryDatasetFilesUsingGet(id!, params);
@@ -245,4 +248,5 @@ interface DatasetFilesQueryParams {
  size: number;
  isWithDirectory: boolean;
  prefix: string;
  excludeDerivedFiles?: boolean;
 }
--- a/runtime/datamate-python/app/module/annotation/interface/project.py
+++ b/runtime/datamate-python/app/module/annotation/interface/project.py
@@ -27,6 +27,7 @@ router = APIRouter(
 )
 logger = get_logger(__name__)
 TEXT_DATASET_TYPE = "TEXT"
 SOURCE_DOCUMENT_FILE_TYPES = {"pdf", "doc", "docx", "xls", "xlsx"}
@router.get("/{mapping_id}/login")
 async def login_label_studio(
@@ -123,18 +124,14 @@ async def create_mapping(
        file_records = file_result.scalars().all()
        snapshot_file_ids: list[str] = []
        if dataset_type == TEXT_DATASET_TYPE:
-            derived_source_ids = set()
+            snapshot_file_ids = []
            for file_record in file_records:
-                metadata = getattr(file_record, "dataset_filemetadata", None)
+                if not file_record.id:
-                if isinstance(metadata, dict):
+                    continue
-                    source_id = metadata.get("derived_from_file_id")
+                file_type = str(getattr(file_record, "file_type", "") or "").lower()
-                    if source_id:
+                if file_type in SOURCE_DOCUMENT_FILE_TYPES:
-                        derived_source_ids.add(str(source_id))
+                    continue
-            snapshot_file_ids = [
+                snapshot_file_ids.append(str(file_record.id))
                str(file_record.id)
                for file_record in file_records
                if file_record.id and str(file_record.id) not in derived_source_ids
            ]
        else:
            snapshot_file_ids = [
                str(file_record.id)
--- a/runtime/datamate-python/poetry.lock
+++ b/runtime/datamate-python/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.3.1 and should not be changed by hand.
 [[package]]
 name = "aiofiles"
@@ -704,6 +704,18 @@ files = [
 [package.extras]
 dev = ["coverage", "pytest (>=7.4.4)"]
 [[package]]
 name = "et-xmlfile"
 version = "2.0.0"
 description = "An implementation of lxml.xmlfile for the standard library"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
    {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"},
    {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"},
 ]
 [[package]]
 name = "fastapi"
 version = "0.124.0"
@@ -1353,7 +1365,7 @@ files = [
 [package.dependencies]
 attrs = ">=22.2.0"
-jsonschema-specifications = ">=2023.03.6"
+jsonschema-specifications = ">=2023.3.6"
 referencing = ">=0.28.4"
 rpds-py = ">=0.7.1"
@@ -2155,6 +2167,21 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
 realtime = ["websockets (>=13,<16)"]
 voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"]
 [[package]]
 name = "openpyxl"
 version = "3.1.5"
 description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
    {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"},
    {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"},
 ]
 [package.dependencies]
 et-xmlfile = "*"
 [[package]]
 name = "orjson"
 version = "3.11.4"
@@ -3329,12 +3356,14 @@ optional = false
 python-versions = ">=3.7"
 groups = ["main"]
 files = [
    {file = "sqlalchemy-2.0.45-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c64772786d9eee72d4d3784c28f0a636af5b0a29f3fe26ff11f55efe90c0bd85"},
    {file = "sqlalchemy-2.0.45-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ae64ebf7657395824a19bca98ab10eb9a3ecb026bf09524014f1bb81cb598d4"},
    {file = "sqlalchemy-2.0.45-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f02325709d1b1a1489f23a39b318e175a171497374149eae74d612634b234c0"},
    {file = "sqlalchemy-2.0.45-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d2c3684fca8a05f0ac1d9a21c1f4a266983a7ea9180efb80ffeb03861ecd01a0"},
    {file = "sqlalchemy-2.0.45-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:040f6f0545b3b7da6b9317fc3e922c9a98fc7243b2a1b39f78390fc0942f7826"},
    {file = "sqlalchemy-2.0.45-cp310-cp310-win32.whl", hash = "sha256:830d434d609fe7bfa47c425c445a8b37929f140a7a44cdaf77f6d34df3a7296a"},
    {file = "sqlalchemy-2.0.45-cp310-cp310-win_amd64.whl", hash = "sha256:0209d9753671b0da74da2cfbb9ecf9c02f72a759e4b018b3ab35f244c91842c7"},
    {file = "sqlalchemy-2.0.45-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e90a344c644a4fa871eb01809c32096487928bd2038bf10f3e4515cb688cc56"},
    {file = "sqlalchemy-2.0.45-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8c8b41b97fba5f62349aa285654230296829672fc9939cd7f35aab246d1c08b"},
    {file = "sqlalchemy-2.0.45-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:12c694ed6468333a090d2f60950e4250b928f457e4962389553d6ba5fe9951ac"},
    {file = "sqlalchemy-2.0.45-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f7d27a1d977a1cfef38a0e2e1ca86f09c4212666ce34e6ae542f3ed0a33bc606"},
@@ -3363,12 +3392,14 @@ files = [
    {file = "sqlalchemy-2.0.45-cp314-cp314-win_amd64.whl", hash = "sha256:4748601c8ea959e37e03d13dcda4a44837afcd1b21338e637f7c935b8da06177"},
    {file = "sqlalchemy-2.0.45-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cd337d3526ec5298f67d6a30bbbe4ed7e5e68862f0bf6dd21d289f8d37b7d60b"},
    {file = "sqlalchemy-2.0.45-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9a62b446b7d86a3909abbcd1cd3cc550a832f99c2bc37c5b22e1925438b9367b"},
    {file = "sqlalchemy-2.0.45-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5964f832431b7cdfaaa22a660b4c7eb1dfcd6ed41375f67fd3e3440fd95cb3cc"},
    {file = "sqlalchemy-2.0.45-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee580ab50e748208754ae8980cec79ec205983d8cf8b3f7c39067f3d9f2c8e22"},
    {file = "sqlalchemy-2.0.45-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13e27397a7810163440c6bfed6b3fe46f1bfb2486eb540315a819abd2c004128"},
    {file = "sqlalchemy-2.0.45-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ed3635353e55d28e7f4a95c8eda98a5cdc0a0b40b528433fbd41a9ae88f55b3d"},
    {file = "sqlalchemy-2.0.45-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:db6834900338fb13a9123307f0c2cbb1f890a8656fcd5e5448ae3ad5bbe8d312"},
    {file = "sqlalchemy-2.0.45-cp38-cp38-win32.whl", hash = "sha256:1d8b4a7a8c9b537509d56d5cd10ecdcfbb95912d72480c8861524efecc6a3fff"},
    {file = "sqlalchemy-2.0.45-cp38-cp38-win_amd64.whl", hash = "sha256:ebd300afd2b62679203435f596b2601adafe546cb7282d5a0cd3ed99e423720f"},
    {file = "sqlalchemy-2.0.45-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d29b2b99d527dbc66dd87c3c3248a5dd789d974a507f4653c969999fc7c1191b"},
    {file = "sqlalchemy-2.0.45-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:59a8b8bd9c6bedf81ad07c8bd5543eedca55fe9b8780b2b628d495ba55f8db1e"},
    {file = "sqlalchemy-2.0.45-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd93c6f5d65f254ceabe97548c709e073d6da9883343adaa51bf1a913ce93f8e"},
    {file = "sqlalchemy-2.0.45-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6d0beadc2535157070c9c17ecf25ecec31e13c229a8f69196d7590bde8082bf1"},
@@ -4133,6 +4164,23 @@ files = [
 [package.extras]
 dev = ["pytest", "setuptools"]
 [[package]]
 name = "xlrd"
 version = "2.0.2"
 description = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 groups = ["main"]
 files = [
    {file = "xlrd-2.0.2-py2.py3-none-any.whl", hash = "sha256:ea762c3d29f4cca48d82df517b6d89fbce4db3107f9d78713e48cd321d5c9aa9"},
    {file = "xlrd-2.0.2.tar.gz", hash = "sha256:08b5e25de58f21ce71dc7db3b3b8106c1fa776f3024c54e45b45b374e89234c9"},
 ]
 [package.extras]
 build = ["twine", "wheel"]
 docs = ["sphinx"]
 test = ["pytest", "pytest-cov"]
 [[package]]
 name = "xxhash"
 version = "3.6.0"
@@ -4538,9 +4586,9 @@ files = [
 ]
 [package.extras]
-cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and python_version < \"3.14\"", "cffi (>=2.0.0b) ; platform_python_implementation != \"PyPy\" and python_version >= \"3.14\""]
+cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and python_version < \"3.14\"", "cffi (>=2.0.0b0) ; platform_python_implementation != \"PyPy\" and python_version >= \"3.14\""]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12,<4.0.0"
-content-hash = "996ab9a6b957607afb6d493b0a5dd1fec8f65f600f41bb5e99ee1e16fcb1f7b8"
+content-hash = "906ee4a17768bc92cf160032c185fd9a9d530ca56082081c1d85b2311b409df3"
--- a/runtime/datamate-python/pyproject.toml
+++ b/runtime/datamate-python/pyproject.toml
@@ -25,6 +25,8 @@ dependencies = [
    "jsonschema (>=4.25.1,<5.0.0)",
    "greenlet (>=3.3.0,<4.0.0)",
    "docx2txt (>=0.9,<0.10)",
    "openpyxl (>=3.1.5,<4.0.0)",
    "xlrd (>=2.0.1,<3.0.0)",
    "jq (>=1.10.0,<2.0.0)",
    "openai (>=2.9.0,<3.0.0)",
    "langchain-openai (>=1.1.1,<2.0.0)",
--- a/scripts/images/mineru-npu/Dockerfile
+++ b/scripts/images/mineru-npu/Dockerfile
@@ -1,6 +1,6 @@
 # 基础镜像配置 vLLM 或 LMDeploy ，请根据实际需要选择其中一个，要求 ARM(AArch64) CPU + Ascend NPU。
 # Base image containing the vLLM inference environment, requiring ARM(AArch64) CPU + Ascend NPU.
-FROM quay.io/ascend/vllm-ascend:v0.11.0rc2
+FROM quay.nju.edu.cn/ascend/vllm-ascend:v0.11.0rc2
 # Base image containing the LMDeploy inference environment, requiring ARM(AArch64) CPU + Ascend NPU.
 # FROM crpi-4crprmm5baj1v8iv.cn-hangzhou.personal.cr.aliyuncs.com/lmdeploy_dlinfer/ascend:mineru-a2