You've already forked DataMate
feat(data-management): 扩展源文档排除功能支持Excel文件类型
- 在后端服务中扩展源文档类型检查,新增对XLS和XLSX文件的支持 - 修改DatasetFileApplicationService中的过滤逻辑,统一处理所有源文档类型 - 新增isSourceDocument和isDerivedFile辅助方法进行文件类型判断 - 更新前端DatasetFileTransfer组件中的注释说明 - 在Python运行时依赖中添加openpyxl和xlrd库以支持Excel文件处理 - 修改标注项目接口中源文档类型的集合定义 - 更新文件操作钩子中的派生文件排除逻辑
This commit is contained in:
@@ -28,6 +28,7 @@ import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import jakarta.servlet.http.HttpServletResponse;
|
||||
@@ -79,6 +80,7 @@ public class DatasetFileApplicationService {
|
||||
XLS_FILE_TYPE,
|
||||
XLSX_FILE_TYPE
|
||||
);
|
||||
private static final String DERIVED_METADATA_KEY = "derived_from_file_id";
|
||||
|
||||
private final DatasetFileRepository datasetFileRepository;
|
||||
private final DatasetRepository datasetRepository;
|
||||
@@ -119,7 +121,7 @@ public class DatasetFileApplicationService {
|
||||
* @param status 状态过滤
|
||||
* @param name 文件名模糊查询
|
||||
* @param hasAnnotation 是否有标注
|
||||
* @param excludeSourceDocuments 是否排除已被转换为TXT的源文档(PDF/DOC/DOCX)
|
||||
* @param excludeSourceDocuments 是否排除源文档(PDF/DOC/DOCX/XLS/XLSX)
|
||||
* @param pagingQuery 分页参数
|
||||
* @return 分页文件列表
|
||||
*/
|
||||
@@ -130,12 +132,9 @@ public class DatasetFileApplicationService {
|
||||
IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, hasAnnotation, page);
|
||||
|
||||
if (excludeSourceDocuments) {
|
||||
// 查询所有作为衍生TXT文件源的文档文件ID
|
||||
List<String> sourceFileIds = datasetFileRepository.findSourceFileIdsWithDerivedFiles(datasetId);
|
||||
if (!sourceFileIds.isEmpty()) {
|
||||
// 过滤掉源文件
|
||||
// 过滤掉源文档文件(PDF/DOC/DOCX/XLS/XLSX),用于标注场景只展示派生文件
|
||||
List<DatasetFile> filteredRecords = files.getRecords().stream()
|
||||
.filter(file -> !sourceFileIds.contains(file.getId()))
|
||||
.filter(file -> !isSourceDocument(file))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// 重新构建分页结果
|
||||
@@ -143,7 +142,6 @@ public class DatasetFileApplicationService {
|
||||
filteredPage.setRecords(filteredRecords);
|
||||
return PagedResponse.of(filteredPage);
|
||||
}
|
||||
}
|
||||
|
||||
return PagedResponse.of(files);
|
||||
}
|
||||
@@ -152,7 +150,7 @@ public class DatasetFileApplicationService {
|
||||
* 获取数据集文件列表
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public PagedResponse<DatasetFile> getDatasetFilesWithDirectory(String datasetId, String prefix, PagingQuery pagingQuery) {
|
||||
public PagedResponse<DatasetFile> getDatasetFilesWithDirectory(String datasetId, String prefix, boolean excludeDerivedFiles, PagingQuery pagingQuery) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
int page = Math.max(pagingQuery.getPage(), 1);
|
||||
int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize();
|
||||
@@ -163,9 +161,17 @@ public class DatasetFileApplicationService {
|
||||
Path queryPath = Path.of(dataset.getPath() + File.separator + prefix);
|
||||
Map<String, DatasetFile> datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId)
|
||||
.stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity()));
|
||||
Set<String> derivedFilePaths = excludeDerivedFiles
|
||||
? datasetFilesMap.values().stream()
|
||||
.filter(this::isDerivedFile)
|
||||
.map(DatasetFile::getFilePath)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toSet())
|
||||
: Collections.emptySet();
|
||||
try (Stream<Path> pathStream = Files.list(queryPath)) {
|
||||
List<Path> allFiles = pathStream
|
||||
.filter(path -> path.toString().startsWith(datasetPath))
|
||||
.filter(path -> !excludeDerivedFiles || Files.isDirectory(path) || !derivedFilePaths.contains(path.toString()))
|
||||
.sorted(Comparator
|
||||
.comparing((Path path) -> !Files.isDirectory(path))
|
||||
.thenComparing(path -> path.getFileName().toString()))
|
||||
@@ -249,6 +255,35 @@ public class DatasetFileApplicationService {
|
||||
return datasetFile;
|
||||
}
|
||||
|
||||
private boolean isSourceDocument(DatasetFile datasetFile) {
|
||||
if (datasetFile == null) {
|
||||
return false;
|
||||
}
|
||||
String fileType = datasetFile.getFileType();
|
||||
if (fileType == null || fileType.isBlank()) {
|
||||
return false;
|
||||
}
|
||||
return DOCUMENT_TEXT_FILE_TYPES.contains(fileType.toLowerCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
private boolean isDerivedFile(DatasetFile datasetFile) {
|
||||
if (datasetFile == null) {
|
||||
return false;
|
||||
}
|
||||
String metadata = datasetFile.getMetadata();
|
||||
if (metadata == null || metadata.isBlank()) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
Map<String, Object> metadataMap = mapper.readValue(metadata, new TypeReference<Map<String, Object>>() {});
|
||||
return metadataMap.get(DERIVED_METADATA_KEY) != null;
|
||||
} catch (Exception e) {
|
||||
log.debug("Failed to parse dataset file metadata for derived detection: {}", datasetFile.getId(), e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文件详情
|
||||
*/
|
||||
|
||||
@@ -52,11 +52,17 @@ public class DatasetFileController {
|
||||
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix,
|
||||
@RequestParam(value = "status", required = false) String status,
|
||||
@RequestParam(value = "hasAnnotation", required = false) Boolean hasAnnotation,
|
||||
@RequestParam(value = "excludeSourceDocuments", required = false, defaultValue = "false") Boolean excludeSourceDocuments) {
|
||||
@RequestParam(value = "excludeSourceDocuments", required = false, defaultValue = "false") Boolean excludeSourceDocuments,
|
||||
@RequestParam(value = "excludeDerivedFiles", required = false, defaultValue = "false") Boolean excludeDerivedFiles) {
|
||||
PagingQuery pagingQuery = new PagingQuery(page, size);
|
||||
PagedResponse<DatasetFile> filesPage;
|
||||
if (isWithDirectory) {
|
||||
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
|
||||
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(
|
||||
datasetId,
|
||||
prefix,
|
||||
Boolean.TRUE.equals(excludeDerivedFiles),
|
||||
pagingQuery
|
||||
);
|
||||
} else {
|
||||
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, status, null, hasAnnotation,
|
||||
Boolean.TRUE.equals(excludeSourceDocuments), pagingQuery);
|
||||
|
||||
@@ -23,8 +23,7 @@ interface DatasetFileTransferProps
|
||||
datasetTypeFilter?: DatasetType;
|
||||
hasAnnotationFilter?: boolean;
|
||||
/**
|
||||
* 是否排除已被转换为TXT的源文档文件(PDF/DOC/DOCX)
|
||||
* 默认为 true,当 datasetTypeFilter 为 TEXT 时自动启用
|
||||
* 是否排除源文档文件(PDF/DOC/DOCX/XLS/XLSX),文本标注默认启用
|
||||
*/
|
||||
excludeSourceDocuments?: boolean;
|
||||
}
|
||||
|
||||
@@ -282,7 +282,7 @@ export default function CreateAnnotationTask({
|
||||
}
|
||||
setDatasetPreviewLoading(true);
|
||||
try {
|
||||
// 对于文本数据集,排除已被转换为TXT的源文档文件(PDF/DOC/DOCX)
|
||||
// 对于文本数据集,排除源文档文件(PDF/DOC/DOCX/XLS/XLSX)
|
||||
const params: { page: number; size: number; excludeSourceDocuments?: boolean } = { page: 0, size: 10 };
|
||||
if (isTextDataset) {
|
||||
params.excludeSourceDocuments = true;
|
||||
|
||||
@@ -2,6 +2,7 @@ import type {
|
||||
Dataset,
|
||||
DatasetFile,
|
||||
} from "@/pages/DataManagement/dataset.model";
|
||||
import { DatasetType } from "@/pages/DataManagement/dataset.model";
|
||||
import { App } from "antd";
|
||||
import { useState } from "react";
|
||||
import {
|
||||
@@ -51,12 +52,14 @@ export function useFilesOperation(dataset: Dataset) {
|
||||
) => {
|
||||
// 如果明确传了 prefix(包括空字符串),使用传入的值;否则使用当前 pagination.prefix
|
||||
const targetPrefix = prefix !== undefined ? prefix : (pagination.prefix || '');
|
||||
const shouldExcludeDerivedFiles = dataset?.datasetType === DatasetType.TEXT;
|
||||
|
||||
const params: DatasetFilesQueryParams = {
|
||||
page: current !== undefined ? current : pagination.current,
|
||||
size: pageSize !== undefined ? pageSize : pagination.pageSize,
|
||||
isWithDirectory: true,
|
||||
prefix: targetPrefix,
|
||||
...(shouldExcludeDerivedFiles ? { excludeDerivedFiles: true } : {}),
|
||||
};
|
||||
|
||||
const { data } = await queryDatasetFilesUsingGet(id!, params);
|
||||
@@ -245,4 +248,5 @@ interface DatasetFilesQueryParams {
|
||||
size: number;
|
||||
isWithDirectory: boolean;
|
||||
prefix: string;
|
||||
excludeDerivedFiles?: boolean;
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ router = APIRouter(
|
||||
)
|
||||
logger = get_logger(__name__)
|
||||
TEXT_DATASET_TYPE = "TEXT"
|
||||
SOURCE_DOCUMENT_FILE_TYPES = {"pdf", "doc", "docx", "xls", "xlsx"}
|
||||
|
||||
@router.get("/{mapping_id}/login")
|
||||
async def login_label_studio(
|
||||
@@ -123,18 +124,14 @@ async def create_mapping(
|
||||
file_records = file_result.scalars().all()
|
||||
snapshot_file_ids: list[str] = []
|
||||
if dataset_type == TEXT_DATASET_TYPE:
|
||||
derived_source_ids = set()
|
||||
snapshot_file_ids = []
|
||||
for file_record in file_records:
|
||||
metadata = getattr(file_record, "dataset_filemetadata", None)
|
||||
if isinstance(metadata, dict):
|
||||
source_id = metadata.get("derived_from_file_id")
|
||||
if source_id:
|
||||
derived_source_ids.add(str(source_id))
|
||||
snapshot_file_ids = [
|
||||
str(file_record.id)
|
||||
for file_record in file_records
|
||||
if file_record.id and str(file_record.id) not in derived_source_ids
|
||||
]
|
||||
if not file_record.id:
|
||||
continue
|
||||
file_type = str(getattr(file_record, "file_type", "") or "").lower()
|
||||
if file_type in SOURCE_DOCUMENT_FILE_TYPES:
|
||||
continue
|
||||
snapshot_file_ids.append(str(file_record.id))
|
||||
else:
|
||||
snapshot_file_ids = [
|
||||
str(file_record.id)
|
||||
|
||||
56
runtime/datamate-python/poetry.lock
generated
56
runtime/datamate-python/poetry.lock
generated
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 2.3.1 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiofiles"
|
||||
@@ -704,6 +704,18 @@ files = [
|
||||
[package.extras]
|
||||
dev = ["coverage", "pytest (>=7.4.4)"]
|
||||
|
||||
[[package]]
|
||||
name = "et-xmlfile"
|
||||
version = "2.0.0"
|
||||
description = "An implementation of lxml.xmlfile for the standard library"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"},
|
||||
{file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastapi"
|
||||
version = "0.124.0"
|
||||
@@ -1353,7 +1365,7 @@ files = [
|
||||
|
||||
[package.dependencies]
|
||||
attrs = ">=22.2.0"
|
||||
jsonschema-specifications = ">=2023.03.6"
|
||||
jsonschema-specifications = ">=2023.3.6"
|
||||
referencing = ">=0.28.4"
|
||||
rpds-py = ">=0.7.1"
|
||||
|
||||
@@ -2155,6 +2167,21 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
|
||||
realtime = ["websockets (>=13,<16)"]
|
||||
voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "openpyxl"
|
||||
version = "3.1.5"
|
||||
description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"},
|
||||
{file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
et-xmlfile = "*"
|
||||
|
||||
[[package]]
|
||||
name = "orjson"
|
||||
version = "3.11.4"
|
||||
@@ -3329,12 +3356,14 @@ optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "sqlalchemy-2.0.45-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c64772786d9eee72d4d3784c28f0a636af5b0a29f3fe26ff11f55efe90c0bd85"},
|
||||
{file = "sqlalchemy-2.0.45-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ae64ebf7657395824a19bca98ab10eb9a3ecb026bf09524014f1bb81cb598d4"},
|
||||
{file = "sqlalchemy-2.0.45-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f02325709d1b1a1489f23a39b318e175a171497374149eae74d612634b234c0"},
|
||||
{file = "sqlalchemy-2.0.45-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d2c3684fca8a05f0ac1d9a21c1f4a266983a7ea9180efb80ffeb03861ecd01a0"},
|
||||
{file = "sqlalchemy-2.0.45-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:040f6f0545b3b7da6b9317fc3e922c9a98fc7243b2a1b39f78390fc0942f7826"},
|
||||
{file = "sqlalchemy-2.0.45-cp310-cp310-win32.whl", hash = "sha256:830d434d609fe7bfa47c425c445a8b37929f140a7a44cdaf77f6d34df3a7296a"},
|
||||
{file = "sqlalchemy-2.0.45-cp310-cp310-win_amd64.whl", hash = "sha256:0209d9753671b0da74da2cfbb9ecf9c02f72a759e4b018b3ab35f244c91842c7"},
|
||||
{file = "sqlalchemy-2.0.45-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e90a344c644a4fa871eb01809c32096487928bd2038bf10f3e4515cb688cc56"},
|
||||
{file = "sqlalchemy-2.0.45-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8c8b41b97fba5f62349aa285654230296829672fc9939cd7f35aab246d1c08b"},
|
||||
{file = "sqlalchemy-2.0.45-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:12c694ed6468333a090d2f60950e4250b928f457e4962389553d6ba5fe9951ac"},
|
||||
{file = "sqlalchemy-2.0.45-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f7d27a1d977a1cfef38a0e2e1ca86f09c4212666ce34e6ae542f3ed0a33bc606"},
|
||||
@@ -3363,12 +3392,14 @@ files = [
|
||||
{file = "sqlalchemy-2.0.45-cp314-cp314-win_amd64.whl", hash = "sha256:4748601c8ea959e37e03d13dcda4a44837afcd1b21338e637f7c935b8da06177"},
|
||||
{file = "sqlalchemy-2.0.45-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cd337d3526ec5298f67d6a30bbbe4ed7e5e68862f0bf6dd21d289f8d37b7d60b"},
|
||||
{file = "sqlalchemy-2.0.45-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9a62b446b7d86a3909abbcd1cd3cc550a832f99c2bc37c5b22e1925438b9367b"},
|
||||
{file = "sqlalchemy-2.0.45-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5964f832431b7cdfaaa22a660b4c7eb1dfcd6ed41375f67fd3e3440fd95cb3cc"},
|
||||
{file = "sqlalchemy-2.0.45-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee580ab50e748208754ae8980cec79ec205983d8cf8b3f7c39067f3d9f2c8e22"},
|
||||
{file = "sqlalchemy-2.0.45-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13e27397a7810163440c6bfed6b3fe46f1bfb2486eb540315a819abd2c004128"},
|
||||
{file = "sqlalchemy-2.0.45-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ed3635353e55d28e7f4a95c8eda98a5cdc0a0b40b528433fbd41a9ae88f55b3d"},
|
||||
{file = "sqlalchemy-2.0.45-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:db6834900338fb13a9123307f0c2cbb1f890a8656fcd5e5448ae3ad5bbe8d312"},
|
||||
{file = "sqlalchemy-2.0.45-cp38-cp38-win32.whl", hash = "sha256:1d8b4a7a8c9b537509d56d5cd10ecdcfbb95912d72480c8861524efecc6a3fff"},
|
||||
{file = "sqlalchemy-2.0.45-cp38-cp38-win_amd64.whl", hash = "sha256:ebd300afd2b62679203435f596b2601adafe546cb7282d5a0cd3ed99e423720f"},
|
||||
{file = "sqlalchemy-2.0.45-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d29b2b99d527dbc66dd87c3c3248a5dd789d974a507f4653c969999fc7c1191b"},
|
||||
{file = "sqlalchemy-2.0.45-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:59a8b8bd9c6bedf81ad07c8bd5543eedca55fe9b8780b2b628d495ba55f8db1e"},
|
||||
{file = "sqlalchemy-2.0.45-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd93c6f5d65f254ceabe97548c709e073d6da9883343adaa51bf1a913ce93f8e"},
|
||||
{file = "sqlalchemy-2.0.45-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6d0beadc2535157070c9c17ecf25ecec31e13c229a8f69196d7590bde8082bf1"},
|
||||
@@ -4133,6 +4164,23 @@ files = [
|
||||
[package.extras]
|
||||
dev = ["pytest", "setuptools"]
|
||||
|
||||
[[package]]
|
||||
name = "xlrd"
|
||||
version = "2.0.2"
|
||||
description = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files"
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "xlrd-2.0.2-py2.py3-none-any.whl", hash = "sha256:ea762c3d29f4cca48d82df517b6d89fbce4db3107f9d78713e48cd321d5c9aa9"},
|
||||
{file = "xlrd-2.0.2.tar.gz", hash = "sha256:08b5e25de58f21ce71dc7db3b3b8106c1fa776f3024c54e45b45b374e89234c9"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
build = ["twine", "wheel"]
|
||||
docs = ["sphinx"]
|
||||
test = ["pytest", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "xxhash"
|
||||
version = "3.6.0"
|
||||
@@ -4538,9 +4586,9 @@ files = [
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and python_version < \"3.14\"", "cffi (>=2.0.0b) ; platform_python_implementation != \"PyPy\" and python_version >= \"3.14\""]
|
||||
cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and python_version < \"3.14\"", "cffi (>=2.0.0b0) ; platform_python_implementation != \"PyPy\" and python_version >= \"3.14\""]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.12,<4.0.0"
|
||||
content-hash = "996ab9a6b957607afb6d493b0a5dd1fec8f65f600f41bb5e99ee1e16fcb1f7b8"
|
||||
content-hash = "906ee4a17768bc92cf160032c185fd9a9d530ca56082081c1d85b2311b409df3"
|
||||
|
||||
@@ -25,6 +25,8 @@ dependencies = [
|
||||
"jsonschema (>=4.25.1,<5.0.0)",
|
||||
"greenlet (>=3.3.0,<4.0.0)",
|
||||
"docx2txt (>=0.9,<0.10)",
|
||||
"openpyxl (>=3.1.5,<4.0.0)",
|
||||
"xlrd (>=2.0.1,<3.0.0)",
|
||||
"jq (>=1.10.0,<2.0.0)",
|
||||
"openai (>=2.9.0,<3.0.0)",
|
||||
"langchain-openai (>=1.1.1,<2.0.0)",
|
||||
|
||||
Reference in New Issue
Block a user