Files
DataMate/runtime/datamate-python/app/module/annotation/service/text_fetcher.py
Jerry Yan 7092c3f955 feat(annotation): 调整文本编辑器大小限制配置
- 将editor_max_text_bytes默认值从2MB改为0,表示不限制
- 更新文本获取服务中的大小检查逻辑,只在max_bytes大于0时进行限制
- 修改错误提示信息中的字节限制显示
- 优化配置参数的条件判断流程
2026-02-02 17:53:09 +08:00

58 lines
2.2 KiB
Python

from __future__ import annotations
import httpx
from fastapi import HTTPException
from app.core.config import settings
from app.core.logging import get_logger
logger = get_logger(__name__)
async def fetch_text_content_via_download_api(dataset_id: str, file_id: str) -> str:
"""通过下载接口读取文本内容"""
base = settings.datamate_backend_base_url.rstrip("/")
url = f"{base}/data-management/datasets/{dataset_id}/files/{file_id}/download"
try:
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
resp = await client.get(url)
resp.raise_for_status()
max_bytes = settings.editor_max_text_bytes
content_length = resp.headers.get("content-length")
if max_bytes > 0 and content_length:
try:
if int(content_length) > max_bytes:
raise HTTPException(
status_code=413,
detail=f"文本文件过大,限制 {max_bytes} 字节",
)
except ValueError:
# content-length 非法则忽略,走实际长度判断
pass
data = resp.content
if max_bytes > 0 and len(data) > max_bytes:
raise HTTPException(
status_code=413,
detail=f"文本文件过大,限制 {max_bytes} 字节",
)
# TEXT POC:默认按 UTF-8 解码,不可解码字符用替换符处理
return data.decode("utf-8", errors="replace")
except HTTPException:
raise
except httpx.HTTPStatusError as exc:
logger.error(
"读取文本失败: dataset=%s, file=%s, http=%s",
dataset_id,
file_id,
exc.response.status_code,
)
raise HTTPException(status_code=502, detail="读取文本失败(下载接口返回错误)")
except Exception as exc:
logger.error("读取文本失败: dataset=%s, file=%s, err=%s", dataset_id, file_id, exc)
raise HTTPException(status_code=502, detail="读取文本失败(下载接口调用异常)")