from __future__ import annotations import httpx from fastapi import HTTPException from app.core.config import settings from app.core.logging import get_logger logger = get_logger(__name__) async def fetch_text_content_via_download_api(dataset_id: str, file_id: str) -> str: """通过下载接口读取文本内容""" base = settings.datamate_backend_base_url.rstrip("/") url = f"{base}/data-management/datasets/{dataset_id}/files/{file_id}/download" try: async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client: resp = await client.get(url) resp.raise_for_status() max_bytes = settings.editor_max_text_bytes content_length = resp.headers.get("content-length") if max_bytes > 0 and content_length: try: if int(content_length) > max_bytes: raise HTTPException( status_code=413, detail=f"文本文件过大,限制 {max_bytes} 字节", ) except ValueError: # content-length 非法则忽略,走实际长度判断 pass data = resp.content if max_bytes > 0 and len(data) > max_bytes: raise HTTPException( status_code=413, detail=f"文本文件过大,限制 {max_bytes} 字节", ) # TEXT POC:默认按 UTF-8 解码,不可解码字符用替换符处理 return data.decode("utf-8", errors="replace") except HTTPException: raise except httpx.HTTPStatusError as exc: logger.error( "读取文本失败: dataset=%s, file=%s, http=%s", dataset_id, file_id, exc.response.status_code, ) raise HTTPException(status_code=502, detail="读取文本失败(下载接口返回错误)") except Exception as exc: logger.error("读取文本失败: dataset=%s, file=%s, err=%s", dataset_id, file_id, exc) raise HTTPException(status_code=502, detail="读取文本失败(下载接口调用异常)")