You've already forked DataMate
feat(annotation): 实现标注结果同步到知识管理功能
- 在知识条目实体中新增来源数据集ID和文件ID字段 - 实现标注编辑器中同步标注结果到知识管理的服务逻辑 - 添加知识同步服务类处理标注到知识条目的转换和同步 - 实现通过下载接口获取文本内容的独立服务模块 - 更新知识条目查询接口支持按来源数据集和文件ID过滤 - 自动创建和关联标注项目对应的知识集 - 支持文本和Markdown文件的内容合并标注结果 - 添加同步过程中的错误处理和日志记录机制
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import httpx
|
||||
from fastapi import HTTPException
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.logging import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def fetch_text_content_via_download_api(dataset_id: str, file_id: str) -> str:
|
||||
"""通过下载接口读取文本内容"""
|
||||
base = settings.datamate_backend_base_url.rstrip("/")
|
||||
url = f"{base}/data-management/datasets/{dataset_id}/files/{file_id}/download"
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
resp = await client.get(url)
|
||||
resp.raise_for_status()
|
||||
|
||||
content_length = resp.headers.get("content-length")
|
||||
if content_length:
|
||||
try:
|
||||
if int(content_length) > settings.editor_max_text_bytes:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=f"文本文件过大,限制 {settings.editor_max_text_bytes} 字节",
|
||||
)
|
||||
except ValueError:
|
||||
# content-length 非法则忽略,走实际长度判断
|
||||
pass
|
||||
|
||||
data = resp.content
|
||||
if len(data) > settings.editor_max_text_bytes:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=f"文本文件过大,限制 {settings.editor_max_text_bytes} 字节",
|
||||
)
|
||||
|
||||
# TEXT POC:默认按 UTF-8 解码,不可解码字符用替换符处理
|
||||
return data.decode("utf-8", errors="replace")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except httpx.HTTPStatusError as exc:
|
||||
logger.error(
|
||||
"读取文本失败: dataset=%s, file=%s, http=%s",
|
||||
dataset_id,
|
||||
file_id,
|
||||
exc.response.status_code,
|
||||
)
|
||||
raise HTTPException(status_code=502, detail="读取文本失败(下载接口返回错误)")
|
||||
except Exception as exc:
|
||||
logger.error("读取文本失败: dataset=%s, file=%s, err=%s", dataset_id, file_id, exc)
|
||||
raise HTTPException(status_code=502, detail="读取文本失败(下载接口调用异常)")
|
||||
Reference in New Issue
Block a user