Compare commits

...

2 Commits

Author SHA1 Message Date
07a901043a refactor(annotation): 移除文本内容获取相关功能
- 删除了 fetch_text_content_via_download_api 导入
- 移除了 TEXT 类型数据集的文本内容获取逻辑
- 删除了 _append_annotation_to_content 方法实现
- 简化了知识同步服务的内容处理流程
2026-02-02 15:39:06 +08:00
32e3fc97c6 feat(annotation): 增强知识库同步服务以支持项目隔离
- 在知识库查找时添加项目ID验证,确保知识库归属正确
- 修改日志消息以显示项目ID信息便于调试
- 重构知识库查找逻辑,从按名称查找改为按名称和项目ID组合查找
- 新增_metadata_matches_project方法验证元数据中的项目归属
- 新增_parse_metadata方法安全解析元数据JSON字符串
- 更新回退命名逻辑以确保项目级别的唯一性
- 在所有知识库操作中统一使用项目名称和项目ID进行验证
2026-02-02 15:28:33 +08:00

View File

@@ -11,7 +11,6 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings from app.core.config import settings
from app.core.logging import get_logger from app.core.logging import get_logger
from app.db.models import Dataset, DatasetFiles, LabelingProject from app.db.models import Dataset, DatasetFiles, LabelingProject
from app.module.annotation.service.text_fetcher import fetch_text_content_via_download_api
logger = get_logger(__name__) logger = get_logger(__name__)
@@ -77,15 +76,18 @@ class KnowledgeSyncService:
if set_id: if set_id:
exists = await self._get_knowledge_set(set_id) exists = await self._get_knowledge_set(set_id)
if exists: if exists and self._metadata_matches_project(exists.get("metadata"), project.id):
return set_id return set_id
logger.warning("知识集不存在,准备重建:set_id=%s", set_id) logger.warning(
"知识集不存在或归属不匹配,准备重建:set_id=%s project_id=%s",
set_id,
project.id,
)
dataset_name = project.name or "annotation-project" project_name = (project.name or "annotation-project").strip() or "annotation-project"
base_name = dataset_name.strip() or "annotation-project"
metadata = self._build_set_metadata(project) metadata = self._build_set_metadata(project)
existing = await self._find_knowledge_set_by_name(base_name) existing = await self._find_knowledge_set_by_name_and_project(project_name, project.id)
if existing: if existing:
await self._update_project_config( await self._update_project_config(
project, project,
@@ -96,19 +98,19 @@ class KnowledgeSyncService:
) )
return existing.get("id") return existing.get("id")
created = await self._create_knowledge_set(base_name, metadata) created = await self._create_knowledge_set(project_name, metadata)
if not created: if not created:
created = await self._find_knowledge_set_by_name(base_name) created = await self._find_knowledge_set_by_name_and_project(project_name, project.id)
if not created: if not created:
fallback_name = self._build_fallback_set_name(base_name, project.id) fallback_name = self._build_fallback_set_name(project_name, project.id)
existing = await self._find_knowledge_set_by_name(fallback_name) existing = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id)
if existing: if existing:
created = existing created = existing
else: else:
created = await self._create_knowledge_set(fallback_name, metadata) created = await self._create_knowledge_set(fallback_name, metadata)
if not created: if not created:
created = await self._find_knowledge_set_by_name(fallback_name) created = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id)
if not created: if not created:
return None return None
@@ -153,16 +155,18 @@ class KnowledgeSyncService:
return [] return []
return [item for item in content if isinstance(item, dict)] return [item for item in content if isinstance(item, dict)]
async def _find_knowledge_set_by_name(self, name: str) -> Optional[Dict[str, Any]]: async def _find_knowledge_set_by_name_and_project(self, name: str, project_id: str) -> Optional[Dict[str, Any]]:
if not name: if not name:
return None return None
items = await self._list_knowledge_sets(name) items = await self._list_knowledge_sets(name)
if not items: if not items:
return None return None
exact_matches = [item for item in items if item.get("name") == name] for item in items:
if not exact_matches: if item.get("name") != name:
continue
if self._metadata_matches_project(item.get("metadata"), project_id):
return item
return None return None
return exact_matches[0]
async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]: async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]:
payload = { payload = {
@@ -249,16 +253,6 @@ class KnowledgeSyncService:
content_type = "MARKDOWN" content_type = "MARKDOWN"
content = annotation_json content = annotation_json
if dataset_type == "TEXT":
try:
content = await fetch_text_content_via_download_api(
project.dataset_id,
str(file_record.id),
)
content = self._append_annotation_to_content(content, annotation_json, content_type)
except Exception as exc:
logger.warning("读取文本失败,改为仅存标注JSON:%s", exc)
content = annotation_json
payload: Dict[str, Any] = { payload: Dict[str, Any] = {
"title": title, "title": title,
@@ -289,13 +283,6 @@ class KnowledgeSyncService:
extension = file_type extension = file_type
return extension.lower() in {"md", "markdown"} return extension.lower() in {"md", "markdown"}
def _append_annotation_to_content(self, content: str, annotation_json: str, content_type: str) -> str:
if content_type == "MARKDOWN":
return (
f"{content}\n\n---\n\n## 标注结果\n\n```json\n"
f"{annotation_json}\n```")
return f"{content}\n\n---\n\n标注结果(JSON):\n{annotation_json}"
def _strip_extension(self, file_name: str) -> str: def _strip_extension(self, file_name: str) -> str:
if not file_name: if not file_name:
return "" return ""
@@ -359,6 +346,27 @@ class KnowledgeSyncService:
except Exception: except Exception:
return json.dumps({"error": "failed to serialize"}, ensure_ascii=False) return json.dumps({"error": "failed to serialize"}, ensure_ascii=False)
def _metadata_matches_project(self, metadata: Any, project_id: str) -> bool:
if not project_id:
return False
parsed = self._parse_metadata(metadata)
if not parsed:
return False
return str(parsed.get("project_id") or "").strip() == project_id
def _parse_metadata(self, metadata: Any) -> Optional[Dict[str, Any]]:
if metadata is None:
return None
if isinstance(metadata, dict):
return metadata
if isinstance(metadata, str):
try:
payload = json.loads(metadata)
except Exception:
return None
return payload if isinstance(payload, dict) else None
return None
def _safe_response_text(self, response: httpx.Response) -> str: def _safe_response_text(self, response: httpx.Response) -> str:
try: try:
return response.text return response.text