feat(annotation): 增强知识库同步服务以支持项目隔离

- 在知识库查找时添加项目ID验证,确保知识库归属正确
- 修改日志消息以显示项目ID信息便于调试
- 重构知识库查找逻辑,从按名称查找改为按名称和项目ID组合查找
- 新增_metadata_matches_project方法验证元数据中的项目归属
- 新增_parse_metadata方法安全解析元数据JSON字符串
- 更新回退命名逻辑以确保项目级别的唯一性
- 在所有知识库操作中统一使用项目名称和项目ID进行验证
This commit is contained in:
2026-02-02 15:28:33 +08:00
parent a73571bd73
commit 32e3fc97c6

View File

@@ -77,15 +77,18 @@ class KnowledgeSyncService:
if set_id: if set_id:
exists = await self._get_knowledge_set(set_id) exists = await self._get_knowledge_set(set_id)
if exists: if exists and self._metadata_matches_project(exists.get("metadata"), project.id):
return set_id return set_id
logger.warning("知识集不存在,准备重建:set_id=%s", set_id) logger.warning(
"知识集不存在或归属不匹配,准备重建:set_id=%s project_id=%s",
set_id,
project.id,
)
dataset_name = project.name or "annotation-project" project_name = (project.name or "annotation-project").strip() or "annotation-project"
base_name = dataset_name.strip() or "annotation-project"
metadata = self._build_set_metadata(project) metadata = self._build_set_metadata(project)
existing = await self._find_knowledge_set_by_name(base_name) existing = await self._find_knowledge_set_by_name_and_project(project_name, project.id)
if existing: if existing:
await self._update_project_config( await self._update_project_config(
project, project,
@@ -96,19 +99,19 @@ class KnowledgeSyncService:
) )
return existing.get("id") return existing.get("id")
created = await self._create_knowledge_set(base_name, metadata) created = await self._create_knowledge_set(project_name, metadata)
if not created: if not created:
created = await self._find_knowledge_set_by_name(base_name) created = await self._find_knowledge_set_by_name_and_project(project_name, project.id)
if not created: if not created:
fallback_name = self._build_fallback_set_name(base_name, project.id) fallback_name = self._build_fallback_set_name(project_name, project.id)
existing = await self._find_knowledge_set_by_name(fallback_name) existing = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id)
if existing: if existing:
created = existing created = existing
else: else:
created = await self._create_knowledge_set(fallback_name, metadata) created = await self._create_knowledge_set(fallback_name, metadata)
if not created: if not created:
created = await self._find_knowledge_set_by_name(fallback_name) created = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id)
if not created: if not created:
return None return None
@@ -153,16 +156,18 @@ class KnowledgeSyncService:
return [] return []
return [item for item in content if isinstance(item, dict)] return [item for item in content if isinstance(item, dict)]
async def _find_knowledge_set_by_name(self, name: str) -> Optional[Dict[str, Any]]: async def _find_knowledge_set_by_name_and_project(self, name: str, project_id: str) -> Optional[Dict[str, Any]]:
if not name: if not name:
return None return None
items = await self._list_knowledge_sets(name) items = await self._list_knowledge_sets(name)
if not items: if not items:
return None return None
exact_matches = [item for item in items if item.get("name") == name] for item in items:
if not exact_matches: if item.get("name") != name:
continue
if self._metadata_matches_project(item.get("metadata"), project_id):
return item
return None return None
return exact_matches[0]
async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]: async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]:
payload = { payload = {
@@ -359,6 +364,27 @@ class KnowledgeSyncService:
except Exception: except Exception:
return json.dumps({"error": "failed to serialize"}, ensure_ascii=False) return json.dumps({"error": "failed to serialize"}, ensure_ascii=False)
def _metadata_matches_project(self, metadata: Any, project_id: str) -> bool:
if not project_id:
return False
parsed = self._parse_metadata(metadata)
if not parsed:
return False
return str(parsed.get("project_id") or "").strip() == project_id
def _parse_metadata(self, metadata: Any) -> Optional[Dict[str, Any]]:
if metadata is None:
return None
if isinstance(metadata, dict):
return metadata
if isinstance(metadata, str):
try:
payload = json.loads(metadata)
except Exception:
return None
return payload if isinstance(payload, dict) else None
return None
def _safe_response_text(self, response: httpx.Response) -> str: def _safe_response_text(self, response: httpx.Response) -> str:
try: try:
return response.text return response.text