From 32e3fc97c66520c5d7c116322da1a295c16b23de Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Mon, 2 Feb 2026 15:28:33 +0800 Subject: [PATCH] =?UTF-8?q?feat(annotation):=20=E5=A2=9E=E5=BC=BA=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E5=90=8C=E6=AD=A5=E6=9C=8D=E5=8A=A1=E4=BB=A5?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E9=A1=B9=E7=9B=AE=E9=9A=94=E7=A6=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在知识库查找时添加项目ID验证,确保知识库归属正确 - 修改日志消息以显示项目ID信息便于调试 - 重构知识库查找逻辑,从按名称查找改为按名称和项目ID组合查找 - 新增_metadata_matches_project方法验证元数据中的项目归属 - 新增_parse_metadata方法安全解析元数据JSON字符串 - 更新回退命名逻辑以确保项目级别的唯一性 - 在所有知识库操作中统一使用项目名称和项目ID进行验证 --- .../annotation/service/knowledge_sync.py | 56 ++++++++++++++----- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py b/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py index 369b78d..850a470 100644 --- a/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py +++ b/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py @@ -77,15 +77,18 @@ class KnowledgeSyncService: if set_id: exists = await self._get_knowledge_set(set_id) - if exists: + if exists and self._metadata_matches_project(exists.get("metadata"), project.id): return set_id - logger.warning("知识集不存在,准备重建:set_id=%s", set_id) + logger.warning( + "知识集不存在或归属不匹配,准备重建:set_id=%s project_id=%s", + set_id, + project.id, + ) - dataset_name = project.name or "annotation-project" - base_name = dataset_name.strip() or "annotation-project" + project_name = (project.name or "annotation-project").strip() or "annotation-project" metadata = self._build_set_metadata(project) - existing = await self._find_knowledge_set_by_name(base_name) + existing = await self._find_knowledge_set_by_name_and_project(project_name, project.id) if existing: await self._update_project_config( project, @@ -96,19 +99,19 @@ class KnowledgeSyncService: ) return existing.get("id") - created = await self._create_knowledge_set(base_name, metadata) + created = await self._create_knowledge_set(project_name, metadata) if not created: - created = await self._find_knowledge_set_by_name(base_name) + created = await self._find_knowledge_set_by_name_and_project(project_name, project.id) if not created: - fallback_name = self._build_fallback_set_name(base_name, project.id) - existing = await self._find_knowledge_set_by_name(fallback_name) + fallback_name = self._build_fallback_set_name(project_name, project.id) + existing = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id) if existing: created = existing else: created = await self._create_knowledge_set(fallback_name, metadata) if not created: - created = await self._find_knowledge_set_by_name(fallback_name) + created = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id) if not created: return None @@ -153,16 +156,18 @@ class KnowledgeSyncService: return [] return [item for item in content if isinstance(item, dict)] - async def _find_knowledge_set_by_name(self, name: str) -> Optional[Dict[str, Any]]: + async def _find_knowledge_set_by_name_and_project(self, name: str, project_id: str) -> Optional[Dict[str, Any]]: if not name: return None items = await self._list_knowledge_sets(name) if not items: return None - exact_matches = [item for item in items if item.get("name") == name] - if not exact_matches: - return None - return exact_matches[0] + for item in items: + if item.get("name") != name: + continue + if self._metadata_matches_project(item.get("metadata"), project_id): + return item + return None async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]: payload = { @@ -359,6 +364,27 @@ class KnowledgeSyncService: except Exception: return json.dumps({"error": "failed to serialize"}, ensure_ascii=False) + def _metadata_matches_project(self, metadata: Any, project_id: str) -> bool: + if not project_id: + return False + parsed = self._parse_metadata(metadata) + if not parsed: + return False + return str(parsed.get("project_id") or "").strip() == project_id + + def _parse_metadata(self, metadata: Any) -> Optional[Dict[str, Any]]: + if metadata is None: + return None + if isinstance(metadata, dict): + return metadata + if isinstance(metadata, str): + try: + payload = json.loads(metadata) + except Exception: + return None + return payload if isinstance(payload, dict) else None + return None + def _safe_response_text(self, response: httpx.Response) -> str: try: return response.text