diff --git a/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py b/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py index 369b78d..850a470 100644 --- a/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py +++ b/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py @@ -77,15 +77,18 @@ class KnowledgeSyncService: if set_id: exists = await self._get_knowledge_set(set_id) - if exists: + if exists and self._metadata_matches_project(exists.get("metadata"), project.id): return set_id - logger.warning("知识集不存在,准备重建:set_id=%s", set_id) + logger.warning( + "知识集不存在或归属不匹配,准备重建:set_id=%s project_id=%s", + set_id, + project.id, + ) - dataset_name = project.name or "annotation-project" - base_name = dataset_name.strip() or "annotation-project" + project_name = (project.name or "annotation-project").strip() or "annotation-project" metadata = self._build_set_metadata(project) - existing = await self._find_knowledge_set_by_name(base_name) + existing = await self._find_knowledge_set_by_name_and_project(project_name, project.id) if existing: await self._update_project_config( project, @@ -96,19 +99,19 @@ class KnowledgeSyncService: ) return existing.get("id") - created = await self._create_knowledge_set(base_name, metadata) + created = await self._create_knowledge_set(project_name, metadata) if not created: - created = await self._find_knowledge_set_by_name(base_name) + created = await self._find_knowledge_set_by_name_and_project(project_name, project.id) if not created: - fallback_name = self._build_fallback_set_name(base_name, project.id) - existing = await self._find_knowledge_set_by_name(fallback_name) + fallback_name = self._build_fallback_set_name(project_name, project.id) + existing = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id) if existing: created = existing else: created = await self._create_knowledge_set(fallback_name, metadata) if not created: - created = await self._find_knowledge_set_by_name(fallback_name) + created = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id) if not created: return None @@ -153,16 +156,18 @@ class KnowledgeSyncService: return [] return [item for item in content if isinstance(item, dict)] - async def _find_knowledge_set_by_name(self, name: str) -> Optional[Dict[str, Any]]: + async def _find_knowledge_set_by_name_and_project(self, name: str, project_id: str) -> Optional[Dict[str, Any]]: if not name: return None items = await self._list_knowledge_sets(name) if not items: return None - exact_matches = [item for item in items if item.get("name") == name] - if not exact_matches: - return None - return exact_matches[0] + for item in items: + if item.get("name") != name: + continue + if self._metadata_matches_project(item.get("metadata"), project_id): + return item + return None async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]: payload = { @@ -359,6 +364,27 @@ class KnowledgeSyncService: except Exception: return json.dumps({"error": "failed to serialize"}, ensure_ascii=False) + def _metadata_matches_project(self, metadata: Any, project_id: str) -> bool: + if not project_id: + return False + parsed = self._parse_metadata(metadata) + if not parsed: + return False + return str(parsed.get("project_id") or "").strip() == project_id + + def _parse_metadata(self, metadata: Any) -> Optional[Dict[str, Any]]: + if metadata is None: + return None + if isinstance(metadata, dict): + return metadata + if isinstance(metadata, str): + try: + payload = json.loads(metadata) + except Exception: + return None + return payload if isinstance(payload, dict) else None + return None + def _safe_response_text(self, response: httpx.Response) -> str: try: return response.text