From cb71712f513b2d17afda701b4107812042962fed Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Thu, 22 Jan 2026 17:48:27 +0800 Subject: [PATCH] =?UTF-8?q?feat(annotation):=20=E4=BC=98=E5=8C=96=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E9=9B=86=E5=90=88=E5=90=8C=E6=AD=A5=E6=9C=8D=E5=8A=A1?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加 KNOWLEDGE_SET_LIST_SIZE 常量配置知识集列表大小为50 - 实现现有知识集名称检查避免重复创建 - 添加 _list_knowledge_sets 方法用于查询知识集列表 - 实现 _parse_metadata 和 _metadata_matches_project 方法解析匹配项目元数据 - 添加 _find_knowledge_set_by_name 方法按名称查找知识集 - 优化知识集创建流程增加重试和回退机制 - 实现项目配置更新保存知识集ID和名称信息 --- .../annotation/service/knowledge_sync.py | 83 ++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py b/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py index 7dd846a..4c904a8 100644 --- a/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py +++ b/runtime/datamate-python/app/module/annotation/service/knowledge_sync.py @@ -21,6 +21,7 @@ class KnowledgeSyncService: CONFIG_KEY_SET_ID = "knowledge_set_id" CONFIG_KEY_SET_NAME = "knowledge_set_name" + KNOWLEDGE_SET_LIST_SIZE = 50 def __init__(self, db: AsyncSession): self.db = db @@ -84,10 +85,30 @@ class KnowledgeSyncService: base_name = dataset_name.strip() or "annotation-project" metadata = self._build_set_metadata(project) + existing = await self._find_knowledge_set_by_name(base_name, project) + if existing: + await self._update_project_config( + project, + { + self.CONFIG_KEY_SET_ID: existing.get("id"), + self.CONFIG_KEY_SET_NAME: existing.get("name"), + }, + ) + return existing.get("id") + created = await self._create_knowledge_set(base_name, metadata) + if not created: + created = await self._find_knowledge_set_by_name(base_name, project) + if not created: fallback_name = self._build_fallback_set_name(base_name, project.id) - created = await self._create_knowledge_set(fallback_name, metadata) + existing = await self._find_knowledge_set_by_name(fallback_name, project) + if existing: + created = existing + else: + created = await self._create_knowledge_set(fallback_name, metadata) + if not created: + created = await self._find_knowledge_set_by_name(fallback_name, project) if not created: return None @@ -109,6 +130,66 @@ class KnowledgeSyncService: return None raise + async def _list_knowledge_sets(self, keyword: Optional[str]) -> list[Dict[str, Any]]: + params: Dict[str, Any] = { + "page": 0, + "size": self.KNOWLEDGE_SET_LIST_SIZE, + } + if keyword: + params["keyword"] = keyword + try: + data = await self._request("GET", "/data-management/knowledge-sets", params=params) + except httpx.HTTPStatusError as exc: + logger.warning( + "查询知识集失败:keyword=%s status=%s", + keyword, + exc.response.status_code, + ) + return [] + if not isinstance(data, dict): + return [] + content = data.get("content") + if not isinstance(content, list): + return [] + return [item for item in content if isinstance(item, dict)] + + def _parse_metadata(self, metadata: Any) -> Optional[Dict[str, Any]]: + if not isinstance(metadata, str) or not metadata.strip(): + return None + try: + parsed = json.loads(metadata) + except Exception: + return None + return parsed if isinstance(parsed, dict) else None + + def _metadata_matches_project(self, metadata: Any, project: LabelingProject) -> bool: + parsed = self._parse_metadata(metadata) + if not parsed: + return False + return ( + parsed.get("source") == "annotation" + and parsed.get("project_id") == project.id + and parsed.get("dataset_id") == project.dataset_id + ) + + async def _find_knowledge_set_by_name( + self, + name: str, + project: LabelingProject, + ) -> Optional[Dict[str, Any]]: + if not name: + return None + items = await self._list_knowledge_sets(name) + if not items: + return None + exact_matches = [item for item in items if item.get("name") == name] + if not exact_matches: + return None + for item in exact_matches: + if self._metadata_matches_project(item.get("metadata"), project): + return item + return exact_matches[0] + async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]: payload = { "name": name,