You've already forked DataMate
feat(annotation): 增强知识库同步服务以支持项目隔离
- 在知识库查找时添加项目ID验证,确保知识库归属正确 - 修改日志消息以显示项目ID信息便于调试 - 重构知识库查找逻辑,从按名称查找改为按名称和项目ID组合查找 - 新增_metadata_matches_project方法验证元数据中的项目归属 - 新增_parse_metadata方法安全解析元数据JSON字符串 - 更新回退命名逻辑以确保项目级别的唯一性 - 在所有知识库操作中统一使用项目名称和项目ID进行验证
This commit is contained in:
@@ -77,15 +77,18 @@ class KnowledgeSyncService:
|
||||
|
||||
if set_id:
|
||||
exists = await self._get_knowledge_set(set_id)
|
||||
if exists:
|
||||
if exists and self._metadata_matches_project(exists.get("metadata"), project.id):
|
||||
return set_id
|
||||
logger.warning("知识集不存在,准备重建:set_id=%s", set_id)
|
||||
logger.warning(
|
||||
"知识集不存在或归属不匹配,准备重建:set_id=%s project_id=%s",
|
||||
set_id,
|
||||
project.id,
|
||||
)
|
||||
|
||||
dataset_name = project.name or "annotation-project"
|
||||
base_name = dataset_name.strip() or "annotation-project"
|
||||
project_name = (project.name or "annotation-project").strip() or "annotation-project"
|
||||
metadata = self._build_set_metadata(project)
|
||||
|
||||
existing = await self._find_knowledge_set_by_name(base_name)
|
||||
existing = await self._find_knowledge_set_by_name_and_project(project_name, project.id)
|
||||
if existing:
|
||||
await self._update_project_config(
|
||||
project,
|
||||
@@ -96,19 +99,19 @@ class KnowledgeSyncService:
|
||||
)
|
||||
return existing.get("id")
|
||||
|
||||
created = await self._create_knowledge_set(base_name, metadata)
|
||||
created = await self._create_knowledge_set(project_name, metadata)
|
||||
if not created:
|
||||
created = await self._find_knowledge_set_by_name(base_name)
|
||||
created = await self._find_knowledge_set_by_name_and_project(project_name, project.id)
|
||||
|
||||
if not created:
|
||||
fallback_name = self._build_fallback_set_name(base_name, project.id)
|
||||
existing = await self._find_knowledge_set_by_name(fallback_name)
|
||||
fallback_name = self._build_fallback_set_name(project_name, project.id)
|
||||
existing = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id)
|
||||
if existing:
|
||||
created = existing
|
||||
else:
|
||||
created = await self._create_knowledge_set(fallback_name, metadata)
|
||||
if not created:
|
||||
created = await self._find_knowledge_set_by_name(fallback_name)
|
||||
created = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id)
|
||||
|
||||
if not created:
|
||||
return None
|
||||
@@ -153,16 +156,18 @@ class KnowledgeSyncService:
|
||||
return []
|
||||
return [item for item in content if isinstance(item, dict)]
|
||||
|
||||
async def _find_knowledge_set_by_name(self, name: str) -> Optional[Dict[str, Any]]:
|
||||
async def _find_knowledge_set_by_name_and_project(self, name: str, project_id: str) -> Optional[Dict[str, Any]]:
|
||||
if not name:
|
||||
return None
|
||||
items = await self._list_knowledge_sets(name)
|
||||
if not items:
|
||||
return None
|
||||
exact_matches = [item for item in items if item.get("name") == name]
|
||||
if not exact_matches:
|
||||
return None
|
||||
return exact_matches[0]
|
||||
for item in items:
|
||||
if item.get("name") != name:
|
||||
continue
|
||||
if self._metadata_matches_project(item.get("metadata"), project_id):
|
||||
return item
|
||||
return None
|
||||
|
||||
async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]:
|
||||
payload = {
|
||||
@@ -359,6 +364,27 @@ class KnowledgeSyncService:
|
||||
except Exception:
|
||||
return json.dumps({"error": "failed to serialize"}, ensure_ascii=False)
|
||||
|
||||
def _metadata_matches_project(self, metadata: Any, project_id: str) -> bool:
|
||||
if not project_id:
|
||||
return False
|
||||
parsed = self._parse_metadata(metadata)
|
||||
if not parsed:
|
||||
return False
|
||||
return str(parsed.get("project_id") or "").strip() == project_id
|
||||
|
||||
def _parse_metadata(self, metadata: Any) -> Optional[Dict[str, Any]]:
|
||||
if metadata is None:
|
||||
return None
|
||||
if isinstance(metadata, dict):
|
||||
return metadata
|
||||
if isinstance(metadata, str):
|
||||
try:
|
||||
payload = json.loads(metadata)
|
||||
except Exception:
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
return None
|
||||
|
||||
def _safe_response_text(self, response: httpx.Response) -> str:
|
||||
try:
|
||||
return response.text
|
||||
|
||||
Reference in New Issue
Block a user