You've already forked DataMate
feat(annotation): 优化知识集合同步服务逻辑
- 添加 KNOWLEDGE_SET_LIST_SIZE 常量配置知识集列表大小为50 - 实现现有知识集名称检查避免重复创建 - 添加 _list_knowledge_sets 方法用于查询知识集列表 - 实现 _parse_metadata 和 _metadata_matches_project 方法解析匹配项目元数据 - 添加 _find_knowledge_set_by_name 方法按名称查找知识集 - 优化知识集创建流程增加重试和回退机制 - 实现项目配置更新保存知识集ID和名称信息
This commit is contained in:
@@ -21,6 +21,7 @@ class KnowledgeSyncService:
|
|||||||
|
|
||||||
CONFIG_KEY_SET_ID = "knowledge_set_id"
|
CONFIG_KEY_SET_ID = "knowledge_set_id"
|
||||||
CONFIG_KEY_SET_NAME = "knowledge_set_name"
|
CONFIG_KEY_SET_NAME = "knowledge_set_name"
|
||||||
|
KNOWLEDGE_SET_LIST_SIZE = 50
|
||||||
|
|
||||||
def __init__(self, db: AsyncSession):
|
def __init__(self, db: AsyncSession):
|
||||||
self.db = db
|
self.db = db
|
||||||
@@ -84,10 +85,30 @@ class KnowledgeSyncService:
|
|||||||
base_name = dataset_name.strip() or "annotation-project"
|
base_name = dataset_name.strip() or "annotation-project"
|
||||||
metadata = self._build_set_metadata(project)
|
metadata = self._build_set_metadata(project)
|
||||||
|
|
||||||
|
existing = await self._find_knowledge_set_by_name(base_name, project)
|
||||||
|
if existing:
|
||||||
|
await self._update_project_config(
|
||||||
|
project,
|
||||||
|
{
|
||||||
|
self.CONFIG_KEY_SET_ID: existing.get("id"),
|
||||||
|
self.CONFIG_KEY_SET_NAME: existing.get("name"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return existing.get("id")
|
||||||
|
|
||||||
created = await self._create_knowledge_set(base_name, metadata)
|
created = await self._create_knowledge_set(base_name, metadata)
|
||||||
|
if not created:
|
||||||
|
created = await self._find_knowledge_set_by_name(base_name, project)
|
||||||
|
|
||||||
if not created:
|
if not created:
|
||||||
fallback_name = self._build_fallback_set_name(base_name, project.id)
|
fallback_name = self._build_fallback_set_name(base_name, project.id)
|
||||||
|
existing = await self._find_knowledge_set_by_name(fallback_name, project)
|
||||||
|
if existing:
|
||||||
|
created = existing
|
||||||
|
else:
|
||||||
created = await self._create_knowledge_set(fallback_name, metadata)
|
created = await self._create_knowledge_set(fallback_name, metadata)
|
||||||
|
if not created:
|
||||||
|
created = await self._find_knowledge_set_by_name(fallback_name, project)
|
||||||
|
|
||||||
if not created:
|
if not created:
|
||||||
return None
|
return None
|
||||||
@@ -109,6 +130,66 @@ class KnowledgeSyncService:
|
|||||||
return None
|
return None
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
async def _list_knowledge_sets(self, keyword: Optional[str]) -> list[Dict[str, Any]]:
|
||||||
|
params: Dict[str, Any] = {
|
||||||
|
"page": 0,
|
||||||
|
"size": self.KNOWLEDGE_SET_LIST_SIZE,
|
||||||
|
}
|
||||||
|
if keyword:
|
||||||
|
params["keyword"] = keyword
|
||||||
|
try:
|
||||||
|
data = await self._request("GET", "/data-management/knowledge-sets", params=params)
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"查询知识集失败:keyword=%s status=%s",
|
||||||
|
keyword,
|
||||||
|
exc.response.status_code,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return []
|
||||||
|
content = data.get("content")
|
||||||
|
if not isinstance(content, list):
|
||||||
|
return []
|
||||||
|
return [item for item in content if isinstance(item, dict)]
|
||||||
|
|
||||||
|
def _parse_metadata(self, metadata: Any) -> Optional[Dict[str, Any]]:
|
||||||
|
if not isinstance(metadata, str) or not metadata.strip():
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
parsed = json.loads(metadata)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
return parsed if isinstance(parsed, dict) else None
|
||||||
|
|
||||||
|
def _metadata_matches_project(self, metadata: Any, project: LabelingProject) -> bool:
|
||||||
|
parsed = self._parse_metadata(metadata)
|
||||||
|
if not parsed:
|
||||||
|
return False
|
||||||
|
return (
|
||||||
|
parsed.get("source") == "annotation"
|
||||||
|
and parsed.get("project_id") == project.id
|
||||||
|
and parsed.get("dataset_id") == project.dataset_id
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _find_knowledge_set_by_name(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
project: LabelingProject,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
if not name:
|
||||||
|
return None
|
||||||
|
items = await self._list_knowledge_sets(name)
|
||||||
|
if not items:
|
||||||
|
return None
|
||||||
|
exact_matches = [item for item in items if item.get("name") == name]
|
||||||
|
if not exact_matches:
|
||||||
|
return None
|
||||||
|
for item in exact_matches:
|
||||||
|
if self._metadata_matches_project(item.get("metadata"), project):
|
||||||
|
return item
|
||||||
|
return exact_matches[0]
|
||||||
|
|
||||||
async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]:
|
async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]:
|
||||||
payload = {
|
payload = {
|
||||||
"name": name,
|
"name": name,
|
||||||
|
|||||||
Reference in New Issue
Block a user