fix: 修复codex review发现的问题

问题1 - 行锁持有时间过长:
- 采用双重检查锁定模式,将HTTP调用移到锁范围外
- 新增 _update_knowledge_set_config 方法专门处理加锁更新

问题2 - 清理不完整:
- _list_knowledge_sets 方法添加分页参数
- 新增 _list_all_knowledge_sets 方法遍历所有知识集
- 清理方法使用新的全量查询方法

问题3 - 文件删除逻辑可能误删:
- deleteKnowledgeItemFile 方法增加严格的 sourceType 检查
- 只有当 sourceType 为 FILE_UPLOAD 或 MANUAL 时才删除文件
- 避免误删 DATASET_FILE 类型的数据集文件

涉及文件:
- knowledge_sync.py
- KnowledgeItemApplicationService.java
This commit is contained in:
2026-02-05 04:07:40 +08:00
parent 99bd83d312
commit 4143bc75f9
2 changed files with 81 additions and 48 deletions

View File

@@ -809,8 +809,8 @@ public class KnowledgeItemApplicationService {
if (knowledgeItem == null) { if (knowledgeItem == null) {
return; return;
} }
if (knowledgeItem.getSourceType() == KnowledgeSourceType.FILE_UPLOAD KnowledgeSourceType sourceType = knowledgeItem.getSourceType();
|| knowledgeItem.getContentType() == KnowledgeContentType.FILE) { if (sourceType == KnowledgeSourceType.FILE_UPLOAD || sourceType == KnowledgeSourceType.MANUAL) {
String relativePath = knowledgeItem.getContent(); String relativePath = knowledgeItem.getContent();
if (StringUtils.isNotBlank(relativePath)) { if (StringUtils.isNotBlank(relativePath)) {
try { try {

View File

@@ -73,65 +73,46 @@ class KnowledgeSyncService:
logger.warning("标注同步到知识管理失败:%s", exc) logger.warning("标注同步到知识管理失败:%s", exc)
async def _ensure_knowledge_set(self, project: LabelingProject) -> Optional[str]: async def _ensure_knowledge_set(self, project: LabelingProject) -> Optional[str]:
result = await self.db.execute( # 第一次检查:无锁查询配置
select(LabelingProject)
.where(LabelingProject.id == project.id)
.with_for_update()
)
locked_project = result.scalar_one_or_none()
if not locked_project:
logger.warning("标注同步失败:无法锁定项目:project_id=%s", project.id)
return None
config = ( config = (
locked_project.configuration project.configuration if isinstance(project.configuration, dict) else {}
if isinstance(locked_project.configuration, dict)
else {}
) )
set_id = config.get(self.CONFIG_KEY_SET_ID) set_id = config.get(self.CONFIG_KEY_SET_ID)
if set_id: if set_id:
exists = await self._get_knowledge_set(set_id) exists = await self._get_knowledge_set(set_id)
if exists and self._metadata_matches_project( if exists and self._metadata_matches_project(
exists.get("metadata"), locked_project.id exists.get("metadata"), project.id
): ):
return set_id return set_id
logger.warning( logger.warning(
"知识集不存在或归属不匹配,准备重建:set_id=%s project_id=%s", "知识集不存在或归属不匹配,准备重建:set_id=%s project_id=%s",
set_id, set_id,
locked_project.id, project.id,
) )
set_id = None
project_name = ( project_name = (
locked_project.name or "annotation-project" project.name or "annotation-project"
).strip() or "annotation-project" ).strip() or "annotation-project"
metadata = self._build_set_metadata(locked_project) metadata = self._build_set_metadata(project)
existing = await self._find_knowledge_set_by_name_and_project( existing = await self._find_knowledge_set_by_name_and_project(
project_name, locked_project.id project_name, project.id
) )
if existing: if existing:
await self._update_project_config( return await self._update_knowledge_set_config(project, existing)
locked_project,
{
self.CONFIG_KEY_SET_ID: existing.get("id"),
self.CONFIG_KEY_SET_NAME: existing.get("name"),
},
)
return existing.get("id")
created = await self._create_knowledge_set(project_name, metadata) created = await self._create_knowledge_set(project_name, metadata)
if not created: if not created:
created = await self._find_knowledge_set_by_name_and_project( created = await self._find_knowledge_set_by_name_and_project(
project_name, locked_project.id project_name, project.id
) )
if not created: if not created:
fallback_name = self._build_fallback_set_name( fallback_name = self._build_fallback_set_name(project_name, project.id)
project_name, locked_project.id
)
existing = await self._find_knowledge_set_by_name_and_project( existing = await self._find_knowledge_set_by_name_and_project(
fallback_name, locked_project.id fallback_name, project.id
) )
if existing: if existing:
created = existing created = existing
@@ -139,20 +120,13 @@ class KnowledgeSyncService:
created = await self._create_knowledge_set(fallback_name, metadata) created = await self._create_knowledge_set(fallback_name, metadata)
if not created: if not created:
created = await self._find_knowledge_set_by_name_and_project( created = await self._find_knowledge_set_by_name_and_project(
fallback_name, locked_project.id fallback_name, project.id
) )
if not created: if not created:
return None return None
await self._update_project_config( return await self._update_knowledge_set_config(project, created)
locked_project,
{
self.CONFIG_KEY_SET_ID: created.get("id"),
self.CONFIG_KEY_SET_NAME: created.get("name"),
},
)
return created.get("id")
async def _get_knowledge_set(self, set_id: str) -> Optional[Dict[str, Any]]: async def _get_knowledge_set(self, set_id: str) -> Optional[Dict[str, Any]]:
try: try:
@@ -165,11 +139,14 @@ class KnowledgeSyncService:
raise raise
async def _list_knowledge_sets( async def _list_knowledge_sets(
self, keyword: Optional[str] self,
keyword: Optional[str],
page: Optional[int] = None,
size: Optional[int] = None,
) -> list[Dict[str, Any]]: ) -> list[Dict[str, Any]]:
params: Dict[str, Any] = { params: Dict[str, Any] = {
"page": 1, "page": page if page is not None else 1,
"size": self.KNOWLEDGE_SET_LIST_SIZE, "size": size if size is not None else self.KNOWLEDGE_SET_LIST_SIZE,
} }
if keyword: if keyword:
params["keyword"] = keyword params["keyword"] = keyword
@@ -191,12 +168,29 @@ class KnowledgeSyncService:
return [] return []
return [item for item in content if isinstance(item, dict)] return [item for item in content if isinstance(item, dict)]
async def _list_all_knowledge_sets(
self, keyword: Optional[str] = None
) -> list[Dict[str, Any]]:
page = 1
all_items: list[Dict[str, Any]] = []
while True:
items = await self._list_knowledge_sets(
keyword, page=page, size=self.KNOWLEDGE_SET_LIST_SIZE
)
if not items:
break
all_items.extend(items)
if len(items) < self.KNOWLEDGE_SET_LIST_SIZE:
break
page += 1
return all_items
async def _find_knowledge_set_by_name_and_project( async def _find_knowledge_set_by_name_and_project(
self, name: str, project_id: str self, name: str, project_id: str
) -> Optional[Dict[str, Any]]: ) -> Optional[Dict[str, Any]]:
if not name: if not name:
return None return None
items = await self._list_knowledge_sets(name) items = await self._list_all_knowledge_sets(name)
if not items: if not items:
return None return None
for item in items: for item in items:
@@ -278,7 +272,7 @@ class KnowledgeSyncService:
async def _cleanup_knowledge_set_for_project(self, project_id: str) -> None: async def _cleanup_knowledge_set_for_project(self, project_id: str) -> None:
"""清理项目关联的知识集及其所有知识条目""" """清理项目关联的知识集及其所有知识条目"""
items = await self._list_knowledge_sets(None) items = await self._list_all_knowledge_sets()
for item in items: for item in items:
if self._metadata_matches_project(item.get("metadata"), project_id): if self._metadata_matches_project(item.get("metadata"), project_id):
set_id = item.get("id") set_id = item.get("id")
@@ -303,7 +297,7 @@ class KnowledgeSyncService:
self, dataset_id: str, file_id: str self, dataset_id: str, file_id: str
) -> None: ) -> None:
"""清理文件的知识条目""" """清理文件的知识条目"""
items = await self._list_knowledge_sets(None) items = await self._list_all_knowledge_sets()
for set_item in items: for set_item in items:
set_id = set_item.get("id") set_id = set_item.get("id")
if not set_id: if not set_id:
@@ -427,6 +421,45 @@ class KnowledgeSyncService:
short_id = project_id.replace("-", "")[:8] short_id = project_id.replace("-", "")[:8]
return f"{base_name}-annotation-{short_id}" return f"{base_name}-annotation-{short_id}"
async def _update_knowledge_set_config(
self, project: LabelingProject, knowledge_set: Dict[str, Any]
) -> Optional[str]:
result = await self.db.execute(
select(LabelingProject)
.where(LabelingProject.id == project.id)
.with_for_update()
)
locked_project = result.scalar_one_or_none()
if not locked_project:
logger.warning("标注同步失败:无法锁定项目:project_id=%s", project.id)
return None
config = (
locked_project.configuration
if isinstance(locked_project.configuration, dict)
else {}
)
set_id = config.get(self.CONFIG_KEY_SET_ID)
if set_id:
logger.info(
"知识集配置已被其他进程更新:set_id=%s project_id=%s",
set_id,
locked_project.id,
)
return set_id
config.update(
{
self.CONFIG_KEY_SET_ID: knowledge_set.get("id"),
self.CONFIG_KEY_SET_NAME: knowledge_set.get("name"),
}
)
locked_project.configuration = config
await self.db.commit()
await self.db.refresh(locked_project)
return knowledge_set.get("id")
async def _update_project_config( async def _update_project_config(
self, project: LabelingProject, updates: Dict[str, Any] self, project: LabelingProject, updates: Dict[str, Any]
) -> None: ) -> None: