You've already forked DataMate
fix: 修复知识库同步的并发控制、数据清理、文件事务和COCO导出问题
问题1 - 并发控制缺失: - 在 _ensure_knowledge_set 方法中添加数据库行锁(with_for_update) - 修改 _update_project_config 方法,使用行锁保护配置更新 问题3 - 数据清理机制缺失: - 添加 _cleanup_knowledge_set_for_project 方法,项目删除时清理知识集 - 添加 _cleanup_knowledge_item_for_file 方法,文件删除时清理知识条目 - 在 delete_mapping 接口中调用清理方法 问题4 - 文件操作事务问题: - 修改 uploadKnowledgeItems,添加事务失败后的文件清理逻辑 - 修改 deleteKnowledgeItem,删除记录前先删除关联文件 - 新增 deleteKnowledgeItemFile 辅助方法 问题5 - COCO导出格式问题: - 添加 _get_image_dimensions 方法读取图片实际宽高 - 将百分比坐标转换为像素坐标 - 在 AnnotationExportItem 中添加 file_path 字段 涉及文件: - knowledge_sync.py - project.py - KnowledgeItemApplicationService.java - export.py - export schema.py
This commit is contained in:
@@ -43,7 +43,9 @@ class KnowledgeSyncService:
|
||||
logger.warning("标注同步失败:无法获取知识集")
|
||||
return
|
||||
|
||||
item = await self._get_item_by_source(set_id, project.dataset_id, str(file_record.id))
|
||||
item = await self._get_item_by_source(
|
||||
set_id, project.dataset_id, str(file_record.id)
|
||||
)
|
||||
if item and item.get("status") in {"PUBLISHED", "ARCHIVED", "DEPRECATED"}:
|
||||
logger.info(
|
||||
"知识条目为只读状态,跳过同步:item_id=%s status=%s",
|
||||
@@ -71,26 +73,46 @@ class KnowledgeSyncService:
|
||||
logger.warning("标注同步到知识管理失败:%s", exc)
|
||||
|
||||
async def _ensure_knowledge_set(self, project: LabelingProject) -> Optional[str]:
|
||||
config = project.configuration if isinstance(project.configuration, dict) else {}
|
||||
result = await self.db.execute(
|
||||
select(LabelingProject)
|
||||
.where(LabelingProject.id == project.id)
|
||||
.with_for_update()
|
||||
)
|
||||
locked_project = result.scalar_one_or_none()
|
||||
if not locked_project:
|
||||
logger.warning("标注同步失败:无法锁定项目:project_id=%s", project.id)
|
||||
return None
|
||||
|
||||
config = (
|
||||
locked_project.configuration
|
||||
if isinstance(locked_project.configuration, dict)
|
||||
else {}
|
||||
)
|
||||
set_id = config.get(self.CONFIG_KEY_SET_ID)
|
||||
|
||||
if set_id:
|
||||
exists = await self._get_knowledge_set(set_id)
|
||||
if exists and self._metadata_matches_project(exists.get("metadata"), project.id):
|
||||
if exists and self._metadata_matches_project(
|
||||
exists.get("metadata"), locked_project.id
|
||||
):
|
||||
return set_id
|
||||
logger.warning(
|
||||
"知识集不存在或归属不匹配,准备重建:set_id=%s project_id=%s",
|
||||
set_id,
|
||||
project.id,
|
||||
locked_project.id,
|
||||
)
|
||||
|
||||
project_name = (project.name or "annotation-project").strip() or "annotation-project"
|
||||
metadata = self._build_set_metadata(project)
|
||||
project_name = (
|
||||
locked_project.name or "annotation-project"
|
||||
).strip() or "annotation-project"
|
||||
metadata = self._build_set_metadata(locked_project)
|
||||
|
||||
existing = await self._find_knowledge_set_by_name_and_project(project_name, project.id)
|
||||
existing = await self._find_knowledge_set_by_name_and_project(
|
||||
project_name, locked_project.id
|
||||
)
|
||||
if existing:
|
||||
await self._update_project_config(
|
||||
project,
|
||||
locked_project,
|
||||
{
|
||||
self.CONFIG_KEY_SET_ID: existing.get("id"),
|
||||
self.CONFIG_KEY_SET_NAME: existing.get("name"),
|
||||
@@ -100,23 +122,31 @@ class KnowledgeSyncService:
|
||||
|
||||
created = await self._create_knowledge_set(project_name, metadata)
|
||||
if not created:
|
||||
created = await self._find_knowledge_set_by_name_and_project(project_name, project.id)
|
||||
created = await self._find_knowledge_set_by_name_and_project(
|
||||
project_name, locked_project.id
|
||||
)
|
||||
|
||||
if not created:
|
||||
fallback_name = self._build_fallback_set_name(project_name, project.id)
|
||||
existing = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id)
|
||||
fallback_name = self._build_fallback_set_name(
|
||||
project_name, locked_project.id
|
||||
)
|
||||
existing = await self._find_knowledge_set_by_name_and_project(
|
||||
fallback_name, locked_project.id
|
||||
)
|
||||
if existing:
|
||||
created = existing
|
||||
else:
|
||||
created = await self._create_knowledge_set(fallback_name, metadata)
|
||||
if not created:
|
||||
created = await self._find_knowledge_set_by_name_and_project(fallback_name, project.id)
|
||||
created = await self._find_knowledge_set_by_name_and_project(
|
||||
fallback_name, locked_project.id
|
||||
)
|
||||
|
||||
if not created:
|
||||
return None
|
||||
|
||||
await self._update_project_config(
|
||||
project,
|
||||
locked_project,
|
||||
{
|
||||
self.CONFIG_KEY_SET_ID: created.get("id"),
|
||||
self.CONFIG_KEY_SET_NAME: created.get("name"),
|
||||
@@ -126,13 +156,17 @@ class KnowledgeSyncService:
|
||||
|
||||
async def _get_knowledge_set(self, set_id: str) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
return await self._request("GET", f"/data-management/knowledge-sets/{set_id}")
|
||||
return await self._request(
|
||||
"GET", f"/data-management/knowledge-sets/{set_id}"
|
||||
)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
if exc.response.status_code == 404:
|
||||
return None
|
||||
raise
|
||||
|
||||
async def _list_knowledge_sets(self, keyword: Optional[str]) -> list[Dict[str, Any]]:
|
||||
async def _list_knowledge_sets(
|
||||
self, keyword: Optional[str]
|
||||
) -> list[Dict[str, Any]]:
|
||||
params: Dict[str, Any] = {
|
||||
"page": 1,
|
||||
"size": self.KNOWLEDGE_SET_LIST_SIZE,
|
||||
@@ -140,7 +174,9 @@ class KnowledgeSyncService:
|
||||
if keyword:
|
||||
params["keyword"] = keyword
|
||||
try:
|
||||
data = await self._request("GET", "/data-management/knowledge-sets", params=params)
|
||||
data = await self._request(
|
||||
"GET", "/data-management/knowledge-sets", params=params
|
||||
)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
logger.warning(
|
||||
"查询知识集失败:keyword=%s status=%s",
|
||||
@@ -155,7 +191,9 @@ class KnowledgeSyncService:
|
||||
return []
|
||||
return [item for item in content if isinstance(item, dict)]
|
||||
|
||||
async def _find_knowledge_set_by_name_and_project(self, name: str, project_id: str) -> Optional[Dict[str, Any]]:
|
||||
async def _find_knowledge_set_by_name_and_project(
|
||||
self, name: str, project_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
if not name:
|
||||
return None
|
||||
items = await self._list_knowledge_sets(name)
|
||||
@@ -168,7 +206,9 @@ class KnowledgeSyncService:
|
||||
return item
|
||||
return None
|
||||
|
||||
async def _create_knowledge_set(self, name: str, metadata: str) -> Optional[Dict[str, Any]]:
|
||||
async def _create_knowledge_set(
|
||||
self, name: str, metadata: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
payload = {
|
||||
"name": name,
|
||||
"description": "标注项目自动创建的知识集",
|
||||
@@ -176,7 +216,9 @@ class KnowledgeSyncService:
|
||||
"metadata": metadata,
|
||||
}
|
||||
try:
|
||||
return await self._request("POST", "/data-management/knowledge-sets", json=payload)
|
||||
return await self._request(
|
||||
"POST", "/data-management/knowledge-sets", json=payload
|
||||
)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
logger.warning(
|
||||
"创建知识集失败:name=%s status=%s detail=%s",
|
||||
@@ -199,7 +241,9 @@ class KnowledgeSyncService:
|
||||
"sourceFileId": file_id,
|
||||
}
|
||||
try:
|
||||
data = await self._request("GET", f"/data-management/knowledge-sets/{set_id}/items", params=params)
|
||||
data = await self._request(
|
||||
"GET", f"/data-management/knowledge-sets/{set_id}/items", params=params
|
||||
)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
logger.warning(
|
||||
"查询知识条目失败:set_id=%s status=%s",
|
||||
@@ -216,9 +260,13 @@ class KnowledgeSyncService:
|
||||
return content[0]
|
||||
|
||||
async def _create_item(self, set_id: str, payload: Dict[str, Any]) -> None:
|
||||
await self._request("POST", f"/data-management/knowledge-sets/{set_id}/items", json=payload)
|
||||
await self._request(
|
||||
"POST", f"/data-management/knowledge-sets/{set_id}/items", json=payload
|
||||
)
|
||||
|
||||
async def _update_item(self, set_id: str, item_id: str, payload: Dict[str, Any]) -> None:
|
||||
async def _update_item(
|
||||
self, set_id: str, item_id: str, payload: Dict[str, Any]
|
||||
) -> None:
|
||||
update_payload = dict(payload)
|
||||
update_payload.pop("sourceDatasetId", None)
|
||||
update_payload.pop("sourceFileId", None)
|
||||
@@ -228,6 +276,62 @@ class KnowledgeSyncService:
|
||||
json=update_payload,
|
||||
)
|
||||
|
||||
async def _cleanup_knowledge_set_for_project(self, project_id: str) -> None:
|
||||
"""清理项目关联的知识集及其所有知识条目"""
|
||||
items = await self._list_knowledge_sets(None)
|
||||
for item in items:
|
||||
if self._metadata_matches_project(item.get("metadata"), project_id):
|
||||
set_id = item.get("id")
|
||||
if not set_id:
|
||||
continue
|
||||
try:
|
||||
await self._request(
|
||||
"DELETE", f"/data-management/knowledge-sets/{set_id}"
|
||||
)
|
||||
logger.info(
|
||||
"已删除知识集:set_id=%s project_id=%s", set_id, project_id
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"删除知识集失败:set_id=%s project_id=%s error=%s",
|
||||
set_id,
|
||||
project_id,
|
||||
exc,
|
||||
)
|
||||
|
||||
async def _cleanup_knowledge_item_for_file(
|
||||
self, dataset_id: str, file_id: str
|
||||
) -> None:
|
||||
"""清理文件的知识条目"""
|
||||
items = await self._list_knowledge_sets(None)
|
||||
for set_item in items:
|
||||
set_id = set_item.get("id")
|
||||
if not set_id:
|
||||
continue
|
||||
item = await self._get_item_by_source(set_id, dataset_id, file_id)
|
||||
if item and item.get("id"):
|
||||
try:
|
||||
await self._request(
|
||||
"DELETE",
|
||||
f"/data-management/knowledge-sets/{set_id}/items/{item['id']}",
|
||||
)
|
||||
logger.info(
|
||||
"已删除知识条目:item_id=%s set_id=%s dataset_id=%s file_id=%s",
|
||||
item.get("id"),
|
||||
set_id,
|
||||
dataset_id,
|
||||
file_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"删除知识条目失败:item_id=%s set_id=%s dataset_id=%s file_id=%s error=%s",
|
||||
item.get("id"),
|
||||
set_id,
|
||||
dataset_id,
|
||||
file_id,
|
||||
exc,
|
||||
)
|
||||
|
||||
async def _build_item_payload(
|
||||
self,
|
||||
project: LabelingProject,
|
||||
@@ -323,12 +427,28 @@ class KnowledgeSyncService:
|
||||
short_id = project_id.replace("-", "")[:8]
|
||||
return f"{base_name}-annotation-{short_id}"
|
||||
|
||||
async def _update_project_config(self, project: LabelingProject, updates: Dict[str, Any]) -> None:
|
||||
config = project.configuration if isinstance(project.configuration, dict) else {}
|
||||
async def _update_project_config(
|
||||
self, project: LabelingProject, updates: Dict[str, Any]
|
||||
) -> None:
|
||||
result = await self.db.execute(
|
||||
select(LabelingProject)
|
||||
.where(LabelingProject.id == project.id)
|
||||
.with_for_update()
|
||||
)
|
||||
locked_project = result.scalar_one_or_none()
|
||||
if not locked_project:
|
||||
logger.warning("更新项目配置失败:无法锁定项目:project_id=%s", project.id)
|
||||
return
|
||||
|
||||
config = (
|
||||
locked_project.configuration
|
||||
if isinstance(locked_project.configuration, dict)
|
||||
else {}
|
||||
)
|
||||
config.update(updates)
|
||||
project.configuration = config
|
||||
locked_project.configuration = config
|
||||
await self.db.commit()
|
||||
await self.db.refresh(project)
|
||||
await self.db.refresh(locked_project)
|
||||
|
||||
async def _request(self, method: str, path: str, **kwargs) -> Any:
|
||||
url = f"{self.base_url}{path}"
|
||||
|
||||
Reference in New Issue
Block a user