You've already forked DataMate
feature: LabelStudio jumps without login (#201)
This commit is contained in:
@@ -1,11 +1,12 @@
|
||||
import httpx
|
||||
import re
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.logging import get_logger
|
||||
|
||||
from .schema import (
|
||||
LabelStudioProject,
|
||||
LabelStudioProject,
|
||||
LabelStudioCreateProjectRequest,
|
||||
LabelStudioCreateTaskRequest
|
||||
)
|
||||
@@ -14,11 +15,11 @@ logger = get_logger(__name__)
|
||||
|
||||
class Client:
|
||||
"""Label Studio服务客户端
|
||||
|
||||
|
||||
使用 HTTP REST API 直接与 Label Studio 交互
|
||||
认证方式:使用 Authorization: Token {token} 头部进行认证
|
||||
"""
|
||||
|
||||
|
||||
# 默认标注配置模板
|
||||
DEFAULT_LABEL_CONFIGS = {
|
||||
"image": """
|
||||
@@ -57,15 +58,15 @@ class Client:
|
||||
</View>
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: Optional[str] = None,
|
||||
self,
|
||||
base_url: Optional[str] = None,
|
||||
token: Optional[str] = None,
|
||||
timeout: float = 30.0
|
||||
):
|
||||
"""初始化 Label Studio 客户端
|
||||
|
||||
|
||||
Args:
|
||||
base_url: Label Studio 服务地址
|
||||
token: API Token(使用 Authorization: Token {token} 头部)
|
||||
@@ -74,10 +75,10 @@ class Client:
|
||||
self.base_url = (base_url or settings.label_studio_base_url).rstrip("/")
|
||||
self.token = token or settings.label_studio_user_token
|
||||
self.timeout = timeout
|
||||
|
||||
|
||||
if not self.token:
|
||||
raise ValueError("Label Studio API token is required")
|
||||
|
||||
|
||||
# 初始化 HTTP 客户端
|
||||
self.client = httpx.AsyncClient(
|
||||
base_url=self.base_url,
|
||||
@@ -87,46 +88,80 @@ class Client:
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
logger.debug(f"Label Studio client initialized: {self.base_url}")
|
||||
|
||||
|
||||
def get_label_config_by_type(self, data_type: str) -> str:
|
||||
"""根据数据类型获取标注配置"""
|
||||
return self.DEFAULT_LABEL_CONFIGS.get(data_type.lower(), self.DEFAULT_LABEL_CONFIGS["image"])
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get_csrf_token(html: str) -> str:
|
||||
m = re.search(r'name="csrfmiddlewaretoken"\s+value="([^"]+)"', html)
|
||||
if not m:
|
||||
raise IOError("CSRF Token not found")
|
||||
return m.group(1)
|
||||
|
||||
async def login_label_studio(self):
|
||||
try:
|
||||
response = await self.client.get("/user/login/")
|
||||
response.raise_for_status()
|
||||
body = response.text
|
||||
set_cookie_headers = response.headers.get_list("set-cookie")
|
||||
cookie_header = "; ".join(set_cookie_headers)
|
||||
form = {
|
||||
"email": settings.label_studio_username,
|
||||
"password": settings.label_studio_password,
|
||||
"csrfmiddlewaretoken": self.get_csrf_token(body),
|
||||
}
|
||||
headers = {
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Cookie": cookie_header,
|
||||
}
|
||||
login_response = await self.client.post("/user/login/", data=form, headers=headers)
|
||||
logger.info(f"response is: {login_response}, {login_response.text}")
|
||||
return login_response
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Login failed HTTP {e.response.status_code}: {e.response.text}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error while login: {e}", e)
|
||||
return None
|
||||
|
||||
|
||||
async def create_project(
|
||||
self,
|
||||
title: str,
|
||||
description: str = "",
|
||||
self,
|
||||
title: str,
|
||||
description: str = "",
|
||||
label_config: Optional[str] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""创建Label Studio项目"""
|
||||
try:
|
||||
logger.debug(f"Creating Label Studio project: {title}")
|
||||
logger.debug(f"Label Studio URL: {self.base_url}/api/projects")
|
||||
|
||||
|
||||
project_data = {
|
||||
"title": title,
|
||||
"description": description,
|
||||
"label_config": label_config or "<View></View>"
|
||||
}
|
||||
|
||||
|
||||
# Log the request body for debugging
|
||||
logger.debug(f"Request body: {project_data}")
|
||||
logger.debug(f"Label config being sent:\n{project_data['label_config']}")
|
||||
|
||||
|
||||
response = await self.client.post("/api/projects", json=project_data)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
project = response.json()
|
||||
project_id = project.get("id")
|
||||
|
||||
|
||||
if not project_id:
|
||||
raise Exception("Label Studio response does not contain project ID")
|
||||
|
||||
|
||||
logger.debug(f"Project created successfully, ID: {project_id}")
|
||||
return project
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(
|
||||
f"Create project failed - HTTP {e.response.status_code}\n"
|
||||
@@ -151,7 +186,7 @@ class Client:
|
||||
except Exception as e:
|
||||
logger.error(f"Error while creating Label Studio project: {str(e)}", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
async def import_tasks(
|
||||
self,
|
||||
project_id: int,
|
||||
@@ -162,7 +197,7 @@ class Client:
|
||||
"""批量导入任务到Label Studio项目"""
|
||||
try:
|
||||
logger.debug(f"Importing {len(tasks)} tasks into project {project_id}")
|
||||
|
||||
|
||||
response = await self.client.post(
|
||||
f"/api/projects/{project_id}/import",
|
||||
json=tasks,
|
||||
@@ -172,20 +207,20 @@ class Client:
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
result = response.json()
|
||||
task_count = result.get("task_count", len(tasks))
|
||||
|
||||
|
||||
logger.debug(f"Tasks imported successfully: {task_count}")
|
||||
return result
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Import tasks failed HTTP {e.response.status_code}: {e.response.text}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error while importing tasks: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def create_tasks_batch(
|
||||
self,
|
||||
project_id: str,
|
||||
@@ -201,7 +236,7 @@ class Client:
|
||||
except Exception as e:
|
||||
logger.error(f"Error while creating tasks in batch: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def create_task(
|
||||
self,
|
||||
project_id: str,
|
||||
@@ -213,13 +248,13 @@ class Client:
|
||||
task = {"data": data}
|
||||
if meta:
|
||||
task["meta"] = meta
|
||||
|
||||
|
||||
return await self.create_tasks_batch(project_id, [task])
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error while creating single task: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def get_project_tasks(
|
||||
self,
|
||||
project_id: str,
|
||||
@@ -227,12 +262,12 @@ class Client:
|
||||
page_size: int = 1000
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""获取项目任务信息
|
||||
|
||||
|
||||
Args:
|
||||
project_id: 项目ID
|
||||
page: 页码(从1开始)。如果为None,则获取所有任务
|
||||
page_size: 每页大小
|
||||
|
||||
|
||||
Returns:
|
||||
如果指定了page参数,返回包含分页信息的字典:
|
||||
{
|
||||
@@ -242,9 +277,9 @@ class Client:
|
||||
"project_id": 项目ID,
|
||||
"tasks": 当前页的任务列表
|
||||
}
|
||||
|
||||
|
||||
如果page为None,返回包含所有任务的字典:
|
||||
|
||||
|
||||
"count": 总任务数,
|
||||
"project_id": 项目ID,
|
||||
"tasks": 所有任务列表
|
||||
@@ -252,11 +287,11 @@ class Client:
|
||||
"""
|
||||
try:
|
||||
pid = int(project_id)
|
||||
|
||||
|
||||
# 如果指定了page,直接获取单页任务
|
||||
if page is not None:
|
||||
logger.debug(f"Fetching tasks for project {pid}, page {page} (page_size={page_size})")
|
||||
|
||||
|
||||
response = await self.client.get(
|
||||
f"/api/tasks",
|
||||
params={
|
||||
@@ -266,9 +301,9 @@ class Client:
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
result = response.json()
|
||||
|
||||
|
||||
# 返回单页结果,包含分页信息
|
||||
return {
|
||||
"count": result.get("total", len(result.get("tasks", []))),
|
||||
@@ -277,11 +312,11 @@ class Client:
|
||||
"project_id": pid,
|
||||
"tasks": result.get("tasks", [])
|
||||
}
|
||||
|
||||
|
||||
# 如果未指定page,获取所有任务
|
||||
logger.debug(f"(page) not specified, fetching all tasks.")
|
||||
all_tasks = []
|
||||
|
||||
|
||||
response = await self.client.get(
|
||||
f"/api/tasks",
|
||||
params={
|
||||
@@ -289,31 +324,31 @@ class Client:
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
result = response.json()
|
||||
tasks = result.get("tasks", [])
|
||||
|
||||
|
||||
if not tasks:
|
||||
logger.debug(f"No tasks found for this project.")
|
||||
|
||||
|
||||
|
||||
all_tasks.extend(tasks)
|
||||
logger.debug(f"Fetched {len(tasks)} tasks.")
|
||||
|
||||
|
||||
# 返回所有任务,不包含分页信息
|
||||
return {
|
||||
"count": len(all_tasks),
|
||||
"project_id": pid,
|
||||
"tasks": all_tasks
|
||||
}
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"获取项目任务失败 HTTP {e.response.status_code}: {e.response.text}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"获取项目任务时发生错误: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def delete_task(
|
||||
self,
|
||||
task_id: int
|
||||
@@ -321,20 +356,20 @@ class Client:
|
||||
"""删除单个任务"""
|
||||
try:
|
||||
logger.debug(f"Deleting task: {task_id}")
|
||||
|
||||
|
||||
response = await self.client.delete(f"/api/tasks/{task_id}")
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
logger.debug(f"Task deleted: {task_id}")
|
||||
return True
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Delete task {task_id} failed HTTP {e.response.status_code}: {e.response.text}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error while deleting task {task_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def delete_tasks_batch(
|
||||
self,
|
||||
task_ids: List[int]
|
||||
@@ -342,24 +377,24 @@ class Client:
|
||||
"""批量删除任务"""
|
||||
try:
|
||||
logger.debug(f"Deleting {len(task_ids)} tasks in batch")
|
||||
|
||||
|
||||
successful_deletions = 0
|
||||
failed_deletions = 0
|
||||
|
||||
|
||||
for task_id in task_ids:
|
||||
if await self.delete_task(task_id):
|
||||
successful_deletions += 1
|
||||
else:
|
||||
failed_deletions += 1
|
||||
|
||||
|
||||
logger.debug(f"Batch deletion finished: success {successful_deletions}, failed {failed_deletions}")
|
||||
|
||||
|
||||
return {
|
||||
"successful": successful_deletions,
|
||||
"failed": failed_deletions,
|
||||
"total": len(task_ids)
|
||||
}
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error while deleting tasks in batch: {e}")
|
||||
return {
|
||||
@@ -367,72 +402,72 @@ class Client:
|
||||
"failed": len(task_ids),
|
||||
"total": len(task_ids)
|
||||
}
|
||||
|
||||
|
||||
async def get_project(self, project_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""获取项目信息"""
|
||||
try:
|
||||
logger.debug(f"Fetching project info: {project_id}")
|
||||
|
||||
|
||||
response = await self.client.get(f"/api/projects/{project_id}")
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
return response.json()
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Get project info failed HTTP {e.response.status_code}: {e.response.text}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error while getting project info: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def delete_project(self, project_id: int) -> bool:
|
||||
"""删除项目"""
|
||||
try:
|
||||
logger.debug(f"Deleting project: {project_id}")
|
||||
|
||||
|
||||
response = await self.client.delete(f"/api/projects/{project_id}")
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
logger.debug(f"Project deleted: {project_id}")
|
||||
return True
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Delete project {project_id} failed HTTP {e.response.status_code}: {e.response.text}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error while deleting project {project_id}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def get_task_annotations(
|
||||
self,
|
||||
task_id: int
|
||||
) -> Optional[List[Dict[str, Any]]]:
|
||||
"""获取任务的标注结果
|
||||
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
|
||||
|
||||
Returns:
|
||||
标注结果列表,每个标注包含完整的annotation信息
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Fetching annotations for task: {task_id}")
|
||||
|
||||
|
||||
response = await self.client.get(f"/api/tasks/{task_id}/annotations")
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
annotations = response.json()
|
||||
logger.debug(f"Fetched {len(annotations)} annotations for task {task_id}")
|
||||
|
||||
|
||||
return annotations
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Get task annotations failed HTTP {e.response.status_code}: {e.response.text}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error while getting task annotations: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def create_annotation(
|
||||
self,
|
||||
task_id: int,
|
||||
@@ -440,111 +475,111 @@ class Client:
|
||||
completed_by: Optional[int] = None
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""为任务创建新的标注
|
||||
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
result: 标注结果列表
|
||||
completed_by: 完成标注的用户ID(可选)
|
||||
|
||||
|
||||
Returns:
|
||||
创建的标注信息,失败返回None
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Creating annotation for task: {task_id}")
|
||||
|
||||
|
||||
annotation_data = {
|
||||
"result": result,
|
||||
"task": task_id
|
||||
}
|
||||
|
||||
|
||||
if completed_by:
|
||||
annotation_data["completed_by"] = completed_by
|
||||
|
||||
|
||||
response = await self.client.post(
|
||||
f"/api/tasks/{task_id}/annotations",
|
||||
json=annotation_data
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
annotation = response.json()
|
||||
logger.debug(f"Created annotation {annotation.get('id')} for task {task_id}")
|
||||
|
||||
|
||||
return annotation
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Create annotation failed HTTP {e.response.status_code}: {e.response.text}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error while creating annotation: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def update_annotation(
|
||||
self,
|
||||
annotation_id: int,
|
||||
result: List[Dict[str, Any]]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""更新已存在的标注
|
||||
|
||||
|
||||
Args:
|
||||
annotation_id: 标注ID
|
||||
result: 新的标注结果列表
|
||||
|
||||
|
||||
Returns:
|
||||
更新后的标注信息,失败返回None
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Updating annotation: {annotation_id}")
|
||||
|
||||
|
||||
annotation_data = {
|
||||
"result": result
|
||||
}
|
||||
|
||||
|
||||
response = await self.client.patch(
|
||||
f"/api/annotations/{annotation_id}",
|
||||
json=annotation_data
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
annotation = response.json()
|
||||
logger.debug(f"Updated annotation {annotation_id}")
|
||||
|
||||
|
||||
return annotation
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Update annotation failed HTTP {e.response.status_code}: {e.response.text}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error while updating annotation: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def delete_annotation(
|
||||
self,
|
||||
annotation_id: int
|
||||
) -> bool:
|
||||
"""删除标注
|
||||
|
||||
|
||||
Args:
|
||||
annotation_id: 标注ID
|
||||
|
||||
|
||||
Returns:
|
||||
成功返回True,失败返回False
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Deleting annotation: {annotation_id}")
|
||||
|
||||
|
||||
response = await self.client.delete(f"/api/annotations/{annotation_id}")
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
logger.debug(f"Deleted annotation {annotation_id}")
|
||||
return True
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Delete annotation failed HTTP {e.response.status_code}: {e.response.text}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error while deleting annotation: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def create_local_storage(
|
||||
self,
|
||||
project_id: int,
|
||||
@@ -555,7 +590,7 @@ class Client:
|
||||
description: Optional[str] = None
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""创建本地文件存储配置
|
||||
|
||||
|
||||
Args:
|
||||
project_id: Label Studio 项目 ID
|
||||
path: 本地文件路径(在 Label Studio 容器中的路径)
|
||||
@@ -563,37 +598,37 @@ class Client:
|
||||
use_blob_urls: 是否使用 blob URLs(建议 True)
|
||||
regex_filter: 文件过滤正则表达式(可选)
|
||||
description: 存储描述(可选)
|
||||
|
||||
|
||||
Returns:
|
||||
创建的存储配置信息,失败返回 None
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Creating local storage for project {project_id}: {path}")
|
||||
|
||||
|
||||
storage_data = {
|
||||
"project": project_id,
|
||||
"path": path,
|
||||
"title": title,
|
||||
"use_blob_urls": use_blob_urls
|
||||
}
|
||||
|
||||
|
||||
if regex_filter:
|
||||
storage_data["regex_filter"] = regex_filter
|
||||
if description:
|
||||
storage_data["description"] = description
|
||||
|
||||
|
||||
response = await self.client.post(
|
||||
"/api/storages/localfiles/",
|
||||
json=storage_data
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
storage = response.json()
|
||||
storage_id = storage.get("id")
|
||||
|
||||
|
||||
logger.debug(f"Local storage created successfully, ID: {storage_id}")
|
||||
return storage
|
||||
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Create local storage failed HTTP {e.response.status_code}: {e.response.text}")
|
||||
return None
|
||||
|
||||
@@ -2,7 +2,7 @@ from typing import Optional
|
||||
import math
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path, Response
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db.session import get_db
|
||||
@@ -29,6 +29,39 @@ router = APIRouter(
|
||||
)
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@router.get("/{mapping_id}/login")
|
||||
async def list_mappings(
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
try:
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
token=settings.label_studio_user_token)
|
||||
target_response = await ls_client.login_label_studio()
|
||||
headers = dict(target_response.headers)
|
||||
set_cookies = target_response.headers.get_list("set-cookie")
|
||||
|
||||
# 删除合并的 Set-Cookie
|
||||
if "set-cookie" in headers:
|
||||
del headers["set-cookie"]
|
||||
|
||||
# 创建新响应,添加多个 Set-Cookie
|
||||
response = Response(
|
||||
content=target_response.content,
|
||||
status_code=target_response.status_code,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
# 分别添加每个 Set-Cookie
|
||||
for cookie in set_cookies:
|
||||
response.headers.append("set-cookie", cookie)
|
||||
|
||||
return response
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error while logining in LabelStudio: {e}", e)
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@router.post("", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201)
|
||||
async def create_mapping(
|
||||
request: DatasetMappingCreateRequest,
|
||||
@@ -36,12 +69,12 @@ async def create_mapping(
|
||||
):
|
||||
"""
|
||||
创建数据集映射
|
||||
|
||||
|
||||
根据指定的DM程序中的数据集,创建Label Studio中的数据集,
|
||||
在数据库中记录这一关联关系,返回Label Studio数据集的ID
|
||||
|
||||
|
||||
注意:一个数据集可以创建多个标注项目
|
||||
|
||||
|
||||
支持通过 template_id 指定标注模板,如果提供了模板ID,则使用模板的配置
|
||||
"""
|
||||
try:
|
||||
@@ -51,9 +84,9 @@ async def create_mapping(
|
||||
mapping_service = DatasetMappingService(db)
|
||||
sync_service = SyncService(dm_client, ls_client, mapping_service)
|
||||
template_service = AnnotationTemplateService()
|
||||
|
||||
|
||||
logger.info(f"Create dataset mapping request: {request.dataset_id}")
|
||||
|
||||
|
||||
# 从DM服务获取数据集信息
|
||||
dataset_info = await dm_client.get_dataset(request.dataset_id)
|
||||
if not dataset_info:
|
||||
@@ -61,11 +94,11 @@ async def create_mapping(
|
||||
status_code=404,
|
||||
detail=f"Dataset not found in DM service: {request.dataset_id}"
|
||||
)
|
||||
|
||||
|
||||
project_name = request.name or \
|
||||
dataset_info.name or \
|
||||
"A new project from DataMate"
|
||||
|
||||
|
||||
project_description = request.description or \
|
||||
dataset_info.description or \
|
||||
f"Imported from DM dataset {dataset_info.name} ({dataset_info.id})"
|
||||
@@ -89,15 +122,15 @@ async def create_mapping(
|
||||
description=project_description,
|
||||
label_config=label_config # 传递模板配置
|
||||
)
|
||||
|
||||
|
||||
if not project_data:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Fail to create Label Studio project."
|
||||
)
|
||||
|
||||
|
||||
project_id = project_data["id"]
|
||||
|
||||
|
||||
# 配置本地存储:dataset/<id>
|
||||
local_storage_path = f"{settings.label_studio_local_document_root}/{request.dataset_id}"
|
||||
storage_result = await ls_client.create_local_storage(
|
||||
@@ -107,7 +140,7 @@ async def create_mapping(
|
||||
use_blob_urls=True,
|
||||
description=f"Local storage for dataset {dataset_info.name}"
|
||||
)
|
||||
|
||||
|
||||
if not storage_result:
|
||||
# 本地存储配置失败,记录警告但不中断流程
|
||||
logger.warning(f"Failed to configure local storage for project {project_id}")
|
||||
@@ -124,28 +157,28 @@ async def create_mapping(
|
||||
|
||||
# 创建映射关系,包含项目名称(先持久化映射以获得 mapping.id)
|
||||
mapping = await mapping_service.create_mapping(labeling_project)
|
||||
|
||||
|
||||
# 进行一次同步,使用创建后的 mapping.id
|
||||
await sync_service.sync_dataset_files(mapping.id, 100)
|
||||
|
||||
|
||||
response_data = DatasetMappingCreateResponse(
|
||||
id=mapping.id,
|
||||
labeling_project_id=str(mapping.labeling_project_id),
|
||||
labeling_project_name=mapping.name or project_name
|
||||
)
|
||||
|
||||
|
||||
return StandardResponse(
|
||||
code=201,
|
||||
message="success",
|
||||
data=response_data
|
||||
)
|
||||
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error while creating dataset mapping: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get("", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
|
||||
async def list_mappings(
|
||||
page: int = Query(1, ge=1, description="页码(从1开始)"),
|
||||
@@ -155,10 +188,10 @@ async def list_mappings(
|
||||
):
|
||||
"""
|
||||
查询所有映射关系(分页)
|
||||
|
||||
|
||||
返回所有有效的数据集映射关系(未被软删除的),支持分页查询。
|
||||
可选择是否包含完整的标注模板信息(默认不包含,以提高列表查询性能)。
|
||||
|
||||
|
||||
参数:
|
||||
- page: 页码(从1开始)
|
||||
- pageSize: 每页记录数
|
||||
@@ -166,12 +199,12 @@ async def list_mappings(
|
||||
"""
|
||||
try:
|
||||
service = DatasetMappingService(db)
|
||||
|
||||
|
||||
# 计算 skip
|
||||
skip = (page - 1) * page_size
|
||||
|
||||
|
||||
logger.info(f"List mappings: page={page}, page_size={page_size}, include_template={include_template}")
|
||||
|
||||
|
||||
# 获取数据和总数
|
||||
mappings, total = await service.get_all_mappings_with_count(
|
||||
skip=skip,
|
||||
@@ -179,10 +212,10 @@ async def list_mappings(
|
||||
include_deleted=False,
|
||||
include_template=include_template
|
||||
)
|
||||
|
||||
|
||||
# 计算总页数
|
||||
total_pages = math.ceil(total / page_size) if total > 0 else 0
|
||||
|
||||
|
||||
# 构造分页响应
|
||||
paginated_data = PaginatedData(
|
||||
page=page,
|
||||
@@ -191,15 +224,15 @@ async def list_mappings(
|
||||
total_pages=total_pages,
|
||||
content=mappings
|
||||
)
|
||||
|
||||
|
||||
logger.info(f"List mappings: page={page}, returned {len(mappings)}/{total}, templates_included: {include_template}")
|
||||
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=paginated_data
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing mappings: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
@@ -211,7 +244,7 @@ async def get_mapping(
|
||||
):
|
||||
"""
|
||||
根据 UUID 查询单个映射关系(包含关联的标注模板详情)
|
||||
|
||||
|
||||
返回数据集映射关系以及关联的完整标注模板信息,包括:
|
||||
- 映射基本信息
|
||||
- 数据集信息
|
||||
@@ -220,26 +253,26 @@ async def get_mapping(
|
||||
"""
|
||||
try:
|
||||
service = DatasetMappingService(db)
|
||||
|
||||
|
||||
logger.info(f"Get mapping with template details: {mapping_id}")
|
||||
|
||||
|
||||
# 获取映射,并包含完整的模板信息
|
||||
mapping = await service.get_mapping_by_uuid(mapping_id, include_template=True)
|
||||
|
||||
|
||||
if not mapping:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Mapping not found: {mapping_id}"
|
||||
)
|
||||
|
||||
|
||||
logger.info(f"Found mapping: {mapping.id}, template_included: {mapping.template is not None}")
|
||||
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=mapping
|
||||
)
|
||||
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
@@ -256,10 +289,10 @@ async def get_mappings_by_source(
|
||||
):
|
||||
"""
|
||||
根据源数据集 ID 查询所有映射关系(分页,包含模板详情)
|
||||
|
||||
|
||||
返回该数据集创建的所有标注项目(不包括已删除的),支持分页查询。
|
||||
默认包含关联的完整标注模板信息。
|
||||
|
||||
|
||||
参数:
|
||||
- dataset_id: 数据集ID
|
||||
- page: 页码(从1开始)
|
||||
@@ -268,12 +301,12 @@ async def get_mappings_by_source(
|
||||
"""
|
||||
try:
|
||||
service = DatasetMappingService(db)
|
||||
|
||||
|
||||
# 计算 skip
|
||||
skip = (page - 1) * page_size
|
||||
|
||||
|
||||
logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, page_size={page_size}, include_template={include_template}")
|
||||
|
||||
|
||||
# 获取数据和总数(包含模板信息)
|
||||
mappings, total = await service.get_mappings_by_source_with_count(
|
||||
dataset_id=dataset_id,
|
||||
@@ -281,10 +314,10 @@ async def get_mappings_by_source(
|
||||
limit=page_size,
|
||||
include_template=include_template
|
||||
)
|
||||
|
||||
|
||||
# 计算总页数
|
||||
total_pages = math.ceil(total / page_size) if total > 0 else 0
|
||||
|
||||
|
||||
# 构造分页响应
|
||||
paginated_data = PaginatedData(
|
||||
page=page,
|
||||
@@ -293,15 +326,15 @@ async def get_mappings_by_source(
|
||||
total_pages=total_pages,
|
||||
content=mappings
|
||||
)
|
||||
|
||||
|
||||
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}, templates_included: {include_template}")
|
||||
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=paginated_data
|
||||
)
|
||||
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
@@ -328,24 +361,24 @@ async def delete_mapping(
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
token=settings.label_studio_user_token)
|
||||
service = DatasetMappingService(db)
|
||||
|
||||
|
||||
# 使用 mapping UUID 查询映射记录
|
||||
logger.debug(f"Deleting by mapping UUID: {project_id}")
|
||||
mapping = await service.get_mapping_by_uuid(project_id)
|
||||
|
||||
logger.debug(f"Mapping lookup result: {mapping}")
|
||||
|
||||
|
||||
if not mapping:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Mapping either not found or not specified."
|
||||
)
|
||||
|
||||
|
||||
id = mapping.id
|
||||
labeling_project_id = mapping.labeling_project_id
|
||||
|
||||
logger.debug(f"Found mapping: {id}, Label Studio project ID: {labeling_project_id}")
|
||||
|
||||
|
||||
# 1. 删除 Label Studio 项目
|
||||
try:
|
||||
logger.debug(f"Deleting Label Studio project: {labeling_project_id}")
|
||||
@@ -357,11 +390,11 @@ async def delete_mapping(
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting Label Studio project: {e}")
|
||||
# 继续执行,即使 Label Studio 项目删除失败也要删除映射记录
|
||||
|
||||
|
||||
# 2. 软删除映射记录
|
||||
soft_delete_success = await service.soft_delete_mapping(id)
|
||||
logger.debug(f"Soft delete result for mapping {id}: {soft_delete_success}")
|
||||
|
||||
|
||||
if not soft_delete_success:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
@@ -378,7 +411,7 @@ async def delete_mapping(
|
||||
status="success"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user