Files
DataMate/runtime/datamate-python/app/module/annotation/interface/task.py
Jerry Yan 6a4c4ae3d7 feat(auth): 为数据管理和RAG服务增加资源访问控制
- 在DatasetApplicationService中注入ResourceAccessService并添加所有权验证
- 在KnowledgeSetApplicationService中注入ResourceAccessService并添加所有权验证
- 修改DatasetRepository接口和实现类,增加按创建者过滤的方法
- 修改KnowledgeSetRepository接口和实现类,增加按创建者过滤的方法
- 在RAG索引器服务中添加知识库访问权限检查和作用域过滤
- 更新实体元对象处理器以使用请求用户上下文获取当前用户
- 在前端设置页面添加用户权限管理功能和角色权限控制
- 为Python标注服务增加用户上下文和数据集访问权限验证
2026-02-06 14:58:46 +08:00

247 lines
8.7 KiB
Python

from fastapi import APIRouter, Depends, HTTPException, Query, Path
from sqlalchemy.ext.asyncio import AsyncSession
from typing import List, Optional, Dict, Any
from datetime import datetime
from pydantic import BaseModel, Field, ConfigDict
from app.db.session import get_db
from app.module.shared.schema import StandardResponse
from app.module.dataset import DatasetManagementService
from app.core.logging import get_logger
from app.core.config import settings
from ..security import (
RequestUserContext,
assert_dataset_access,
get_request_user_context,
)
from ..service.mapping import DatasetMappingService
from ..schema import (
SyncDatasetRequest,
SyncDatasetResponse,
SyncAnnotationsRequest,
SyncAnnotationsResponse,
UpdateFileTagsRequest,
UpdateFileTagsResponse,
UpdateFileTagsRequest,
UpdateFileTagsResponse
)
router = APIRouter(
prefix="/task",
tags=["annotation/task"]
)
logger = get_logger(__name__)
@router.post("/sync", response_model=StandardResponse[SyncDatasetResponse])
async def sync_dataset_content(
request: SyncDatasetRequest,
db: AsyncSession = Depends(get_db),
user_context: RequestUserContext = Depends(get_request_user_context),
):
"""
Sync Dataset Content (Files and Annotations)
内嵌编辑器模式:任务列表直接读取 DataMate 数据集文件,无需与 Label Studio Server 同步。
"""
try:
mapping_service = DatasetMappingService(db)
logger.debug(f"Sync dataset content request: mapping_id={request.id}, sync_annotations={request.sync_annotations}")
# request.id validation
mapping = await mapping_service.get_mapping_by_uuid(request.id)
if not mapping:
raise HTTPException(
status_code=404,
detail=f"Mapping not found: {request.id}"
)
await assert_dataset_access(db, mapping.dataset_id, user_context)
dm_client = DatasetManagementService(db)
dataset_info = await dm_client.get_dataset(mapping.dataset_id)
total_files = int(getattr(dataset_info, "fileCount", 0) or 0) if dataset_info else 0
result = SyncDatasetResponse(
id=mapping.id,
status="success",
synced_files=0,
total_files=total_files,
message="内嵌编辑器模式:任务列表直接读取数据集文件,无需同步(已忽略 syncAnnotations 等参数)",
)
logger.info(f"Embedded editor mode: sync is a no-op, mapping={mapping.id}, total_files={total_files}")
return StandardResponse(
code=200,
message="success",
data=result
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error syncing dataset content: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.post("/annotation/sync", response_model=StandardResponse[SyncAnnotationsResponse])
async def sync_annotations(
request: SyncAnnotationsRequest,
db: AsyncSession = Depends(get_db),
user_context: RequestUserContext = Depends(get_request_user_context),
):
"""
Sync Annotations Only (Bidirectional Support)
同步指定 mapping 下的标注数据,支持单向或双向同步,基于时间戳自动解决冲突。
请求与响应由 Pydantic 模型 `SyncAnnotationsRequest` / `SyncAnnotationsResponse` 定义。
"""
try:
mapping_service = DatasetMappingService(db)
logger.info(f"Sync annotations request: mapping_id={request.id}, direction={request.direction}, overwrite={request.overwrite}, overwrite_ls={request.overwrite_labeling_project}")
# 验证映射是否存在
mapping = await mapping_service.get_mapping_by_uuid(request.id)
if not mapping:
raise HTTPException(
status_code=404,
detail=f"Mapping not found: {request.id}"
)
await assert_dataset_access(db, mapping.dataset_id, user_context)
result = SyncAnnotationsResponse(
id=mapping.id,
status="success",
synced_to_dm=0,
synced_to_ls=0,
skipped=0,
failed=0,
conflicts_resolved=0,
message="内嵌编辑器模式:标注结果由 DataMate 直接存储,无需与 Label Studio 同步(该接口为兼容保留,当前为 no-op)",
)
logger.info(f"Embedded editor mode: annotation sync is a no-op, mapping={mapping.id}")
return StandardResponse(
code=200,
message="success",
data=result
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error syncing annotations: {e}")
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.get("/check-ls-connection")
async def check_label_studio_connection():
"""
Check Label Studio Connection Status
诊断 Label Studio 连接并返回简要连接信息(状态、base URL、token 摘要、项目统计)。
"""
try:
return StandardResponse(
code=200,
message="success",
data={
"status": "disabled",
"message": "当前为内嵌编辑器模式:不需要 Label Studio Server,该诊断接口已停用",
},
)
except Exception as e:
logger.error(f"Error checking Label Studio connection: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.put(
"/{file_id}",
response_model=StandardResponse[UpdateFileTagsResponse],
)
async def update_file_tags(
request: UpdateFileTagsRequest,
file_id: str = Path(..., description="文件ID"),
db: AsyncSession = Depends(get_db),
user_context: RequestUserContext = Depends(get_request_user_context),
):
"""
Update File Tags (Partial Update with Auto Format Conversion)
接收部分标签更新并合并到指定文件(只修改提交的标签,其余保持不变),并更新 `tags_updated_at`。
支持两种标签格式:
1. 简化格式(外部用户提交):
[{"from_name": "label", "to_name": "image", "values": ["cat", "dog"]}]
2. 完整格式(内部存储):
[{"id": "...", "from_name": "label", "to_name": "image", "type": "choices",
"value": {"choices": ["cat", "dog"]}}]
系统会自动根据数据集关联的模板将简化格式转换为完整格式。
请求与响应使用 Pydantic 模型 `UpdateFileTagsRequest` / `UpdateFileTagsResponse`。
"""
service = DatasetManagementService(db)
# 首先获取文件所属的数据集
from sqlalchemy.future import select
from app.db.models import DatasetFiles
result = await db.execute(
select(DatasetFiles).where(DatasetFiles.id == file_id)
)
file_record = result.scalar_one_or_none()
if not file_record:
raise HTTPException(status_code=404, detail=f"File not found: {file_id}")
dataset_id = str(file_record.dataset_id) # type: ignore - Convert Column to str
await assert_dataset_access(db, dataset_id, user_context)
# 查找数据集关联的模板ID
from ..service.mapping import DatasetMappingService
mapping_service = DatasetMappingService(db)
template_id = await mapping_service.get_template_id_by_dataset_id(dataset_id)
if template_id:
logger.info(f"Found template {template_id} for dataset {dataset_id}, will auto-convert tag format")
else:
logger.warning(f"No template found for dataset {dataset_id}, tags must be in full format")
# 更新标签(如果有模板ID则自动转换格式)
success, error_msg, updated_at = await service.update_file_tags_partial(
file_id=file_id,
new_tags=request.tags,
template_id=template_id # 传递模板ID以启用自动转换
)
if not success:
if "not found" in (error_msg or "").lower():
raise HTTPException(status_code=404, detail=error_msg)
raise HTTPException(status_code=500, detail=error_msg or "更新标签失败")
# 重新获取更新后的文件记录(获取完整标签列表)
result = await db.execute(
select(DatasetFiles).where(DatasetFiles.id == file_id)
)
file_record = result.scalar_one_or_none()
if not file_record:
raise HTTPException(status_code=404, detail=f"File not found: {file_id}")
response_data = UpdateFileTagsResponse(
fileId=file_id,
tags=file_record.tags or [], # type: ignore
tagsUpdatedAt=updated_at or datetime.now()
)
return StandardResponse(
code=200,
message="标签更新成功",
data=response_data
)