feat(dataset): 添加PDF文本提取功能支持

- 新增dataset模块路由配置
- 添加PdfTextExtractRequest和PdfTextExtractResponse数据传输对象
- 实现PDF文本提取接口,支持从PDF文件中提取文本内容
- 集成数据库会话管理和异步处理能力
This commit is contained in:
2026-01-29 11:44:50 +08:00
parent 5eafcf0145
commit ab957ab53d
7 changed files with 198 additions and 52 deletions

View File

@@ -0,0 +1,22 @@
from fastapi import APIRouter, Depends
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.session import get_db
from app.module.dataset.schema.pdf_extract import PdfTextExtractRequest, PdfTextExtractResponse
from app.module.dataset.service.pdf_extract import PdfTextExtractService
from app.module.shared.schema.common import StandardResponse
router = APIRouter(
prefix="",
tags=["dataset"],
)
@router.post("/pdf-text-extract", response_model=StandardResponse[PdfTextExtractResponse])
async def extract_pdf_text(
request: PdfTextExtractRequest,
db: AsyncSession = Depends(get_db),
):
service = PdfTextExtractService(db)
result = await service.extract_pdf_to_text(request.dataset_id, request.file_id)
return StandardResponse(code=200, message="Success", data=result)