You've already forked DataMate
feat(dataset): 添加PDF文本提取功能支持
- 新增dataset模块路由配置 - 添加PdfTextExtractRequest和PdfTextExtractResponse数据传输对象 - 实现PDF文本提取接口,支持从PDF文件中提取文本内容 - 集成数据库会话管理和异步处理能力
This commit is contained in:
@@ -6,6 +6,7 @@ from .ratio.interface import router as ratio_router
|
||||
from .generation.interface import router as generation_router
|
||||
from .evaluation.interface import router as evaluation_router
|
||||
from .collection.interface import router as collection_route
|
||||
from .dataset.interface import router as dataset_router
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/api"
|
||||
@@ -17,5 +18,6 @@ router.include_router(ratio_router)
|
||||
router.include_router(generation_router)
|
||||
router.include_router(evaluation_router)
|
||||
router.include_router(collection_route)
|
||||
router.include_router(dataset_router)
|
||||
|
||||
__all__ = ["router"]
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db.session import get_db
|
||||
from app.module.dataset.schema.pdf_extract import PdfTextExtractRequest, PdfTextExtractResponse
|
||||
from app.module.dataset.service.pdf_extract import PdfTextExtractService
|
||||
from app.module.shared.schema.common import StandardResponse
|
||||
|
||||
router = APIRouter(
|
||||
prefix="",
|
||||
tags=["dataset"],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/pdf-text-extract", response_model=StandardResponse[PdfTextExtractResponse])
|
||||
async def extract_pdf_text(
|
||||
request: PdfTextExtractRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
service = PdfTextExtractService(db)
|
||||
result = await service.extract_pdf_to_text(request.dataset_id, request.file_id)
|
||||
return StandardResponse(code=200, message="Success", data=result)
|
||||
Reference in New Issue
Block a user