Files
DataMate/scripts/images/unstructured/app.py
2025-10-21 23:00:48 +08:00

62 lines
1.7 KiB
Python

import asyncio
import os
from typing import Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from unstructured.partition.auto import partition
app = FastAPI(title="unstructured")
class FileProcessingRequest(BaseModel):
"""文件处理请求模型"""
file_path: Optional[str] = None
# 可添加其他可选字段
@app.post("/process", tags=["文件处理"])
async def process_file(request_data: FileProcessingRequest):
"""处理文件并返回提取的文本内容"""
try:
file_path = request_data.file_path
if not file_path:
raise HTTPException(status_code=400, detail="缺少必要参数: filePath")
if not os.path.exists(file_path):
raise HTTPException(status_code=404, detail=f"文件不存在: {file_path}")
# 异步执行可能耗时的文件处理操作
text_content = await process_file_async(file_path)
# 返回处理结果
return {
"filePath": file_path,
"text": text_content,
"status": "success"
}
except HTTPException as e:
raise e
except Exception as e:
raise HTTPException(status_code=500, detail=f"process failed: {str(e)}")
async def process_file_async(file_path: str) -> str:
"""异步处理文件内容"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, partition_file_sync, file_path)
def partition_file_sync(file_path: str) -> str:
"""同步处理文件内容(由异步函数调用)"""
elements = partition(filename=file_path)
return "\n\n".join([str(el) for el in elements])
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)