feature: add unstructured formatter operator for doc/docx (#17)

* feature: add UnstructuredFormatter

* feature: add UnstructuredFormatter in db

* feature: add unstructured[docx]==0.18.15

* feature: support doc

---------

Co-authored-by: Startalker <438747480@qq.com>
This commit is contained in:
Startalker
2025-10-23 16:49:03 +08:00
committed by GitHub
parent c52702b073
commit f86d4fae25
7 changed files with 63 additions and 3 deletions

View File

@@ -7,7 +7,7 @@ ENV PYTHONPATH=/opt/runtime/datamate/
RUN sed -i 's/deb.debian.org/mirrors.huaweicloud.com/g' /etc/apt/sources.list.d/debian.sources \
&& apt update \
&& apt install -y libgl1 libglib2.0-0 vim poppler-utils tesseract-ocr tesseract-ocr-chi-sim libmagic1t64 \
&& apt install -y libgl1 libglib2.0-0 vim poppler-utils tesseract-ocr tesseract-ocr-chi-sim libmagic1t64 libreoffice\
&& apt clean \
&& rm -rf /var/lib/apt/lists/*