feature: add external pdf extract operator by using mineru (#36)

* feature: add UnstructuredFormatter

* feature: add UnstructuredFormatter in db

* feature: add unstructured[docx]==0.18.15

* feature: support doc

* feature: add mineru

* feature: add external pdf extract operator by using mineru

* feature: mineru docker install bugfix

---------

Co-authored-by: Startalker <438747480@qq.com>
This commit is contained in:
Startalker
2025-10-30 15:55:10 +08:00
committed by GitHub
parent 2f7341dc1f
commit 155603b1ca
12 changed files with 370 additions and 3 deletions

View File

@@ -77,6 +77,8 @@ head:
value: "password"
- name: MYSQL_DATABASE
value: "datamate"
- name: PDF_FORMATTER_BASE_URL
value: "http://datamate-mineru:9001"
# - name: EXAMPLE_ENV
# value: "1"
envFrom: []
@@ -154,6 +156,8 @@ head:
value: "password"
- name: MYSQL_DATABASE
value: "datamate"
- name: PDF_FORMATTER_BASE_URL
value: "http://datamate-mineru:9001"
ports:
- containerPort: 8081
volumeMounts:
@@ -221,6 +225,8 @@ worker:
value: "password"
- name: MYSQL_DATABASE
value: "datamate"
- name: PDF_FORMATTER_BASE_URL
value: "http://datamate-mineru:9001"
# - name: EXAMPLE_ENV
# value: "1"
envFrom: []