You've already forked DataMate
feature: add external pdf extract operator by using mineru (#36)
* feature: add UnstructuredFormatter * feature: add UnstructuredFormatter in db * feature: add unstructured[docx]==0.18.15 * feature: support doc * feature: add mineru * feature: add external pdf extract operator by using mineru * feature: mineru docker install bugfix --------- Co-authored-by: Startalker <438747480@qq.com>
This commit is contained in:
@@ -60,6 +60,7 @@ services:
|
||||
MYSQL_USER: "root"
|
||||
MYSQL_PASSWORD: "password"
|
||||
MYSQL_DATABASE: "datamate"
|
||||
PDF_FORMATTER_BASE_URL: "http://datamate-mineru:9001"
|
||||
command:
|
||||
- python
|
||||
- /opt/runtime/datamate/operator_runtime.py
|
||||
@@ -72,6 +73,27 @@ services:
|
||||
- flow_volume:/flow
|
||||
networks: [ datamate ]
|
||||
|
||||
# 4) mineru
|
||||
datamate-mineru:
|
||||
container_name: datamate-mineru
|
||||
image: datamate-mineru
|
||||
restart: on-failure
|
||||
environment:
|
||||
MINERU_MODEL_SOURCE: local
|
||||
MINERU_DEVICE_MODE: cpu # cpu|cuda|npu|mps
|
||||
MINERU_BACKEND_MODE: pipeline
|
||||
privileged: true
|
||||
command:
|
||||
- python
|
||||
- /opt/runtime/datamate/mineru/mineru_api.py
|
||||
- --port
|
||||
- "9001"
|
||||
volumes:
|
||||
- dataset_volume:/dataset
|
||||
- mineru_log_volume:/var/log/datamate/mineru
|
||||
networks: [ datamate ]
|
||||
profiles: [ mineru ]
|
||||
|
||||
volumes:
|
||||
dataset_volume:
|
||||
name: datamate-dataset-volume
|
||||
|
||||
@@ -77,6 +77,8 @@ head:
|
||||
value: "password"
|
||||
- name: MYSQL_DATABASE
|
||||
value: "datamate"
|
||||
- name: PDF_FORMATTER_BASE_URL
|
||||
value: "http://datamate-mineru:9001"
|
||||
# - name: EXAMPLE_ENV
|
||||
# value: "1"
|
||||
envFrom: []
|
||||
@@ -154,6 +156,8 @@ head:
|
||||
value: "password"
|
||||
- name: MYSQL_DATABASE
|
||||
value: "datamate"
|
||||
- name: PDF_FORMATTER_BASE_URL
|
||||
value: "http://datamate-mineru:9001"
|
||||
ports:
|
||||
- containerPort: 8081
|
||||
volumeMounts:
|
||||
@@ -221,6 +225,8 @@ worker:
|
||||
value: "password"
|
||||
- name: MYSQL_DATABASE
|
||||
value: "datamate"
|
||||
- name: PDF_FORMATTER_BASE_URL
|
||||
value: "http://datamate-mineru:9001"
|
||||
# - name: EXAMPLE_ENV
|
||||
# value: "1"
|
||||
envFrom: []
|
||||
|
||||
70
deployment/kubernetes/mineru/deploy.yaml
Normal file
70
deployment/kubernetes/mineru/deploy.yaml
Normal file
@@ -0,0 +1,70 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: datamate-mineru
|
||||
labels:
|
||||
app: datamate
|
||||
tier: mineru
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: datamate
|
||||
tier: mineru
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: datamate
|
||||
tier: mineru
|
||||
spec:
|
||||
containers:
|
||||
- name: mineru
|
||||
image: datamate-mineru
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- python
|
||||
- /opt/runtime/datamate/mineru/mineru_api.py
|
||||
- --port
|
||||
- "9001"
|
||||
env:
|
||||
- name: MINERU_MODEL_SOURCE
|
||||
value: local
|
||||
- name: MINERU_DEVICE_MODE
|
||||
value: cpu
|
||||
- name: MINERU_BACKEND_MODE
|
||||
value: pipeline
|
||||
ports:
|
||||
- containerPort: 9001
|
||||
volumeMounts:
|
||||
- name: dataset-volume
|
||||
mountPath: /dataset
|
||||
- name: log-volume
|
||||
mountPath: /var/log/datamate/mineru
|
||||
subPath: mineru
|
||||
volumes:
|
||||
- name: dataset-volume
|
||||
hostPath:
|
||||
path: /opt/datamate/data/dataset
|
||||
type: DirectoryOrCreate
|
||||
- name: log-volume
|
||||
hostPath:
|
||||
path: /opt/datamate/data/log
|
||||
type: DirectoryOrCreate
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: datamate-mineru
|
||||
labels:
|
||||
app: datamate
|
||||
tier: mineru
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 9001
|
||||
targetPort: 9001
|
||||
protocol: TCP
|
||||
selector:
|
||||
app: datamate
|
||||
tier: mineru
|
||||
Reference in New Issue
Block a user