You've already forked DataMate
* feature: 增加data-juicer算子 * feat: 支持运行data-juicer算子 * feat: 支持data-juicer任务下发 * feat: 支持data-juicer结果数据集归档 * feat: 支持data-juicer结果数据集归档
185 lines
4.7 KiB
YAML
185 lines
4.7 KiB
YAML
services:
|
|
# 1) backend
|
|
datamate-backend:
|
|
container_name: datamate-backend
|
|
image: ${REGISTRY:-}datamate-backend
|
|
restart: on-failure
|
|
privileged: true
|
|
volumes:
|
|
- dataset_volume:/dataset
|
|
- flow_volume:/flow
|
|
- log_volume:/var/log/datamate
|
|
- operator-upload-volume:/operators/upload
|
|
- operator-runtime-volume:/operators/extract
|
|
networks: [ datamate ]
|
|
depends_on:
|
|
- datamate-database
|
|
|
|
# 1) backend (Python)
|
|
datamate-backend-python:
|
|
container_name: datamate-backend-python
|
|
image: ${REGISTRY:-}datamate-backend-python
|
|
restart: on-failure
|
|
privileged: true
|
|
environment:
|
|
- log_level=DEBUG
|
|
volumes:
|
|
- dataset_volume:/dataset
|
|
- flow_volume:/flow
|
|
- log_volume:/var/log/datamate
|
|
networks: [ datamate ]
|
|
depends_on:
|
|
- datamate-database
|
|
|
|
datamate-gateway:
|
|
container_name: datamate-gateway
|
|
image: ${REGISTRY:-}datamate-gateway
|
|
restart: on-failure
|
|
privileged: true
|
|
networks: [ datamate ]
|
|
|
|
# 2) frontend(NodePort 30000)
|
|
datamate-frontend:
|
|
container_name: datamate-frontend
|
|
image: ${REGISTRY:-}datamate-frontend
|
|
restart: on-failure
|
|
ports:
|
|
- "30000:80" # nodePort → hostPort
|
|
volumes:
|
|
- frontend_log_volume:/var/log/datamate/frontend
|
|
- ./backend.conf:/etc/nginx/conf.d/backend.conf
|
|
networks: [ datamate ]
|
|
depends_on:
|
|
- datamate-backend
|
|
- datamate-backend-python
|
|
|
|
# 3) database
|
|
datamate-database:
|
|
container_name: datamate-database
|
|
image: ${REGISTRY:-}datamate-database
|
|
restart: on-failure
|
|
environment:
|
|
MYSQL_ROOT_PASSWORD: password
|
|
command: |
|
|
sh -c "
|
|
chown mysql:mysql /var/log/datamate/database &&
|
|
chmod 755 /var/log/datamate/database &&
|
|
exec docker-entrypoint.sh mysqld
|
|
"
|
|
volumes:
|
|
- mysql_volume:/var/lib/mysql
|
|
- database_log_volume:/var/log/datamate/database
|
|
ports:
|
|
- "3306:3306"
|
|
networks: [ datamate ]
|
|
|
|
# 3) runtime
|
|
datamate-runtime:
|
|
container_name: datamate-runtime
|
|
image: ${REGISTRY:-}datamate-runtime
|
|
restart: on-failure
|
|
environment:
|
|
RAY_DEDUP_LOGS: "0"
|
|
RAY_TQDM_PATCH_PRINT: "0"
|
|
MYSQL_HOST: "datamate-database"
|
|
MYSQL_PORT: "3306"
|
|
MYSQL_USER: "root"
|
|
MYSQL_PASSWORD: "password"
|
|
MYSQL_DATABASE: "datamate"
|
|
command:
|
|
- python
|
|
- /opt/runtime/datamate/operator_runtime.py
|
|
- --port
|
|
- "8081"
|
|
volumes:
|
|
- ray_log_volume:/tmp/ray
|
|
- log_volume:/var/log/datamate
|
|
- dataset_volume:/dataset
|
|
- flow_volume:/flow
|
|
- operator-runtime-volume:/opt/runtime/datamate/ops/user
|
|
- operator-packages-volume:/usr/local/lib/ops/site-packages
|
|
networks: [ datamate ]
|
|
|
|
# 4) mineru
|
|
datamate-mineru:
|
|
container_name: datamate-mineru
|
|
image: datamate-mineru
|
|
restart: on-failure
|
|
environment:
|
|
MINERU_MODEL_SOURCE: local
|
|
MINERU_DEVICE_MODE: npu # cpu|cuda|npu|mps
|
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
privileged: true
|
|
entrypoint: mineru-openai-server
|
|
command:
|
|
--engine vllm
|
|
--host 0.0.0.0
|
|
--port 8000
|
|
volumes:
|
|
- dataset_volume:/dataset
|
|
- mineru_log_volume:/var/log/datamate/mineru
|
|
- /var/log/npu/:/usr/slog
|
|
- /usr/local/dcmi:/usr/local/dcmi
|
|
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
|
|
- /usr/local/Ascend/driver:/usr/local/Ascend/driver
|
|
networks: [ datamate ]
|
|
profiles: [ mineru ]
|
|
devices:
|
|
- /dev/davinci0
|
|
- /dev/davinci_manager
|
|
- /dev/devmm_svm
|
|
- /dev/hisi_hdc
|
|
|
|
# 5) redis
|
|
datamate-redis:
|
|
container_name: datamate-redis
|
|
image: redis:8.2.3
|
|
restart: on-failure
|
|
ports:
|
|
- "6379:6379"
|
|
networks: [ datamate ]
|
|
|
|
datamate-data-juicer:
|
|
container_name: datamate-data-juicer
|
|
image: datajuicer/data-juicer:v1.4.4
|
|
restart: on-failure
|
|
command:
|
|
- uvicorn
|
|
- service:app
|
|
- --host
|
|
- "0.0.0.0"
|
|
volumes:
|
|
- dataset_volume:/dataset
|
|
- flow_volume:/flow
|
|
networks: [ datamate ]
|
|
profiles: [ data-juicer ]
|
|
|
|
volumes:
|
|
dataset_volume:
|
|
name: datamate-dataset-volume
|
|
flow_volume:
|
|
name: datamate-flow-volume
|
|
log_volume:
|
|
name: datamate-log-volume
|
|
mysql_volume:
|
|
name: datamate-mysql-volume
|
|
ray_log_volume:
|
|
name: datamate-ray-log-volume
|
|
frontend_log_volume:
|
|
name: datamate-frontend-log-volume
|
|
database_log_volume:
|
|
name: datamate-database-log-volume
|
|
operator-upload-volume:
|
|
name: datamate-operator-upload-volume
|
|
operator-runtime-volume:
|
|
name: datamate-operator-runtime-volume
|
|
operator-packages-volume:
|
|
name: datamate-operator-packages-volume
|
|
mineru_log_volume:
|
|
name: datamate-mineru_log_volume
|
|
|
|
networks:
|
|
datamate:
|
|
driver: bridge
|
|
name: datamate-network
|