feat: 支持运行data-juicer算子 (#215)

* feature: 增加data-juicer算子

* feat: 支持运行data-juicer算子

* feat: 支持data-juicer任务下发

* feat: 支持data-juicer结果数据集归档

* feat: 支持data-juicer结果数据集归档
This commit is contained in:
hhhhsc701
2025-12-31 09:20:41 +08:00
committed by GitHub
parent 63f4e3e447
commit 6a1eb85e8e
26 changed files with 709 additions and 120 deletions

View File

@@ -139,6 +139,21 @@ services:
- "6379:6379"
networks: [ datamate ]
datamate-data-juicer:
container_name: datamate-data-juicer
image: datajuicer/data-juicer:v1.4.4
restart: on-failure
command:
- uvicorn
- service:app
- --host
- "0.0.0.0"
volumes:
- dataset_volume:/dataset
- flow_volume:/flow
networks: [ datamate ]
profiles: [ data-juicer ]
volumes:
dataset_volume:
name: datamate-dataset-volume

View File

@@ -66,15 +66,3 @@ Name of image
{{- $name }}:{{ $tag }}
{{- end }}
{{- end }}
{{/*
Name of sidecar image
*/}}
{{- define "ray-cluster-sidecar.image" -}}
{{- $name := default (printf "%s:%s" .Values.image.repository .Values.image.tag) .Values.head.sidecarContainers.image }}
{{- if .Values.global.image.repository }}
{{- .Values.global.image.repository | trimSuffix "/" }}/{{ $name }}
{{- else }}
{{- $name }}
{{- end }}
{{- end }}

View File

@@ -96,7 +96,7 @@ spec:
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- $defult := printf "%s:%s" .Values.image.repository .Values.image.tag }}
{{- $defult := include "ray-cluster.image" . -}}
{{- with .Values.head.sidecarContainers }}
{{- range $index, $container := . }}
{{- $image := default $defult $container.image -}}
@@ -313,10 +313,14 @@ spec:
- name: ray-worker
{{- if $values.image }}
image: {{ $values.image.repository }}:{{ $values.image.tag }}
{{- if $values.image.pullPolicy }}
imagePullPolicy: {{ $values.image.pullPolicy }}
{{- else }}
image: {{ $.Values.image.repository }}:{{ $.Values.image.tag }}
imagePullPolicy: {{ $.Values.image.pullPolicy }}
imagePullPolicy: {{ default $.Values.image.pullPolicy $.Values.global.image.pullPolicy }}
{{- end }}
{{- else }}
image: {{ include "ray-cluster.image" $ }}
imagePullPolicy: {{ default $.Values.image.pullPolicy $.Values.global.image.pullPolicy }}
{{- end }}
{{- with $values.command }}
command:

View File

@@ -248,7 +248,6 @@ ray-cluster:
subPath: site-packages
sidecarContainers:
- name: runtime
image: datamate-runtime
imagePullPolicy: IfNotPresent
args: *runtimeArgs
env: *runtimeEnv
@@ -338,6 +337,9 @@ ray-cluster:
- *flowVolume
- *logVolume
- *operatorVolume
- name: ascend
hostPath:
path: /usr/local/Ascend
volumeMounts:
- mountPath: /tmp/ray
name: log-volume
@@ -352,3 +354,5 @@ ray-cluster:
- mountPath: /usr/local/lib/ops/site-packages
name: operator-volume
subPath: site-packages
- mountPath: /usr/local/Ascend
name: ascend

View File

@@ -0,0 +1,74 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: datamate-data-juicer
labels:
app: datamate
tier: data-juicer
spec:
replicas: 1
selector:
matchLabels:
app: datamate
tier: data-juicer
template:
metadata:
labels:
app: datamate
tier: data-juicer
spec:
containers:
- name: data-juicer
image: datajuicer/data-juicer:v1.4.4
imagePullPolicy: IfNotPresent
command:
- uvicorn
args:
- service:app
- --host
- "0.0.0.0"
ports:
- containerPort: 8000
resources:
limits:
cpu: 8
memory: 32Gi
requests:
cpu: 100m
memory: 100Mi
volumeMounts:
- name: dataset-volume
mountPath: /dataset
- name: log-volume
mountPath: /var/log/datamate/data-juicer
subPath: data-juicer
- name: flow-volume
mountPath: /flow
volumes:
- name: dataset-volume
persistentVolumeClaim:
claimName: datamate-dataset-pvc
- name: flow-volume
persistentVolumeClaim:
claimName: datamate-flow-pvc
- name: log-volume
persistentVolumeClaim:
claimName: datamate-log-pvc
---
apiVersion: v1
kind: Service
metadata:
name: datamate-data-juicer
labels:
app: datamate
tier: data-juicer
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
selector:
app: datamate
tier: data-juicer