Files
DataMate/deployment/helm/datamate/charts/ray-cluster/values.yaml
hhhhsc701 1c507ac98a feat: 支持npu自动扩缩容 (#197)
* feat: npu动态调度

* feat: 数据集分页优化

* feat: 支持npu自动扩缩容

* feat: 支持npu自动扩缩容

* feat: 支持npu自动扩缩容

* feat: clean code
2025-12-24 18:03:30 +08:00

286 lines
9.7 KiB
YAML

# Default values for ray-cluster.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# The KubeRay community welcomes PRs to expose additional configuration
# in this Helm chart.
image:
repository: datamate-runtime
tag: latest
pullPolicy: IfNotPresent
nameOverride: "kuberay"
fullnameOverride: "datamate-raycluster"
imagePullSecrets: []
# - name: an-existing-secret
# common defined values shared between the head and worker
common:
# containerEnv specifies environment variables for the Ray head and worker containers.
# Follows standard K8s container env schema.
containerEnv: []
# - name: BLAH
# value: VAL
head:
# rayVersion determines the autoscaler's image version.
# It should match the Ray version in the image of the containers.
# rayVersion: 2.46.0
# If enableInTreeAutoscaling is true, the autoscaler sidecar will be added to the Ray head pod.
# Ray autoscaler integration is supported only for Ray versions >= 1.11.0
# Ray autoscaler integration is Beta with KubeRay >= 0.3.0 and Ray >= 2.0.0.
# enableInTreeAutoscaling: true
# autoscalerOptions is an OPTIONAL field specifying configuration overrides for the Ray autoscaler.
# The example configuration shown below represents the DEFAULT values.
# autoscalerOptions:
# upscalingMode: Default
# idleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources.
# idleTimeoutSeconds: 60
# imagePullPolicy optionally overrides the autoscaler container's default image pull policy (IfNotPresent).
# imagePullPolicy: IfNotPresent
# Optionally specify the autoscaler container's securityContext.
# securityContext: {}
# env: []
# envFrom: []
# resources specifies optional resource request and limit overrides for the autoscaler container.
# For large Ray clusters, we recommend monitoring container resource usage to determine if overriding the defaults is required.
# resources:
# limits:
# cpu: "500m"
# memory: "512Mi"
# requests:
# cpu: "500m"
# memory: "512Mi"
initContainers: []
labels: {}
# Note: From KubeRay v0.6.0, users need to create the ServiceAccount by themselves if they specify the `serviceAccountName`
# in the headGroupSpec. See https://github.com/ray-project/kuberay/pull/1128 for more details.
serviceAccountName: ""
restartPolicy: ""
rayStartParams: {}
# containerEnv specifies environment variables for the Ray container,
# Follows standard K8s container env schema.
containerEnv: []
# - name: EXAMPLE_ENV
# value: "1"
envFrom: []
# - secretRef:
# name: my-env-secret
# ports optionally allows specifying ports for the Ray container.
# ports: []
# resource requests and limits for the Ray head container.
# Modify as needed for your application.
# Note that the resources in this example are much too small for production;
# we don't recommend allocating less than 8G memory for a Ray pod in production.
# Ray pods should be sized to take up entire K8s nodes when possible.
# Always set CPU and memory limits for Ray pods.
# It is usually best to set requests equal to limits.
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
# for further guidance.
resources: {}
annotations: {}
nodeSelector: {}
tolerations: []
affinity: {}
# Pod security context.
podSecurityContext: {}
# Ray container security context.
securityContext: {}
# Optional: The following volumes/volumeMounts configurations are optional but recommended because
# Ray writes logs to /tmp/ray/session_latests/logs instead of stdout/stderr.
volumes: []
volumeMounts: []
# sidecarContainers specifies additional containers to attach to the Ray pod.
# Follows standard K8s container spec.
sidecarContainers: []
# See docs/guidance/pod-command.md for more details about how to specify
# container command for head Pod.
command: []
args: []
# Optional, for the user to provide any additional fields to the service.
# See https://pkg.go.dev/k8s.io/Kubernetes/pkg/api/v1#Service
headService: {}
# metadata:
# annotations:
# prometheus.io/scrape: "true"
# Custom pod DNS configuration
# See https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config
# dnsConfig:
# nameservers:
# - 8.8.8.8
# searches:
# - example.local
# options:
# - name: ndots
# value: "2"
# - name: edns0
topologySpreadConstraints: []
worker:
# If you want to disable the default workergroup
# uncomment the line below
# disabled: true
groupName: workergroup
replicas: 1
minReplicas: 1
maxReplicas: 1
labels: {}
serviceAccountName: ""
restartPolicy: ""
rayStartParams: {}
initContainers: []
# containerEnv specifies environment variables for the Ray container,
# Follows standard K8s container env schema.
containerEnv: []
# - name: EXAMPLE_ENV
# value: "1"
envFrom: []
# - secretRef:
# name: my-env-secret
# ports optionally allows specifying ports for the Ray container.
# ports: []
# resource requests and limits for the Ray head container.
# Modify as needed for your application.
# Note that the resources in this example are much too small for production;
# we don't recommend allocating less than 8G memory for a Ray pod in production.
# Ray pods should be sized to take up entire K8s nodes when possible.
# Always set CPU and memory limits for Ray pods.
# It is usually best to set requests equal to limits.
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
# for further guidance.
resources: {}
annotations: {}
nodeSelector: {}
tolerations: []
affinity: {}
# Pod security context.
podSecurityContext: {}
# Ray container security context.
securityContext: {}
# Optional: The following volumes/volumeMounts configurations are optional but recommended because
# Ray writes logs to /tmp/ray/session_latests/logs instead of stdout/stderr.
volumes: []
volumeMounts: []
# sidecarContainers specifies additional containers to attach to the Ray pod.
# Follows standard K8s container spec.
sidecarContainers: []
# See docs/guidance/pod-command.md for more details about how to specify
# container command for worker Pod.
command: []
args: []
topologySpreadConstraints: []
# Custom pod DNS configuration
# See https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config
# dnsConfig:
# nameservers:
# - 8.8.8.8
# searches:
# - example.local
# options:
# - name: ndots
# value: "2"
# - name: edns0
# The map's key is used as the groupName.
# For example, key:small-group in the map below
# will be used as the groupName
additionalWorkerGroups:
smallGroup:
# Disabled by default
disabled: true
replicas: 0
minReplicas: 0
maxReplicas: 3
labels: {}
serviceAccountName: ""
restartPolicy: ""
rayStartParams: {}
# containerEnv specifies environment variables for the Ray container,
# Follows standard K8s container env schema.
containerEnv: []
# - name: EXAMPLE_ENV
# value: "1"
envFrom: []
# - secretRef:
# name: my-env-secret
# ports optionally allows specifying ports for the Ray container.
# ports: []
# resource requests and limits for the Ray head container.
# Modify as needed for your application.
# Note that the resources in this example are much too small for production;
# we don't recommend allocating less than 8G memory for a Ray pod in production.
# Ray pods should be sized to take up entire K8s nodes when possible.
# Always set CPU and memory limits for Ray pods.
# It is usually best to set requests equal to limits.
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
# for further guidance.
resources:
limits:
cpu: 1
memory: "1G"
requests:
cpu: 1
memory: "1G"
annotations: {}
nodeSelector: {}
tolerations: []
affinity: {}
# Pod security context.
podSecurityContext: {}
# Ray container security context.
securityContext: {}
# Optional: The following volumes/volumeMounts configurations are optional but recommended because
# Ray writes logs to /tmp/ray/session_latests/logs instead of stdout/stderr.
volumes:
- name: log-volume
hostPath:
path: /opt/datamate/data/log
type: DirectoryOrCreate
- name: dataset-volume
hostPath:
path: /opt/datamate/data/dataset
type: DirectoryOrCreate
- name: flow-volume
hostPath:
path: /opt/datamate/data/flow
type: DirectoryOrCreate
volumeMounts:
- mountPath: /tmp/ray
name: log-volume
subPath: ray
- mountPath: /dataset
name: dataset-volume
- mountPath: /flow
name: flow-volume
sidecarContainers: []
# See docs/guidance/pod-command.md for more details about how to specify
# container command for worker Pod.
command: []
args: []
# Topology Spread Constraints for worker pods
# See: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/
topologySpreadConstraints: []
# Custom pod DNS configuration
# See https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config
# dnsConfig:
# nameservers:
# - 8.8.8.8
# searches:
# - example.local
# options:
# - name: ndots
# value: "2"
# - name: edns0
# Configuration for Head's Kubernetes Service
service:
# This is optional, and the default is ClusterIP.
type: NodePort