You've already forked DataMate
bugfix: update values.yaml to enhance ray-cluster configuration with security context, environment variables, and resource limits (#172)
* feature: unstructured支持简单pdf处理 * feature: update values.yaml to enhance ray-cluster configuration with security context, environment variables, and resource limits
This commit is contained in:
@@ -58,25 +58,10 @@ head:
|
||||
# in the headGroupSpec. See https://github.com/ray-project/kuberay/pull/1128 for more details.
|
||||
serviceAccountName: ""
|
||||
restartPolicy: ""
|
||||
rayStartParams:
|
||||
object-store-memory: '78643200'
|
||||
rayStartParams: {}
|
||||
# containerEnv specifies environment variables for the Ray container,
|
||||
# Follows standard K8s container env schema.
|
||||
containerEnv:
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name: RAY_TQDM_PATCH_PRINT
|
||||
value: "0"
|
||||
- name: MYSQL_HOST
|
||||
value: "datamate-database"
|
||||
- name: MYSQL_PORT
|
||||
value: "3306"
|
||||
- name: MYSQL_USER
|
||||
value: "root"
|
||||
- name: MYSQL_PASSWORD
|
||||
value: "password"
|
||||
- name: MYSQL_DATABASE
|
||||
value: "datamate"
|
||||
containerEnv: []
|
||||
# - name: EXAMPLE_ENV
|
||||
# value: "1"
|
||||
envFrom: []
|
||||
@@ -93,14 +78,7 @@ head:
|
||||
# It is usually best to set requests equal to limits.
|
||||
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
|
||||
# for further guidance.
|
||||
resources:
|
||||
limits:
|
||||
cpu: "2"
|
||||
# To avoid out-of-memory issues, never allocate less than 2G memory for the Ray head.
|
||||
memory: "8G"
|
||||
requests:
|
||||
cpu: "1"
|
||||
memory: "2G"
|
||||
resources: {}
|
||||
annotations: {}
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
@@ -156,21 +134,7 @@ worker:
|
||||
initContainers: []
|
||||
# containerEnv specifies environment variables for the Ray container,
|
||||
# Follows standard K8s container env schema.
|
||||
containerEnv:
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name: RAY_TQDM_PATCH_PRINT
|
||||
value: "0"
|
||||
- name: MYSQL_HOST
|
||||
value: "datamate-database"
|
||||
- name: MYSQL_PORT
|
||||
value: "3306"
|
||||
- name: MYSQL_USER
|
||||
value: "root"
|
||||
- name: MYSQL_PASSWORD
|
||||
value: "password"
|
||||
- name: MYSQL_DATABASE
|
||||
value: "datamate"
|
||||
containerEnv: []
|
||||
# - name: EXAMPLE_ENV
|
||||
# value: "1"
|
||||
envFrom: []
|
||||
@@ -187,13 +151,7 @@ worker:
|
||||
# It is usually best to set requests equal to limits.
|
||||
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
|
||||
# for further guidance.
|
||||
resources:
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: "8G"
|
||||
requests:
|
||||
cpu: "1"
|
||||
memory: "1G"
|
||||
resources: {}
|
||||
annotations: {}
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
|
||||
@@ -77,6 +77,10 @@ database:
|
||||
subPath: database
|
||||
|
||||
backend:
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_ADMIN
|
||||
env:
|
||||
- name: DB_PASSWORD
|
||||
value: *dbPass
|
||||
@@ -170,6 +174,31 @@ runtime:
|
||||
ray-cluster:
|
||||
enabled: true
|
||||
head:
|
||||
rayStartParams:
|
||||
object-store-memory: '78643200'
|
||||
num-cpus: '0'
|
||||
containerEnv:
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name: RAY_TQDM_PATCH_PRINT
|
||||
value: "0"
|
||||
- name: MYSQL_HOST
|
||||
value: "datamate-database"
|
||||
- name: MYSQL_PORT
|
||||
value: "3306"
|
||||
- name: MYSQL_USER
|
||||
value: "root"
|
||||
- name: MYSQL_PASSWORD
|
||||
value: *dbPass
|
||||
- name: MYSQL_DATABASE
|
||||
value: "datamate"
|
||||
resources:
|
||||
limits:
|
||||
cpu: "2"
|
||||
memory: "8G"
|
||||
requests:
|
||||
cpu: "1"
|
||||
memory: "2G"
|
||||
volumes:
|
||||
- *datasetVolume
|
||||
- *flowVolume
|
||||
@@ -196,6 +225,28 @@ ray-cluster:
|
||||
- containerPort: 8081
|
||||
volumeMounts: *runtimeVolumeMounts
|
||||
worker:
|
||||
containerEnv:
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
- name: RAY_TQDM_PATCH_PRINT
|
||||
value: "0"
|
||||
- name: MYSQL_HOST
|
||||
value: "datamate-database"
|
||||
- name: MYSQL_PORT
|
||||
value: "3306"
|
||||
- name: MYSQL_USER
|
||||
value: "root"
|
||||
- name: MYSQL_PASSWORD
|
||||
value: *dbPass
|
||||
- name: MYSQL_DATABASE
|
||||
value: "datamate"
|
||||
resources:
|
||||
limits:
|
||||
cpu: "8"
|
||||
memory: "64G"
|
||||
requests:
|
||||
cpu: "1"
|
||||
memory: "2G"
|
||||
volumes:
|
||||
- *datasetVolume
|
||||
- *flowVolume
|
||||
|
||||
@@ -146,10 +146,10 @@ class BaseOp:
|
||||
def read_file(self, sample):
|
||||
filepath = sample[self.filepath_key]
|
||||
filetype = sample[self.filetype_key]
|
||||
if filetype in ["ppt", "pptx", "docx", "doc", "xlsx"]:
|
||||
if filetype in ["ppt", "pptx", "docx", "doc", "xlsx", "csv", "md", "pdf"]:
|
||||
elements = partition(filename=filepath)
|
||||
sample[self.text_key] = "\n\n".join([str(el) for el in elements])
|
||||
elif filetype in ["txt", "md", "markdown", "xml", "html", "csv", "json", "jsonl"]:
|
||||
elif filetype in ["txt", "md", "markdown", "xml", "html", "json", "jsonl"]:
|
||||
with open(filepath, 'rb') as f:
|
||||
content = f.read()
|
||||
sample[self.text_key] = content.decode("utf-8-sig").replace("\r\n", "\n")
|
||||
|
||||
@@ -21,7 +21,7 @@ dependencies = [
|
||||
"loguru>=0.7.3",
|
||||
"opencv-python-headless>=4.12.0.88",
|
||||
"ray[data,default]==2.52.1",
|
||||
"unstructured[csv,docx,pptx,xlsx]==0.18.15",
|
||||
"unstructured[csv,docx,pptx,xlsx,pdf,md]==0.18.15",
|
||||
"uvicorn[standard]>=0.38.0",
|
||||
]
|
||||
|
||||
|
||||
@@ -21,7 +21,9 @@ WORKDIR /opt/runtime
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install -e . --system \
|
||||
&& uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
|
||||
&& UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
|
||||
&& uv pip uninstall torch torchvision triton --system \
|
||||
&& uv pip list | grep -E '^nvidia-' | awk '{print $1}' | xargs -r uv pip uninstall --system \
|
||||
&& python -m spacy download zh_core_web_sm
|
||||
|
||||
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
|
||||
|
||||
Reference in New Issue
Block a user