You've already forked DataMate
bugfix: update values.yaml to enhance ray-cluster configuration with security context, environment variables, and resource limits (#172)
* feature: unstructured支持简单pdf处理 * feature: update values.yaml to enhance ray-cluster configuration with security context, environment variables, and resource limits
This commit is contained in:
@@ -58,25 +58,10 @@ head:
|
|||||||
# in the headGroupSpec. See https://github.com/ray-project/kuberay/pull/1128 for more details.
|
# in the headGroupSpec. See https://github.com/ray-project/kuberay/pull/1128 for more details.
|
||||||
serviceAccountName: ""
|
serviceAccountName: ""
|
||||||
restartPolicy: ""
|
restartPolicy: ""
|
||||||
rayStartParams:
|
rayStartParams: {}
|
||||||
object-store-memory: '78643200'
|
|
||||||
# containerEnv specifies environment variables for the Ray container,
|
# containerEnv specifies environment variables for the Ray container,
|
||||||
# Follows standard K8s container env schema.
|
# Follows standard K8s container env schema.
|
||||||
containerEnv:
|
containerEnv: []
|
||||||
- name: RAY_DEDUP_LOGS
|
|
||||||
value: "0"
|
|
||||||
- name: RAY_TQDM_PATCH_PRINT
|
|
||||||
value: "0"
|
|
||||||
- name: MYSQL_HOST
|
|
||||||
value: "datamate-database"
|
|
||||||
- name: MYSQL_PORT
|
|
||||||
value: "3306"
|
|
||||||
- name: MYSQL_USER
|
|
||||||
value: "root"
|
|
||||||
- name: MYSQL_PASSWORD
|
|
||||||
value: "password"
|
|
||||||
- name: MYSQL_DATABASE
|
|
||||||
value: "datamate"
|
|
||||||
# - name: EXAMPLE_ENV
|
# - name: EXAMPLE_ENV
|
||||||
# value: "1"
|
# value: "1"
|
||||||
envFrom: []
|
envFrom: []
|
||||||
@@ -93,14 +78,7 @@ head:
|
|||||||
# It is usually best to set requests equal to limits.
|
# It is usually best to set requests equal to limits.
|
||||||
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
|
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
|
||||||
# for further guidance.
|
# for further guidance.
|
||||||
resources:
|
resources: {}
|
||||||
limits:
|
|
||||||
cpu: "2"
|
|
||||||
# To avoid out-of-memory issues, never allocate less than 2G memory for the Ray head.
|
|
||||||
memory: "8G"
|
|
||||||
requests:
|
|
||||||
cpu: "1"
|
|
||||||
memory: "2G"
|
|
||||||
annotations: {}
|
annotations: {}
|
||||||
nodeSelector: {}
|
nodeSelector: {}
|
||||||
tolerations: []
|
tolerations: []
|
||||||
@@ -156,21 +134,7 @@ worker:
|
|||||||
initContainers: []
|
initContainers: []
|
||||||
# containerEnv specifies environment variables for the Ray container,
|
# containerEnv specifies environment variables for the Ray container,
|
||||||
# Follows standard K8s container env schema.
|
# Follows standard K8s container env schema.
|
||||||
containerEnv:
|
containerEnv: []
|
||||||
- name: RAY_DEDUP_LOGS
|
|
||||||
value: "0"
|
|
||||||
- name: RAY_TQDM_PATCH_PRINT
|
|
||||||
value: "0"
|
|
||||||
- name: MYSQL_HOST
|
|
||||||
value: "datamate-database"
|
|
||||||
- name: MYSQL_PORT
|
|
||||||
value: "3306"
|
|
||||||
- name: MYSQL_USER
|
|
||||||
value: "root"
|
|
||||||
- name: MYSQL_PASSWORD
|
|
||||||
value: "password"
|
|
||||||
- name: MYSQL_DATABASE
|
|
||||||
value: "datamate"
|
|
||||||
# - name: EXAMPLE_ENV
|
# - name: EXAMPLE_ENV
|
||||||
# value: "1"
|
# value: "1"
|
||||||
envFrom: []
|
envFrom: []
|
||||||
@@ -187,13 +151,7 @@ worker:
|
|||||||
# It is usually best to set requests equal to limits.
|
# It is usually best to set requests equal to limits.
|
||||||
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
|
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
|
||||||
# for further guidance.
|
# for further guidance.
|
||||||
resources:
|
resources: {}
|
||||||
limits:
|
|
||||||
cpu: "4"
|
|
||||||
memory: "8G"
|
|
||||||
requests:
|
|
||||||
cpu: "1"
|
|
||||||
memory: "1G"
|
|
||||||
annotations: {}
|
annotations: {}
|
||||||
nodeSelector: {}
|
nodeSelector: {}
|
||||||
tolerations: []
|
tolerations: []
|
||||||
|
|||||||
@@ -77,6 +77,10 @@ database:
|
|||||||
subPath: database
|
subPath: database
|
||||||
|
|
||||||
backend:
|
backend:
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- SYS_ADMIN
|
||||||
env:
|
env:
|
||||||
- name: DB_PASSWORD
|
- name: DB_PASSWORD
|
||||||
value: *dbPass
|
value: *dbPass
|
||||||
@@ -170,6 +174,31 @@ runtime:
|
|||||||
ray-cluster:
|
ray-cluster:
|
||||||
enabled: true
|
enabled: true
|
||||||
head:
|
head:
|
||||||
|
rayStartParams:
|
||||||
|
object-store-memory: '78643200'
|
||||||
|
num-cpus: '0'
|
||||||
|
containerEnv:
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
- name: RAY_TQDM_PATCH_PRINT
|
||||||
|
value: "0"
|
||||||
|
- name: MYSQL_HOST
|
||||||
|
value: "datamate-database"
|
||||||
|
- name: MYSQL_PORT
|
||||||
|
value: "3306"
|
||||||
|
- name: MYSQL_USER
|
||||||
|
value: "root"
|
||||||
|
- name: MYSQL_PASSWORD
|
||||||
|
value: *dbPass
|
||||||
|
- name: MYSQL_DATABASE
|
||||||
|
value: "datamate"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "2"
|
||||||
|
memory: "8G"
|
||||||
|
requests:
|
||||||
|
cpu: "1"
|
||||||
|
memory: "2G"
|
||||||
volumes:
|
volumes:
|
||||||
- *datasetVolume
|
- *datasetVolume
|
||||||
- *flowVolume
|
- *flowVolume
|
||||||
@@ -196,6 +225,28 @@ ray-cluster:
|
|||||||
- containerPort: 8081
|
- containerPort: 8081
|
||||||
volumeMounts: *runtimeVolumeMounts
|
volumeMounts: *runtimeVolumeMounts
|
||||||
worker:
|
worker:
|
||||||
|
containerEnv:
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
- name: RAY_TQDM_PATCH_PRINT
|
||||||
|
value: "0"
|
||||||
|
- name: MYSQL_HOST
|
||||||
|
value: "datamate-database"
|
||||||
|
- name: MYSQL_PORT
|
||||||
|
value: "3306"
|
||||||
|
- name: MYSQL_USER
|
||||||
|
value: "root"
|
||||||
|
- name: MYSQL_PASSWORD
|
||||||
|
value: *dbPass
|
||||||
|
- name: MYSQL_DATABASE
|
||||||
|
value: "datamate"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "8"
|
||||||
|
memory: "64G"
|
||||||
|
requests:
|
||||||
|
cpu: "1"
|
||||||
|
memory: "2G"
|
||||||
volumes:
|
volumes:
|
||||||
- *datasetVolume
|
- *datasetVolume
|
||||||
- *flowVolume
|
- *flowVolume
|
||||||
|
|||||||
@@ -146,10 +146,10 @@ class BaseOp:
|
|||||||
def read_file(self, sample):
|
def read_file(self, sample):
|
||||||
filepath = sample[self.filepath_key]
|
filepath = sample[self.filepath_key]
|
||||||
filetype = sample[self.filetype_key]
|
filetype = sample[self.filetype_key]
|
||||||
if filetype in ["ppt", "pptx", "docx", "doc", "xlsx"]:
|
if filetype in ["ppt", "pptx", "docx", "doc", "xlsx", "csv", "md", "pdf"]:
|
||||||
elements = partition(filename=filepath)
|
elements = partition(filename=filepath)
|
||||||
sample[self.text_key] = "\n\n".join([str(el) for el in elements])
|
sample[self.text_key] = "\n\n".join([str(el) for el in elements])
|
||||||
elif filetype in ["txt", "md", "markdown", "xml", "html", "csv", "json", "jsonl"]:
|
elif filetype in ["txt", "md", "markdown", "xml", "html", "json", "jsonl"]:
|
||||||
with open(filepath, 'rb') as f:
|
with open(filepath, 'rb') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
sample[self.text_key] = content.decode("utf-8-sig").replace("\r\n", "\n")
|
sample[self.text_key] = content.decode("utf-8-sig").replace("\r\n", "\n")
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ dependencies = [
|
|||||||
"loguru>=0.7.3",
|
"loguru>=0.7.3",
|
||||||
"opencv-python-headless>=4.12.0.88",
|
"opencv-python-headless>=4.12.0.88",
|
||||||
"ray[data,default]==2.52.1",
|
"ray[data,default]==2.52.1",
|
||||||
"unstructured[csv,docx,pptx,xlsx]==0.18.15",
|
"unstructured[csv,docx,pptx,xlsx,pdf,md]==0.18.15",
|
||||||
"uvicorn[standard]>=0.38.0",
|
"uvicorn[standard]>=0.38.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,9 @@ WORKDIR /opt/runtime
|
|||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install -e . --system \
|
uv pip install -e . --system \
|
||||||
&& uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
|
&& UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
|
||||||
|
&& uv pip uninstall torch torchvision triton --system \
|
||||||
|
&& uv pip list | grep -E '^nvidia-' | awk '{print $1}' | xargs -r uv pip uninstall --system \
|
||||||
&& python -m spacy download zh_core_web_sm
|
&& python -m spacy download zh_core_web_sm
|
||||||
|
|
||||||
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
|
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
|
||||||
|
|||||||
Reference in New Issue
Block a user