You've already forked DataMate
23
deployment/helm/milvus/charts/tei/.helmignore
Normal file
23
deployment/helm/milvus/charts/tei/.helmignore
Normal file
@@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
16
deployment/helm/milvus/charts/tei/Chart.yaml
Normal file
16
deployment/helm/milvus/charts/tei/Chart.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
appVersion: 1.6.0
|
||||
description: A text-embeddings-inference Helm chart for Kubernetes
|
||||
home: https://github.com/huggingface/text-embeddings-inference
|
||||
icon: https://avatars.githubusercontent.com/u/25720743?s=200&v=4
|
||||
keywords:
|
||||
- embeddings
|
||||
- inference
|
||||
maintainers:
|
||||
- email: devops@zilliz.com
|
||||
name: zilliz
|
||||
name: tei
|
||||
sources:
|
||||
- https://github.com/huggingface/text-embeddings-inference
|
||||
type: application
|
||||
version: 1.6.0
|
||||
169
deployment/helm/milvus/charts/tei/README.md
Normal file
169
deployment/helm/milvus/charts/tei/README.md
Normal file
@@ -0,0 +1,169 @@
|
||||
# Text Embeddings Inference (TEI) Integration Guide
|
||||
|
||||
This document describes how to use Text Embeddings Inference (TEI) service with Milvus Helm Chart, and how to integrate TEI with Milvus. TEI is an open-source project developed by Hugging Face, available at [https://github.com/huggingface/text-embeddings-inference](https://github.com/huggingface/text-embeddings-inference).
|
||||
|
||||
## Overview
|
||||
|
||||
Text Embeddings Inference (TEI) is a high-performance text embedding model inference service that converts text into vector representations. Milvus is a vector database that can store and retrieve these vectors. By combining the two, you can build powerful semantic search and retrieval systems.
|
||||
|
||||
## Deployment Methods
|
||||
|
||||
This guide provides two ways to use TEI:
|
||||
1. Deploy TEI service directly through the Milvus Helm Chart
|
||||
2. Use external TEI service with Milvus integration
|
||||
|
||||
## Deploy TEI through Milvus Helm Chart
|
||||
|
||||
### Basic Configuration
|
||||
|
||||
```yaml
|
||||
modelId: "BAAI/bge-large-en-v1.5" # Specify the model to use
|
||||
```
|
||||
|
||||
This is the simplest configuration, just specify `enabled: true` and the desired `modelId`.
|
||||
|
||||
### Complete Configuration Options
|
||||
|
||||
```yaml
|
||||
modelId: "BAAI/bge-large-en-v1.5" # Model ID
|
||||
extraArgs: [] # Additional command line arguments for TEI, such as "--max-batch-tokens=16384", "--max-client-batch-size=32", "--max-concurrent-requests=128", etc.
|
||||
replicaCount: 1 # Number of TEI replicas
|
||||
image:
|
||||
repository: ghcr.io/huggingface/text-embeddings-inference # Image repository
|
||||
tag: cpu-1.6 # Image tag (CPU version)
|
||||
pullPolicy: IfNotPresent # Image pull policy
|
||||
service:
|
||||
type: ClusterIP # Service type
|
||||
port: 8080 # Service port
|
||||
annotations: {} # Service annotations
|
||||
labels: {} # Service labels
|
||||
resources: # Resource configuration
|
||||
requests:
|
||||
cpu: "4" # CPU request
|
||||
memory: "8Gi" # Memory request
|
||||
limits:
|
||||
cpu: "8" # CPU limit
|
||||
memory: "16Gi" # Memory limit
|
||||
persistence: # Persistence storage configuration
|
||||
enabled: true # Enable persistence storage
|
||||
mountPath: "/data" # Mount path
|
||||
annotations: {} # Storage annotations
|
||||
persistentVolumeClaim: # PVC configuration
|
||||
existingClaim: "" # Use existing PVC
|
||||
storageClass: # Storage class
|
||||
accessModes: ReadWriteOnce # Access modes
|
||||
size: 50Gi # Storage size
|
||||
subPath: "" # Sub path
|
||||
nodeSelector: {} # Node selector
|
||||
affinity: {} # Affinity configuration
|
||||
tolerations: [] # Tolerations
|
||||
topologySpreadConstraints: [] # Topology spread constraints
|
||||
extraEnv: [] # Additional environment variables
|
||||
```
|
||||
|
||||
### Using GPU Acceleration
|
||||
|
||||
If you have GPU resources, you can use the GPU version of the TEI image to accelerate inference:
|
||||
|
||||
```yaml
|
||||
enabled: true
|
||||
modelId: "BAAI/bge-large-en-v1.5"
|
||||
image:
|
||||
repository: ghcr.io/huggingface/text-embeddings-inference
|
||||
tag: 1.6 # GPU version
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1 # Allocate 1 GPU
|
||||
```
|
||||
|
||||
|
||||
## Frequently Asked Questions
|
||||
|
||||
### How to determine the embedding dimension of a model?
|
||||
|
||||
Different models have different embedding dimensions. Here are the dimensions of some commonly used models:
|
||||
- BAAI/bge-large-en-v1.5: 1024
|
||||
- BAAI/bge-base-en-v1.5: 768
|
||||
- nomic-ai/nomic-embed-text-v1: 768
|
||||
- sentence-transformers/all-mpnet-base-v2: 768
|
||||
|
||||
You can find this information in the model's documentation or get it through the TEI service's API.
|
||||
|
||||
### How to test if the TEI service is working properly?
|
||||
|
||||
After deploying the TEI service, you can use the following commands to test if the service is working properly:
|
||||
|
||||
```bash
|
||||
# Get the TEI service endpoint
|
||||
export TEI_SERVICE=$(kubectl get svc -l component=text-embeddings-inference -o jsonpath='{.items[0].metadata.name}')
|
||||
|
||||
# Test the embedding functionality
|
||||
kubectl run -it --rm curl --image=curlimages/curl -- curl -X POST "http://${TEI_SERVICE}:8080/embed" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"inputs":"This is a test text"}'
|
||||
```
|
||||
|
||||
### How to use TEI-generated embeddings in Milvus?
|
||||
|
||||
In Milvus, you can use TEI-generated embeddings for the following operations:
|
||||
|
||||
1. When creating a collection, specify the vector dimension to match the TEI model output dimension
|
||||
2. Before inserting data, use the TEI service to convert text to vectors
|
||||
3. When searching, similarly use the TEI service to convert query text to vectors
|
||||
|
||||
## Using Milvus Text Embedding Function
|
||||
|
||||
Milvus provides a text embedding function feature that allows you to generate vector embeddings directly within Milvus. You can configure Milvus to use TEI as the backend for this function.
|
||||
|
||||
### Using the Text Embedding Function in Milvus
|
||||
|
||||
1. Specify the embedding function when creating a collection:
|
||||
|
||||
```python
|
||||
from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType
|
||||
|
||||
# Connect to Milvus
|
||||
connections.connect(host="localhost", port="19530")
|
||||
|
||||
# Define collection schema
|
||||
fields = [
|
||||
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
|
||||
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=1000),
|
||||
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=768) # Dimension should match model output
|
||||
]
|
||||
schema = CollectionSchema(fields=fields, description="Text collection with embedding function")
|
||||
|
||||
# Create collection and specify embedding function
|
||||
collection = Collection(
|
||||
name="text_collection",
|
||||
schema=schema,
|
||||
embedding_field="text", # Specify the field to embed
|
||||
vector_field="vector", # Specify the field to store embedding vectors
|
||||
embedding_config={
|
||||
"provider": "tei",
|
||||
"model_id": "BAAI/bge-large-en-v1.5",
|
||||
"endpoint": "http://tei-service:8080"
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
2. Automatically generate embeddings when inserting data:
|
||||
|
||||
```python
|
||||
# Insert data, Milvus will automatically call the TEI service to generate embedding vectors
|
||||
collection.insert([
|
||||
{"id": 1, "text": "This is a sample document about artificial intelligence."},
|
||||
{"id": 2, "text": "Vector databases are designed to handle embeddings efficiently."}
|
||||
])
|
||||
```
|
||||
|
||||
3. Automatically generate query embeddings when searching:
|
||||
|
||||
```python
|
||||
# Search directly using text, Milvus will automatically call the TEI service to generate query vectors
|
||||
results = collection.search(
|
||||
query_texts=["Tell me about AI technology"],
|
||||
embedding_field="text",
|
||||
limit=3
|
||||
)
|
||||
```
|
||||
12
deployment/helm/milvus/charts/tei/ci/tei-mininum-values.yaml
Normal file
12
deployment/helm/milvus/charts/tei/ci/tei-mininum-values.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
resources:
|
||||
requests:
|
||||
cpu: "0.1"
|
||||
memory: "2Gi"
|
||||
limits:
|
||||
cpu: "8"
|
||||
memory: "16Gi"
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
persistentVolumeClaim:
|
||||
size: 10Gi
|
||||
62
deployment/helm/milvus/charts/tei/templates/_helpers.tpl
Normal file
62
deployment/helm/milvus/charts/tei/templates/_helpers.tpl
Normal file
@@ -0,0 +1,62 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "tei.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "tei.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "tei.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "tei.labels" -}}
|
||||
helm.sh/chart: {{ include "tei.chart" . }}
|
||||
{{ include "tei.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "tei.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "tei.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "tei.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "tei.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,15 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "tei.fullname" . }}-headless
|
||||
labels:
|
||||
{{- include "tei.labels" . | nindent 4 }}
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
{{- include "tei.selectorLabels" . | nindent 4 }}
|
||||
19
deployment/helm/milvus/charts/tei/templates/service.yaml
Normal file
19
deployment/helm/milvus/charts/tei/templates/service.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "tei.fullname" . }}
|
||||
labels:
|
||||
{{- include "tei.labels" . | nindent 4 }}
|
||||
{{- with .Values.service.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
{{- include "tei.selectorLabels" . | nindent 4 }}
|
||||
@@ -0,0 +1,13 @@
|
||||
{{- if .Values.serviceAccount.create -}}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "tei.serviceAccountName" . }}
|
||||
labels:
|
||||
{{- include "tei.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
|
||||
{{- end }}
|
||||
103
deployment/helm/milvus/charts/tei/templates/statefulset.yaml
Normal file
103
deployment/helm/milvus/charts/tei/templates/statefulset.yaml
Normal file
@@ -0,0 +1,103 @@
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: {{ include "tei.fullname" . }}
|
||||
labels:
|
||||
{{- include "tei.labels" . | nindent 4 }}
|
||||
{{- if .Values.labels }}
|
||||
{{ toYaml .Values.labels | indent 4 }}
|
||||
{{- end }}
|
||||
{{- if .Values.annotations }}
|
||||
annotations:
|
||||
{{ toYaml .Values.annotations | indent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
serviceName: {{ template "tei.fullname" . }}-headless
|
||||
podManagementPolicy: Parallel
|
||||
replicas: {{ .Values.replicaCount | default 1 }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "tei.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
{{- with .Values.podAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "tei.labels" . | nindent 8 }}
|
||||
{{- with .Values.podLabels }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "tei.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
args:
|
||||
- "--model-id"
|
||||
- "{{ .Values.modelId }}"
|
||||
{{- range .Values.extraArgs }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- containerPort: 80
|
||||
name: http
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
{{- if .Values.extraEnv }}
|
||||
env:
|
||||
{{ toYaml .Values.extraEnv | indent 8 }}
|
||||
{{- end }}
|
||||
volumeMounts:
|
||||
- name: data-volume
|
||||
mountPath: {{ .Values.persistence.mountPath }}
|
||||
{{- if .Values.volumeMounts }}
|
||||
{{ toYaml .Values.volumeMounts | indent 8 }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
{{- if not .Values.persistence.enabled }}
|
||||
- name: data-volume
|
||||
emptyDir: {}
|
||||
{{- end }}
|
||||
{{- with .Values.volumes }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.persistence.enabled }}
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: data-volume
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
{{- if .Values.persistence.persistentVolumeClaim.storageClass }}
|
||||
{{- if (eq "-" .Values.persistence.persistentVolumeClaim.storageClass) }}
|
||||
storageClassName: ""
|
||||
{{- else }}
|
||||
storageClassName: "{{ .Values.persistence.persistentVolumeClaim.storageClass }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.persistence.persistentVolumeClaim.size }}
|
||||
{{- end }}
|
||||
85
deployment/helm/milvus/charts/tei/values.yaml
Normal file
85
deployment/helm/milvus/charts/tei/values.yaml
Normal file
@@ -0,0 +1,85 @@
|
||||
# Default values for tei.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# TEI model configuration
|
||||
modelId: "BAAI/bge-large-en-v1.5"
|
||||
# Additional TEI configuration
|
||||
extraArgs: []
|
||||
|
||||
replicaCount: 1
|
||||
|
||||
image:
|
||||
repository: ghcr.io/huggingface/text-embeddings-inference
|
||||
tag: cpu-1.6
|
||||
pullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
|
||||
serviceAccount:
|
||||
# Specifies whether a service account should be created
|
||||
create: true
|
||||
# Automatically mount a ServiceAccount's API credentials?
|
||||
automount: true
|
||||
# Annotations to add to the service account
|
||||
annotations: {}
|
||||
# The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name: ""
|
||||
|
||||
podAnnotations: {}
|
||||
podLabels: {}
|
||||
|
||||
podSecurityContext: {}
|
||||
# fsGroup: 2000
|
||||
|
||||
securityContext: {}
|
||||
# capabilities:
|
||||
# drop:
|
||||
# - ALL
|
||||
# readOnlyRootFilesystem: true
|
||||
# runAsNonRoot: true
|
||||
# runAsUser: 1000
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 8080
|
||||
annotations: {}
|
||||
labels: {}
|
||||
resources:
|
||||
requests:
|
||||
cpu: "4"
|
||||
memory: "8Gi"
|
||||
limits:
|
||||
cpu: "8"
|
||||
memory: "16Gi"
|
||||
persistence:
|
||||
enabled: true
|
||||
mountPath: "/data"
|
||||
annotations: {}
|
||||
persistentVolumeClaim:
|
||||
existingClaim: ""
|
||||
storageClass:
|
||||
accessModes: ReadWriteOnce
|
||||
size: 50Gi
|
||||
subPath: ""
|
||||
nodeSelector: {}
|
||||
affinity: {}
|
||||
tolerations: []
|
||||
topologySpreadConstraints: []
|
||||
extraEnv: []
|
||||
|
||||
|
||||
# Additional volumes on the output Deployment definition.
|
||||
volumes: []
|
||||
# - name: foo
|
||||
# secret:
|
||||
# secretName: mysecret
|
||||
# optional: false
|
||||
|
||||
# Additional volumeMounts on the output Deployment definition.
|
||||
volumeMounts: []
|
||||
# - name: foo
|
||||
# mountPath: "/etc/foo"
|
||||
# readOnly: true
|
||||
Reference in New Issue
Block a user