diff --git a/.github/workflows/docker-image-backend.yml b/.github/workflows/docker-image-backend.yml index 9a3fa58..f638298 100644 --- a/.github/workflows/docker-image-backend.yml +++ b/.github/workflows/docker-image-backend.yml @@ -5,6 +5,7 @@ on: branches: [ "main" ] paths: - 'backend/**' + - '!backend/api-gateway/**' - 'scripts/images/backend/**' - '.github/workflows/docker-image-backend.yml' - '.github/workflows/docker-images-reusable.yml' @@ -12,6 +13,7 @@ on: branches: [ "main" ] paths: - 'backend/**' + - '!backend/api-gateway/**' - 'scripts/images/backend/**' - '.github/workflows/docker-image-backend.yml' - '.github/workflows/docker-images-reusable.yml' diff --git a/.github/workflows/docker-image-gateway.yml b/.github/workflows/docker-image-gateway.yml index a3ba63b..0c214f2 100644 --- a/.github/workflows/docker-image-gateway.yml +++ b/.github/workflows/docker-image-gateway.yml @@ -4,14 +4,14 @@ on: push: branches: [ "main" ] paths: - - 'backend/**' + - 'backend/api-gateway/**' - 'scripts/images/gateway/**' - '.github/workflows/docker-image-gateway.yml' - '.github/workflows/docker-images-reusable.yml' pull_request: branches: [ "main" ] paths: - - 'backend/**' + - 'backend/api-gateway/**' - 'scripts/images/gateway/**' - '.github/workflows/docker-image-gateway.yml' - '.github/workflows/docker-images-reusable.yml' diff --git a/.github/workflows/docker-image-label-studio.yml b/.github/workflows/docker-image-label-studio.yml new file mode 100644 index 0000000..09634b1 --- /dev/null +++ b/.github/workflows/docker-image-label-studio.yml @@ -0,0 +1,28 @@ +name: Label Studio Docker Image CI + +on: + push: + branches: [ "main" ] + paths: + - 'scripts/images/label-studio/**' + - '.github/workflows/docker-image-label-studio.yml' + - '.github/workflows/docker-images-reusable.yml' + pull_request: + branches: [ "main" ] + paths: + - 'scripts/images/label-studio/**' + - '.github/workflows/docker-image-label-studio.yml' + - '.github/workflows/docker-images-reusable.yml' + workflow_dispatch: + workflow_call: + +jobs: + call-docker-build: + name: Build and Push Label Studio Docker Image + uses: ./.github/workflows/docker-images-reusable.yml + permissions: + contents: read + packages: write + with: + service_name: label-studio + build_dir: . diff --git a/.github/workflows/enterprise-package.yml b/.github/workflows/enterprise-package.yml deleted file mode 100644 index 5efb39f..0000000 --- a/.github/workflows/enterprise-package.yml +++ /dev/null @@ -1,110 +0,0 @@ -name: DataMate Package - -on: - workflow_dispatch: - inputs: - milvus: - type: boolean - description: 是否打包milvus - required: false - default: true - deer-flow: - type: boolean - description: 是否打包deer-flow - required: false - default: false - version: - type: string - description: 版本号 - required: false - default: 1.0.0 - -jobs: - package-all: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Clone DataMate-Deploy - uses: GuillaumeFalourd/clone-github-repo-action@v2.3 - with: - owner: ModelEngine-Group - repository: 'DataMate-Deploy' - access-token: ${{ secrets.ACCESS_TOKEN }} - - - name: Copy tools - run: | - cp -r DataMate-Deploy/tools tools - - - name: DataMate Package - run: | - mkdir helm - cp -r deployment/helm/datamate helm/datamate - sed -i "s#^\(\s*repository:\s*\).*#\1\"\"#" helm/datamate/values.yaml - sed -i "s#^\(\s*type:\s*\).*#\1ClusterIP#" helm/datamate/values.yaml - - - name: DeerFlow Package - if: inputs.deer-flow == true - run: | - cp runtime/deer-flow/.env deployment/helm/deer-flow/charts/public/.env - cp runtime/deer-flow/conf.yaml deployment/helm/deer-flow/charts/public/conf.yaml - cp -r deployment/helm/deer-flow helm/deer-flow - - - name: Milvus Package - if: inputs.milvus == true - run: | - cp -r deployment/helm/milvus helm/milvus - sed -i "s#^\(\s*workload:\s*\).*#\1deployment#" helm/milvus/values.yaml - - - name: Download DataMate Image - run: | - mkdir -p images/datamate - LOWERCASE_REPO=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - services=("runtime" "backend" "frontend" "database" "backend-python" "gateway") - for service in "${services[@]}"; do - docker pull ghcr.io/$LOWERCASE_REPO/datamate-$service:latest --platform arm64 - docker tag ghcr.io/$LOWERCASE_REPO/datamate-$service:latest datamate-$service:latest - docker save -o images/datamate/datamate-$service.tar datamate-$service:latest - docker rmi ghcr.io/$LOWERCASE_REPO/datamate-$service:latest datamate-$service:latest - done - docker pull quay.io/kuberay/operator:v1.4.2 --platform arm64 - docker save -o images/datamate/kuberay-operator.tar quay.io/kuberay/operator:v1.4.2 - docker rmi quay.io/kuberay/operator:v1.4.2 - - - name: Download DeerFlow Image - if: inputs.deer-flow == true - run: | - mkdir -p images/deer-flow - LOWERCASE_REPO=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - services=("backend" "frontend") - for service in "${services[@]}"; do - docker pull ghcr.io/$LOWERCASE_REPO/deer-flow-$service:latest --platform arm64 - docker tag ghcr.io/$LOWERCASE_REPO/deer-flow-$service:latest deer-flow-$service:latest - docker save -o images/deer-flow/deer-flow-$service.tar deer-flow-$service:latest - docekr rmi ghcr.io/$LOWERCASE_REPO/deer-flow-$service:latest deer-flow-$service:latest - done - - - name: Download Milvus Image - if: inputs.milvus == true - run: | - mkdir -p images/milvus - docker pull milvusdb/milvus:v2.6.5 --platform arm64 - docker save -o images/milvus/milvus.tar milvusdb/milvus:v2.6.5 - docker rmi milvusdb/milvus:v2.6.5 - docker pull minio/minio:RELEASE.2024-12-18T13-15-44Z --platform arm64 - docker save -o images/milvus/minio.tar minio/minio:RELEASE.2024-12-18T13-15-44Z - docker rmi minio/minio:RELEASE.2024-12-18T13-15-44Z - docker pull milvusdb/etcd:3.5.18-r1 --platform arm64 - docker save -o images/milvus/etcd.tar milvusdb/etcd:3.5.18-r1 - docker rmi milvusdb/etcd:3.5.18-r1 - - - name: Upload Package - uses: actions/upload-artifact@v4 - with: - name: DataMate_${{ inputs.version }}_Aarch64 - include-hidden-files: true - path: | - helm/ - images/ - tools/ diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml deleted file mode 100644 index 9719933..0000000 --- a/.github/workflows/package.yml +++ /dev/null @@ -1,78 +0,0 @@ -name: Package All - -on: - workflow_dispatch: - -jobs: - backend-docker-build: - name: Build and Push Backend Docker Image - uses: ./.github/workflows/docker-image-save.yml - with: - service_name: backend - - frontend-docker-build: - name: Build and Push Frontend Docker Image - uses: ./.github/workflows/docker-image-save.yml - with: - service_name: frontend - - database-docker-build: - name: Build and Push Database Docker Image - uses: ./.github/workflows/docker-image-save.yml - with: - service_name: database - - runtime-docker-build: - name: Build and Push Runtime Docker Image - uses: ./.github/workflows/docker-image-save.yml - with: - service_name: runtime - - backend-python-docker-build: - name: Build and Push Backend Python Docker Image - uses: ./.github/workflows/docker-image-save.yml - with: - service_name: backend-python - - gateway-docker-build: - name: Build and Push Frontend Docker Image - uses: ./.github/workflows/docker-image-save.yml - with: - service_name: gateway - - package-all: - needs: - - backend-docker-build - - frontend-docker-build - - database-docker-build - - backend-python-docker-build - - runtime-docker-build - - gateway-docker-build - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Download Image - uses: actions/download-artifact@v5 - with: - path: images - pattern: datamate-* - merge-multiple: true - - - name: Package - run: | - cp runtime/deer-flow/.env deployment/docker/deer-flow/.env - cp runtime/deer-flow/conf.yaml deployment/docker/deer-flow/conf.yaml - cp runtime/deer-flow/.env deployment/helm/deer-flow/charts/public/.env - cp runtime/deer-flow/conf.yaml deployment/helm/deer-flow/charts/public/conf.yaml - tar -cvf datamate.tar deployment/ images - - - name: Upload Package - uses: actions/upload-artifact@v4 - with: - name: DataMate - include-hidden-files: true - path: | - deployment/ - images/ \ No newline at end of file diff --git a/Makefile b/Makefile index b8f648e..08f3240 100644 --- a/Makefile +++ b/Makefile @@ -155,7 +155,7 @@ endef # ========== Build Targets ========== # Valid build targets -VALID_BUILD_TARGETS := backend database frontend runtime backend-python deer-flow mineru mineru-npu gateway +VALID_BUILD_TARGETS := backend database frontend runtime backend-python deer-flow mineru mineru-npu gateway label-studio # Generic docker build target with service name as parameter # Automatically prefixes image names with "datamate-" unless it's deer-flow @@ -231,7 +231,6 @@ else fi @$(MAKE) label-studio-$(INSTALLER)-uninstall DELETE_VOLUMES_CHOICE=$$DELETE_VOLUMES_CHOICE; \ $(MAKE) milvus-$(INSTALLER)-uninstall DELETE_VOLUMES_CHOICE=$$DELETE_VOLUMES_CHOICE; \ - $(MAKE) datamate-$(INSTALLER)-uninstall DELETE_VOLUMES_CHOICE=$$DELETE_VOLUMES_CHOICE; \ $(MAKE) deer-flow-$(INSTALLER)-uninstall DELETE_VOLUMES_CHOICE=$$DELETE_VOLUMES_CHOICE; \ $(MAKE) datamate-$(INSTALLER)-uninstall DELETE_VOLUMES_CHOICE=$$DELETE_VOLUMES_CHOICE endif @@ -321,7 +320,9 @@ VALID_K8S_TARGETS := mineru datamate deer-flow milvus label-studio done; \ exit 1; \ fi - @if [ "$*" = "mineru" ]; then \ + @if [ "$*" = "label-studio" ]; then \ + helm upgrade label-studio deployment/helm/label-studio/ -n $(NAMESPACE) --install; \ + elif [ "$*" = "mineru" ]; then \ kubectl apply -f deployment/kubernetes/mineru/deploy.yaml -n $(NAMESPACE); \ elif [ "$*" = "datamate" ]; then \ helm upgrade datamate deployment/helm/datamate/ -n $(NAMESPACE) --install --set global.image.repository=$(REGISTRY); \ diff --git a/deployment/helm/label-studio/templates/deployment.yaml b/deployment/helm/label-studio/templates/deployment.yaml index 202a95b..5ef7ec3 100644 --- a/deployment/helm/label-studio/templates/deployment.yaml +++ b/deployment/helm/label-studio/templates/deployment.yaml @@ -21,7 +21,11 @@ spec: spec: containers: - name: label-studio + {{- if .Values.global.imageRegistry }} + image: "{{ .Values.global.imageRegistry | trimSuffix "/" }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" + {{- else }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + {{- end }} imagePullPolicy: {{ .Values.image.pullPolicy }} args: ["label-studio-uwsgi"] env: diff --git a/deployment/helm/label-studio/templates/postgres.yaml b/deployment/helm/label-studio/templates/postgres.yaml index 1f9655a..47ee2d3 100644 --- a/deployment/helm/label-studio/templates/postgres.yaml +++ b/deployment/helm/label-studio/templates/postgres.yaml @@ -31,7 +31,11 @@ spec: spec: containers: - name: postgres + {{- if .Values.global.imageRegistry }} + image: "{{ .Values.global.imageRegistry | trimSuffix "/" }}/{{ .Values.postgres.image.repository }}:{{ .Values.postgres.image.tag }}" + {{- else }} image: "{{ .Values.postgres.image.repository }}:{{ .Values.postgres.image.tag }}" + {{- end }} imagePullPolicy: {{ .Values.postgres.image.pullPolicy }} env: - name: POSTGRES_HOST_AUTH_METHOD diff --git a/deployment/helm/label-studio/values.yaml b/deployment/helm/label-studio/values.yaml index 696e5a4..c16f0d8 100644 --- a/deployment/helm/label-studio/values.yaml +++ b/deployment/helm/label-studio/values.yaml @@ -4,6 +4,9 @@ fullnameOverride: label-studio replicaCount: 1 +global: + imageRegistry: "" + image: repository: heartexlabs/label-studio tag: "latest" diff --git a/scripts/images/label-studio/Dockerfile b/scripts/images/label-studio/Dockerfile new file mode 100644 index 0000000..ee7dc93 --- /dev/null +++ b/scripts/images/label-studio/Dockerfile @@ -0,0 +1,188 @@ +ARG NODE_VERSION=22 +ARG PYTHON_VERSION=3.13 +ARG POETRY_VERSION=2.1.4 +ARG VERSION_OVERRIDE +ARG BRANCH_OVERRIDE + +ARG GIT_REPO=https://github.com/ModelEngine-Group/Label-Studio.git +ARG GIT_BRANCH=ls-release/1.21.0.wsi + +################################ Stage: downloader (Source Code) +FROM alpine/git AS downloader +ARG GIT_REPO +ARG GIT_BRANCH +WORKDIR /label-studio + +RUN git clone --branch ${GIT_BRANCH} ${GIT_REPO} . + +################################ Overview + +# This Dockerfile builds a Label Studio environment. +# It consists of five main stages: +# 1. "frontend-builder" - Compiles the frontend assets using Node. +# 2. "frontend-version-generator" - Generates version files for frontend sources. +# 3. "venv-builder" - Prepares the virtualenv environment. +# 4. "py-version-generator" - Generates version files for python sources. +# 5. "prod" - Creates the final production image with the Label Studio, Nginx, and other dependencies. + +################################ Stage: frontend-builder (build frontend assets) +FROM --platform=${BUILDPLATFORM} node:${NODE_VERSION}-trixie AS frontend-builder +ENV BUILD_NO_SERVER=true \ + BUILD_NO_HASH=true \ + BUILD_NO_CHUNKS=true \ + BUILD_MODULE=true \ + YARN_CACHE_FOLDER=/root/web/.yarn \ + NX_CACHE_DIRECTORY=/root/web/.nx \ + NODE_ENV=production + +WORKDIR /label-studio/web + +# Fix Docker Arm64 Build +RUN yarn config set registry https://registry.npmjs.org/ +RUN yarn config set network-timeout 1200000 # HTTP timeout used when downloading packages, set to 20 minutes + +# [修改] 从 downloader 阶段复制 package.json 和 lock 文件 +COPY --from=downloader /label-studio/web/package.json . +COPY --from=downloader /label-studio/web/yarn.lock . +COPY --from=downloader /label-studio/web/tools tools + +RUN --mount=type=cache,target=/root/web/.yarn,id=yarn-cache,sharing=locked \ + --mount=type=cache,target=/root/web/.nx,id=nx-cache,sharing=locked \ + yarn install --prefer-offline --no-progress --pure-lockfile --frozen-lockfile --ignore-engines --non-interactive --production=false + +# [修改] 从 downloader 阶段复制前端源码 +COPY --from=downloader /label-studio/web/ . +# [修改] 从 downloader 阶段复制 pyproject.toml +COPY --from=downloader /label-studio/pyproject.toml ../pyproject.toml + +RUN --mount=type=cache,target=/root/web/.yarn,id=yarn-cache,sharing=locked \ + --mount=type=cache,target=/root/web/.nx,id=nx-cache,sharing=locked \ + yarn run build + +################################ Stage: frontend-version-generator +FROM frontend-builder AS frontend-version-generator +# [修改] 之前是 bind mount 本地 .git,现在需要将 downloader 的 .git 目录复制进来 +# 注意:WORKDIR 目前是 /label-studio/web,所以我们将 git 放在上级目录以匹配原逻辑 +COPY --from=downloader /label-studio/.git ../.git + +RUN --mount=type=cache,target=/root/web/.yarn,id=yarn-cache,sharing=locked \ + --mount=type=cache,target=/root/web/.nx,id=nx-cache,sharing=locked \ + yarn version:libs + +################################ Stage: venv-builder (prepare the virtualenv) +FROM python:${PYTHON_VERSION}-slim-trixie AS venv-builder +ARG POETRY_VERSION + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + PIP_CACHE_DIR="/.cache" \ + POETRY_CACHE_DIR="/.poetry-cache" \ + POETRY_HOME="/opt/poetry" \ + POETRY_VIRTUALENVS_IN_PROJECT=true \ + POETRY_VIRTUALENVS_PREFER_ACTIVE_PYTHON=true \ + PATH="/opt/poetry/bin:$PATH" + +ADD https://install.python-poetry.org /tmp/install-poetry.py +RUN python /tmp/install-poetry.py + +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \ + set -eux; \ + apt-get update; \ + apt-get install --no-install-recommends -y \ + build-essential git; \ + apt-get autoremove -y + +WORKDIR /label-studio + +ENV VENV_PATH="/label-studio/.venv" +ENV PATH="$VENV_PATH/bin:$PATH" + +## Starting from this line all packages will be installed in $VENV_PATH + +COPY --from=downloader /label-studio/pyproject.toml /label-studio/poetry.lock /label-studio/README.md ./ + +# Set a default build argument for including dev dependencies +ARG INCLUDE_DEV=false + +# Install dependencies +RUN --mount=type=cache,target=/.poetry-cache,id=poetry-cache,sharing=locked \ + poetry check --lock && \ + if [ "$INCLUDE_DEV" = "true" ]; then \ + poetry install --no-root --extras uwsgi --with test; \ + else \ + poetry install --no-root --without test --extras uwsgi; \ + fi + +# Install LS +COPY --from=downloader /label-studio/label_studio label_studio +RUN --mount=type=cache,target=/.poetry-cache,id=poetry-cache,sharing=locked \ + poetry install --only-root --extras uwsgi && \ + python3 label_studio/manage.py collectstatic --no-input + +################################ Stage: py-version-generator +FROM venv-builder AS py-version-generator +ARG VERSION_OVERRIDE +ARG BRANCH_OVERRIDE + +COPY --from=downloader /label-studio/.git ./.git + +# Create version_.py and ls-version_.py +RUN VERSION_OVERRIDE=${VERSION_OVERRIDE} BRANCH_OVERRIDE=${BRANCH_OVERRIDE} poetry run python label_studio/core/version.py + +################################### Stage: prod +FROM python:${PYTHON_VERSION}-slim-trixie AS production + +ENV LS_DIR=/label-studio \ + HOME=/label-studio \ + LABEL_STUDIO_BASE_DATA_DIR=/label-studio/data \ + OPT_DIR=/opt/heartex/instance-data/etc \ + PATH="/label-studio/.venv/bin:$PATH" \ + DJANGO_SETTINGS_MODULE=core.settings.label_studio \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + +WORKDIR $LS_DIR + +# install prerequisites for app +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \ + set -eux; \ + apt-get update; \ + apt-get upgrade -y; \ + apt-get install --no-install-recommends -y libexpat1 libgl1 libglx-mesa0 libglib2.0-0t64 \ + gnupg2 curl nginx \ + libopenjp2-7 libnuma1 libspeex1 libvdpau1 libgomp1; \ + apt-get autoremove -y + +RUN set -eux; \ + mkdir -p $LS_DIR $LABEL_STUDIO_BASE_DATA_DIR $OPT_DIR && \ + chown -R 1001:0 $LS_DIR $LABEL_STUDIO_BASE_DATA_DIR $OPT_DIR /var/log/nginx /etc/nginx + +COPY --chown=1001:0 --from=downloader /label-studio/deploy/default.conf /etc/nginx/nginx.conf + +# Copy essential files for installing Label Studio and its dependencies +COPY --chown=1001:0 --from=downloader /label-studio/pyproject.toml . +COPY --chown=1001:0 --from=downloader /label-studio/poetry.lock . +COPY --chown=1001:0 --from=downloader /label-studio/README.md . +COPY --chown=1001:0 --from=downloader /label-studio/LICENSE LICENSE +COPY --chown=1001:0 --from=downloader /label-studio/licenses licenses +COPY --chown=1001:0 --from=downloader /label-studio/deploy deploy + +# Copy files from build stages +COPY --chown=1001:0 --from=venv-builder $LS_DIR $LS_DIR +COPY --chown=1001:0 --from=py-version-generator $LS_DIR/label_studio/core/version_.py $LS_DIR/label_studio/core/version_.py +COPY --chown=1001:0 --from=frontend-builder $LS_DIR/web/dist $LS_DIR/web/dist +COPY --chown=1001:0 --from=frontend-version-generator $LS_DIR/web/dist/apps/labelstudio/version.json $LS_DIR/web/dist/apps/labelstudio/version.json +COPY --chown=1001:0 --from=frontend-version-generator $LS_DIR/web/dist/libs/editor/version.json $LS_DIR/web/dist/libs/editor/version.json +COPY --chown=1001:0 --from=frontend-version-generator $LS_DIR/web/dist/libs/datamanager/version.json $LS_DIR/web/dist/libs/datamanager/version.json + +USER 1001 + +EXPOSE 8080 + +ENTRYPOINT ["./deploy/docker-entrypoint.sh"] +CMD ["label-studio"] \ No newline at end of file