refactor: modify data collection to python implementation (#214)

* feature: LabelStudio jumps without login

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* fix: remove terrabase dependency

* feature: add the collection task executions page and the collection template page

* fix: fix the collection task creation

* fix: fix the collection task creation
This commit is contained in:
hefanli
2025-12-30 18:48:43 +08:00
committed by GitHub
parent 80d4dfd285
commit 63f4e3e447
71 changed files with 1861 additions and 2557 deletions

View File

@@ -1,9 +1,26 @@
FROM maven:3-eclipse-temurin-8 AS datax-builder
RUN apt-get update && \
apt-get install -y git && \
git clone https://github.com/alibaba/DataX.git
COPY runtime/datax/ DataX/
RUN cd DataX && \
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
FROM python:3.12-slim
# Single-stage image with build cache optimization using BuildKit cache mounts.
# Note: to use the cache mount syntax you must build with BuildKit enabled:
# DOCKER_BUILDKIT=1 docker build . -f scripts/images/datamate-python/Dockerfile -t datamate-backend-python
RUN apt-get update \
&& apt-get install -y --no-install-recommends openjdk-21-jre-headless \
&& rm -rf /var/lib/apt/lists/*
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
# Poetry configuration
@@ -12,7 +29,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
POETRY_VIRTUALENVS_CREATE=false \
POETRY_CACHE_DIR=/tmp/poetry_cache
ENV PATH="/root/.local/bin:$PATH"
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64
ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH"
WORKDIR /app
@@ -22,6 +41,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \
&& pip install --root-user-action=ignore pipx \
&& pipx install "poetry==$POETRY_VERSION"
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
# Copy only dependency files first (leverages layer caching when dependencies don't change)
COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/

View File

@@ -1,28 +1,19 @@
FROM maven:3-eclipse-temurin-21 AS builder
RUN apt-get update && \
apt-get install -y git && \
git clone https://github.com/ModelEngine-Group/Terrabase.git && \
cd Terrabase && \
git -c core.quotepath=false -c log.showSignature=false checkout -b pyh/feat_terrabase_develop origin/pyh/feat_terrabase_develop -- && \
mvn -U clean package install -Dmaven.test.skip=true
COPY backend/ /opt/gateway
RUN cd /opt/gateway/api-gateway && \
mvn -U clean package -Dmaven.test.skip=true && \
ls /opt/gateway/api-gateway/target
mvn -U clean package -Dmaven.test.skip=true
FROM eclipse-temurin:21-jdk
RUN apt-get update && \
apt-get install -y vim wget curl python3 python3-pip python-is-python3 dos2unix && \
apt-get install -y vim wget curl dos2unix && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/gateway/api-gateway/target/gateway.jar /opt/gateway/gateway.jar
COPY --from=builder /Terrabase/enterprise-impl-commercial/target/*.jar /opt/terrabase/
COPY scripts/images/gateway/start.sh /opt/gateway/start.sh