feature: add mysql collection and starrocks collection (#222)

* fix: fix the path for backend-python imaage building

* feature: add mysql collection and starrocks collection

* feature: add mysql collection and starrocks collection

* fix: change the permission of those files which collected from nfs to 754

* fix: delete collected files, config files and log files while deleting collection task

* fix: add the collection task detail api

* fix: change the log of collecting for dataset

* fix: add collection task selecting while creating and updating dataset

* fix: set the umask value to 0022 for java process
This commit is contained in:
hefanli
2026-01-04 19:05:08 +08:00
committed by GitHub
parent 8d61eb28c3
commit ccfb84c034
13 changed files with 208 additions and 115 deletions

View File

@@ -17,19 +17,18 @@ FROM python:3.12-slim
# Note: to use the cache mount syntax you must build with BuildKit enabled:
# DOCKER_BUILDKIT=1 docker build . -f scripts/images/datamate-python/Dockerfile -t datamate-backend-python
RUN apt-get update \
&& apt-get install -y --no-install-recommends openjdk-21-jre-headless \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update && \
apt-get install -y --no-install-recommends vim openjdk-21-jre nfs-common rsync && \
rm -rf /var/lib/apt/lists/*
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
# Poetry configuration
POETRY_VERSION=2.2.1 \
POETRY_NO_INTERACTION=1 \
POETRY_VIRTUALENVS_CREATE=false \
POETRY_CACHE_DIR=/tmp/poetry_cache
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk
ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH"
@@ -42,6 +41,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
&& pipx install "poetry==$POETRY_VERSION"
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
RUN cp /opt/datax/plugin/reader/mysqlreader/libs/mysql* /opt/datax/plugin/reader/starrocksreader/libs/
# Copy only dependency files first (leverages layer caching when dependencies don't change)
COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/

View File

@@ -1,16 +1,3 @@
FROM maven:3-eclipse-temurin-8 AS datax-builder
RUN apt-get update && \
apt-get install -y git && \
git clone https://github.com/alibaba/DataX.git
COPY runtime/datax/ DataX/
RUN cd DataX && \
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
FROM maven:3-eclipse-temurin-21 AS builder
COPY backend/ /opt/backend
@@ -22,12 +9,11 @@ RUN cd /opt/backend/services && \
FROM eclipse-temurin:21-jdk
RUN apt-get update && \
apt-get install -y vim wget curl nfs-common rsync python3 python3-pip python-is-python3 dos2unix && \
apt-get install -y vim wget curl rsync python3 python3-pip python-is-python3 dos2unix && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/backend/services/main-application/target/datamate.jar /opt/backend/datamate.jar
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
COPY scripts/images/backend/start.sh /opt/backend/start.sh

View File

@@ -2,7 +2,7 @@
set -e
rpcbind
umask 0022
echo "Starting main application..."
exec "$@"
exec "$@"