You've already forked DataMate
feature: add mysql collection and starrocks collection (#222)
* fix: fix the path for backend-python imaage building * feature: add mysql collection and starrocks collection * feature: add mysql collection and starrocks collection * fix: change the permission of those files which collected from nfs to 754 * fix: delete collected files, config files and log files while deleting collection task * fix: add the collection task detail api * fix: change the log of collecting for dataset * fix: add collection task selecting while creating and updating dataset * fix: set the umask value to 0022 for java process
This commit is contained in:
@@ -73,5 +73,7 @@ CREATE TABLE t_dc_collection_templates (
|
||||
) COMMENT='数据归集模板配置表';
|
||||
|
||||
INSERT IGNORE INTO t_dc_collection_templates(id, name, description, source_type, source_name, target_type, target_name, template_content, built_in, created_by, updated_by)
|
||||
VALUES ('1', 'NAS归集模板', '将NAS存储上的文件归集到DataMate平台上。', 'nfsreader', 'nfsreader', 'nfswriter', 'nfswriter', '{"parameter": {}, "reader": {}, "writer": {}}', True, 'system', 'system'),
|
||||
('2', 'OBS归集模板', '将OBS存储上的文件归集到DataMate平台上。', 'obsreader', 'obsreader', 'obswriter', 'obswriter', '{"parameter": {"endpoint": {"name": "服务地址","description": "OBS的服务地址。","type": "input"},"bucket": {"name": "存储桶名称","description": "OBS存储桶名称。","type": "input"},"accessKey": {"name": "访问密钥","description": "OBS访问密钥。","type": "input"},"secretKey": {"name": "密钥","description": "OBS密钥。","type": "input"},"prefix": {"name": "匹配前缀","description": "按照匹配前缀去选中OBS中的文件进行归集。","type": "input"}}, "reader": {}, "writer": {}}', True, 'system', 'system');
|
||||
VALUES ('1', 'NAS归集模板', '将NAS存储上的文件归集到DataMate平台上。', 'nfsreader', 'nfsreader', 'nfswriter', 'nfswriter', '{"parameter": {"ip": {"name": "NAS地址","description": "NAS服务的地址,可以为IP或者域名。","type": "input", "required": true, "index": 1}, "path": {"name": "共享路径","description": "NAS服务的共享路径。","type": "input", "required": true, "index": 2}, "files": {"name": "文件列表","description": "指定文件列表进行归集。","type": "selectTag", "required": false, "index": 3}}, "reader": {}, "writer": {}}', True, 'system', 'system'),
|
||||
('2', 'OBS归集模板', '将OBS存储上的文件归集到DataMate平台上。', 'obsreader', 'obsreader', 'obswriter', 'obswriter', '{"parameter": {"endpoint": {"name": "服务地址","description": "OBS的服务地址。","type": "input", "required": true, "index": 1},"bucket": {"name": "存储桶名称","description": "OBS存储桶名称。","type": "input", "required": true, "index": 2},"accessKey": {"name": "AK","description": "OBS访问密钥。","type": "input", "required": true, "index": 3},"secretKey": {"name": "SK","description": "OBS密钥。","type": "password", "required": true, "index": 4},"prefix": {"name": "匹配前缀","description": "按照匹配前缀去选中OBS中的文件进行归集。","type": "input", "required": true, "index": 5}}, "reader": {}, "writer": {}}', True, 'system', 'system'),
|
||||
('3', 'MYSQL归集模板', '将MYSQL数据库中的数据以csv文件的形式归集到DataMate平台上。', 'mysqlreader', 'mysqlreader', 'txtfilewriter', 'txtfilewriter', '{"parameter": {}, "reader": {"username": {"name": "用户名","description": "数据库的用户名。","type": "input", "required": true, "index": 2}, "password": {"name": "密码","description": "数据库的密码。","type": "password", "required": true, "index": 3}, "connection": {"name": "数据库连接信息", "description": "数据库连接信息。", "type": "multipleList", "size": 1, "index": 1, "properties": {"jdbcUrl": {"type": "inputList", "name": "数据库连接", "description": "数据库连接url。", "required": true, "index": 1}, "querySql": {"type": "inputList", "name": "查询sql", "description": "输入符合语法的sql查询语句。", "required": true, "index": 2}}}}, "writer": {"header": {"name": "列名","description": "查询结果的列名,最终会体现为csv文件的表头。","type": "selectTag", "required": false}}}', True, 'system', 'system'),
|
||||
('4', 'StarRocks归集模板', '将StarRocks中的数据以csv文件的形式归集到DataMate平台上。', 'starrocksreader', 'starrocksreader', 'txtfilewriter', 'txtfilewriter', '{"parameter": {}, "reader": {"username": {"name": "用户名","description": "数据库的用户名。","type": "input", "required": true, "index": 2}, "password": {"name": "密码","description": "数据库的密码。","type": "password", "required": true, "index": 3}, "connection": {"name": "数据库连接信息", "description": "数据库连接信息。", "type": "multipleList", "size": 1, "index": 1, "properties": {"jdbcUrl": {"type": "inputList", "name": "数据库连接", "description": "数据库连接url。", "required": true, "index": 1}, "querySql": {"type": "inputList", "name": "查询sql", "description": "输入符合语法的sql查询语句。", "required": true, "index": 2}}}}, "writer": {"header": {"name": "列名","description": "查询结果的列名,最终会体现为csv文件的表头。","type": "selectTag", "required": false}}}', True, 'system', 'system');
|
||||
|
||||
@@ -17,19 +17,18 @@ FROM python:3.12-slim
|
||||
# Note: to use the cache mount syntax you must build with BuildKit enabled:
|
||||
# DOCKER_BUILDKIT=1 docker build . -f scripts/images/datamate-python/Dockerfile -t datamate-backend-python
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends openjdk-21-jre-headless \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends vim openjdk-21-jre nfs-common rsync && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
# Poetry configuration
|
||||
POETRY_VERSION=2.2.1 \
|
||||
POETRY_NO_INTERACTION=1 \
|
||||
POETRY_VIRTUALENVS_CREATE=false \
|
||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk
|
||||
|
||||
ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH"
|
||||
|
||||
@@ -42,6 +41,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
&& pipx install "poetry==$POETRY_VERSION"
|
||||
|
||||
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
|
||||
RUN cp /opt/datax/plugin/reader/mysqlreader/libs/mysql* /opt/datax/plugin/reader/starrocksreader/libs/
|
||||
|
||||
# Copy only dependency files first (leverages layer caching when dependencies don't change)
|
||||
COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/
|
||||
|
||||
@@ -1,16 +1,3 @@
|
||||
FROM maven:3-eclipse-temurin-8 AS datax-builder
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y git && \
|
||||
git clone https://github.com/alibaba/DataX.git
|
||||
|
||||
COPY runtime/datax/ DataX/
|
||||
|
||||
RUN cd DataX && \
|
||||
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
|
||||
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
|
||||
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
|
||||
|
||||
FROM maven:3-eclipse-temurin-21 AS builder
|
||||
|
||||
COPY backend/ /opt/backend
|
||||
@@ -22,12 +9,11 @@ RUN cd /opt/backend/services && \
|
||||
FROM eclipse-temurin:21-jdk
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y vim wget curl nfs-common rsync python3 python3-pip python-is-python3 dos2unix && \
|
||||
apt-get install -y vim wget curl rsync python3 python3-pip python-is-python3 dos2unix && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /opt/backend/services/main-application/target/datamate.jar /opt/backend/datamate.jar
|
||||
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
|
||||
|
||||
COPY scripts/images/backend/start.sh /opt/backend/start.sh
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
set -e
|
||||
|
||||
rpcbind
|
||||
umask 0022
|
||||
|
||||
echo "Starting main application..."
|
||||
exec "$@"
|
||||
exec "$@"
|
||||
|
||||
Reference in New Issue
Block a user