You've already forked DataMate
Develop op (#35)
* refactor: enhance CleaningTaskService and related components with validation and repository updates * feature: 支持算子上传创建
This commit is contained in:
@@ -3,7 +3,7 @@ USE datamate;
|
||||
CREATE TABLE IF NOT EXISTS t_clean_template
|
||||
(
|
||||
id varchar(64) primary key not null unique,
|
||||
name varchar(64),
|
||||
name varchar(64) unique,
|
||||
description varchar(256),
|
||||
created_at timestamp default current_timestamp,
|
||||
updated_at timestamp default current_timestamp,
|
||||
@@ -13,7 +13,7 @@ CREATE TABLE IF NOT EXISTS t_clean_template
|
||||
CREATE TABLE IF NOT EXISTS t_clean_task
|
||||
(
|
||||
id varchar(64) primary key,
|
||||
name varchar(64),
|
||||
name varchar(64) unique,
|
||||
description varchar(256),
|
||||
status varchar(256),
|
||||
src_dataset_id varchar(64),
|
||||
|
||||
@@ -3,7 +3,7 @@ USE datamate;
|
||||
CREATE TABLE IF NOT EXISTS t_operator
|
||||
(
|
||||
id varchar(64) primary key,
|
||||
name varchar(64),
|
||||
name varchar(64) unique,
|
||||
description varchar(256),
|
||||
version varchar(256),
|
||||
inputs varchar(256),
|
||||
@@ -18,15 +18,17 @@ CREATE TABLE IF NOT EXISTS t_operator
|
||||
|
||||
CREATE TABLE IF NOT EXISTS t_operator_category
|
||||
(
|
||||
id int primary key auto_increment,
|
||||
name varchar(64),
|
||||
id varchar(64) primary key,
|
||||
name varchar(64) unique ,
|
||||
value varchar(64) unique ,
|
||||
type varchar(64),
|
||||
parent_id int
|
||||
parent_id varchar(64),
|
||||
created_at timestamp default current_timestamp
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS t_operator_category_relation
|
||||
(
|
||||
category_id int,
|
||||
category_id varchar(64),
|
||||
operator_id varchar(64),
|
||||
primary key (category_id, operator_id)
|
||||
);
|
||||
@@ -41,7 +43,7 @@ SELECT o.id AS operator_id,
|
||||
runtime,
|
||||
settings,
|
||||
is_star,
|
||||
created_at,
|
||||
o.created_at AS created_at,
|
||||
updated_at,
|
||||
toc.id AS category_id,
|
||||
toc.name AS category_name
|
||||
@@ -49,21 +51,21 @@ FROM t_operator_category_relation tocr
|
||||
LEFT JOIN t_operator o ON tocr.operator_id = o.id
|
||||
LEFT JOIN t_operator_category toc ON tocr.category_id = toc.id;
|
||||
|
||||
INSERT IGNORE INTO t_operator_category(id, name, type, parent_id)
|
||||
VALUES (1, '模态', 'predefined', 0),
|
||||
(2, '语言', 'predefined', 0),
|
||||
(3, '文本', 'predefined', 1),
|
||||
(4, '图片', 'predefined', 1),
|
||||
(5, '音频', 'predefined', 1),
|
||||
(6, '视频', 'predefined', 1),
|
||||
(7, '多模态', 'predefined', 1),
|
||||
(8, 'Python', 'predefined', 2),
|
||||
(9, 'Java', 'predefined', 2),
|
||||
(10, '来源', 'predefined', 0),
|
||||
(11, '系统预置', 'predefined', 10),
|
||||
(12, '用户上传', 'predefined', 10),
|
||||
(13, '收藏状态', 'predefined', 0),
|
||||
(14, '已收藏', 'predefined', 13);
|
||||
INSERT IGNORE INTO t_operator_category(id, name, value, type, parent_id)
|
||||
VALUES ('64465bec-b46b-11f0-8291-00155d0e4808', '模态', 'modal', 'predefined', '0'),
|
||||
('873000a2-65b3-474b-8ccc-4813c08c76fb', '语言', 'language', 'predefined', '0'),
|
||||
('d8a5df7a-52a9-42c2-83c4-01062e60f597', '文本', 'text', 'predefined', '64465bec-b46b-11f0-8291-00155d0e4808'),
|
||||
('de36b61c-9e8a-4422-8c31-d30585c7100f', '图片', 'image', 'predefined', '64465bec-b46b-11f0-8291-00155d0e4808'),
|
||||
('42dd9392-73e4-458c-81ff-41751ada47b5', '音频', 'audio', 'predefined', '64465bec-b46b-11f0-8291-00155d0e4808'),
|
||||
('a233d584-73c8-4188-ad5d-8f7c8dda9c27', '视频', 'video', 'predefined', '64465bec-b46b-11f0-8291-00155d0e4808'),
|
||||
('4d7dbd77-0a92-44f3-9056-2cd62d4a71e4', '多模态', 'multimodal', 'predefined', '64465bec-b46b-11f0-8291-00155d0e4808'),
|
||||
('9eda9d5d-072b-499b-916c-797a0a8750e1', 'Python', 'python', 'predefined', '873000a2-65b3-474b-8ccc-4813c08c76fb'),
|
||||
('b5bfc548-8ef6-417c-b8a6-a4197c078249', 'Java', 'java', 'predefined', '873000a2-65b3-474b-8ccc-4813c08c76fb'),
|
||||
('16e2d99e-eafb-44fc-acd0-f35a2bad28f8', '来源', 'origin', 'predefined', '0'),
|
||||
('96a3b07a-3439-4557-a835-525faad60ca3', '系统预置', 'predefined', 'predefined', '16e2d99e-eafb-44fc-acd0-f35a2bad28f8'),
|
||||
('ec2cdd17-8b93-4a81-88c4-ac9e98d10757', '用户上传', 'customized', 'predefined', '16e2d99e-eafb-44fc-acd0-f35a2bad28f8'),
|
||||
('d8482257-7ee6-41a0-a914-8363c7db1db0', '收藏状态', 'starStatus', 'predefined', '0'),
|
||||
('79f2d35a-3b6c-4846-a892-2f2015f48f24', '已收藏', 'isStar', 'predefined', 'd8482257-7ee6-41a0-a914-8363c7db1db0');
|
||||
|
||||
INSERT IGNORE INTO t_operator
|
||||
(id, name, description, version, inputs, outputs, runtime, settings, file_name, is_star)
|
||||
@@ -116,7 +118,7 @@ INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
|
||||
SELECT c.id, o.id
|
||||
FROM t_operator_category c
|
||||
CROSS JOIN t_operator o
|
||||
WHERE c.id IN (3, 8, 11)
|
||||
WHERE c.id IN ('d8a5df7a-52a9-42c2-83c4-01062e60f597', '9eda9d5d-072b-499b-916c-797a0a8750e1', '96a3b07a-3439-4557-a835-525faad60ca3')
|
||||
AND o.id IN ('TextFormatter', 'FileWithShortOrLongLengthFilter', 'FileWithHighRepeatPhraseRateFilter',
|
||||
'FileWithHighRepeatWordRateFilter', 'FileWithHighSpecialCharRateFilter', 'FileWithManySensitiveWordsFilter',
|
||||
'DuplicateFilesFilter', 'DuplicateSentencesFilter', 'AnonymizedCreditCardNumber', 'AnonymizedIdNumber',
|
||||
@@ -129,7 +131,7 @@ INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
|
||||
SELECT c.id, o.id
|
||||
FROM t_operator_category c
|
||||
CROSS JOIN t_operator o
|
||||
WHERE c.id IN (4, 8, 11)
|
||||
WHERE c.id IN ('de36b61c-9e8a-4422-8c31-d30585c7100f', '9eda9d5d-072b-499b-916c-797a0a8750e1', '96a3b07a-3439-4557-a835-525faad60ca3')
|
||||
AND o.id IN ('ImgFormatter', 'ImgBlurredImagesCleaner', 'ImgBrightness', 'ImgContrast', 'ImgDenoise',
|
||||
'ImgDuplicatedImagesCleaner', 'ImgPerspectiveTransformation', 'ImgResize', 'ImgSaturation',
|
||||
'ImgShadowRemove', 'ImgSharpness', 'ImgSimilarImagesCleaner', 'ImgTypeUnify');
|
||||
@@ -138,5 +140,5 @@ INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
|
||||
SELECT c.id, o.id
|
||||
FROM t_operator_category c
|
||||
CROSS JOIN t_operator o
|
||||
WHERE c.id IN (7, 8, 11)
|
||||
AND o.id IN ('FileExporter', 'UnstructuredFormatter', 'ExternalPDFFormatter');
|
||||
WHERE c.id IN ('4d7dbd77-0a92-44f3-9056-2cd62d4a71e4', '9eda9d5d-072b-499b-916c-797a0a8750e1', '96a3b07a-3439-4557-a835-525faad60ca3')
|
||||
AND o.id IN ('FileExporter', 'UnstructuredFormatter');
|
||||
|
||||
@@ -14,10 +14,9 @@ RUN cd DataX && \
|
||||
FROM maven:3-amazoncorretto-21-debian AS builder
|
||||
|
||||
COPY backend/ /opt/backend
|
||||
COPY scripts/images/backend/settings.xml /opt/backend
|
||||
|
||||
RUN cd /opt/backend && \
|
||||
mvn -U clean package -s settings.xml -Dmaven.test.skip=true
|
||||
mvn -U clean package -Dmaven.test.skip=true
|
||||
|
||||
|
||||
FROM openjdk:21-jdk-slim
|
||||
@@ -25,7 +24,7 @@ FROM openjdk:21-jdk-slim
|
||||
RUN apt-get update && \
|
||||
apt-get install -y vim wget curl nfs-common rsync python3 python3-pip python-is-python3 dos2unix && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apy/lists/*
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /opt/backend/services/main-application/target/data-mate.jar /opt/backend/data-mate.jar
|
||||
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 https://maven.apache.org/xsd/settings-1.0.0.xsd">
|
||||
<!-- 本地仓库路径(可选,默认在 ~/.m2/repository) -->
|
||||
<localRepository>${user.home}/.m2/repository</localRepository>
|
||||
|
||||
<!-- 阿里云镜像配置 -->
|
||||
<mirrors>
|
||||
<mirror>
|
||||
<id>aliyun-maven</id>
|
||||
<name>Aliyun Maven Repository</name>
|
||||
<url>https://maven.aliyun.com/repository/public</url>
|
||||
<mirrorOf>central,jcenter,google,spring,spring-plugin,gradle-plugin</mirrorOf>
|
||||
</mirror>
|
||||
</mirrors>
|
||||
|
||||
<!-- 使用 Java 21 编译配置(可选,但推荐) -->
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>java21</id>
|
||||
<activation>
|
||||
<activeByDefault>true</activeByDefault>
|
||||
<jdk>21</jdk>
|
||||
</activation>
|
||||
<properties>
|
||||
<maven.compiler.source>21</maven.compiler.source>
|
||||
<maven.compiler.target>21</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
</profile>
|
||||
<!-- 激活阿里云仓库(可选,增强依赖解析) -->
|
||||
<profile>
|
||||
<id>aliyun-repos</id>
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>aliyun-public</id>
|
||||
<name>Aliyun Public Repository</name>
|
||||
<url>https://maven.aliyun.com/repository/public</url>
|
||||
<releases>
|
||||
<enabled>true</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>false</enabled> <!-- 默认关闭快照版本 -->
|
||||
</snapshots>
|
||||
</repository>
|
||||
</repositories>
|
||||
<pluginRepositories>
|
||||
<pluginRepository>
|
||||
<id>aliyun-plugin</id>
|
||||
<name>Aliyun Plugin Repository</name>
|
||||
<url>https://maven.aliyun.com/repository/public</url>
|
||||
<releases>
|
||||
<enabled>true</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
</pluginRepository>
|
||||
</pluginRepositories>
|
||||
</profile>
|
||||
</profiles>
|
||||
|
||||
<activeProfiles>
|
||||
<activeProfile>aliyun-repos</activeProfile> <!-- 激活阿里云仓库 -->
|
||||
<activeProfile>java21</activeProfile> <!-- 激活 Java 21 配置 -->
|
||||
</activeProfiles>
|
||||
</settings>
|
||||
@@ -2,6 +2,8 @@ FROM python:3.11
|
||||
|
||||
COPY runtime/python-executor /opt/runtime
|
||||
COPY runtime/ops /opt/runtime/datamate/ops
|
||||
COPY runtime/ops/user /opt/runtime/user
|
||||
COPY scripts/images/runtime/start.sh /opt/runtime/start.sh
|
||||
|
||||
ENV PYTHONPATH=/opt/runtime/datamate/
|
||||
|
||||
@@ -12,12 +14,13 @@ RUN apt update \
|
||||
|
||||
WORKDIR /opt/runtime
|
||||
|
||||
ENV HF_HUB_DISABLE_XET=1
|
||||
|
||||
RUN pip install -e . \
|
||||
&& pip install -r /opt/runtime/datamate/ops/requirements.txt \
|
||||
RUN pip install -e . --trusted-host mirrors.huaweicloud.com -i https://mirrors.huaweicloud.com/repository/pypi/simple \
|
||||
&& pip install -r /opt/runtime/datamate/ops/requirements.txt --trusted-host mirrors.huaweicloud.com -i https://mirrors.huaweicloud.com/repository/pypi/simple \
|
||||
&& pip cache purge
|
||||
|
||||
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
|
||||
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
|
||||
&& chmod +x /opt/runtime/start.sh
|
||||
|
||||
EXPOSE 8081
|
||||
|
||||
ENTRYPOINT ["/opt/runtime/start.sh"]
|
||||
|
||||
8
scripts/images/runtime/start.sh
Normal file
8
scripts/images/runtime/start.sh
Normal file
@@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
cp -r /opt/runtime/user/* /opt/runtime/datamate/ops/user
|
||||
|
||||
echo "Starting main application..."
|
||||
exec "$@"
|
||||
Reference in New Issue
Block a user