You've already forked DataMate
[Feature] Refactor project to use 'datamate' naming convention for services and configurations (#14)
* Enhance CleaningTaskService to track cleaning process progress and update ExecutorType to DATAMATE * Refactor project to use 'datamate' naming convention for services and configurations
This commit is contained in:
80
Makefile
80
Makefile
@@ -33,7 +33,7 @@ else
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
.PHONY: install
|
.PHONY: install
|
||||||
install: install-data-mate
|
install: install-datamate
|
||||||
|
|
||||||
.PHONY: uninstall-%
|
.PHONY: uninstall-%
|
||||||
uninstall-%:
|
uninstall-%:
|
||||||
@@ -54,32 +54,20 @@ else
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
.PHONY: uninstall
|
.PHONY: uninstall
|
||||||
uninstall: uninstall-data-mate
|
uninstall: uninstall-datamate
|
||||||
|
|
||||||
# build
|
# build
|
||||||
.PHONY: mineru-docker-build
|
|
||||||
mineru-docker-build:
|
|
||||||
docker build -t mineru:$(VERSION) . -f scripts/images/mineru/Dockerfile
|
|
||||||
|
|
||||||
.PHONY: datax-docker-build
|
|
||||||
datax-docker-build:
|
|
||||||
docker build -t datax:$(VERSION) . -f scripts/images/datax/Dockerfile
|
|
||||||
|
|
||||||
.PHONY: unstructured-docker-build
|
|
||||||
unstructured-docker-build:
|
|
||||||
docker build -t unstructured:$(VERSION) . -f scripts/images/unstructured/Dockerfile
|
|
||||||
|
|
||||||
.PHONY: backend-docker-build
|
.PHONY: backend-docker-build
|
||||||
backend-docker-build:
|
backend-docker-build:
|
||||||
docker build -t backend:$(VERSION) . -f scripts/images/backend/Dockerfile
|
docker build -t datamate-backend:$(VERSION) . -f scripts/images/backend/Dockerfile
|
||||||
|
|
||||||
.PHONY: frontend-docker-build
|
.PHONY: frontend-docker-build
|
||||||
frontend-docker-build:
|
frontend-docker-build:
|
||||||
docker build -t frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile
|
docker build -t datamate-frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile
|
||||||
|
|
||||||
.PHONY: runtime-docker-build
|
.PHONY: runtime-docker-build
|
||||||
runtime-docker-build:
|
runtime-docker-build:
|
||||||
docker build -t runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile
|
docker build -t datamate-runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile
|
||||||
|
|
||||||
.PHONY: label-studio-adapter-docker-build
|
.PHONY: label-studio-adapter-docker-build
|
||||||
label-studio-adapter-docker-build:
|
label-studio-adapter-docker-build:
|
||||||
@@ -87,55 +75,57 @@ label-studio-adapter-docker-build:
|
|||||||
|
|
||||||
.PHONY: backend-docker-install
|
.PHONY: backend-docker-install
|
||||||
backend-docker-install:
|
backend-docker-install:
|
||||||
cd deployment/docker/data-mate && docker-compose up -d backend
|
cd deployment/docker/datamate && docker-compose up -d backend
|
||||||
|
|
||||||
.PHONY: backend-docker-uninstall
|
.PHONY: backend-docker-uninstall
|
||||||
backend-docker-uninstall:
|
backend-docker-uninstall:
|
||||||
cd deployment/docker/data-mate && docker-compose down backend
|
cd deployment/docker/datamate && docker-compose down backend
|
||||||
|
|
||||||
.PHONY: frontend-docker-install
|
.PHONY: frontend-docker-install
|
||||||
frontend-docker-install:
|
frontend-docker-install:
|
||||||
cd deployment/docker/data-mate && docker-compose up -d frontend
|
cd deployment/docker/datamate && docker-compose up -d frontend
|
||||||
|
|
||||||
.PHONY: frontend-docker-uninstall
|
.PHONY: frontend-docker-uninstall
|
||||||
frontend-docker-uninstall:
|
frontend-docker-uninstall:
|
||||||
cd deployment/docker/data-mate && docker-compose down frontend
|
cd deployment/docker/datamate && docker-compose down frontend
|
||||||
|
|
||||||
.PHONY: runtime-docker-install
|
.PHONY: runtime-docker-install
|
||||||
runtime-docker-install:
|
runtime-docker-install:
|
||||||
cd deployment/docker/data-mate && docker-compose up -d runtime
|
cd deployment/docker/datamate && docker-compose up -d runtime
|
||||||
|
|
||||||
.PHONY: runtime-docker-uninstall
|
.PHONY: runtime-docker-uninstall
|
||||||
runtime-docker-uninstall:
|
runtime-docker-uninstall:
|
||||||
cd deployment/docker/data-mate && docker-compose down runtime
|
cd deployment/docker/datamate && docker-compose down runtime
|
||||||
|
|
||||||
.PHONY: runtime-k8s-install
|
.PHONY: runtime-k8s-install
|
||||||
runtime-k8s-install: create-namespace
|
runtime-k8s-install: create-namespace
|
||||||
helm upgrade kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE)
|
helm upgrade datamate-kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE)
|
||||||
helm upgrade raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE)
|
helm upgrade datamate-raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE)
|
||||||
kubectl apply -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
|
kubectl apply -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
|
||||||
|
|
||||||
.PHONY: runtime-k8s-uninstall
|
.PHONY: runtime-k8s-uninstall
|
||||||
runtime-k8s-uninstall:
|
runtime-k8s-uninstall:
|
||||||
helm uninstall raycluster -n $(NAMESPACE)
|
helm uninstall datamate-raycluster -n $(NAMESPACE)
|
||||||
helm uninstall kuberay-operator -n $(NAMESPACE)
|
helm uninstall datamate-kuberay-operator -n $(NAMESPACE)
|
||||||
kubectl delete -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
|
kubectl delete -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
|
||||||
|
|
||||||
.PHONY: unstructured-k8s-install
|
|
||||||
unstructured-k8s-install: create-namespace
|
|
||||||
kubectl apply -f deployment/kubernetes/unstructured/deploy.yaml -n $(NAMESPACE)
|
|
||||||
|
|
||||||
.PHONY: mysql-k8s-install
|
.PHONY: mysql-k8s-install
|
||||||
mysql-k8s-install: create-namespace
|
mysql-k8s-install: create-namespace
|
||||||
kubectl create configmap init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE)
|
kubectl create configmap datamate-init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE)
|
||||||
kubectl apply -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
|
kubectl apply -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
|
||||||
kubectl apply -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
|
kubectl apply -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
|
||||||
|
|
||||||
.PHONY: mysql-k8s-uninstall
|
.PHONY: mysql-k8s-uninstall
|
||||||
mysql-k8s-uninstall:
|
mysql-k8s-uninstall:
|
||||||
kubectl delete configmap init-sql -n $(NAMESPACE)
|
kubectl delete configmap datamate-init-sql -n $(NAMESPACE) --ignore-not-found
|
||||||
kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
|
kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE) --ignore-not-found
|
||||||
kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
|
kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE) --ignore-not-found
|
||||||
|
|
||||||
|
.PHONY: database-k8s-install
|
||||||
|
database-k8s-install: mysql-k8s-install
|
||||||
|
|
||||||
|
.PHONY: database-k8s-uninstall
|
||||||
|
database-k8s-uninstall: mysql-k8s-uninstall
|
||||||
|
|
||||||
.PHONY: backend-k8s-install
|
.PHONY: backend-k8s-install
|
||||||
backend-k8s-install: create-namespace
|
backend-k8s-install: create-namespace
|
||||||
@@ -143,7 +133,7 @@ backend-k8s-install: create-namespace
|
|||||||
|
|
||||||
.PHONY: backend-k8s-uninstall
|
.PHONY: backend-k8s-uninstall
|
||||||
backend-k8s-uninstall:
|
backend-k8s-uninstall:
|
||||||
kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE)
|
kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE) --ignore-not-found
|
||||||
|
|
||||||
.PHONY: frontend-k8s-install
|
.PHONY: frontend-k8s-install
|
||||||
frontend-k8s-install: create-namespace
|
frontend-k8s-install: create-namespace
|
||||||
@@ -151,18 +141,18 @@ frontend-k8s-install: create-namespace
|
|||||||
|
|
||||||
.PHONY: frontend-k8s-uninstall
|
.PHONY: frontend-k8s-uninstall
|
||||||
frontend-k8s-uninstall:
|
frontend-k8s-uninstall:
|
||||||
kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE)
|
kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE) --ignore-not-found
|
||||||
|
|
||||||
.PHONY: data-mate-docker-install
|
.PHONY: datamate-docker-install
|
||||||
data-mate-docker-install:
|
datamate-docker-install:
|
||||||
cd deployment/docker/datamate && docker-compose up -d
|
cd deployment/docker/datamate && docker-compose up -d
|
||||||
|
|
||||||
.PHONY: data-mate-docker-uninstall
|
.PHONY: datamate-docker-uninstall
|
||||||
data-mate-docker-uninstall:
|
datamate-docker-uninstall:
|
||||||
cd deployment/docker/datamate && docker-compose down
|
cd deployment/docker/datamate && docker-compose down
|
||||||
|
|
||||||
.PHONY: data-mate-k8s-install
|
.PHONY: datamate-k8s-install
|
||||||
data-mate-k8s-install: create-namespace mysql-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install
|
datamate-k8s-install: create-namespace database-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install
|
||||||
|
|
||||||
.PHONY: data-mate-k8s-uninstall
|
.PHONY: datamate-k8s-uninstall
|
||||||
data-mate-k8s-uninstall: mysql-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall
|
datamate-k8s-uninstall: database-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import java.time.Duration;
|
|||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class RuntimeClient {
|
public class RuntimeClient {
|
||||||
private static final String BASE_URL = "http://runtime:8081/api";
|
private static final String BASE_URL = "http://datamate-runtime:8081/api";
|
||||||
|
|
||||||
private static final String CREATE_TASK_URL = BASE_URL + "/task/{0}/submit";
|
private static final String CREATE_TASK_URL = BASE_URL + "/task/{0}/submit";
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import com.datamate.cleaning.domain.model.TaskProcess;
|
|||||||
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper;
|
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper;
|
||||||
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
|
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
|
||||||
import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
|
import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
|
||||||
|
import com.datamate.cleaning.interfaces.dto.CleaningProcess;
|
||||||
import com.datamate.cleaning.interfaces.dto.CleaningTask;
|
import com.datamate.cleaning.interfaces.dto.CleaningTask;
|
||||||
import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
|
import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
|
||||||
import com.datamate.cleaning.interfaces.dto.OperatorInstance;
|
import com.datamate.cleaning.interfaces.dto.OperatorInstance;
|
||||||
@@ -55,7 +56,14 @@ public class CleaningTaskService {
|
|||||||
|
|
||||||
public List<CleaningTask> getTasks(String status, String keywords, Integer page, Integer size) {
|
public List<CleaningTask> getTasks(String status, String keywords, Integer page, Integer size) {
|
||||||
Integer offset = page * size;
|
Integer offset = page * size;
|
||||||
return cleaningTaskMapper.findTasks(status, keywords, size, offset);
|
List<CleaningTask> tasks = cleaningTaskMapper.findTasks(status, keywords, size, offset);
|
||||||
|
tasks.forEach(this::setProcess);
|
||||||
|
return tasks;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setProcess(CleaningTask task) {
|
||||||
|
int count = cleaningResultMapper.countByInstanceId(task.getId());
|
||||||
|
task.setProgress(CleaningProcess.of(task.getFileCount(), count));
|
||||||
}
|
}
|
||||||
|
|
||||||
public int countTasks(String status, String keywords) {
|
public int countTasks(String status, String keywords) {
|
||||||
@@ -80,6 +88,7 @@ public class CleaningTaskService {
|
|||||||
task.setDestDatasetId(destDataset.getId());
|
task.setDestDatasetId(destDataset.getId());
|
||||||
task.setDestDatasetName(destDataset.getName());
|
task.setDestDatasetName(destDataset.getName());
|
||||||
task.setBeforeSize(srcDataset.getTotalSize());
|
task.setBeforeSize(srcDataset.getTotalSize());
|
||||||
|
task.setFileCount(srcDataset.getFileCount());
|
||||||
cleaningTaskMapper.insertTask(task);
|
cleaningTaskMapper.insertTask(task);
|
||||||
|
|
||||||
List<OperatorInstancePo> instancePos = request.getInstance().stream()
|
List<OperatorInstancePo> instancePos = request.getInstance().stream()
|
||||||
@@ -93,7 +102,9 @@ public class CleaningTaskService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public CleaningTask getTask(String taskId) {
|
public CleaningTask getTask(String taskId) {
|
||||||
return cleaningTaskMapper.findTaskById(taskId);
|
CleaningTask task = cleaningTaskMapper.findTaskById(taskId);
|
||||||
|
setProcess(task);
|
||||||
|
return task;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Transactional
|
@Transactional
|
||||||
@@ -113,7 +124,7 @@ public class CleaningTaskService {
|
|||||||
process.setDatasetId(task.getDestDatasetId());
|
process.setDatasetId(task.getDestDatasetId());
|
||||||
process.setDatasetPath(FLOW_PATH + "/" + task.getId() + "/dataset.jsonl");
|
process.setDatasetPath(FLOW_PATH + "/" + task.getId() + "/dataset.jsonl");
|
||||||
process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId());
|
process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId());
|
||||||
process.setExecutorType(ExecutorType.DATA_PLATFORM.getValue());
|
process.setExecutorType(ExecutorType.DATAMATE.getValue());
|
||||||
process.setProcess(instances.stream()
|
process.setProcess(instances.stream()
|
||||||
.map(instance -> Map.of(instance.getId(), instance.getOverrides()))
|
.map(instance -> Map.of(instance.getId(), instance.getOverrides()))
|
||||||
.toList());
|
.toList());
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import lombok.Getter;
|
|||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
public enum ExecutorType {
|
public enum ExecutorType {
|
||||||
DATA_PLATFORM("data_platform"),
|
DATAMATE("datamate"),
|
||||||
DATA_JUICER_RAY("ray"),
|
DATA_JUICER_RAY("ray"),
|
||||||
DATA_JUICER_DEFAULT("default");
|
DATA_JUICER_DEFAULT("default");
|
||||||
|
|
||||||
|
|||||||
@@ -6,4 +6,6 @@ import org.apache.ibatis.annotations.Param;
|
|||||||
@Mapper
|
@Mapper
|
||||||
public interface CleaningResultMapper {
|
public interface CleaningResultMapper {
|
||||||
void deleteByInstanceId(@Param("instanceId") String instanceId);
|
void deleteByInstanceId(@Param("instanceId") String instanceId);
|
||||||
|
|
||||||
|
int countByInstanceId(@Param("instanceId") String instanceId);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,9 @@ package com.datamate.cleaning.interfaces.dto;
|
|||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.math.RoundingMode;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CleaningProcess
|
* CleaningProcess
|
||||||
*/
|
*/
|
||||||
@@ -16,5 +19,20 @@ public class CleaningProcess {
|
|||||||
private Integer totalFileNum;
|
private Integer totalFileNum;
|
||||||
|
|
||||||
private Integer finishedFileNum;
|
private Integer finishedFileNum;
|
||||||
|
|
||||||
|
public CleaningProcess(int totalFileNum, int finishedFileNum) {
|
||||||
|
this.totalFileNum = totalFileNum;
|
||||||
|
this.finishedFileNum = finishedFileNum;
|
||||||
|
if (totalFileNum == 0) {
|
||||||
|
this.process = 0.0f;
|
||||||
|
} else {
|
||||||
|
this.process = BigDecimal.valueOf(finishedFileNum * 100L)
|
||||||
|
.divide(BigDecimal.valueOf(totalFileNum), 2, RoundingMode.HALF_UP).floatValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CleaningProcess of(int totalFileNum, int finishedFileNum) {
|
||||||
|
return new CleaningProcess(totalFileNum, finishedFileNum);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,8 @@ public class CleaningTask {
|
|||||||
|
|
||||||
private long afterSize;
|
private long afterSize;
|
||||||
|
|
||||||
|
private int fileCount;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 任务当前状态
|
* 任务当前状态
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -5,4 +5,8 @@
|
|||||||
DELETE FROM t_clean_result WHERE instance_id = #{instanceId}
|
DELETE FROM t_clean_result WHERE instance_id = #{instanceId}
|
||||||
</delete>
|
</delete>
|
||||||
|
|
||||||
|
<select id="countByInstanceId" resultType="java.lang.Integer">
|
||||||
|
SELECT COUNT(1) FROM t_clean_result WHERE instance_id = #{instanceId}
|
||||||
|
</select>
|
||||||
|
|
||||||
</mapper>
|
</mapper>
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper">
|
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper">
|
||||||
<sql id="Base_Column_List">
|
<sql id="Base_Column_List">
|
||||||
id, name, description, src_dataset_id, src_dataset_name, dest_dataset_id, dest_dataset_name, before_size,
|
id, name, description, src_dataset_id, src_dataset_name, dest_dataset_id, dest_dataset_name, before_size,
|
||||||
after_size, status, created_at, started_at, finished_at
|
after_size, file_count, status, created_at, started_at, finished_at
|
||||||
</sql>
|
</sql>
|
||||||
|
|
||||||
<select id="findTasks" resultType="com.datamate.cleaning.interfaces.dto.CleaningTask">
|
<select id="findTasks" resultType="com.datamate.cleaning.interfaces.dto.CleaningTask">
|
||||||
@@ -28,9 +28,9 @@
|
|||||||
|
|
||||||
<insert id="insertTask">
|
<insert id="insertTask">
|
||||||
INSERT INTO t_clean_task (id, name, description, status, src_dataset_id, src_dataset_name, dest_dataset_id,
|
INSERT INTO t_clean_task (id, name, description, status, src_dataset_id, src_dataset_name, dest_dataset_id,
|
||||||
dest_dataset_name, before_size, after_size, created_at)
|
dest_dataset_name, before_size, after_size, file_count, created_at)
|
||||||
VALUES (#{id}, #{name}, #{description}, #{status}, #{srcDatasetId}, #{srcDatasetName}, #{destDatasetId},
|
VALUES (#{id}, #{name}, #{description}, #{status}, #{srcDatasetId}, #{srcDatasetName}, #{destDatasetId},
|
||||||
#{destDatasetName}, ${beforeSize}, ${afterSize}, NOW())
|
#{destDatasetName}, #{beforeSize}, #{afterSize}, #{fileCount}, NOW())
|
||||||
</insert>
|
</insert>
|
||||||
|
|
||||||
<update id="updateTask">
|
<update id="updateTask">
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ spring:
|
|||||||
# 数据源配置
|
# 数据源配置
|
||||||
datasource:
|
datasource:
|
||||||
driver-class-name: com.mysql.cj.jdbc.Driver
|
driver-class-name: com.mysql.cj.jdbc.Driver
|
||||||
url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
|
url: jdbc:mysql://datamate-database:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
|
||||||
username: ${DB_USERNAME:root}
|
username: ${DB_USERNAME:root}
|
||||||
password: ${DB_PASSWORD:Huawei@123}
|
password: ${DB_PASSWORD:Huawei@123}
|
||||||
hikari:
|
hikari:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<Configuration status="WARN" monitorInterval="30">
|
<Configuration status="WARN" monitorInterval="30">
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name="LOG_PATH">/var/log/data-mate/backend</Property>
|
<Property name="LOG_PATH">/var/log/datamate/backend</Property>
|
||||||
<Property name="LOG_PATTERN">%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</Property>
|
<Property name="LOG_PATTERN">%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</Property>
|
||||||
<Property name="MAX_FILE_SIZE">100MB</Property>
|
<Property name="MAX_FILE_SIZE">100MB</Property>
|
||||||
<Property name="MAX_HISTORY">30</Property>
|
<Property name="MAX_HISTORY">30</Property>
|
||||||
|
|||||||
@@ -1,63 +1,70 @@
|
|||||||
services:
|
services:
|
||||||
# 1) backend
|
# 1) backend
|
||||||
backend:
|
datamate-backend:
|
||||||
container_name: backend
|
container_name: datamate-backend
|
||||||
image: backend
|
image: datamate-backend
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
privileged: true
|
privileged: true
|
||||||
ports:
|
ports:
|
||||||
- "8080:8080"
|
- "8080"
|
||||||
volumes:
|
volumes:
|
||||||
- dataset_volume:/dataset
|
- dataset_volume:/dataset
|
||||||
- flow_volume:/flow
|
- flow_volume:/flow
|
||||||
- log_volume:/var/log/data-mate
|
- log_volume:/var/log/datamate
|
||||||
networks: [ edatamate ]
|
networks: [ datamate ]
|
||||||
depends_on:
|
depends_on:
|
||||||
- mysql
|
- datamate-database
|
||||||
|
|
||||||
# 2) frontend(NodePort 30000)
|
# 2) frontend(NodePort 30000)
|
||||||
frontend:
|
datamate-frontend:
|
||||||
container_name: frontend
|
container_name: datamate-frontend
|
||||||
image: frontend
|
image: datamate-frontend
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
ports:
|
ports:
|
||||||
- "30000:80" # nodePort → hostPort
|
- "30000:80" # nodePort → hostPort
|
||||||
volumes:
|
volumes:
|
||||||
- log_volume:/var/log/data-mate
|
- frontend_log_volume:/var/log/datamate/frontend
|
||||||
networks: [ edatamate ]
|
networks: [ datamate ]
|
||||||
depends_on:
|
depends_on:
|
||||||
- backend
|
- datamate-backend
|
||||||
|
|
||||||
# 3) mysql
|
# 3) database
|
||||||
mysql:
|
datamate-database:
|
||||||
container_name: mysql
|
container_name: datamate-database
|
||||||
image: mysql:8
|
image: mysql:8
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
environment:
|
environment:
|
||||||
MYSQL_ROOT_PASSWORD: Huawei@123
|
MYSQL_ROOT_PASSWORD: Huawei@123
|
||||||
ports:
|
ports:
|
||||||
- "3306:3306"
|
- "3306"
|
||||||
|
command: |
|
||||||
|
sh -c "
|
||||||
|
chown mysql:mysql /var/log/datamate/database &&
|
||||||
|
chmod 755 /var/log/datamate/database &&
|
||||||
|
exec docker-entrypoint.sh mysqld
|
||||||
|
"
|
||||||
volumes:
|
volumes:
|
||||||
- mysql_volume:/var/lib/mysql
|
- mysql_volume:/var/lib/mysql
|
||||||
- ../../../scripts/db:/docker-entrypoint-initdb.d
|
- ../../../scripts/db:/docker-entrypoint-initdb.d
|
||||||
- ./utf8.cnf:/etc/mysql/conf.d/utf8.cnf
|
- ./utf8.cnf:/etc/mysql/conf.d/utf8.cnf:ro
|
||||||
- log_volume:/var/log/data-mate
|
- database_log_volume:/var/log/datamate/database
|
||||||
networks: [ edatamate ]
|
networks: [ datamate ]
|
||||||
|
|
||||||
runtime:
|
# 3) runtime
|
||||||
container_name: runtime
|
datamate-runtime:
|
||||||
image: runtime
|
container_name: datamate-runtime
|
||||||
|
image: datamate-runtime
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
environment:
|
environment:
|
||||||
RAY_DEDUP_LOGS: "0"
|
RAY_DEDUP_LOGS: "0"
|
||||||
RAY_TQDM_PATCH_PRINT: "0"
|
RAY_TQDM_PATCH_PRINT: "0"
|
||||||
MYSQL_HOST: "mysql"
|
MYSQL_HOST: "datamate-database"
|
||||||
MYSQL_PORT: "3306"
|
MYSQL_PORT: "3306"
|
||||||
MYSQL_USER: "root"
|
MYSQL_USER: "root"
|
||||||
MYSQL_PASSWORD: "Huawei@123"
|
MYSQL_PASSWORD: "Huawei@123"
|
||||||
MYSQL_DATABASE: "datamate"
|
MYSQL_DATABASE: "datamate"
|
||||||
ports:
|
ports:
|
||||||
- "8081:8081"
|
- "8081"
|
||||||
command:
|
command:
|
||||||
- python
|
- python
|
||||||
- /opt/runtime/datamate/operator_runtime.py
|
- /opt/runtime/datamate/operator_runtime.py
|
||||||
@@ -65,22 +72,27 @@ services:
|
|||||||
- "8081"
|
- "8081"
|
||||||
volumes:
|
volumes:
|
||||||
- ray_log_volume:/tmp/ray
|
- ray_log_volume:/tmp/ray
|
||||||
- log_volume:/var/log/data-mate
|
- log_volume:/var/log/datamate
|
||||||
- dataset_volume:/dataset
|
- dataset_volume:/dataset
|
||||||
- flow_volume:/flow
|
- flow_volume:/flow
|
||||||
|
networks: [ datamate ]
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
dataset_volume:
|
dataset_volume:
|
||||||
name: data-mate-dataset-volume
|
name: datamate-dataset-volume
|
||||||
flow_volume:
|
flow_volume:
|
||||||
name: data-mate-flow-volume
|
name: datamate-flow-volume
|
||||||
log_volume:
|
log_volume:
|
||||||
name: data-mate-log-volume
|
name: datamate-log-volume
|
||||||
mysql_volume:
|
mysql_volume:
|
||||||
name: data-mate-mysql-volume
|
name: datamate-mysql-volume
|
||||||
ray_log_volume:
|
ray_log_volume:
|
||||||
name: data-mate-ray-log-volume
|
name: datamate-ray-log-volume
|
||||||
|
frontend_log_volume:
|
||||||
|
name: datamate-frontend-log-volume
|
||||||
|
database_log_volume:
|
||||||
|
name: datamate-database-log-volume
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
edatamate:
|
datamate:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ character-set-server = utf8mb4
|
|||||||
collation-server = utf8mb4_unicode_ci
|
collation-server = utf8mb4_unicode_ci
|
||||||
# 或者使用 utf8_general_ci (性能稍好,但排序规则稍宽松)
|
# 或者使用 utf8_general_ci (性能稍好,但排序规则稍宽松)
|
||||||
default-time-zone = 'Asia/Shanghai'
|
default-time-zone = 'Asia/Shanghai'
|
||||||
|
log_error=/var/log/datamate/database/error.log
|
||||||
|
|
||||||
[client]
|
[client]
|
||||||
# 设置客户端连接默认字符集
|
# 设置客户端连接默认字符集
|
||||||
@@ -12,4 +13,4 @@ default-character-set = utf8mb4
|
|||||||
|
|
||||||
[mysql]
|
[mysql]
|
||||||
# 设置 mysql 命令行客户端默认字符集
|
# 设置 mysql 命令行客户端默认字符集
|
||||||
default-character-set = utf8mb4
|
default-character-set = utf8mb4
|
||||||
|
|||||||
@@ -3,13 +3,13 @@
|
|||||||
# Declare variables to be passed into your templates.
|
# Declare variables to be passed into your templates.
|
||||||
|
|
||||||
# -- String to partially override release name.
|
# -- String to partially override release name.
|
||||||
nameOverride: kuberay-operator
|
nameOverride: datamate-kuberay-operator
|
||||||
|
|
||||||
# -- String to fully override release name.
|
# -- String to fully override release name.
|
||||||
fullnameOverride: kuberay-operator
|
fullnameOverride: datamate-kuberay-operator
|
||||||
|
|
||||||
# -- String to override component name.
|
# -- String to override component name.
|
||||||
componentOverride: kuberay-operator
|
componentOverride: datamate-kuberay-operator
|
||||||
|
|
||||||
image:
|
image:
|
||||||
# -- Image repository.
|
# -- Image repository.
|
||||||
@@ -32,7 +32,7 @@ serviceAccount:
|
|||||||
create: true
|
create: true
|
||||||
# -- The name of the service account to use.
|
# -- The name of the service account to use.
|
||||||
# If not set and create is true, a name is generated using the fullname template.
|
# If not set and create is true, a name is generated using the fullname template.
|
||||||
name: kuberay-operator
|
name: datamate-kuberay-operator
|
||||||
|
|
||||||
logging:
|
logging:
|
||||||
# -- Log encoder to use for stdout (one of `json` or `console`).
|
# -- Log encoder to use for stdout (one of `json` or `console`).
|
||||||
@@ -88,7 +88,7 @@ featureGates:
|
|||||||
# Configurations for KubeRay operator metrics.
|
# Configurations for KubeRay operator metrics.
|
||||||
metrics:
|
metrics:
|
||||||
# -- Whether KubeRay operator should emit control plane metrics.
|
# -- Whether KubeRay operator should emit control plane metrics.
|
||||||
enabled: true
|
enabled: false
|
||||||
serviceMonitor:
|
serviceMonitor:
|
||||||
# -- Enable a prometheus ServiceMonitor
|
# -- Enable a prometheus ServiceMonitor
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|||||||
@@ -6,12 +6,12 @@
|
|||||||
# in this Helm chart.
|
# in this Helm chart.
|
||||||
|
|
||||||
image:
|
image:
|
||||||
repository: runtime
|
repository: datamate-runtime
|
||||||
tag: latest
|
tag: latest
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
nameOverride: "kuberay"
|
nameOverride: "kuberay"
|
||||||
fullnameOverride: ""
|
fullnameOverride: "datamate-raycluster"
|
||||||
|
|
||||||
imagePullSecrets: []
|
imagePullSecrets: []
|
||||||
# - name: an-existing-secret
|
# - name: an-existing-secret
|
||||||
@@ -68,7 +68,7 @@ head:
|
|||||||
- name: RAY_TQDM_PATCH_PRINT
|
- name: RAY_TQDM_PATCH_PRINT
|
||||||
value: "0"
|
value: "0"
|
||||||
- name: MYSQL_HOST
|
- name: MYSQL_HOST
|
||||||
value: "mysql"
|
value: "datamate-database"
|
||||||
- name: MYSQL_PORT
|
- name: MYSQL_PORT
|
||||||
value: "3306"
|
value: "3306"
|
||||||
- name: MYSQL_USER
|
- name: MYSQL_USER
|
||||||
@@ -114,15 +114,15 @@ head:
|
|||||||
volumes:
|
volumes:
|
||||||
- name: log-volume
|
- name: log-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/log
|
path: /opt/datamate/data/log
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
- name: dataset-volume
|
- name: dataset-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/dataset
|
path: /opt/datamate/data/dataset
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
- name: flow-volume
|
- name: flow-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/flow
|
path: /opt/datamate/data/flow
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /tmp/ray
|
- mountPath: /tmp/ray
|
||||||
@@ -136,7 +136,7 @@ head:
|
|||||||
# Follows standard K8s container spec.
|
# Follows standard K8s container spec.
|
||||||
sidecarContainers:
|
sidecarContainers:
|
||||||
- name: runtime
|
- name: runtime
|
||||||
image: runtime
|
image: datamate-runtime
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
command:
|
command:
|
||||||
- python
|
- python
|
||||||
@@ -145,7 +145,7 @@ head:
|
|||||||
- "8081"
|
- "8081"
|
||||||
env:
|
env:
|
||||||
- name: MYSQL_HOST
|
- name: MYSQL_HOST
|
||||||
value: "mysql"
|
value: "datamate-database"
|
||||||
- name: MYSQL_PORT
|
- name: MYSQL_PORT
|
||||||
value: "3306"
|
value: "3306"
|
||||||
- name: MYSQL_USER
|
- name: MYSQL_USER
|
||||||
@@ -160,7 +160,7 @@ head:
|
|||||||
- mountPath: /tmp/ray
|
- mountPath: /tmp/ray
|
||||||
name: log-volume
|
name: log-volume
|
||||||
subPath: ray/head
|
subPath: ray/head
|
||||||
- mountPath: /var/log/data-mate
|
- mountPath: /var/log/datamate
|
||||||
name: log-volume
|
name: log-volume
|
||||||
- mountPath: /dataset
|
- mountPath: /dataset
|
||||||
name: dataset-volume
|
name: dataset-volume
|
||||||
@@ -212,7 +212,7 @@ worker:
|
|||||||
- name: RAY_TQDM_PATCH_PRINT
|
- name: RAY_TQDM_PATCH_PRINT
|
||||||
value: "0"
|
value: "0"
|
||||||
- name: MYSQL_HOST
|
- name: MYSQL_HOST
|
||||||
value: "mysql"
|
value: "datamate-database"
|
||||||
- name: MYSQL_PORT
|
- name: MYSQL_PORT
|
||||||
value: "3306"
|
value: "3306"
|
||||||
- name: MYSQL_USER
|
- name: MYSQL_USER
|
||||||
@@ -257,15 +257,15 @@ worker:
|
|||||||
volumes:
|
volumes:
|
||||||
- name: log-volume
|
- name: log-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/log
|
path: /opt/datamate/data/log
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
- name: dataset-volume
|
- name: dataset-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/dataset
|
path: /opt/datamate/data/dataset
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
- name: flow-volume
|
- name: flow-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/flow
|
path: /opt/datamate/data/flow
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /tmp/ray
|
- mountPath: /tmp/ray
|
||||||
@@ -350,15 +350,15 @@ additionalWorkerGroups:
|
|||||||
volumes:
|
volumes:
|
||||||
- name: log-volume
|
- name: log-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/log
|
path: /opt/datamate/data/log
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
- name: dataset-volume
|
- name: dataset-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/dataset
|
path: /opt/datamate/data/dataset
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
- name: flow-volume
|
- name: flow-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/flow
|
path: /opt/datamate/data/flow
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /tmp/ray
|
- mountPath: /tmp/ray
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: runtime
|
name: datamate-runtime
|
||||||
labels:
|
labels:
|
||||||
ray.io/node-type: head
|
ray.io/node-type: head
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -2,8 +2,9 @@ apiVersion: rbac.authorization.k8s.io/v1
|
|||||||
kind: Role
|
kind: Role
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: backend
|
app: datamate
|
||||||
name: backend
|
tier: backend
|
||||||
|
name: datamate-backend
|
||||||
rules:
|
rules:
|
||||||
- verbs:
|
- verbs:
|
||||||
- create
|
- create
|
||||||
@@ -33,83 +34,89 @@ apiVersion: v1
|
|||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: backend
|
app: datamate
|
||||||
name: backend
|
tier: backend
|
||||||
|
name: datamate-backend
|
||||||
|
|
||||||
---
|
---
|
||||||
kind: RoleBinding
|
kind: RoleBinding
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: backend
|
app: datamate
|
||||||
name: backend
|
tier: backend
|
||||||
|
name: datamate-backend
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
kind: Role
|
kind: Role
|
||||||
name: backend
|
name: datamate-backend
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: backend
|
name: datamate-backend
|
||||||
|
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: backend
|
name: datamate-backend
|
||||||
labels:
|
labels:
|
||||||
app: backend
|
app: datamate
|
||||||
|
tier: backend
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: backend
|
app: datamate
|
||||||
|
tier: backend
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: backend
|
app: datamate
|
||||||
|
tier: backend
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: backend
|
serviceAccountName: datamate-backend
|
||||||
containers:
|
containers:
|
||||||
- name: backend
|
- name: backend
|
||||||
image: backend
|
image: datamate-backend
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
env:
|
env:
|
||||||
- name: namespace
|
- name: namespace
|
||||||
valueFrom:
|
valueFrom:
|
||||||
fieldRef:
|
fieldRef:
|
||||||
fieldPath: metadata.namespace
|
fieldPath: metadata.namespace
|
||||||
- name: SPRING_CONFIG_LOCATION
|
- name: SPRING_CONFIG_LOCATION
|
||||||
value: file:/opt/backend/application.yml
|
value: file:/opt/backend/application.yml
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: dataset-volume
|
- name: dataset-volume
|
||||||
mountPath: /dataset
|
mountPath: /dataset
|
||||||
- name: flow-volume
|
- name: flow-volume
|
||||||
mountPath: /flow
|
mountPath: /flow
|
||||||
- name: log-volume
|
- name: log-volume
|
||||||
mountPath: /var/log/data-mate
|
mountPath: /var/log/datamate
|
||||||
volumes:
|
volumes:
|
||||||
- name: dataset-volume
|
- name: dataset-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/dataset
|
path: /opt/datamate/data/dataset
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
- name: flow-volume
|
- name: flow-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/flow
|
path: /opt/datamate/data/flow
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
- name: log-volume
|
- name: log-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/log
|
path: /opt/datamate/data/log
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
|
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: backend
|
name: datamate-backend
|
||||||
labels:
|
labels:
|
||||||
app: backend
|
app: datamate
|
||||||
|
tier: backend
|
||||||
spec:
|
spec:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
ports:
|
ports:
|
||||||
@@ -117,4 +124,5 @@ spec:
|
|||||||
targetPort: 8080
|
targetPort: 8080
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
selector:
|
selector:
|
||||||
app: backend
|
app: datamate
|
||||||
|
tier: backend
|
||||||
|
|||||||
@@ -1,54 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: datax
|
|
||||||
labels:
|
|
||||||
app: datax
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: datax
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: datax
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: datax
|
|
||||||
image: datax
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
securityContext:
|
|
||||||
capabilities:
|
|
||||||
add:
|
|
||||||
- SYS_ADMIN
|
|
||||||
command:
|
|
||||||
- bash
|
|
||||||
- -c
|
|
||||||
- rpcbind && python3 /opt/datax/bin/app.py
|
|
||||||
ports:
|
|
||||||
- containerPort: 8000
|
|
||||||
volumeMounts:
|
|
||||||
- name: dataset
|
|
||||||
mountPath: /dataset
|
|
||||||
subPath: dataset
|
|
||||||
volumes:
|
|
||||||
- name: dataset
|
|
||||||
hostPath:
|
|
||||||
path: /tmp/data-mate
|
|
||||||
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: datax
|
|
||||||
labels:
|
|
||||||
app: datax
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 8000
|
|
||||||
targetPort: 8000
|
|
||||||
protocol: TCP
|
|
||||||
selector:
|
|
||||||
app: datax
|
|
||||||
@@ -1,32 +1,45 @@
|
|||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: frontend
|
name: datamate-frontend
|
||||||
labels:
|
labels:
|
||||||
app: frontend
|
app: datamate
|
||||||
|
tier: frontend
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: frontend
|
app: datamate
|
||||||
|
tier: frontend
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: frontend
|
app: datamate
|
||||||
|
tier: frontend
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: frontend
|
- name: frontend
|
||||||
image: frontend
|
image: datamate-frontend
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 80
|
- containerPort: 80
|
||||||
|
volumeMounts:
|
||||||
|
- name: log-volume
|
||||||
|
mountPath: /var/log/datamate/frontend
|
||||||
|
subPath: frontend
|
||||||
|
volumes:
|
||||||
|
- name: log-volume
|
||||||
|
hostPath:
|
||||||
|
path: /opt/datamate/data/log
|
||||||
|
type: DirectoryOrCreate
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: frontend
|
name: datamate-frontend
|
||||||
labels:
|
labels:
|
||||||
app: frontend
|
app: datamate
|
||||||
|
tier: frontend
|
||||||
spec:
|
spec:
|
||||||
type: NodePort
|
type: NodePort
|
||||||
ports:
|
ports:
|
||||||
@@ -35,4 +48,5 @@ spec:
|
|||||||
nodePort: 30000
|
nodePort: 30000
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
selector:
|
selector:
|
||||||
app: frontend
|
app: datamate
|
||||||
|
tier: frontend
|
||||||
|
|||||||
@@ -1,52 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: mineru
|
|
||||||
labels:
|
|
||||||
app: mineru
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: mineru
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: mineru
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: mineru
|
|
||||||
image: mineru
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
command:
|
|
||||||
- mineru-api
|
|
||||||
args:
|
|
||||||
- --host
|
|
||||||
- "0.0.0.0"
|
|
||||||
- --port
|
|
||||||
- "8000"
|
|
||||||
ports:
|
|
||||||
- containerPort: 8000
|
|
||||||
volumeMounts:
|
|
||||||
- name: tmp
|
|
||||||
mountPath: /tmp/data-mate
|
|
||||||
volumes:
|
|
||||||
- name: tmp
|
|
||||||
hostPath:
|
|
||||||
path: /tmp/data-mate
|
|
||||||
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: mineru
|
|
||||||
labels:
|
|
||||||
app: mineru
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 8000
|
|
||||||
targetPort: 8000
|
|
||||||
protocol: TCP
|
|
||||||
selector:
|
|
||||||
app: mineru
|
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
name: mysql-utf8-config
|
name: datamate-mysql-utf8-config
|
||||||
data:
|
data:
|
||||||
utf8.cnf: |
|
utf8.cnf: |
|
||||||
[mysqld]
|
[mysqld]
|
||||||
@@ -11,6 +11,7 @@ data:
|
|||||||
collation-server = utf8mb4_unicode_ci
|
collation-server = utf8mb4_unicode_ci
|
||||||
# 或者使用 utf8_general_ci (性能稍好,但排序规则稍宽松)
|
# 或者使用 utf8_general_ci (性能稍好,但排序规则稍宽松)
|
||||||
default-time-zone = 'Asia/Shanghai'
|
default-time-zone = 'Asia/Shanghai'
|
||||||
|
log_error=/var/log/datamate/database/error.log
|
||||||
|
|
||||||
[client]
|
[client]
|
||||||
# 设置客户端连接默认字符集
|
# 设置客户端连接默认字符集
|
||||||
@@ -18,4 +19,4 @@ data:
|
|||||||
|
|
||||||
[mysql]
|
[mysql]
|
||||||
# 设置 mysql 命令行客户端默认字符集
|
# 设置 mysql 命令行客户端默认字符集
|
||||||
default-character-set = utf8mb4
|
default-character-set = utf8mb4
|
||||||
|
|||||||
@@ -1,21 +1,39 @@
|
|||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: mysql
|
name: datamate-database
|
||||||
labels:
|
labels:
|
||||||
app: mysql
|
app: datamate
|
||||||
|
tier: database
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: mysql
|
app: datamate
|
||||||
|
tier: database
|
||||||
strategy:
|
strategy:
|
||||||
type: Recreate
|
type: Recreate
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: mysql
|
app: datamate
|
||||||
|
tier: database
|
||||||
spec:
|
spec:
|
||||||
|
initContainers:
|
||||||
|
- name: init-log
|
||||||
|
image: mysql:8
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
chown mysql:mysql /var/log/datamate/database
|
||||||
|
chmod 755 /var/log/datamate/database
|
||||||
|
volumeMounts:
|
||||||
|
- name: log-volume
|
||||||
|
mountPath: /var/log/datamate/database
|
||||||
|
subPath: database
|
||||||
containers:
|
containers:
|
||||||
- name: mysql
|
- name: mysql
|
||||||
image: mysql:8
|
image: mysql:8
|
||||||
@@ -28,6 +46,9 @@ spec:
|
|||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: data-volume
|
- name: data-volume
|
||||||
mountPath: /var/lib/mysql
|
mountPath: /var/lib/mysql
|
||||||
|
- name: log-volume
|
||||||
|
mountPath: /var/log/datamate/database
|
||||||
|
subPath: database
|
||||||
- name: init-sql
|
- name: init-sql
|
||||||
mountPath: /docker-entrypoint-initdb.d
|
mountPath: /docker-entrypoint-initdb.d
|
||||||
- name: mysql-utf8-config
|
- name: mysql-utf8-config
|
||||||
@@ -35,22 +56,27 @@ spec:
|
|||||||
volumes:
|
volumes:
|
||||||
- name: data-volume
|
- name: data-volume
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /opt/data-mate/data/mysql
|
path: /opt/datamate/data/mysql
|
||||||
|
type: DirectoryOrCreate
|
||||||
|
- name: log-volume
|
||||||
|
hostPath:
|
||||||
|
path: /opt/datamate/data/log
|
||||||
type: DirectoryOrCreate
|
type: DirectoryOrCreate
|
||||||
- name: init-sql
|
- name: init-sql
|
||||||
configMap:
|
configMap:
|
||||||
name: init-sql
|
name: datamate-init-sql
|
||||||
- name: mysql-utf8-config
|
- name: mysql-utf8-config
|
||||||
configMap:
|
configMap:
|
||||||
name: mysql-utf8-config
|
name: datamate-mysql-utf8-config
|
||||||
|
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: mysql
|
name: datamate-database
|
||||||
labels:
|
labels:
|
||||||
app: mysql
|
app: datamate
|
||||||
|
tier: database
|
||||||
spec:
|
spec:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
ports:
|
ports:
|
||||||
@@ -58,4 +84,5 @@ spec:
|
|||||||
targetPort: 3306
|
targetPort: 3306
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
selector:
|
selector:
|
||||||
app: mysql
|
app: datamate
|
||||||
|
tier: database
|
||||||
|
|||||||
@@ -1,49 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: unstructured
|
|
||||||
labels:
|
|
||||||
app: unstructured
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: unstructured
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: unstructured
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: unstructured
|
|
||||||
image: unstructured
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
command:
|
|
||||||
- python
|
|
||||||
args:
|
|
||||||
- app.py
|
|
||||||
ports:
|
|
||||||
- containerPort: 8000
|
|
||||||
volumeMounts:
|
|
||||||
- name: tmp
|
|
||||||
mountPath: /tmp/data-mate
|
|
||||||
volumes:
|
|
||||||
- name: tmp
|
|
||||||
hostPath:
|
|
||||||
path: /tmp/data-mate
|
|
||||||
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: unstructured
|
|
||||||
labels:
|
|
||||||
app: unstructured
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
ports:
|
|
||||||
- port: 8000
|
|
||||||
targetPort: 8000
|
|
||||||
protocol: TCP
|
|
||||||
selector:
|
|
||||||
app: unstructured
|
|
||||||
@@ -12,7 +12,7 @@ spring:
|
|||||||
# 数据源配置
|
# 数据源配置
|
||||||
datasource:
|
datasource:
|
||||||
driver-class-name: com.mysql.cj.jdbc.Driver
|
driver-class-name: com.mysql.cj.jdbc.Driver
|
||||||
url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
|
url: jdbc:mysql://datamate-database:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
|
||||||
username: ${DB_USERNAME:root}
|
username: ${DB_USERNAME:root}
|
||||||
password: ${DB_PASSWORD:Huawei@123}
|
password: ${DB_PASSWORD:Huawei@123}
|
||||||
hikari:
|
hikari:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<Configuration status="WARN" monitorInterval="30">
|
<Configuration status="WARN" monitorInterval="30">
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name="LOG_PATH">/var/log/data-mate/backend</Property>
|
<Property name="LOG_PATH">/var/log/datamate/backend</Property>
|
||||||
<Property name="LOG_PATTERN">%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</Property>
|
<Property name="LOG_PATTERN">%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</Property>
|
||||||
<Property name="MAX_FILE_SIZE">100MB</Property>
|
<Property name="MAX_FILE_SIZE">100MB</Property>
|
||||||
<Property name="MAX_HISTORY">30</Property>
|
<Property name="MAX_HISTORY">30</Property>
|
||||||
|
|||||||
@@ -19,4 +19,3 @@ xmltodict==1.0.2
|
|||||||
zhconv==1.4.3
|
zhconv==1.4.3
|
||||||
sqlalchemy==2.0.40
|
sqlalchemy==2.0.40
|
||||||
pymysql==1.1.1
|
pymysql==1.1.1
|
||||||
unstructured[pdf,docx]==0.18.15
|
|
||||||
@@ -15,7 +15,7 @@ from datamate.scheduler import func_scheduler
|
|||||||
from datamate.wrappers import WRAPPERS
|
from datamate.wrappers import WRAPPERS
|
||||||
|
|
||||||
# 日志配置
|
# 日志配置
|
||||||
LOG_DIR = "/var/log/data-mate/runtime"
|
LOG_DIR = "/var/log/datamate/runtime"
|
||||||
os.makedirs(LOG_DIR, exist_ok=True)
|
os.makedirs(LOG_DIR, exist_ok=True)
|
||||||
logger.add(
|
logger.add(
|
||||||
f"{LOG_DIR}/runtime.log",
|
f"{LOG_DIR}/runtime.log",
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ classifiers = [
|
|||||||
|
|
||||||
# Core dependencies
|
# Core dependencies
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"uvicorn",
|
"uvicorn[standard]",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"loguru",
|
"loguru",
|
||||||
"jsonargparse",
|
"jsonargparse",
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ CREATE TABLE IF NOT EXISTS t_clean_task
|
|||||||
dest_dataset_name varchar(64),
|
dest_dataset_name varchar(64),
|
||||||
before_size bigint,
|
before_size bigint,
|
||||||
after_size bigint,
|
after_size bigint,
|
||||||
|
file_count int,
|
||||||
created_at timestamp default current_timestamp,
|
created_at timestamp default current_timestamp,
|
||||||
started_at timestamp,
|
started_at timestamp,
|
||||||
finished_at timestamp,
|
finished_at timestamp,
|
||||||
|
|||||||
@@ -111,113 +111,30 @@ VALUES ('TextFormatter', 'TXT文本抽取', '抽取TXT中的文本。', '1.0.0',
|
|||||||
|
|
||||||
|
|
||||||
INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
|
INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
|
||||||
VALUES (3, 'TextFormatter'),
|
SELECT c.id, o.id
|
||||||
(7, 'FileExporter'),
|
FROM t_operator_category c
|
||||||
(8, 'TextFormatter'),
|
CROSS JOIN t_operator o
|
||||||
(8, 'FileExporter'),
|
WHERE c.id IN (3, 8, 11)
|
||||||
(3, 'FileWithShortOrLongLengthFilter'),
|
AND o.id IN ('TextFormatter', 'FileWithShortOrLongLengthFilter', 'FileWithHighRepeatPhraseRateFilter',
|
||||||
(3, 'FileWithHighRepeatPhraseRateFilter'),
|
'FileWithHighRepeatWordRateFilter', 'FileWithHighSpecialCharRateFilter', 'FileWithManySensitiveWordsFilter',
|
||||||
(3, 'FileWithHighRepeatWordRateFilter'),
|
'DuplicateFilesFilter', 'DuplicateSentencesFilter', 'AnonymizedCreditCardNumber', 'AnonymizedIdNumber',
|
||||||
(3, 'FileWithHighSpecialCharRateFilter'),
|
'AnonymizedIpAddress', 'AnonymizedPhoneNumber', 'AnonymizedUrlCleaner', 'HtmlTagCleaner', 'XMLTagCleaner',
|
||||||
(3, 'FileWithManySensitiveWordsFilter'),
|
'ContentCleaner', 'EmailNumberCleaner', 'EmojiCleaner', 'ExtraSpaceCleaner', 'FullWidthCharacterCleaner',
|
||||||
(3, 'DuplicateFilesFilter'),
|
'GrableCharactersCleaner', 'InvisibleCharactersCleaner', 'LegendCleaner', 'PoliticalWordCleaner',
|
||||||
(3, 'DuplicateSentencesFilter'),
|
'SexualAndViolentWordCleaner', 'TraditionalChineseCleaner', 'UnicodeSpaceCleaner');
|
||||||
(3, 'AnonymizedCreditCardNumber'),
|
|
||||||
(3, 'AnonymizedIdNumber'),
|
INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
|
||||||
(3, 'AnonymizedIpAddress'),
|
SELECT c.id, o.id
|
||||||
(3, 'AnonymizedPhoneNumber'),
|
FROM t_operator_category c
|
||||||
(3, 'AnonymizedUrlCleaner'),
|
CROSS JOIN t_operator o
|
||||||
(3, 'HtmlTagCleaner'),
|
WHERE c.id IN (4, 8, 11)
|
||||||
(3, 'XMLTagCleaner'),
|
AND o.id IN ('ImgFormatter', 'ImgBlurredImagesCleaner', 'ImgBrightness', 'ImgContrast', 'ImgDenoise',
|
||||||
(3, 'ContentCleaner'),
|
'ImgDuplicatedImagesCleaner', 'ImgPerspectiveTransformation', 'ImgResize', 'ImgSaturation',
|
||||||
(3, 'EmailNumberCleaner'),
|
'ImgShadowRemove', 'ImgSharpness', 'ImgSimilarImagesCleaner', 'ImgTypeUnify');
|
||||||
(3, 'EmojiCleaner'),
|
|
||||||
(3, 'ExtraSpaceCleaner'),
|
INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
|
||||||
(3, 'FullWidthCharacterCleaner'),
|
SELECT c.id, o.id
|
||||||
(3, 'GrableCharactersCleaner'),
|
FROM t_operator_category c
|
||||||
(3, 'InvisibleCharactersCleaner'),
|
CROSS JOIN t_operator o
|
||||||
(3, 'LegendCleaner'),
|
WHERE c.id IN (7, 8, 11)
|
||||||
(3, 'PoliticalWordCleaner'),
|
AND o.id IN ('FileExporter');
|
||||||
(3, 'SexualAndViolentWordCleaner'),
|
|
||||||
(3, 'TraditionalChineseCleaner'),
|
|
||||||
(3, 'UnicodeSpaceCleaner'),
|
|
||||||
(4, 'ImgFormatter'),
|
|
||||||
(4, 'ImgBlurredImagesCleaner'),
|
|
||||||
(4, 'ImgBrightness'),
|
|
||||||
(4, 'ImgContrast'),
|
|
||||||
(4, 'ImgDenoise'),
|
|
||||||
(4, 'ImgDuplicatedImagesCleaner'),
|
|
||||||
(4, 'ImgPerspectiveTransformation'),
|
|
||||||
(4, 'ImgResize'),
|
|
||||||
(4, 'ImgSaturation'),
|
|
||||||
(4, 'ImgShadowRemove'),
|
|
||||||
(4, 'ImgSharpness'),
|
|
||||||
(4, 'ImgSimilarImagesCleaner'),
|
|
||||||
(4, 'ImgTypeUnify'),
|
|
||||||
(8, 'FileWithShortOrLongLengthFilter'),
|
|
||||||
(8, 'FileWithHighRepeatPhraseRateFilter'),
|
|
||||||
(8, 'FileWithHighRepeatWordRateFilter'),
|
|
||||||
(8, 'FileWithHighSpecialCharRateFilter'),
|
|
||||||
(8, 'FileWithManySensitiveWordsFilter'),
|
|
||||||
(8, 'DuplicateFilesFilter'),
|
|
||||||
(8, 'DuplicateSentencesFilter'),
|
|
||||||
(8, 'AnonymizedCreditCardNumber'),
|
|
||||||
(8, 'AnonymizedIdNumber'),
|
|
||||||
(8, 'AnonymizedIpAddress'),
|
|
||||||
(8, 'AnonymizedPhoneNumber'),
|
|
||||||
(8, 'AnonymizedUrlCleaner'),
|
|
||||||
(8, 'HtmlTagCleaner'),
|
|
||||||
(8, 'XMLTagCleaner'),
|
|
||||||
(8, 'ContentCleaner'),
|
|
||||||
(8, 'EmailNumberCleaner'),
|
|
||||||
(8, 'EmojiCleaner'),
|
|
||||||
(8, 'ExtraSpaceCleaner'),
|
|
||||||
(8, 'FullWidthCharacterCleaner'),
|
|
||||||
(8, 'GrableCharactersCleaner'),
|
|
||||||
(8, 'InvisibleCharactersCleaner'),
|
|
||||||
(8, 'LegendCleaner'),
|
|
||||||
(8, 'PoliticalWordCleaner'),
|
|
||||||
(8, 'SexualAndViolentWordCleaner'),
|
|
||||||
(8, 'TraditionalChineseCleaner'),
|
|
||||||
(8, 'UnicodeSpaceCleaner'),
|
|
||||||
(11, 'TextFormatter'),
|
|
||||||
(11, 'FileExporter'),
|
|
||||||
(11, 'FileWithShortOrLongLengthFilter'),
|
|
||||||
(11, 'FileWithHighRepeatPhraseRateFilter'),
|
|
||||||
(11, 'FileWithHighRepeatWordRateFilter'),
|
|
||||||
(11, 'FileWithHighSpecialCharRateFilter'),
|
|
||||||
(11, 'FileWithManySensitiveWordsFilter'),
|
|
||||||
(11, 'DuplicateFilesFilter'),
|
|
||||||
(11, 'DuplicateSentencesFilter'),
|
|
||||||
(11, 'AnonymizedCreditCardNumber'),
|
|
||||||
(11, 'AnonymizedIdNumber'),
|
|
||||||
(11, 'AnonymizedIpAddress'),
|
|
||||||
(11, 'AnonymizedPhoneNumber'),
|
|
||||||
(11, 'AnonymizedUrlCleaner'),
|
|
||||||
(11, 'HtmlTagCleaner'),
|
|
||||||
(11, 'XMLTagCleaner'),
|
|
||||||
(11, 'ContentCleaner'),
|
|
||||||
(11, 'EmailNumberCleaner'),
|
|
||||||
(11, 'EmojiCleaner'),
|
|
||||||
(11, 'ExtraSpaceCleaner'),
|
|
||||||
(11, 'FullWidthCharacterCleaner'),
|
|
||||||
(11, 'GrableCharactersCleaner'),
|
|
||||||
(11, 'InvisibleCharactersCleaner'),
|
|
||||||
(11, 'LegendCleaner'),
|
|
||||||
(11, 'PoliticalWordCleaner'),
|
|
||||||
(11, 'SexualAndViolentWordCleaner'),
|
|
||||||
(11, 'TraditionalChineseCleaner'),
|
|
||||||
(11, 'UnicodeSpaceCleaner'),
|
|
||||||
(11, 'ImgFormatter'),
|
|
||||||
(11, 'ImgBlurredImagesCleaner'),
|
|
||||||
(11, 'ImgBrightness'),
|
|
||||||
(11, 'ImgContrast'),
|
|
||||||
(11, 'ImgDenoise'),
|
|
||||||
(11, 'ImgDuplicatedImagesCleaner'),
|
|
||||||
(11, 'ImgPerspectiveTransformation'),
|
|
||||||
(11, 'ImgResize'),
|
|
||||||
(11, 'ImgSaturation'),
|
|
||||||
(11, 'ImgShadowRemove'),
|
|
||||||
(11, 'ImgSharpness'),
|
|
||||||
(11, 'ImgSimilarImagesCleaner'),
|
|
||||||
(11, 'ImgTypeUnify');
|
|
||||||
|
|||||||
@@ -1,33 +0,0 @@
|
|||||||
FROM maven:3-openjdk-8-slim AS builder
|
|
||||||
|
|
||||||
RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
|
|
||||||
sed -i 's/security.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y git && \
|
|
||||||
git clone https://github.com/alibaba/DataX.git
|
|
||||||
|
|
||||||
COPY runtime/datax/ DataX/
|
|
||||||
|
|
||||||
RUN cd DataX && \
|
|
||||||
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
|
|
||||||
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
|
|
||||||
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
|
|
||||||
|
|
||||||
|
|
||||||
FROM openjdk:8-jdk-slim
|
|
||||||
|
|
||||||
RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
|
|
||||||
sed -i 's/security.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y python3 python3-pip python-is-python3 vim wget curl nfs-common rsync && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apy/lists/*
|
|
||||||
|
|
||||||
RUN pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple && \
|
|
||||||
pip config --user set global.trusted-host mirrors.aliyun.com && \
|
|
||||||
pip install fastapi uvicorn[standard] && \
|
|
||||||
pip cache purge
|
|
||||||
|
|
||||||
COPY --from=builder /DataX/target/datax/datax /opt/datax
|
|
||||||
|
|
||||||
COPY scripts/images/datax/app.py /opt/datax/bin/app.py
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
import subprocess
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
from fastapi import FastAPI
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
app = FastAPI(title="datax")
|
|
||||||
|
|
||||||
|
|
||||||
class CreateJobParam(BaseModel):
|
|
||||||
content: str
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/process", tags=["run datax.py"])
|
|
||||||
async def process(job: CreateJobParam):
|
|
||||||
output = {
|
|
||||||
"status": "failed",
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
# 创建临时文件存储Python脚本
|
|
||||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=True) as f:
|
|
||||||
f.write(job.content)
|
|
||||||
f.seek(0)
|
|
||||||
|
|
||||||
cmd_args = ["python3", "/opt/datax/bin/datax.py", f.name]
|
|
||||||
result = subprocess.run(
|
|
||||||
cmd_args,
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
check=True
|
|
||||||
)
|
|
||||||
|
|
||||||
output["status"] = result.returncode
|
|
||||||
if result.returncode != 0:
|
|
||||||
output["stdout"] = result.stdout
|
|
||||||
output["stderr"] = result.stderr
|
|
||||||
except subprocess.TimeoutExpired as e:
|
|
||||||
output["status"] = 408
|
|
||||||
output["stderr"] = f"The script execution timed out: {e.stderr}"
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
output["status"] = 500
|
|
||||||
output["stderr"] = f"Script execution failed: {e.stdout}"
|
|
||||||
except Exception as e:
|
|
||||||
output["status"] = 500
|
|
||||||
output["stderr"] = f"Server error: {str(e)}"
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
||||||
@@ -10,7 +10,7 @@ RUN if [ -f package-lock.json ]; then npm ci; else npm install; fi && \
|
|||||||
FROM nginx:1.29 AS runner
|
FROM nginx:1.29 AS runner
|
||||||
|
|
||||||
COPY --from=builder /app/dist /opt/frontend
|
COPY --from=builder /app/dist /opt/frontend
|
||||||
COPY scripts/images/frontend/edm.conf /etc/nginx/conf.d/default.conf
|
COPY scripts/images/frontend/backend.conf /etc/nginx/conf.d/default.conf
|
||||||
|
|
||||||
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
|
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,13 @@ server {
|
|||||||
listen 80;
|
listen 80;
|
||||||
server_name 0.0.0.0;
|
server_name 0.0.0.0;
|
||||||
|
|
||||||
|
access_log /var/log/datamate/frontend/access.log main;
|
||||||
|
error_log /var/log/datamate/frontend/error.log notice;
|
||||||
|
|
||||||
|
client_max_body_size 1024M;
|
||||||
|
|
||||||
location /api/ {
|
location /api/ {
|
||||||
proxy_pass http://backend:8080/api/;
|
proxy_pass http://datamate-backend:8080/api/;
|
||||||
proxy_set_header Host $host;
|
proxy_set_header Host $host;
|
||||||
proxy_set_header X-Real-IP $remote_addr;
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
FROM python:3.10-slim
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y curl vim libgl1 libgl1-mesa-glx libglib2.0-0 procps && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
RUN pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple && \
|
|
||||||
pip config --user set global.trusted-host mirrors.aliyun.com && \
|
|
||||||
pip install --upgrade setuptools && \
|
|
||||||
pip install -U 'mineru[core]==2.1.0' --break-system-packages && \
|
|
||||||
pip install torch==2.7.1+cpu -f https://download.pytorch.org/whl/torch/ && \
|
|
||||||
pip install torchvision==0.22.1+cpu -f https://download.pytorch.org/whl/torchvision && \
|
|
||||||
pip install requests==2.27.1 torch_npu==2.7.1rc1 numpy==1.26.0 decorator==5.2.1 einops==0.8.1 attrs==25.3.0 && \
|
|
||||||
pip cache purge
|
|
||||||
|
|
||||||
ENV CURL_CA_BUNDLE=""
|
|
||||||
ENV TORCH_DEVICE_BACKEND_AUTOLOAD=0
|
|
||||||
|
|
||||||
RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
|
|
||||||
|
|
||||||
ENV MINERU_MODEL_SOURCE=local
|
|
||||||
@@ -17,8 +17,6 @@ ENV HF_HUB_DISABLE_XET=1
|
|||||||
|
|
||||||
RUN pip install -e . -i https://mirrors.huaweicloud.com/repository/pypi/simple \
|
RUN pip install -e . -i https://mirrors.huaweicloud.com/repository/pypi/simple \
|
||||||
&& pip install -r /opt/runtime/datamate/ops/requirements.txt -i https://mirrors.huaweicloud.com/repository/pypi/simple \
|
&& pip install -r /opt/runtime/datamate/ops/requirements.txt -i https://mirrors.huaweicloud.com/repository/pypi/simple \
|
||||||
&& pip cache purge \
|
&& pip cache purge
|
||||||
&& python -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()" \
|
|
||||||
&& python -c "from unstructured_inference.models.base import get_model; get_model()"
|
|
||||||
|
|
||||||
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
|
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
|
||||||
|
|||||||
@@ -1,9 +0,0 @@
|
|||||||
FROM downloads.unstructured.io/unstructured-io/unstructured
|
|
||||||
|
|
||||||
RUN pip config --user set global.index https://mirrors.huaweicloud.com/repository/pypi && \
|
|
||||||
pip config --user set global.index-url https://mirrors.huaweicloud.com/repository/pypi/simple && \
|
|
||||||
pip config --user set global.trusted-host mirrors.huaweicloud.com && \
|
|
||||||
pip install fastapi uvicorn && \
|
|
||||||
pip cache purge
|
|
||||||
|
|
||||||
COPY scripts/images/unstructured/app.py /app/app.py
|
|
||||||
@@ -1,61 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
import os
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from unstructured.partition.auto import partition
|
|
||||||
|
|
||||||
app = FastAPI(title="unstructured")
|
|
||||||
|
|
||||||
|
|
||||||
class FileProcessingRequest(BaseModel):
|
|
||||||
"""文件处理请求模型"""
|
|
||||||
file_path: Optional[str] = None
|
|
||||||
# 可添加其他可选字段
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/process", tags=["文件处理"])
|
|
||||||
async def process_file(request_data: FileProcessingRequest):
|
|
||||||
"""处理文件并返回提取的文本内容"""
|
|
||||||
try:
|
|
||||||
file_path = request_data.file_path
|
|
||||||
|
|
||||||
if not file_path:
|
|
||||||
raise HTTPException(status_code=400, detail="缺少必要参数: filePath")
|
|
||||||
|
|
||||||
if not os.path.exists(file_path):
|
|
||||||
raise HTTPException(status_code=404, detail=f"文件不存在: {file_path}")
|
|
||||||
|
|
||||||
# 异步执行可能耗时的文件处理操作
|
|
||||||
text_content = await process_file_async(file_path)
|
|
||||||
|
|
||||||
# 返回处理结果
|
|
||||||
return {
|
|
||||||
"filePath": file_path,
|
|
||||||
"text": text_content,
|
|
||||||
"status": "success"
|
|
||||||
}
|
|
||||||
|
|
||||||
except HTTPException as e:
|
|
||||||
raise e
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=500, detail=f"process failed: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
async def process_file_async(file_path: str) -> str:
|
|
||||||
"""异步处理文件内容"""
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
return await loop.run_in_executor(None, partition_file_sync, file_path)
|
|
||||||
|
|
||||||
|
|
||||||
def partition_file_sync(file_path: str) -> str:
|
|
||||||
"""同步处理文件内容(由异步函数调用)"""
|
|
||||||
elements = partition(filename=file_path)
|
|
||||||
return "\n\n".join([str(el) for el in elements])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
||||||
@@ -1,103 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# ==========================================================
|
|
||||||
# 步骤 1: 定义帮助函数
|
|
||||||
# ==========================================================
|
|
||||||
|
|
||||||
# 脚本名称
|
|
||||||
SCRIPT_NAME=$(basename "$0")
|
|
||||||
|
|
||||||
help_message() {
|
|
||||||
cat << EOF
|
|
||||||
Usage: $SCRIPT_NAME [-d TARGET_DIR] [-h|--help]
|
|
||||||
|
|
||||||
描述:
|
|
||||||
将预定义的 Docker 镜像列表保存为 .tar 文件。
|
|
||||||
|
|
||||||
选项:
|
|
||||||
-d TARGET_DIR 指定保存镜像的目标目录。
|
|
||||||
(绝对路径或相对路径)
|
|
||||||
如果未指定,将使用默认路径: $TARGET_DIR_DEFAULT
|
|
||||||
-h, --help 显示此帮助信息并退出。
|
|
||||||
|
|
||||||
示例:
|
|
||||||
# 使用默认目录 (./dist)
|
|
||||||
$SCRIPT_NAME
|
|
||||||
|
|
||||||
# 指定保存到 /tmp/my-archive 目录
|
|
||||||
$SCRIPT_NAME -d /tmp/my-archive
|
|
||||||
EOF
|
|
||||||
}
|
|
||||||
|
|
||||||
# ==========================================================
|
|
||||||
# 步骤 2: 定义默认值和处理参数
|
|
||||||
# ==========================================================
|
|
||||||
|
|
||||||
# 默认目标目录
|
|
||||||
TARGET_DIR_DEFAULT="./dist"
|
|
||||||
TARGET_DIR="$TARGET_DIR_DEFAULT"
|
|
||||||
|
|
||||||
# 使用 getopts 处理命令行选项。
|
|
||||||
# d: 表示 -d 选项后需要一个参数(目标目录)。
|
|
||||||
while getopts "d:h" opt; do
|
|
||||||
case ${opt} in
|
|
||||||
d )
|
|
||||||
# 如果 -d 选项被指定,使用传入的参数作为目标目录
|
|
||||||
TARGET_DIR="$OPTARG"
|
|
||||||
;;
|
|
||||||
h )
|
|
||||||
# 如果是 -h 选项,显示帮助并退出
|
|
||||||
help_message
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
\? )
|
|
||||||
# 处理无效的选项
|
|
||||||
echo "错误:无效选项 -$OPTARG" >&2
|
|
||||||
help_message
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# 移动到下一个非选项参数 (通常此脚本没有其他参数,但这是最佳实践)
|
|
||||||
shift $((OPTIND -1))
|
|
||||||
|
|
||||||
|
|
||||||
# ==========================================================
|
|
||||||
# 步骤 3: 脚本核心逻辑
|
|
||||||
# ==========================================================
|
|
||||||
|
|
||||||
# 检查/创建目标文件夹
|
|
||||||
if ! mkdir -p "$TARGET_DIR"; then
|
|
||||||
echo "❌ 致命错误:无法创建目标目录: $TARGET_DIR" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "目标目录已确认/创建: $TARGET_DIR"
|
|
||||||
echo "----------------------------------------"
|
|
||||||
|
|
||||||
# Image list
|
|
||||||
images=("frontend:latest" "backend:latest" "runtime:latest" "mysql:8")
|
|
||||||
|
|
||||||
for image in "${images[@]}"; do
|
|
||||||
|
|
||||||
# 清理镜像名称,用 '_' 替换 ':',以创建安全的文件名。
|
|
||||||
safe_name="${image//[:]/_}"
|
|
||||||
|
|
||||||
# 构造完整的输出文件路径。
|
|
||||||
output_path="$TARGET_DIR/$safe_name.tar"
|
|
||||||
|
|
||||||
echo "正在保存镜像 $image"
|
|
||||||
echo " -> 到文件 $output_path"
|
|
||||||
|
|
||||||
# 执行 docker save 命令
|
|
||||||
docker save -o "$output_path" "$image"
|
|
||||||
|
|
||||||
# 检查保存是否成功 ($? 存储上一个命令的退出状态)
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
echo "✅ 保存成功。"
|
|
||||||
else
|
|
||||||
echo "❌ 保存失败!"
|
|
||||||
fi
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
done
|
|
||||||
Reference in New Issue
Block a user