[Feature] Refactor project to use 'datamate' naming convention for services and configurations (#14)

* Enhance CleaningTaskService to track cleaning process progress and update ExecutorType to DATAMATE

* Refactor project to use 'datamate' naming convention for services and configurations
This commit is contained in:
hhhhsc701
2025-10-22 17:53:16 +08:00
committed by GitHub
parent 175d9ded93
commit 31ef8bc265
39 changed files with 312 additions and 737 deletions

View File

@@ -33,7 +33,7 @@ else
endif
.PHONY: install
install: install-data-mate
install: install-datamate
.PHONY: uninstall-%
uninstall-%:
@@ -54,32 +54,20 @@ else
endif
.PHONY: uninstall
uninstall: uninstall-data-mate
uninstall: uninstall-datamate
# build
.PHONY: mineru-docker-build
mineru-docker-build:
docker build -t mineru:$(VERSION) . -f scripts/images/mineru/Dockerfile
.PHONY: datax-docker-build
datax-docker-build:
docker build -t datax:$(VERSION) . -f scripts/images/datax/Dockerfile
.PHONY: unstructured-docker-build
unstructured-docker-build:
docker build -t unstructured:$(VERSION) . -f scripts/images/unstructured/Dockerfile
.PHONY: backend-docker-build
backend-docker-build:
docker build -t backend:$(VERSION) . -f scripts/images/backend/Dockerfile
docker build -t datamate-backend:$(VERSION) . -f scripts/images/backend/Dockerfile
.PHONY: frontend-docker-build
frontend-docker-build:
docker build -t frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile
docker build -t datamate-frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile
.PHONY: runtime-docker-build
runtime-docker-build:
docker build -t runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile
docker build -t datamate-runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile
.PHONY: label-studio-adapter-docker-build
label-studio-adapter-docker-build:
@@ -87,55 +75,57 @@ label-studio-adapter-docker-build:
.PHONY: backend-docker-install
backend-docker-install:
cd deployment/docker/data-mate && docker-compose up -d backend
cd deployment/docker/datamate && docker-compose up -d backend
.PHONY: backend-docker-uninstall
backend-docker-uninstall:
cd deployment/docker/data-mate && docker-compose down backend
cd deployment/docker/datamate && docker-compose down backend
.PHONY: frontend-docker-install
frontend-docker-install:
cd deployment/docker/data-mate && docker-compose up -d frontend
cd deployment/docker/datamate && docker-compose up -d frontend
.PHONY: frontend-docker-uninstall
frontend-docker-uninstall:
cd deployment/docker/data-mate && docker-compose down frontend
cd deployment/docker/datamate && docker-compose down frontend
.PHONY: runtime-docker-install
runtime-docker-install:
cd deployment/docker/data-mate && docker-compose up -d runtime
cd deployment/docker/datamate && docker-compose up -d runtime
.PHONY: runtime-docker-uninstall
runtime-docker-uninstall:
cd deployment/docker/data-mate && docker-compose down runtime
cd deployment/docker/datamate && docker-compose down runtime
.PHONY: runtime-k8s-install
runtime-k8s-install: create-namespace
helm upgrade kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE)
helm upgrade raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE)
helm upgrade datamate-kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE)
helm upgrade datamate-raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE)
kubectl apply -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
.PHONY: runtime-k8s-uninstall
runtime-k8s-uninstall:
helm uninstall raycluster -n $(NAMESPACE)
helm uninstall kuberay-operator -n $(NAMESPACE)
helm uninstall datamate-raycluster -n $(NAMESPACE)
helm uninstall datamate-kuberay-operator -n $(NAMESPACE)
kubectl delete -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
.PHONY: unstructured-k8s-install
unstructured-k8s-install: create-namespace
kubectl apply -f deployment/kubernetes/unstructured/deploy.yaml -n $(NAMESPACE)
.PHONY: mysql-k8s-install
mysql-k8s-install: create-namespace
kubectl create configmap init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE)
kubectl create configmap datamate-init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE)
kubectl apply -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
kubectl apply -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
.PHONY: mysql-k8s-uninstall
mysql-k8s-uninstall:
kubectl delete configmap init-sql -n $(NAMESPACE)
kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
kubectl delete configmap datamate-init-sql -n $(NAMESPACE) --ignore-not-found
kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE) --ignore-not-found
kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE) --ignore-not-found
.PHONY: database-k8s-install
database-k8s-install: mysql-k8s-install
.PHONY: database-k8s-uninstall
database-k8s-uninstall: mysql-k8s-uninstall
.PHONY: backend-k8s-install
backend-k8s-install: create-namespace
@@ -143,7 +133,7 @@ backend-k8s-install: create-namespace
.PHONY: backend-k8s-uninstall
backend-k8s-uninstall:
kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE)
kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE) --ignore-not-found
.PHONY: frontend-k8s-install
frontend-k8s-install: create-namespace
@@ -151,18 +141,18 @@ frontend-k8s-install: create-namespace
.PHONY: frontend-k8s-uninstall
frontend-k8s-uninstall:
kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE)
kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE) --ignore-not-found
.PHONY: data-mate-docker-install
data-mate-docker-install:
.PHONY: datamate-docker-install
datamate-docker-install:
cd deployment/docker/datamate && docker-compose up -d
.PHONY: data-mate-docker-uninstall
data-mate-docker-uninstall:
.PHONY: datamate-docker-uninstall
datamate-docker-uninstall:
cd deployment/docker/datamate && docker-compose down
.PHONY: data-mate-k8s-install
data-mate-k8s-install: create-namespace mysql-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install
.PHONY: datamate-k8s-install
datamate-k8s-install: create-namespace database-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install
.PHONY: data-mate-k8s-uninstall
data-mate-k8s-uninstall: mysql-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall
.PHONY: datamate-k8s-uninstall
datamate-k8s-uninstall: database-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall

View File

@@ -14,7 +14,7 @@ import java.time.Duration;
@Slf4j
public class RuntimeClient {
private static final String BASE_URL = "http://runtime:8081/api";
private static final String BASE_URL = "http://datamate-runtime:8081/api";
private static final String CREATE_TASK_URL = BASE_URL + "/task/{0}/submit";

View File

@@ -12,6 +12,7 @@ import com.datamate.cleaning.domain.model.TaskProcess;
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper;
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
import com.datamate.cleaning.interfaces.dto.CleaningProcess;
import com.datamate.cleaning.interfaces.dto.CleaningTask;
import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
import com.datamate.cleaning.interfaces.dto.OperatorInstance;
@@ -55,7 +56,14 @@ public class CleaningTaskService {
public List<CleaningTask> getTasks(String status, String keywords, Integer page, Integer size) {
Integer offset = page * size;
return cleaningTaskMapper.findTasks(status, keywords, size, offset);
List<CleaningTask> tasks = cleaningTaskMapper.findTasks(status, keywords, size, offset);
tasks.forEach(this::setProcess);
return tasks;
}
private void setProcess(CleaningTask task) {
int count = cleaningResultMapper.countByInstanceId(task.getId());
task.setProgress(CleaningProcess.of(task.getFileCount(), count));
}
public int countTasks(String status, String keywords) {
@@ -80,6 +88,7 @@ public class CleaningTaskService {
task.setDestDatasetId(destDataset.getId());
task.setDestDatasetName(destDataset.getName());
task.setBeforeSize(srcDataset.getTotalSize());
task.setFileCount(srcDataset.getFileCount());
cleaningTaskMapper.insertTask(task);
List<OperatorInstancePo> instancePos = request.getInstance().stream()
@@ -93,7 +102,9 @@ public class CleaningTaskService {
}
public CleaningTask getTask(String taskId) {
return cleaningTaskMapper.findTaskById(taskId);
CleaningTask task = cleaningTaskMapper.findTaskById(taskId);
setProcess(task);
return task;
}
@Transactional
@@ -113,7 +124,7 @@ public class CleaningTaskService {
process.setDatasetId(task.getDestDatasetId());
process.setDatasetPath(FLOW_PATH + "/" + task.getId() + "/dataset.jsonl");
process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId());
process.setExecutorType(ExecutorType.DATA_PLATFORM.getValue());
process.setExecutorType(ExecutorType.DATAMATE.getValue());
process.setProcess(instances.stream()
.map(instance -> Map.of(instance.getId(), instance.getOverrides()))
.toList());

View File

@@ -4,7 +4,7 @@ import lombok.Getter;
@Getter
public enum ExecutorType {
DATA_PLATFORM("data_platform"),
DATAMATE("datamate"),
DATA_JUICER_RAY("ray"),
DATA_JUICER_DEFAULT("default");

View File

@@ -6,4 +6,6 @@ import org.apache.ibatis.annotations.Param;
@Mapper
public interface CleaningResultMapper {
void deleteByInstanceId(@Param("instanceId") String instanceId);
int countByInstanceId(@Param("instanceId") String instanceId);
}

View File

@@ -4,6 +4,9 @@ package com.datamate.cleaning.interfaces.dto;
import lombok.Getter;
import lombok.Setter;
import java.math.BigDecimal;
import java.math.RoundingMode;
/**
* CleaningProcess
*/
@@ -16,5 +19,20 @@ public class CleaningProcess {
private Integer totalFileNum;
private Integer finishedFileNum;
public CleaningProcess(int totalFileNum, int finishedFileNum) {
this.totalFileNum = totalFileNum;
this.finishedFileNum = finishedFileNum;
if (totalFileNum == 0) {
this.process = 0.0f;
} else {
this.process = BigDecimal.valueOf(finishedFileNum * 100L)
.divide(BigDecimal.valueOf(totalFileNum), 2, RoundingMode.HALF_UP).floatValue();
}
}
public static CleaningProcess of(int totalFileNum, int finishedFileNum) {
return new CleaningProcess(totalFileNum, finishedFileNum);
}
}

View File

@@ -36,6 +36,8 @@ public class CleaningTask {
private long afterSize;
private int fileCount;
/**
* 任务当前状态
*/

View File

@@ -5,4 +5,8 @@
DELETE FROM t_clean_result WHERE instance_id = #{instanceId}
</delete>
<select id="countByInstanceId" resultType="java.lang.Integer">
SELECT COUNT(1) FROM t_clean_result WHERE instance_id = #{instanceId}
</select>
</mapper>

View File

@@ -3,7 +3,7 @@
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper">
<sql id="Base_Column_List">
id, name, description, src_dataset_id, src_dataset_name, dest_dataset_id, dest_dataset_name, before_size,
after_size, status, created_at, started_at, finished_at
after_size, file_count, status, created_at, started_at, finished_at
</sql>
<select id="findTasks" resultType="com.datamate.cleaning.interfaces.dto.CleaningTask">
@@ -28,9 +28,9 @@
<insert id="insertTask">
INSERT INTO t_clean_task (id, name, description, status, src_dataset_id, src_dataset_name, dest_dataset_id,
dest_dataset_name, before_size, after_size, created_at)
dest_dataset_name, before_size, after_size, file_count, created_at)
VALUES (#{id}, #{name}, #{description}, #{status}, #{srcDatasetId}, #{srcDatasetName}, #{destDatasetId},
#{destDatasetName}, ${beforeSize}, ${afterSize}, NOW())
#{destDatasetName}, #{beforeSize}, #{afterSize}, #{fileCount}, NOW())
</insert>
<update id="updateTask">

View File

@@ -12,7 +12,7 @@ spring:
# 数据源配置
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
url: jdbc:mysql://datamate-database:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
username: ${DB_USERNAME:root}
password: ${DB_PASSWORD:Huawei@123}
hikari:

View File

@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN" monitorInterval="30">
<Properties>
<Property name="LOG_PATH">/var/log/data-mate/backend</Property>
<Property name="LOG_PATH">/var/log/datamate/backend</Property>
<Property name="LOG_PATTERN">%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</Property>
<Property name="MAX_FILE_SIZE">100MB</Property>
<Property name="MAX_HISTORY">30</Property>

View File

@@ -1,63 +1,70 @@
services:
# 1) backend
backend:
container_name: backend
image: backend
datamate-backend:
container_name: datamate-backend
image: datamate-backend
restart: on-failure
privileged: true
ports:
- "8080:8080"
- "8080"
volumes:
- dataset_volume:/dataset
- flow_volume:/flow
- log_volume:/var/log/data-mate
networks: [ edatamate ]
- log_volume:/var/log/datamate
networks: [ datamate ]
depends_on:
- mysql
- datamate-database
# 2) frontend(NodePort 30000)
frontend:
container_name: frontend
image: frontend
datamate-frontend:
container_name: datamate-frontend
image: datamate-frontend
restart: on-failure
ports:
- "30000:80" # nodePort → hostPort
volumes:
- log_volume:/var/log/data-mate
networks: [ edatamate ]
- frontend_log_volume:/var/log/datamate/frontend
networks: [ datamate ]
depends_on:
- backend
- datamate-backend
# 3) mysql
mysql:
container_name: mysql
# 3) database
datamate-database:
container_name: datamate-database
image: mysql:8
restart: on-failure
environment:
MYSQL_ROOT_PASSWORD: Huawei@123
ports:
- "3306:3306"
- "3306"
command: |
sh -c "
chown mysql:mysql /var/log/datamate/database &&
chmod 755 /var/log/datamate/database &&
exec docker-entrypoint.sh mysqld
"
volumes:
- mysql_volume:/var/lib/mysql
- ../../../scripts/db:/docker-entrypoint-initdb.d
- ./utf8.cnf:/etc/mysql/conf.d/utf8.cnf
- log_volume:/var/log/data-mate
networks: [ edatamate ]
- ./utf8.cnf:/etc/mysql/conf.d/utf8.cnf:ro
- database_log_volume:/var/log/datamate/database
networks: [ datamate ]
runtime:
container_name: runtime
image: runtime
# 3) runtime
datamate-runtime:
container_name: datamate-runtime
image: datamate-runtime
restart: on-failure
environment:
RAY_DEDUP_LOGS: "0"
RAY_TQDM_PATCH_PRINT: "0"
MYSQL_HOST: "mysql"
MYSQL_HOST: "datamate-database"
MYSQL_PORT: "3306"
MYSQL_USER: "root"
MYSQL_PASSWORD: "Huawei@123"
MYSQL_DATABASE: "datamate"
ports:
- "8081:8081"
- "8081"
command:
- python
- /opt/runtime/datamate/operator_runtime.py
@@ -65,22 +72,27 @@ services:
- "8081"
volumes:
- ray_log_volume:/tmp/ray
- log_volume:/var/log/data-mate
- log_volume:/var/log/datamate
- dataset_volume:/dataset
- flow_volume:/flow
networks: [ datamate ]
volumes:
dataset_volume:
name: data-mate-dataset-volume
name: datamate-dataset-volume
flow_volume:
name: data-mate-flow-volume
name: datamate-flow-volume
log_volume:
name: data-mate-log-volume
name: datamate-log-volume
mysql_volume:
name: data-mate-mysql-volume
name: datamate-mysql-volume
ray_log_volume:
name: data-mate-ray-log-volume
name: datamate-ray-log-volume
frontend_log_volume:
name: datamate-frontend-log-volume
database_log_volume:
name: datamate-database-log-volume
networks:
edatamate:
datamate:
driver: bridge

View File

@@ -5,6 +5,7 @@ character-set-server = utf8mb4
collation-server = utf8mb4_unicode_ci
# 或者使用 utf8_general_ci (性能稍好,但排序规则稍宽松)
default-time-zone = 'Asia/Shanghai'
log_error=/var/log/datamate/database/error.log
[client]
# 设置客户端连接默认字符集
@@ -12,4 +13,4 @@ default-character-set = utf8mb4
[mysql]
# 设置 mysql 命令行客户端默认字符集
default-character-set = utf8mb4
default-character-set = utf8mb4

View File

@@ -3,13 +3,13 @@
# Declare variables to be passed into your templates.
# -- String to partially override release name.
nameOverride: kuberay-operator
nameOverride: datamate-kuberay-operator
# -- String to fully override release name.
fullnameOverride: kuberay-operator
fullnameOverride: datamate-kuberay-operator
# -- String to override component name.
componentOverride: kuberay-operator
componentOverride: datamate-kuberay-operator
image:
# -- Image repository.
@@ -32,7 +32,7 @@ serviceAccount:
create: true
# -- The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template.
name: kuberay-operator
name: datamate-kuberay-operator
logging:
# -- Log encoder to use for stdout (one of `json` or `console`).
@@ -88,7 +88,7 @@ featureGates:
# Configurations for KubeRay operator metrics.
metrics:
# -- Whether KubeRay operator should emit control plane metrics.
enabled: true
enabled: false
serviceMonitor:
# -- Enable a prometheus ServiceMonitor
enabled: false

View File

@@ -6,12 +6,12 @@
# in this Helm chart.
image:
repository: runtime
repository: datamate-runtime
tag: latest
pullPolicy: IfNotPresent
nameOverride: "kuberay"
fullnameOverride: ""
fullnameOverride: "datamate-raycluster"
imagePullSecrets: []
# - name: an-existing-secret
@@ -68,7 +68,7 @@ head:
- name: RAY_TQDM_PATCH_PRINT
value: "0"
- name: MYSQL_HOST
value: "mysql"
value: "datamate-database"
- name: MYSQL_PORT
value: "3306"
- name: MYSQL_USER
@@ -114,15 +114,15 @@ head:
volumes:
- name: log-volume
hostPath:
path: /opt/data-mate/data/log
path: /opt/datamate/data/log
type: DirectoryOrCreate
- name: dataset-volume
hostPath:
path: /opt/data-mate/data/dataset
path: /opt/datamate/data/dataset
type: DirectoryOrCreate
- name: flow-volume
hostPath:
path: /opt/data-mate/data/flow
path: /opt/datamate/data/flow
type: DirectoryOrCreate
volumeMounts:
- mountPath: /tmp/ray
@@ -136,7 +136,7 @@ head:
# Follows standard K8s container spec.
sidecarContainers:
- name: runtime
image: runtime
image: datamate-runtime
imagePullPolicy: IfNotPresent
command:
- python
@@ -145,7 +145,7 @@ head:
- "8081"
env:
- name: MYSQL_HOST
value: "mysql"
value: "datamate-database"
- name: MYSQL_PORT
value: "3306"
- name: MYSQL_USER
@@ -160,7 +160,7 @@ head:
- mountPath: /tmp/ray
name: log-volume
subPath: ray/head
- mountPath: /var/log/data-mate
- mountPath: /var/log/datamate
name: log-volume
- mountPath: /dataset
name: dataset-volume
@@ -212,7 +212,7 @@ worker:
- name: RAY_TQDM_PATCH_PRINT
value: "0"
- name: MYSQL_HOST
value: "mysql"
value: "datamate-database"
- name: MYSQL_PORT
value: "3306"
- name: MYSQL_USER
@@ -257,15 +257,15 @@ worker:
volumes:
- name: log-volume
hostPath:
path: /opt/data-mate/data/log
path: /opt/datamate/data/log
type: DirectoryOrCreate
- name: dataset-volume
hostPath:
path: /opt/data-mate/data/dataset
path: /opt/datamate/data/dataset
type: DirectoryOrCreate
- name: flow-volume
hostPath:
path: /opt/data-mate/data/flow
path: /opt/datamate/data/flow
type: DirectoryOrCreate
volumeMounts:
- mountPath: /tmp/ray
@@ -350,15 +350,15 @@ additionalWorkerGroups:
volumes:
- name: log-volume
hostPath:
path: /opt/data-mate/data/log
path: /opt/datamate/data/log
type: DirectoryOrCreate
- name: dataset-volume
hostPath:
path: /opt/data-mate/data/dataset
path: /opt/datamate/data/dataset
type: DirectoryOrCreate
- name: flow-volume
hostPath:
path: /opt/data-mate/data/flow
path: /opt/datamate/data/flow
type: DirectoryOrCreate
volumeMounts:
- mountPath: /tmp/ray

View File

@@ -1,7 +1,7 @@
apiVersion: v1
kind: Service
metadata:
name: runtime
name: datamate-runtime
labels:
ray.io/node-type: head
spec:

View File

@@ -2,8 +2,9 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app: backend
name: backend
app: datamate
tier: backend
name: datamate-backend
rules:
- verbs:
- create
@@ -33,83 +34,89 @@ apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: backend
name: backend
app: datamate
tier: backend
name: datamate-backend
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
app: backend
name: backend
app: datamate
tier: backend
name: datamate-backend
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: backend
name: datamate-backend
subjects:
- kind: ServiceAccount
name: backend
name: datamate-backend
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: backend
name: datamate-backend
labels:
app: backend
app: datamate
tier: backend
spec:
replicas: 1
selector:
matchLabels:
app: backend
app: datamate
tier: backend
template:
metadata:
labels:
app: backend
app: datamate
tier: backend
spec:
serviceAccountName: backend
serviceAccountName: datamate-backend
containers:
- name: backend
image: backend
imagePullPolicy: IfNotPresent
env:
- name: namespace
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: SPRING_CONFIG_LOCATION
value: file:/opt/backend/application.yml
ports:
- containerPort: 8080
volumeMounts:
- name: dataset-volume
mountPath: /dataset
- name: flow-volume
mountPath: /flow
- name: log-volume
mountPath: /var/log/data-mate
- name: backend
image: datamate-backend
imagePullPolicy: IfNotPresent
env:
- name: namespace
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: SPRING_CONFIG_LOCATION
value: file:/opt/backend/application.yml
ports:
- containerPort: 8080
volumeMounts:
- name: dataset-volume
mountPath: /dataset
- name: flow-volume
mountPath: /flow
- name: log-volume
mountPath: /var/log/datamate
volumes:
- name: dataset-volume
hostPath:
path: /opt/data-mate/data/dataset
type: DirectoryOrCreate
- name: flow-volume
hostPath:
path: /opt/data-mate/data/flow
type: DirectoryOrCreate
- name: log-volume
hostPath:
path: /opt/data-mate/data/log
type: DirectoryOrCreate
- name: dataset-volume
hostPath:
path: /opt/datamate/data/dataset
type: DirectoryOrCreate
- name: flow-volume
hostPath:
path: /opt/datamate/data/flow
type: DirectoryOrCreate
- name: log-volume
hostPath:
path: /opt/datamate/data/log
type: DirectoryOrCreate
---
apiVersion: v1
kind: Service
metadata:
name: backend
name: datamate-backend
labels:
app: backend
app: datamate
tier: backend
spec:
type: ClusterIP
ports:
@@ -117,4 +124,5 @@ spec:
targetPort: 8080
protocol: TCP
selector:
app: backend
app: datamate
tier: backend

View File

@@ -1,54 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: datax
labels:
app: datax
spec:
replicas: 1
selector:
matchLabels:
app: datax
template:
metadata:
labels:
app: datax
spec:
containers:
- name: datax
image: datax
imagePullPolicy: IfNotPresent
securityContext:
capabilities:
add:
- SYS_ADMIN
command:
- bash
- -c
- rpcbind && python3 /opt/datax/bin/app.py
ports:
- containerPort: 8000
volumeMounts:
- name: dataset
mountPath: /dataset
subPath: dataset
volumes:
- name: dataset
hostPath:
path: /tmp/data-mate
---
apiVersion: v1
kind: Service
metadata:
name: datax
labels:
app: datax
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
selector:
app: datax

View File

@@ -1,32 +1,45 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: frontend
name: datamate-frontend
labels:
app: frontend
app: datamate
tier: frontend
spec:
replicas: 1
selector:
matchLabels:
app: frontend
app: datamate
tier: frontend
template:
metadata:
labels:
app: frontend
app: datamate
tier: frontend
spec:
containers:
- name: frontend
image: frontend
imagePullPolicy: IfNotPresent
ports:
- containerPort: 80
- name: frontend
image: datamate-frontend
imagePullPolicy: IfNotPresent
ports:
- containerPort: 80
volumeMounts:
- name: log-volume
mountPath: /var/log/datamate/frontend
subPath: frontend
volumes:
- name: log-volume
hostPath:
path: /opt/datamate/data/log
type: DirectoryOrCreate
---
apiVersion: v1
kind: Service
metadata:
name: frontend
name: datamate-frontend
labels:
app: frontend
app: datamate
tier: frontend
spec:
type: NodePort
ports:
@@ -35,4 +48,5 @@ spec:
nodePort: 30000
protocol: TCP
selector:
app: frontend
app: datamate
tier: frontend

View File

@@ -1,52 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: mineru
labels:
app: mineru
spec:
replicas: 1
selector:
matchLabels:
app: mineru
template:
metadata:
labels:
app: mineru
spec:
containers:
- name: mineru
image: mineru
imagePullPolicy: IfNotPresent
command:
- mineru-api
args:
- --host
- "0.0.0.0"
- --port
- "8000"
ports:
- containerPort: 8000
volumeMounts:
- name: tmp
mountPath: /tmp/data-mate
volumes:
- name: tmp
hostPath:
path: /tmp/data-mate
---
apiVersion: v1
kind: Service
metadata:
name: mineru
labels:
app: mineru
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
selector:
app: mineru

View File

@@ -1,7 +1,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: mysql-utf8-config
name: datamate-mysql-utf8-config
data:
utf8.cnf: |
[mysqld]
@@ -11,6 +11,7 @@ data:
collation-server = utf8mb4_unicode_ci
# 或者使用 utf8_general_ci (性能稍好,但排序规则稍宽松)
default-time-zone = 'Asia/Shanghai'
log_error=/var/log/datamate/database/error.log
[client]
# 设置客户端连接默认字符集
@@ -18,4 +19,4 @@ data:
[mysql]
# 设置 mysql 命令行客户端默认字符集
default-character-set = utf8mb4
default-character-set = utf8mb4

View File

@@ -1,21 +1,39 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: mysql
name: datamate-database
labels:
app: mysql
app: datamate
tier: database
spec:
replicas: 1
selector:
matchLabels:
app: mysql
app: datamate
tier: database
strategy:
type: Recreate
template:
metadata:
labels:
app: mysql
app: datamate
tier: database
spec:
initContainers:
- name: init-log
image: mysql:8
imagePullPolicy: IfNotPresent
command:
- sh
- -c
args:
- |
chown mysql:mysql /var/log/datamate/database
chmod 755 /var/log/datamate/database
volumeMounts:
- name: log-volume
mountPath: /var/log/datamate/database
subPath: database
containers:
- name: mysql
image: mysql:8
@@ -28,6 +46,9 @@ spec:
volumeMounts:
- name: data-volume
mountPath: /var/lib/mysql
- name: log-volume
mountPath: /var/log/datamate/database
subPath: database
- name: init-sql
mountPath: /docker-entrypoint-initdb.d
- name: mysql-utf8-config
@@ -35,22 +56,27 @@ spec:
volumes:
- name: data-volume
hostPath:
path: /opt/data-mate/data/mysql
path: /opt/datamate/data/mysql
type: DirectoryOrCreate
- name: log-volume
hostPath:
path: /opt/datamate/data/log
type: DirectoryOrCreate
- name: init-sql
configMap:
name: init-sql
name: datamate-init-sql
- name: mysql-utf8-config
configMap:
name: mysql-utf8-config
name: datamate-mysql-utf8-config
---
apiVersion: v1
kind: Service
metadata:
name: mysql
name: datamate-database
labels:
app: mysql
app: datamate
tier: database
spec:
type: ClusterIP
ports:
@@ -58,4 +84,5 @@ spec:
targetPort: 3306
protocol: TCP
selector:
app: mysql
app: datamate
tier: database

View File

@@ -1,49 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: unstructured
labels:
app: unstructured
spec:
replicas: 1
selector:
matchLabels:
app: unstructured
template:
metadata:
labels:
app: unstructured
spec:
containers:
- name: unstructured
image: unstructured
imagePullPolicy: IfNotPresent
command:
- python
args:
- app.py
ports:
- containerPort: 8000
volumeMounts:
- name: tmp
mountPath: /tmp/data-mate
volumes:
- name: tmp
hostPath:
path: /tmp/data-mate
---
apiVersion: v1
kind: Service
metadata:
name: unstructured
labels:
app: unstructured
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
selector:
app: unstructured

View File

@@ -12,7 +12,7 @@ spring:
# 数据源配置
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://mysql:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
url: jdbc:mysql://datamate-database:3306/datamate?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=Asia/Shanghai&allowPublicKeyRetrieval=true
username: ${DB_USERNAME:root}
password: ${DB_PASSWORD:Huawei@123}
hikari:

View File

@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN" monitorInterval="30">
<Properties>
<Property name="LOG_PATH">/var/log/data-mate/backend</Property>
<Property name="LOG_PATH">/var/log/datamate/backend</Property>
<Property name="LOG_PATTERN">%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</Property>
<Property name="MAX_FILE_SIZE">100MB</Property>
<Property name="MAX_HISTORY">30</Property>

View File

@@ -19,4 +19,3 @@ xmltodict==1.0.2
zhconv==1.4.3
sqlalchemy==2.0.40
pymysql==1.1.1
unstructured[pdf,docx]==0.18.15

View File

@@ -15,7 +15,7 @@ from datamate.scheduler import func_scheduler
from datamate.wrappers import WRAPPERS
# 日志配置
LOG_DIR = "/var/log/data-mate/runtime"
LOG_DIR = "/var/log/datamate/runtime"
os.makedirs(LOG_DIR, exist_ok=True)
logger.add(
f"{LOG_DIR}/runtime.log",

View File

@@ -17,7 +17,7 @@ classifiers = [
# Core dependencies
dependencies = [
"uvicorn",
"uvicorn[standard]",
"fastapi",
"loguru",
"jsonargparse",

View File

@@ -22,6 +22,7 @@ CREATE TABLE IF NOT EXISTS t_clean_task
dest_dataset_name varchar(64),
before_size bigint,
after_size bigint,
file_count int,
created_at timestamp default current_timestamp,
started_at timestamp,
finished_at timestamp,

View File

@@ -111,113 +111,30 @@ VALUES ('TextFormatter', 'TXT文本抽取', '抽取TXT中的文本。', '1.0.0',
INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
VALUES (3, 'TextFormatter'),
(7, 'FileExporter'),
(8, 'TextFormatter'),
(8, 'FileExporter'),
(3, 'FileWithShortOrLongLengthFilter'),
(3, 'FileWithHighRepeatPhraseRateFilter'),
(3, 'FileWithHighRepeatWordRateFilter'),
(3, 'FileWithHighSpecialCharRateFilter'),
(3, 'FileWithManySensitiveWordsFilter'),
(3, 'DuplicateFilesFilter'),
(3, 'DuplicateSentencesFilter'),
(3, 'AnonymizedCreditCardNumber'),
(3, 'AnonymizedIdNumber'),
(3, 'AnonymizedIpAddress'),
(3, 'AnonymizedPhoneNumber'),
(3, 'AnonymizedUrlCleaner'),
(3, 'HtmlTagCleaner'),
(3, 'XMLTagCleaner'),
(3, 'ContentCleaner'),
(3, 'EmailNumberCleaner'),
(3, 'EmojiCleaner'),
(3, 'ExtraSpaceCleaner'),
(3, 'FullWidthCharacterCleaner'),
(3, 'GrableCharactersCleaner'),
(3, 'InvisibleCharactersCleaner'),
(3, 'LegendCleaner'),
(3, 'PoliticalWordCleaner'),
(3, 'SexualAndViolentWordCleaner'),
(3, 'TraditionalChineseCleaner'),
(3, 'UnicodeSpaceCleaner'),
(4, 'ImgFormatter'),
(4, 'ImgBlurredImagesCleaner'),
(4, 'ImgBrightness'),
(4, 'ImgContrast'),
(4, 'ImgDenoise'),
(4, 'ImgDuplicatedImagesCleaner'),
(4, 'ImgPerspectiveTransformation'),
(4, 'ImgResize'),
(4, 'ImgSaturation'),
(4, 'ImgShadowRemove'),
(4, 'ImgSharpness'),
(4, 'ImgSimilarImagesCleaner'),
(4, 'ImgTypeUnify'),
(8, 'FileWithShortOrLongLengthFilter'),
(8, 'FileWithHighRepeatPhraseRateFilter'),
(8, 'FileWithHighRepeatWordRateFilter'),
(8, 'FileWithHighSpecialCharRateFilter'),
(8, 'FileWithManySensitiveWordsFilter'),
(8, 'DuplicateFilesFilter'),
(8, 'DuplicateSentencesFilter'),
(8, 'AnonymizedCreditCardNumber'),
(8, 'AnonymizedIdNumber'),
(8, 'AnonymizedIpAddress'),
(8, 'AnonymizedPhoneNumber'),
(8, 'AnonymizedUrlCleaner'),
(8, 'HtmlTagCleaner'),
(8, 'XMLTagCleaner'),
(8, 'ContentCleaner'),
(8, 'EmailNumberCleaner'),
(8, 'EmojiCleaner'),
(8, 'ExtraSpaceCleaner'),
(8, 'FullWidthCharacterCleaner'),
(8, 'GrableCharactersCleaner'),
(8, 'InvisibleCharactersCleaner'),
(8, 'LegendCleaner'),
(8, 'PoliticalWordCleaner'),
(8, 'SexualAndViolentWordCleaner'),
(8, 'TraditionalChineseCleaner'),
(8, 'UnicodeSpaceCleaner'),
(11, 'TextFormatter'),
(11, 'FileExporter'),
(11, 'FileWithShortOrLongLengthFilter'),
(11, 'FileWithHighRepeatPhraseRateFilter'),
(11, 'FileWithHighRepeatWordRateFilter'),
(11, 'FileWithHighSpecialCharRateFilter'),
(11, 'FileWithManySensitiveWordsFilter'),
(11, 'DuplicateFilesFilter'),
(11, 'DuplicateSentencesFilter'),
(11, 'AnonymizedCreditCardNumber'),
(11, 'AnonymizedIdNumber'),
(11, 'AnonymizedIpAddress'),
(11, 'AnonymizedPhoneNumber'),
(11, 'AnonymizedUrlCleaner'),
(11, 'HtmlTagCleaner'),
(11, 'XMLTagCleaner'),
(11, 'ContentCleaner'),
(11, 'EmailNumberCleaner'),
(11, 'EmojiCleaner'),
(11, 'ExtraSpaceCleaner'),
(11, 'FullWidthCharacterCleaner'),
(11, 'GrableCharactersCleaner'),
(11, 'InvisibleCharactersCleaner'),
(11, 'LegendCleaner'),
(11, 'PoliticalWordCleaner'),
(11, 'SexualAndViolentWordCleaner'),
(11, 'TraditionalChineseCleaner'),
(11, 'UnicodeSpaceCleaner'),
(11, 'ImgFormatter'),
(11, 'ImgBlurredImagesCleaner'),
(11, 'ImgBrightness'),
(11, 'ImgContrast'),
(11, 'ImgDenoise'),
(11, 'ImgDuplicatedImagesCleaner'),
(11, 'ImgPerspectiveTransformation'),
(11, 'ImgResize'),
(11, 'ImgSaturation'),
(11, 'ImgShadowRemove'),
(11, 'ImgSharpness'),
(11, 'ImgSimilarImagesCleaner'),
(11, 'ImgTypeUnify');
SELECT c.id, o.id
FROM t_operator_category c
CROSS JOIN t_operator o
WHERE c.id IN (3, 8, 11)
AND o.id IN ('TextFormatter', 'FileWithShortOrLongLengthFilter', 'FileWithHighRepeatPhraseRateFilter',
'FileWithHighRepeatWordRateFilter', 'FileWithHighSpecialCharRateFilter', 'FileWithManySensitiveWordsFilter',
'DuplicateFilesFilter', 'DuplicateSentencesFilter', 'AnonymizedCreditCardNumber', 'AnonymizedIdNumber',
'AnonymizedIpAddress', 'AnonymizedPhoneNumber', 'AnonymizedUrlCleaner', 'HtmlTagCleaner', 'XMLTagCleaner',
'ContentCleaner', 'EmailNumberCleaner', 'EmojiCleaner', 'ExtraSpaceCleaner', 'FullWidthCharacterCleaner',
'GrableCharactersCleaner', 'InvisibleCharactersCleaner', 'LegendCleaner', 'PoliticalWordCleaner',
'SexualAndViolentWordCleaner', 'TraditionalChineseCleaner', 'UnicodeSpaceCleaner');
INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
SELECT c.id, o.id
FROM t_operator_category c
CROSS JOIN t_operator o
WHERE c.id IN (4, 8, 11)
AND o.id IN ('ImgFormatter', 'ImgBlurredImagesCleaner', 'ImgBrightness', 'ImgContrast', 'ImgDenoise',
'ImgDuplicatedImagesCleaner', 'ImgPerspectiveTransformation', 'ImgResize', 'ImgSaturation',
'ImgShadowRemove', 'ImgSharpness', 'ImgSimilarImagesCleaner', 'ImgTypeUnify');
INSERT IGNORE INTO t_operator_category_relation(category_id, operator_id)
SELECT c.id, o.id
FROM t_operator_category c
CROSS JOIN t_operator o
WHERE c.id IN (7, 8, 11)
AND o.id IN ('FileExporter');

View File

@@ -1,33 +0,0 @@
FROM maven:3-openjdk-8-slim AS builder
RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
sed -i 's/security.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
apt-get update && \
apt-get install -y git && \
git clone https://github.com/alibaba/DataX.git
COPY runtime/datax/ DataX/
RUN cd DataX && \
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
FROM openjdk:8-jdk-slim
RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
sed -i 's/security.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list && \
apt-get update && \
apt-get install -y python3 python3-pip python-is-python3 vim wget curl nfs-common rsync && \
apt-get clean && \
rm -rf /var/lib/apy/lists/*
RUN pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple && \
pip config --user set global.trusted-host mirrors.aliyun.com && \
pip install fastapi uvicorn[standard] && \
pip cache purge
COPY --from=builder /DataX/target/datax/datax /opt/datax
COPY scripts/images/datax/app.py /opt/datax/bin/app.py

View File

@@ -1,52 +0,0 @@
import subprocess
import tempfile
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI(title="datax")
class CreateJobParam(BaseModel):
content: str
@app.post("/process", tags=["run datax.py"])
async def process(job: CreateJobParam):
output = {
"status": "failed",
}
try:
# 创建临时文件存储Python脚本
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=True) as f:
f.write(job.content)
f.seek(0)
cmd_args = ["python3", "/opt/datax/bin/datax.py", f.name]
result = subprocess.run(
cmd_args,
capture_output=True,
text=True,
check=True
)
output["status"] = result.returncode
if result.returncode != 0:
output["stdout"] = result.stdout
output["stderr"] = result.stderr
except subprocess.TimeoutExpired as e:
output["status"] = 408
output["stderr"] = f"The script execution timed out: {e.stderr}"
except subprocess.CalledProcessError as e:
output["status"] = 500
output["stderr"] = f"Script execution failed: {e.stdout}"
except Exception as e:
output["status"] = 500
output["stderr"] = f"Server error: {str(e)}"
return output
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

View File

@@ -10,7 +10,7 @@ RUN if [ -f package-lock.json ]; then npm ci; else npm install; fi && \
FROM nginx:1.29 AS runner
COPY --from=builder /app/dist /opt/frontend
COPY scripts/images/frontend/edm.conf /etc/nginx/conf.d/default.conf
COPY scripts/images/frontend/backend.conf /etc/nginx/conf.d/default.conf
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

View File

@@ -2,8 +2,13 @@ server {
listen 80;
server_name 0.0.0.0;
access_log /var/log/datamate/frontend/access.log main;
error_log /var/log/datamate/frontend/error.log notice;
client_max_body_size 1024M;
location /api/ {
proxy_pass http://backend:8080/api/;
proxy_pass http://datamate-backend:8080/api/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;

View File

@@ -1,22 +0,0 @@
FROM python:3.10-slim
RUN apt-get update && \
apt-get install -y curl vim libgl1 libgl1-mesa-glx libglib2.0-0 procps && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple && \
pip config --user set global.trusted-host mirrors.aliyun.com && \
pip install --upgrade setuptools && \
pip install -U 'mineru[core]==2.1.0' --break-system-packages && \
pip install torch==2.7.1+cpu -f https://download.pytorch.org/whl/torch/ && \
pip install torchvision==0.22.1+cpu -f https://download.pytorch.org/whl/torchvision && \
pip install requests==2.27.1 torch_npu==2.7.1rc1 numpy==1.26.0 decorator==5.2.1 einops==0.8.1 attrs==25.3.0 && \
pip cache purge
ENV CURL_CA_BUNDLE=""
ENV TORCH_DEVICE_BACKEND_AUTOLOAD=0
RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
ENV MINERU_MODEL_SOURCE=local

View File

@@ -17,8 +17,6 @@ ENV HF_HUB_DISABLE_XET=1
RUN pip install -e . -i https://mirrors.huaweicloud.com/repository/pypi/simple \
&& pip install -r /opt/runtime/datamate/ops/requirements.txt -i https://mirrors.huaweicloud.com/repository/pypi/simple \
&& pip cache purge \
&& python -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()" \
&& python -c "from unstructured_inference.models.base import get_model; get_model()"
&& pip cache purge
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

View File

@@ -1,9 +0,0 @@
FROM downloads.unstructured.io/unstructured-io/unstructured
RUN pip config --user set global.index https://mirrors.huaweicloud.com/repository/pypi && \
pip config --user set global.index-url https://mirrors.huaweicloud.com/repository/pypi/simple && \
pip config --user set global.trusted-host mirrors.huaweicloud.com && \
pip install fastapi uvicorn && \
pip cache purge
COPY scripts/images/unstructured/app.py /app/app.py

View File

@@ -1,61 +0,0 @@
import asyncio
import os
from typing import Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from unstructured.partition.auto import partition
app = FastAPI(title="unstructured")
class FileProcessingRequest(BaseModel):
"""文件处理请求模型"""
file_path: Optional[str] = None
# 可添加其他可选字段
@app.post("/process", tags=["文件处理"])
async def process_file(request_data: FileProcessingRequest):
"""处理文件并返回提取的文本内容"""
try:
file_path = request_data.file_path
if not file_path:
raise HTTPException(status_code=400, detail="缺少必要参数: filePath")
if not os.path.exists(file_path):
raise HTTPException(status_code=404, detail=f"文件不存在: {file_path}")
# 异步执行可能耗时的文件处理操作
text_content = await process_file_async(file_path)
# 返回处理结果
return {
"filePath": file_path,
"text": text_content,
"status": "success"
}
except HTTPException as e:
raise e
except Exception as e:
raise HTTPException(status_code=500, detail=f"process failed: {str(e)}")
async def process_file_async(file_path: str) -> str:
"""异步处理文件内容"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, partition_file_sync, file_path)
def partition_file_sync(file_path: str) -> str:
"""同步处理文件内容(由异步函数调用)"""
elements = partition(filename=file_path)
return "\n\n".join([str(el) for el in elements])
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

View File

@@ -1,103 +0,0 @@
#!/bin/bash
# ==========================================================
# 步骤 1: 定义帮助函数
# ==========================================================
# 脚本名称
SCRIPT_NAME=$(basename "$0")
help_message() {
cat << EOF
Usage: $SCRIPT_NAME [-d TARGET_DIR] [-h|--help]
描述:
将预定义的 Docker 镜像列表保存为 .tar 文件。
选项:
-d TARGET_DIR 指定保存镜像的目标目录。
(绝对路径或相对路径)
如果未指定,将使用默认路径: $TARGET_DIR_DEFAULT
-h, --help 显示此帮助信息并退出。
示例:
# 使用默认目录 (./dist)
$SCRIPT_NAME
# 指定保存到 /tmp/my-archive 目录
$SCRIPT_NAME -d /tmp/my-archive
EOF
}
# ==========================================================
# 步骤 2: 定义默认值和处理参数
# ==========================================================
# 默认目标目录
TARGET_DIR_DEFAULT="./dist"
TARGET_DIR="$TARGET_DIR_DEFAULT"
# 使用 getopts 处理命令行选项。
# d: 表示 -d 选项后需要一个参数(目标目录)。
while getopts "d:h" opt; do
case ${opt} in
d )
# 如果 -d 选项被指定,使用传入的参数作为目标目录
TARGET_DIR="$OPTARG"
;;
h )
# 如果是 -h 选项,显示帮助并退出
help_message
exit 0
;;
\? )
# 处理无效的选项
echo "错误:无效选项 -$OPTARG" >&2
help_message
exit 1
;;
esac
done
# 移动到下一个非选项参数 (通常此脚本没有其他参数,但这是最佳实践)
shift $((OPTIND -1))
# ==========================================================
# 步骤 3: 脚本核心逻辑
# ==========================================================
# 检查/创建目标文件夹
if ! mkdir -p "$TARGET_DIR"; then
echo "❌ 致命错误:无法创建目标目录: $TARGET_DIR" >&2
exit 1
fi
echo "目标目录已确认/创建: $TARGET_DIR"
echo "----------------------------------------"
# Image list
images=("frontend:latest" "backend:latest" "runtime:latest" "mysql:8")
for image in "${images[@]}"; do
# 清理镜像名称,用 '_' 替换 ':',以创建安全的文件名。
safe_name="${image//[:]/_}"
# 构造完整的输出文件路径。
output_path="$TARGET_DIR/$safe_name.tar"
echo "正在保存镜像 $image"
echo " -> 到文件 $output_path"
# 执行 docker save 命令
docker save -o "$output_path" "$image"
# 检查保存是否成功 ($? 存储上一个命令的退出状态)
if [ $? -eq 0 ]; then
echo "✅ 保存成功。"
else
echo "❌ 保存失败!"
fi
echo ""
done