diff --git a/.github/workflows/docker-image-backend.yml b/.github/workflows/docker-image-backend.yml index 9548050..8aabf0e 100644 --- a/.github/workflows/docker-image-backend.yml +++ b/.github/workflows/docker-image-backend.yml @@ -2,13 +2,13 @@ name: Backend Docker Image CI on: push: - branches: [ "develop_930" ] + branches: [ "main" ] paths: - 'backend/**' - 'scripts/images/backend/**' - '.github/workflows/docker-image-backend.yml' pull_request: - branches: [ "develop_930" ] + branches: [ "main" ] paths: - 'backend/**' - 'scripts/images/backend/**' @@ -16,12 +16,12 @@ on: workflow_dispatch: jobs: - - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Build the Backend Docker image - run: make build-backend + call-docker-build: + name: Build and Push Backend Docker Image + uses: ./.github/workflows/docker-images-reusable.yml + permissions: + contents: read + packages: write + with: + service_name: backend + build_dir: . diff --git a/.github/workflows/docker-image-frontend.yml b/.github/workflows/docker-image-frontend.yml index 2e0fb3f..8964964 100644 --- a/.github/workflows/docker-image-frontend.yml +++ b/.github/workflows/docker-image-frontend.yml @@ -2,13 +2,13 @@ name: Frontend Docker Image CI on: push: - branches: [ "develop_930" ] + branches: [ "main" ] paths: - 'frontend/**' - 'scripts/images/frontend/**' - '.github/workflows/docker-image-frontend.yml' pull_request: - branches: [ "develop_930" ] + branches: [ "main" ] paths: - 'frontend/**' - 'scripts/images/frontend/**' @@ -16,12 +16,12 @@ on: workflow_dispatch: jobs: - - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Build the Frontend Docker image - run: make build-frontend + call-docker-build: + name: Build and Push Frontend Docker Image + uses: ./.github/workflows/docker-images-reusable.yml + permissions: + contents: read + packages: write + with: + service_name: frontend + build_dir: . diff --git a/.github/workflows/docker-image-runtime.yml b/.github/workflows/docker-image-runtime.yml new file mode 100644 index 0000000..8660f88 --- /dev/null +++ b/.github/workflows/docker-image-runtime.yml @@ -0,0 +1,27 @@ +name: Runtime Docker Image CI + +on: + push: + branches: [ "main" ] + paths: + - 'runtime/**' + - 'scripts/images/runtime/**' + - '.github/workflows/docker-image-runtime.yml' + pull_request: + branches: [ "main" ] + paths: + - 'runtime/**' + - 'scripts/images/runtime/**' + - '.github/workflows/docker-image-runtime.yml' + workflow_dispatch: + +jobs: + call-docker-build: + name: Build and Push Runtime Docker Image + uses: ./.github/workflows/docker-images-reusable.yml + permissions: + contents: read + packages: write + with: + service_name: runtime + build_dir: . diff --git a/.github/workflows/docker-images-reusable.yml b/.github/workflows/docker-images-reusable.yml new file mode 100644 index 0000000..e7ce8e9 --- /dev/null +++ b/.github/workflows/docker-images-reusable.yml @@ -0,0 +1,57 @@ +name: Docker Image Build & Push + +on: + workflow_call: + inputs: + service_name: + required: true + type: string + build_dir: + required: true + type: string + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - uses: actions/checkout@v4 + + - name: Login to GitHub Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set Docker Image Tag + id: set-tag + run: | + LOWERCASE_REPO=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') + BASE_IMAGE=ghcr.io/$LOWERCASE_REPO/datamate-${{ inputs.service_name }} + if [[ $GITHUB_REF == refs/tags/v* ]]; then + TAG=${GITHUB_REF#refs/tags/v} + echo "TAGS=$BASE_IMAGE:$TAG" >> $GITHUB_OUTPUT + elif [[ $GITHUB_REF == refs/heads/main ]]; then + echo "TAGS=$BASE_IMAGE:latest" >> $GITHUB_OUTPUT + else + echo "TAGS=$BASE_IMAGE:temp" >> $GITHUB_OUTPUT + fi + + - name: Build Docker Image + run: | + make build-${{ inputs.service_name }} VERSION=latest + + - name: Tag Docker Image + run: | + docker tag datamate-${{ inputs.service_name }}:latest ${{ steps.set-tag.outputs.TAGS }} + + - name: Push Docker Image + if: github.event_name != 'pull_request' + run: | + docker push ${{ steps.set-tag.outputs.TAGS }} + diff --git a/Makefile b/Makefile index 6c453f1..c89d6b1 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,7 @@ else endif .PHONY: install -install: install-data-mate +install: install-datamate .PHONY: uninstall-% uninstall-%: @@ -54,84 +54,78 @@ else endif .PHONY: uninstall -uninstall: uninstall-data-mate +uninstall: uninstall-datamate # build -.PHONY: mineru-docker-build -mineru-docker-build: - docker build -t mineru:$(VERSION) . -f scripts/images/mineru/Dockerfile - -.PHONY: datax-docker-build -datax-docker-build: - docker build -t datax:$(VERSION) . -f scripts/images/datax/Dockerfile - -.PHONY: unstructured-docker-build -unstructured-docker-build: - docker build -t unstructured:$(VERSION) . -f scripts/images/unstructured/Dockerfile - .PHONY: backend-docker-build backend-docker-build: - docker build -t backend:$(VERSION) . -f scripts/images/backend/Dockerfile + docker build -t datamate-backend:$(VERSION) . -f scripts/images/backend/Dockerfile .PHONY: frontend-docker-build frontend-docker-build: - docker build -t frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile + docker build -t datamate-frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile .PHONY: runtime-docker-build runtime-docker-build: - docker build -t runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile + docker build -t datamate-runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile + +.PHONY: label-studio-adapter-docker-build +label-studio-adapter-docker-build: + docker build -t label-studio-adapter:$(VERSION) . -f scripts/images/label-studio-adapter/Dockerfile .PHONY: backend-docker-install backend-docker-install: - cd deployment/docker/data-mate && docker-compose up -d backend + cd deployment/docker/datamate && docker-compose up -d backend .PHONY: backend-docker-uninstall backend-docker-uninstall: - cd deployment/docker/data-mate && docker-compose down backend + cd deployment/docker/datamate && docker-compose down backend .PHONY: frontend-docker-install frontend-docker-install: - cd deployment/docker/data-mate && docker-compose up -d frontend + cd deployment/docker/datamate && docker-compose up -d frontend .PHONY: frontend-docker-uninstall frontend-docker-uninstall: - cd deployment/docker/data-mate && docker-compose down frontend + cd deployment/docker/datamate && docker-compose down frontend .PHONY: runtime-docker-install runtime-docker-install: - cd deployment/docker/data-mate && docker-compose up -d runtime + cd deployment/docker/datamate && docker-compose up -d runtime .PHONY: runtime-docker-uninstall runtime-docker-uninstall: - cd deployment/docker/data-mate && docker-compose down runtime + cd deployment/docker/datamate && docker-compose down runtime .PHONY: runtime-k8s-install runtime-k8s-install: create-namespace - helm upgrade kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE) - helm upgrade raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE) + helm upgrade datamate-kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE) + helm upgrade datamate-raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE) kubectl apply -f deployment/helm/ray/service.yaml -n $(NAMESPACE) .PHONY: runtime-k8s-uninstall runtime-k8s-uninstall: - helm uninstall raycluster -n $(NAMESPACE) - helm uninstall kuberay-operator -n $(NAMESPACE) + helm uninstall datamate-raycluster -n $(NAMESPACE) + helm uninstall datamate-kuberay-operator -n $(NAMESPACE) kubectl delete -f deployment/helm/ray/service.yaml -n $(NAMESPACE) -.PHONY: unstructured-k8s-install -unstructured-k8s-install: create-namespace - kubectl apply -f deployment/kubernetes/unstructured/deploy.yaml -n $(NAMESPACE) - .PHONY: mysql-k8s-install mysql-k8s-install: create-namespace - kubectl create configmap init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE) + kubectl create configmap datamate-init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE) kubectl apply -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE) kubectl apply -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE) .PHONY: mysql-k8s-uninstall mysql-k8s-uninstall: - kubectl delete configmap init-sql -n $(NAMESPACE) - kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE) - kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE) + kubectl delete configmap datamate-init-sql -n $(NAMESPACE) --ignore-not-found + kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE) --ignore-not-found + kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE) --ignore-not-found + +.PHONY: database-k8s-install +database-k8s-install: mysql-k8s-install + +.PHONY: database-k8s-uninstall +database-k8s-uninstall: mysql-k8s-uninstall .PHONY: backend-k8s-install backend-k8s-install: create-namespace @@ -139,7 +133,7 @@ backend-k8s-install: create-namespace .PHONY: backend-k8s-uninstall backend-k8s-uninstall: - kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE) + kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE) --ignore-not-found .PHONY: frontend-k8s-install frontend-k8s-install: create-namespace @@ -147,18 +141,18 @@ frontend-k8s-install: create-namespace .PHONY: frontend-k8s-uninstall frontend-k8s-uninstall: - kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE) + kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE) --ignore-not-found -.PHONY: data-mate-docker-install -data-mate-docker-install: +.PHONY: datamate-docker-install +datamate-docker-install: cd deployment/docker/datamate && docker-compose up -d -.PHONY: data-mate-docker-uninstall -data-mate-docker-uninstall: +.PHONY: datamate-docker-uninstall +datamate-docker-uninstall: cd deployment/docker/datamate && docker-compose down -.PHONY: data-mate-k8s-install -data-mate-k8s-install: create-namespace mysql-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install +.PHONY: datamate-k8s-install +datamate-k8s-install: create-namespace database-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install -.PHONY: data-mate-k8s-uninstall -data-mate-k8s-uninstall: mysql-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall +.PHONY: datamate-k8s-uninstall +datamate-k8s-uninstall: database-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall diff --git a/README-zh.md b/README-zh.md index b135be8..4e492a0 100644 --- a/README-zh.md +++ b/README-zh.md @@ -9,8 +9,7 @@ ![GitHub Issues](https://img.shields.io/github/issues/ModelEngine-Group/DataMate) ![GitHub License](https://img.shields.io/github/license/ModelEngine-Group/DataMate) -**DataMate是面向模型微调与RAG检索的企业级数据处理平台,支持数据归集、数据管理、算子市场、数据清洗、数据合成、数据标注、数据评估、知识生成等核心功能。 -** +**DataMate是面向模型微调与RAG检索的企业级数据处理平台,支持数据归集、数据管理、算子市场、数据清洗、数据合成、数据标注、数据评估、知识生成等核心功能。** [简体中文](./README-zh.md) | [English](./README.md) diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/httpclient/RuntimeClient.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/httpclient/RuntimeClient.java index 6c47cbf..a526efd 100644 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/httpclient/RuntimeClient.java +++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/httpclient/RuntimeClient.java @@ -14,7 +14,7 @@ import java.time.Duration; @Slf4j public class RuntimeClient { - private static final String BASE_URL = "http://runtime:8081/api"; + private static final String BASE_URL = "http://datamate-runtime:8081/api"; private static final String CREATE_TASK_URL = BASE_URL + "/task/{0}/submit"; diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/service/CleaningTaskService.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/service/CleaningTaskService.java index 46beab3..e367cf9 100644 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/service/CleaningTaskService.java +++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/service/CleaningTaskService.java @@ -12,6 +12,7 @@ import com.datamate.cleaning.domain.model.TaskProcess; import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper; import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper; import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper; +import com.datamate.cleaning.interfaces.dto.CleaningProcess; import com.datamate.cleaning.interfaces.dto.CleaningTask; import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest; import com.datamate.cleaning.interfaces.dto.OperatorInstance; @@ -55,7 +56,14 @@ public class CleaningTaskService { public List getTasks(String status, String keywords, Integer page, Integer size) { Integer offset = page * size; - return cleaningTaskMapper.findTasks(status, keywords, size, offset); + List tasks = cleaningTaskMapper.findTasks(status, keywords, size, offset); + tasks.forEach(this::setProcess); + return tasks; + } + + private void setProcess(CleaningTask task) { + int count = cleaningResultMapper.countByInstanceId(task.getId()); + task.setProgress(CleaningProcess.of(task.getFileCount(), count)); } public int countTasks(String status, String keywords) { @@ -80,6 +88,7 @@ public class CleaningTaskService { task.setDestDatasetId(destDataset.getId()); task.setDestDatasetName(destDataset.getName()); task.setBeforeSize(srcDataset.getTotalSize()); + task.setFileCount(srcDataset.getFileCount()); cleaningTaskMapper.insertTask(task); List instancePos = request.getInstance().stream() @@ -93,7 +102,9 @@ public class CleaningTaskService { } public CleaningTask getTask(String taskId) { - return cleaningTaskMapper.findTaskById(taskId); + CleaningTask task = cleaningTaskMapper.findTaskById(taskId); + setProcess(task); + return task; } @Transactional @@ -113,7 +124,7 @@ public class CleaningTaskService { process.setDatasetId(task.getDestDatasetId()); process.setDatasetPath(FLOW_PATH + "/" + task.getId() + "/dataset.jsonl"); process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId()); - process.setExecutorType(ExecutorType.DATA_PLATFORM.getValue()); + process.setExecutorType(ExecutorType.DATAMATE.getValue()); process.setProcess(instances.stream() .map(instance -> Map.of(instance.getId(), instance.getOverrides())) .toList()); diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/ExecutorType.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/ExecutorType.java index 8ec2043..70b6c90 100644 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/ExecutorType.java +++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/ExecutorType.java @@ -4,7 +4,7 @@ import lombok.Getter; @Getter public enum ExecutorType { - DATA_PLATFORM("data_platform"), + DATAMATE("datamate"), DATA_JUICER_RAY("ray"), DATA_JUICER_DEFAULT("default"); diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java index 24548d8..b6e1da6 100644 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java +++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java @@ -6,4 +6,6 @@ import org.apache.ibatis.annotations.Param; @Mapper public interface CleaningResultMapper { void deleteByInstanceId(@Param("instanceId") String instanceId); + + int countByInstanceId(@Param("instanceId") String instanceId); } diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningProcess.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningProcess.java index 760c2bf..1decd56 100644 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningProcess.java +++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningProcess.java @@ -4,6 +4,9 @@ package com.datamate.cleaning.interfaces.dto; import lombok.Getter; import lombok.Setter; +import java.math.BigDecimal; +import java.math.RoundingMode; + /** * CleaningProcess */ @@ -16,5 +19,20 @@ public class CleaningProcess { private Integer totalFileNum; private Integer finishedFileNum; + + public CleaningProcess(int totalFileNum, int finishedFileNum) { + this.totalFileNum = totalFileNum; + this.finishedFileNum = finishedFileNum; + if (totalFileNum == 0) { + this.process = 0.0f; + } else { + this.process = BigDecimal.valueOf(finishedFileNum * 100L) + .divide(BigDecimal.valueOf(totalFileNum), 2, RoundingMode.HALF_UP).floatValue(); + } + } + + public static CleaningProcess of(int totalFileNum, int finishedFileNum) { + return new CleaningProcess(totalFileNum, finishedFileNum); + } } diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTask.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTask.java index 2ee603c..43f2881 100644 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTask.java +++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTask.java @@ -36,6 +36,8 @@ public class CleaningTask { private long afterSize; + private int fileCount; + /** * 任务当前状态 */ diff --git a/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningResultMapper.xml b/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningResultMapper.xml index b3d6446..e8ab7df 100644 --- a/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningResultMapper.xml +++ b/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningResultMapper.xml @@ -5,4 +5,8 @@ DELETE FROM t_clean_result WHERE instance_id = #{instanceId} + + diff --git a/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningTaskMapper.xml b/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningTaskMapper.xml index 7a16f73..6404c5d 100644 --- a/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningTaskMapper.xml +++ b/backend/services/data-cleaning-service/src/main/resources/mappers/CleaningTaskMapper.xml @@ -3,7 +3,7 @@ id, name, description, src_dataset_id, src_dataset_name, dest_dataset_id, dest_dataset_name, before_size, - after_size, status, created_at, started_at, finished_at + after_size, file_count, status, created_at, started_at, finished_at