init datamate
21
.editorconfig
Normal file
@@ -0,0 +1,21 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[*.{java,kt}]
|
||||
indent_size = 4
|
||||
|
||||
[*.{py}]
|
||||
indent_size = 4
|
||||
|
||||
[*.{md}]
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
[Makefile]
|
||||
indent_style = tab
|
||||
27
.github/workflows/docker-image-backend.yml
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
name: Backend Docker Image CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "develop_930" ]
|
||||
paths:
|
||||
- 'backend/**'
|
||||
- 'scripts/images/backend/**'
|
||||
- '.github/workflows/docker-image-backend.yml'
|
||||
pull_request:
|
||||
branches: [ "develop_930" ]
|
||||
paths:
|
||||
- 'backend/**'
|
||||
- 'scripts/images/backend/**'
|
||||
- '.github/workflows/docker-image-backend.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build the Backend Docker image
|
||||
run: make build-backend
|
||||
27
.github/workflows/docker-image-frontend.yml
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
name: Frontend Docker Image CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "develop_930" ]
|
||||
paths:
|
||||
- 'frontend/**'
|
||||
- 'scripts/images/frontend/**'
|
||||
- '.github/workflows/docker-image-frontend.yml'
|
||||
pull_request:
|
||||
branches: [ "develop_930" ]
|
||||
paths:
|
||||
- 'frontend/**'
|
||||
- 'scripts/images/frontend/**'
|
||||
- '.github/workflows/docker-image-frontend.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build the Frontend Docker image
|
||||
run: make build-frontend
|
||||
189
.gitignore
vendored
Normal file
@@ -0,0 +1,189 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# TypeScript v1 declaration files
|
||||
typings/
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variables file
|
||||
.env
|
||||
|
||||
# next.js build output
|
||||
.next
|
||||
|
||||
# Java
|
||||
*.class
|
||||
*.jar
|
||||
*.war
|
||||
*.ear
|
||||
*.zip
|
||||
*.tar.gz
|
||||
*.rar
|
||||
|
||||
# Maven
|
||||
target/
|
||||
pom.xml.tag
|
||||
pom.xml.releaseBackup
|
||||
pom.xml.versionsBackup
|
||||
pom.xml.next
|
||||
release.properties
|
||||
dependency-reduced-pom.xml
|
||||
buildNumber.properties
|
||||
.mvn/timing.properties
|
||||
|
||||
# Gradle
|
||||
.gradle
|
||||
build/
|
||||
!gradle-wrapper.jar
|
||||
!**/src/main/**/build/
|
||||
!**/src/test/**/build/
|
||||
|
||||
# IntelliJ IDEA
|
||||
.idea
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
out/
|
||||
|
||||
# Eclipse
|
||||
.project
|
||||
.classpath
|
||||
.c9/
|
||||
*.launch
|
||||
.settings/
|
||||
.metadata
|
||||
bin/
|
||||
tmp/
|
||||
*.tmp
|
||||
*.bak
|
||||
*.swp
|
||||
*~.nib
|
||||
local.properties
|
||||
.settings/
|
||||
.loadpath
|
||||
.recommenders
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyEnv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
Pipfile.lock
|
||||
|
||||
# Celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Docker
|
||||
*.dockerignore
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
*.sublime-project
|
||||
*.sublime-workspace
|
||||
20
LICENSE
Normal file
@@ -0,0 +1,20 @@
|
||||
# DataMate Open Source License
|
||||
|
||||
DataMate is licensed under the MIT License, with the following additional conditions:
|
||||
|
||||
DataMate is permitted to be used commercially, including as a backend service for other applications or as an application development platform for enterprises. However, when the following conditions are met, you must contact the producer to obtain a commercial license:
|
||||
|
||||
a. Multi-tenant SaaS service: Unless explicitly authorized by DataMate in writing, you may not use the DataMate source code to operate a multi-tenant SaaS service.
|
||||
b. LOGO and copyright information: In the process of using DataMate's frontend, you may not remove or modify the LOGO or copyright information in the DataMate console or applications. This restriction is inapplicable to uses of Nexent that do not involve its frontend.
|
||||
|
||||
Please contact zhangyafeng2@huawei.com by email to inquire about licensing matters.
|
||||
|
||||
As a contributor, you should agree that:
|
||||
|
||||
a. The producer can adjust the open-source agreement to be more strict or relaxed as deemed necessary.
|
||||
b. Your contributed code may be used for commercial purposes, such as DataMate's cloud business.
|
||||
|
||||
Apart from the specific conditions mentioned above, all other rights and restrictions follow the MIT License.
|
||||
Detailed information about the MIT License can be found at: https://opensource.org/licenses/MIT
|
||||
|
||||
Copyright © 2025 Huawei Technologies Co., Ltd.
|
||||
164
Makefile
Normal file
@@ -0,0 +1,164 @@
|
||||
MAKEFLAGS += --no-print-directory
|
||||
|
||||
VERSION ?= latest
|
||||
NAMESPACE ?= datamate
|
||||
|
||||
.PHONY: build-%
|
||||
build-%:
|
||||
$(MAKE) $*-docker-build
|
||||
|
||||
.PHONY: build
|
||||
build: backend-docker-build frontend-docker-build runtime-docker-build
|
||||
|
||||
.PHONY: create-namespace
|
||||
create-namespace:
|
||||
@kubectl get namespace $(NAMESPACE) > /dev/null 2>&1 || kubectl create namespace $(NAMESPACE)
|
||||
|
||||
.PHONY: install-%
|
||||
install-%:
|
||||
ifeq ($(origin INSTALLER), undefined)
|
||||
@echo "Choose a deployment method:"
|
||||
@echo "1. Docker"
|
||||
@echo "2. Kubernetes/Helm"
|
||||
@echo -n "Enter choice: "
|
||||
@read choice; \
|
||||
case $$choice in \
|
||||
1) INSTALLER=docker ;; \
|
||||
2) INSTALLER=k8s ;; \
|
||||
*) echo "Invalid choice" && exit 1 ;; \
|
||||
esac; \
|
||||
$(MAKE) $*-$$INSTALLER-install
|
||||
else
|
||||
$(MAKE) $*-$(INSTALLER)-install
|
||||
endif
|
||||
|
||||
.PHONY: install
|
||||
install: install-data-mate
|
||||
|
||||
.PHONY: uninstall-%
|
||||
uninstall-%:
|
||||
ifeq ($(origin INSTALLER), undefined)
|
||||
@echo "Choose a deployment method:"
|
||||
@echo "1. Docker"
|
||||
@echo "2. Kubernetes/Helm"
|
||||
@echo -n "Enter choice: "
|
||||
@read choice; \
|
||||
case $$choice in \
|
||||
1) INSTALLER=docker ;; \
|
||||
2) INSTALLER=k8s ;; \
|
||||
*) echo "Invalid choice" && exit 1 ;; \
|
||||
esac; \
|
||||
$(MAKE) $*-$$INSTALLER-uninstall
|
||||
else
|
||||
$(MAKE) $*-$(INSTALLER)-uninstall
|
||||
endif
|
||||
|
||||
.PHONY: uninstall
|
||||
uninstall: uninstall-data-mate
|
||||
|
||||
# build
|
||||
.PHONY: mineru-docker-build
|
||||
mineru-docker-build:
|
||||
docker build -t mineru:$(VERSION) . -f scripts/images/mineru/Dockerfile
|
||||
|
||||
.PHONY: datax-docker-build
|
||||
datax-docker-build:
|
||||
docker build -t datax:$(VERSION) . -f scripts/images/datax/Dockerfile
|
||||
|
||||
.PHONY: unstructured-docker-build
|
||||
unstructured-docker-build:
|
||||
docker build -t unstructured:$(VERSION) . -f scripts/images/unstructured/Dockerfile
|
||||
|
||||
.PHONY: backend-docker-build
|
||||
backend-docker-build:
|
||||
docker build -t backend:$(VERSION) . -f scripts/images/backend/Dockerfile
|
||||
|
||||
.PHONY: frontend-docker-build
|
||||
frontend-docker-build:
|
||||
docker build -t frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile
|
||||
|
||||
.PHONY: runtime-docker-build
|
||||
runtime-docker-build:
|
||||
docker build -t runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile
|
||||
|
||||
.PHONY: backend-docker-install
|
||||
backend-docker-install:
|
||||
cd deployment/docker/data-mate && docker-compose up -d backend
|
||||
|
||||
.PHONY: backend-docker-uninstall
|
||||
backend-docker-uninstall:
|
||||
cd deployment/docker/data-mate && docker-compose down backend
|
||||
|
||||
.PHONY: frontend-docker-install
|
||||
frontend-docker-install:
|
||||
cd deployment/docker/data-mate && docker-compose up -d frontend
|
||||
|
||||
.PHONY: frontend-docker-uninstall
|
||||
frontend-docker-uninstall:
|
||||
cd deployment/docker/data-mate && docker-compose down frontend
|
||||
|
||||
.PHONY: runtime-docker-install
|
||||
runtime-docker-install:
|
||||
cd deployment/docker/data-mate && docker-compose up -d runtime
|
||||
|
||||
.PHONY: runtime-docker-uninstall
|
||||
runtime-docker-uninstall:
|
||||
cd deployment/docker/data-mate && docker-compose down runtime
|
||||
|
||||
.PHONY: runtime-k8s-install
|
||||
runtime-k8s-install: create-namespace
|
||||
helm upgrade kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE)
|
||||
helm upgrade raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE)
|
||||
kubectl apply -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
|
||||
|
||||
.PHONY: runtime-k8s-uninstall
|
||||
runtime-k8s-uninstall:
|
||||
helm uninstall raycluster -n $(NAMESPACE)
|
||||
helm uninstall kuberay-operator -n $(NAMESPACE)
|
||||
kubectl delete -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
|
||||
|
||||
.PHONY: unstructured-k8s-install
|
||||
unstructured-k8s-install: create-namespace
|
||||
kubectl apply -f deployment/kubernetes/unstructured/deploy.yaml -n $(NAMESPACE)
|
||||
|
||||
.PHONY: mysql-k8s-install
|
||||
mysql-k8s-install: create-namespace
|
||||
kubectl create configmap init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE)
|
||||
kubectl apply -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
|
||||
kubectl apply -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
|
||||
|
||||
.PHONY: mysql-k8s-uninstall
|
||||
mysql-k8s-uninstall:
|
||||
kubectl delete configmap init-sql -n $(NAMESPACE)
|
||||
kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
|
||||
kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
|
||||
|
||||
.PHONY: backend-k8s-install
|
||||
backend-k8s-install: create-namespace
|
||||
kubectl apply -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE)
|
||||
|
||||
.PHONY: backend-k8s-uninstall
|
||||
backend-k8s-uninstall:
|
||||
kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE)
|
||||
|
||||
.PHONY: frontend-k8s-install
|
||||
frontend-k8s-install: create-namespace
|
||||
kubectl apply -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE)
|
||||
|
||||
.PHONY: frontend-k8s-uninstall
|
||||
frontend-k8s-uninstall:
|
||||
kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE)
|
||||
|
||||
.PHONY: data-mate-docker-install
|
||||
data-mate-docker-install:
|
||||
cd deployment/docker/datamate && docker-compose up -d
|
||||
|
||||
.PHONY: data-mate-docker-uninstall
|
||||
data-mate-docker-uninstall:
|
||||
cd deployment/docker/datamate && docker-compose down
|
||||
|
||||
.PHONY: data-mate-k8s-install
|
||||
data-mate-k8s-install: create-namespace mysql-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install
|
||||
|
||||
.PHONY: data-mate-k8s-uninstall
|
||||
data-mate-k8s-uninstall: mysql-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall
|
||||
72
README-zh.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# DataMate 一站式数据工作平台
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://github.com/ModelEngine-Group/DataMate/actions/workflows/docker-image-backend.yml)
|
||||
[](https://github.com/ModelEngine-Group/DataMate/actions/workflows/docker-image-frontend.yml)
|
||||

|
||||

|
||||

|
||||

|
||||
|
||||
**DataMate是面向模型微调与RAG检索的企业级数据处理平台,支持数据归集、数据管理、算子市场、数据清洗、数据合成、数据标注、数据评估、知识生成等核心功能。
|
||||
**
|
||||
|
||||
[简体中文](./README-zh.md) | [English](./README.md)
|
||||
|
||||
如果您喜欢这个项目,希望您能给我们一个Star⭐️!
|
||||
|
||||
</div>
|
||||
|
||||
## 🌟 核心特性
|
||||
|
||||
- **核心模块**:数据归集、数据管理、算子市场、数据清洗、数据合成、数据标注、数据评估、知识生成
|
||||
- **可视化编排**:拖拽式数据处理流程设计
|
||||
- **算子生态**:丰富的内置算子和自定义算子支持
|
||||
|
||||
## 🚀 快速开始
|
||||
|
||||
### 前置条件
|
||||
|
||||
- Git (用于拉取源码)
|
||||
- Make (用于构建和安装)
|
||||
- Docker (用于构建镜像和部署服务)
|
||||
- Docker-Compose (用于部署服务-docker方式)
|
||||
- kubernetes (用于部署服务-k8s方式)
|
||||
- Helm (用于部署服务-k8s方式)
|
||||
|
||||
### 拉取代码
|
||||
|
||||
```bash
|
||||
git clone git@github.com:ModelEngine-Group/DataMate.git
|
||||
```
|
||||
|
||||
### 镜像构建
|
||||
|
||||
```bash
|
||||
make build
|
||||
```
|
||||
|
||||
### Docker安装
|
||||
|
||||
```bash
|
||||
make install INSTALLER=docker
|
||||
```
|
||||
|
||||
### kubernetes安装
|
||||
|
||||
```bash
|
||||
make install INSTALLER=k8s
|
||||
```
|
||||
|
||||
## 🤝 贡献指南
|
||||
|
||||
感谢您对本项目的关注!我们非常欢迎社区的贡献,无论是提交 Bug 报告、提出功能建议,还是直接参与代码开发,都能帮助项目变得更好。
|
||||
|
||||
• 📮 [GitHub Issues](../../issues):提交 Bug 或功能建议。
|
||||
|
||||
• 🔧 [GitHub Pull Requests](../../pulls):贡献代码改进。
|
||||
|
||||
## 📄 许可证
|
||||
|
||||
DataMate 基于 [MIT](LICENSE) 开源,您可以在遵守许可证条款的前提下自由使用、修改和分发本项目的代码。
|
||||
77
README.md
Normal file
@@ -0,0 +1,77 @@
|
||||
# DataMate All-in-One Data Work Platform
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://github.com/ModelEngine-Group/DataMate/actions/workflows/docker-image-backend.yml)
|
||||
[](https://github.com/ModelEngine-Group/DataMate/actions/workflows/docker-image-frontend.yml)
|
||||

|
||||

|
||||

|
||||

|
||||
|
||||
**DataMate is an enterprise-level data processing platform for model fine-tuning and RAG retrieval, supporting core
|
||||
functions such as data collection, data management, operator marketplace, data cleaning, data synthesis, data
|
||||
annotation, data evaluation, and knowledge generation.**
|
||||
|
||||
[简体中文](./README-zh.md) | [English](./README.md)
|
||||
|
||||
If you like this project, please give it a Star⭐️!
|
||||
|
||||
</div>
|
||||
|
||||
## 🌟 Core Features
|
||||
|
||||
- **Core Modules**: Data Collection, Data Management, Operator Marketplace, Data Cleaning, Data Synthesis, Data
|
||||
Annotation, Data Evaluation, Knowledge Generation.
|
||||
- **Visual Orchestration**: Drag-and-drop data processing workflow design.
|
||||
- **Operator Ecosystem**: Rich built-in operators and support for custom operators.
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Git (for pulling source code)
|
||||
- Make (for building and installing)
|
||||
- Docker (for building images and deploying services)
|
||||
- Docker-Compose (for service deployment - Docker method)
|
||||
- Kubernetes (for service deployment - k8s method)
|
||||
- Helm (for service deployment - k8s method)
|
||||
|
||||
### Clone the Code
|
||||
|
||||
```bash
|
||||
git clone git@github.com:ModelEngine-Group/DataMate.git
|
||||
```
|
||||
|
||||
### Build Images
|
||||
|
||||
```bash
|
||||
make build
|
||||
```
|
||||
|
||||
### Docker Installation
|
||||
|
||||
```bash
|
||||
make install INSTALLER=docker
|
||||
```
|
||||
|
||||
### Kubernetes Installation
|
||||
|
||||
```bash
|
||||
make install INSTALLER=k8s
|
||||
```
|
||||
|
||||
## 🤝 Contribution Guidelines
|
||||
|
||||
Thank you for your interest in this project! We warmly welcome contributions from the community. Whether it's submitting
|
||||
bug reports, suggesting new features, or directly participating in code development, all forms of help make the project
|
||||
better.
|
||||
|
||||
• 📮 [GitHub Issues](../../issues): Submit bugs or feature suggestions.
|
||||
|
||||
• 🔧 [GitHub Pull Requests](../../pulls): Contribute code improvements.
|
||||
|
||||
## 📄 License
|
||||
|
||||
DataMate is open source under the [MIT](LICENSE) license. You are free to use, modify, and distribute the code of this
|
||||
project in compliance with the license terms.
|
||||
55
backend/api-gateway/pom.xml
Normal file
@@ -0,0 +1,55 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>data-mate-platform</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>api-gateway</artifactId>
|
||||
<name>API Gateway</name>
|
||||
<description>API网关服务</description>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-starter-gateway</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-data-redis-reactive</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-security</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-actuator</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-starter-loadbalancer</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@@ -0,0 +1,77 @@
|
||||
package com.datamate.gateway;
|
||||
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.cloud.gateway.route.RouteLocator;
|
||||
import org.springframework.cloud.gateway.route.builder.RouteLocatorBuilder;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
|
||||
/**
|
||||
* API Gateway & Auth Service Application
|
||||
* 统一的API网关和认证授权微服务
|
||||
* 提供路由、鉴权、限流等功能
|
||||
*/
|
||||
@SpringBootApplication
|
||||
@ComponentScan(basePackages = {
|
||||
"com.datamate.gateway",
|
||||
"com.datamate.shared"
|
||||
})
|
||||
public class ApiGatewayApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(ApiGatewayApplication.class, args);
|
||||
}
|
||||
|
||||
@Bean
|
||||
public RouteLocator customRouteLocator(RouteLocatorBuilder builder) {
|
||||
return builder.routes()
|
||||
// 数据归集服务路由
|
||||
.route("data-collection", r -> r.path("/api/data-collection/**")
|
||||
.uri("lb://data-collection-service"))
|
||||
|
||||
// 数据管理服务路由
|
||||
.route("data-management", r -> r.path("/api/data-management/**")
|
||||
.uri("lb://data-management-service"))
|
||||
|
||||
// 算子市场服务路由
|
||||
.route("operator-market", r -> r.path("/api/operators/**")
|
||||
.uri("lb://operator-market-service"))
|
||||
|
||||
// 数据清洗服务路由
|
||||
.route("data-cleaning", r -> r.path("/api/cleaning/**")
|
||||
.uri("lb://data-cleaning-service"))
|
||||
|
||||
// 数据合成服务路由
|
||||
.route("data-synthesis", r -> r.path("/api/synthesis/**")
|
||||
.uri("lb://data-synthesis-service"))
|
||||
|
||||
// 数据标注服务路由
|
||||
.route("data-annotation", r -> r.path("/api/annotation/**")
|
||||
.uri("lb://data-annotation-service"))
|
||||
|
||||
// 数据评估服务路由
|
||||
.route("data-evaluation", r -> r.path("/api/evaluation/**")
|
||||
.uri("lb://data-evaluation-service"))
|
||||
|
||||
// 流程编排服务路由
|
||||
.route("pipeline-orchestration", r -> r.path("/api/pipelines/**")
|
||||
.uri("lb://pipeline-orchestration-service"))
|
||||
|
||||
// 执行引擎服务路由
|
||||
.route("execution-engine", r -> r.path("/api/execution/**")
|
||||
.uri("lb://execution-engine-service"))
|
||||
|
||||
// 认证服务路由
|
||||
.route("auth-service", r -> r.path("/api/auth/**")
|
||||
.uri("lb://auth-service"))
|
||||
|
||||
// RAG服务路由
|
||||
.route("rag-indexer", r -> r.path("/api/rag/indexer/**")
|
||||
.uri("lb://rag-indexer-service"))
|
||||
.route("rag-query", r -> r.path("/api/rag/query/**")
|
||||
.uri("lb://rag-query-service"))
|
||||
|
||||
.build();
|
||||
}
|
||||
}
|
||||
147
backend/openapi/README.md
Normal file
@@ -0,0 +1,147 @@
|
||||
# OpenAPI Code Generation Configuration
|
||||
# 基于YAML生成API代码的配置文件
|
||||
|
||||
## Maven Plugin Configuration for Spring Boot
|
||||
# 在各个服务的pom.xml中添加以下插件配置:
|
||||
|
||||
```xml
|
||||
|
||||
<plugin>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>openapi-generator-maven-plugin</artifactId>
|
||||
<version>6.6.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>generate</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<inputSpec>${project.basedir}/../../openapi/specs/${project.artifactId}.yaml</inputSpec>
|
||||
<generatorName>spring</generatorName>
|
||||
<output>${project.build.directory}/generated-sources/openapi</output>
|
||||
<apiPackage>com.datamate.${project.name}.interfaces.api</apiPackage>
|
||||
<modelPackage>com.datamate.${project.name}.interfaces.dto</modelPackage>
|
||||
<configOptions>
|
||||
<interfaceOnly>true</interfaceOnly>
|
||||
<useTags>true</useTags>
|
||||
<skipDefaultInterface>true</skipDefaultInterface>
|
||||
<hideGenerationTimestamp>true</hideGenerationTimestamp>
|
||||
<java8>true</java8>
|
||||
<dateLibrary>java8</dateLibrary>
|
||||
<useBeanValidation>true</useBeanValidation>
|
||||
<performBeanValidation>true</performBeanValidation>
|
||||
<useSpringBoot3>true</useSpringBoot3>
|
||||
<documentationProvider>springdoc</documentationProvider>
|
||||
</configOptions>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
```
|
||||
|
||||
## Gradle Plugin Configuration (Alternative)
|
||||
# 如果使用Gradle,可以使用以下配置:
|
||||
|
||||
```gradle
|
||||
plugins {
|
||||
id 'org.openapi.generator' version '6.6.0'
|
||||
}
|
||||
|
||||
openApiGenerate {
|
||||
generatorName = "spring"
|
||||
inputSpec = "$rootDir/openapi/specs/${project.name}.yaml"
|
||||
outputDir = "$buildDir/generated-sources/openapi"
|
||||
apiPackage = "com.datamate.${project.name}.interfaces.api"
|
||||
modelPackage = "com.datamate.${project.name}.interfaces.dto"
|
||||
configOptions = [
|
||||
interfaceOnly: "true",
|
||||
useTags: "true",
|
||||
skipDefaultInterface: "true",
|
||||
hideGenerationTimestamp: "true",
|
||||
java8: "true",
|
||||
dateLibrary: "java8",
|
||||
useBeanValidation: "true",
|
||||
performBeanValidation: "true",
|
||||
useSpringBoot3: "true",
|
||||
documentationProvider: "springdoc"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Frontend TypeScript Client Generation
|
||||
# 为前端生成TypeScript客户端:
|
||||
|
||||
```bash
|
||||
# 安装 OpenAPI Generator CLI
|
||||
npm install -g @openapitools/openapi-generator-cli
|
||||
|
||||
# 生成TypeScript客户端
|
||||
openapi-generator-cli generate \
|
||||
-i openapi/specs/data-annotation-service.yaml \
|
||||
-g typescript-axios \
|
||||
-o frontend/packages/api-client/src/generated/annotation \
|
||||
--additional-properties=supportsES6=true,npmName=@datamate/annotation-api,npmVersion=1.0.0
|
||||
```
|
||||
|
||||
## Usage in Services
|
||||
# 在各个服务中使用生成的代码:
|
||||
|
||||
1. **在 interfaces 层实现生成的API接口**:
|
||||
```java
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/annotation")
|
||||
public class AnnotationTaskController implements AnnotationTasksApi {
|
||||
|
||||
private final AnnotationTaskApplicationService annotationTaskService;
|
||||
|
||||
@Override
|
||||
public ResponseEntity<AnnotationTaskPageResponse> getAnnotationTasks(
|
||||
Integer page, Integer size, String status) {
|
||||
// 实现业务逻辑
|
||||
return ResponseEntity.ok(annotationTaskService.getTasks(page, size, status));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **在 application 层使用生成的DTO**:
|
||||
```java
|
||||
@Service
|
||||
public class AnnotationTaskApplicationService {
|
||||
|
||||
public AnnotationTaskPageResponse getTasks(Integer page, Integer size, String status) {
|
||||
// 业务逻辑实现
|
||||
// 使用生成的DTO类型
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Build Integration
|
||||
# 构建集成脚本位置:scripts/build/generate-api.sh
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# 生成所有服务的API代码
|
||||
|
||||
OPENAPI_DIR="openapi/specs"
|
||||
SERVICES=(
|
||||
"data-annotation-service"
|
||||
"data-management-service"
|
||||
"operator-market-service"
|
||||
"data-cleaning-service"
|
||||
"data-synthesis-service"
|
||||
"data-evaluation-service"
|
||||
"pipeline-orchestration-service"
|
||||
"execution-engine-service"
|
||||
"rag-indexer-service"
|
||||
"rag-query-service"
|
||||
"api-gateway"
|
||||
"auth-service"
|
||||
)
|
||||
|
||||
for service in "${SERVICES[@]}"; do
|
||||
echo "Generating API for $service..."
|
||||
mvn -f backend/services/$service/pom.xml openapi-generator:generate
|
||||
done
|
||||
|
||||
echo "All APIs generated successfully!"
|
||||
```
|
||||
298
backend/openapi/specs/data-annotation.yaml
Normal file
@@ -0,0 +1,298 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Data Annotation Service API
|
||||
description: 数据标注服务API - 智能预标注、人工平台、主动学习
|
||||
version: 1.0.0
|
||||
contact:
|
||||
name: Data Mate Platform Team
|
||||
|
||||
servers:
|
||||
- url: http://localhost:8080
|
||||
description: Development server
|
||||
|
||||
tags:
|
||||
- name: annotation-tasks
|
||||
description: 标注任务管理
|
||||
- name: annotation-data
|
||||
description: 标注数据管理
|
||||
- name: pre-annotation
|
||||
description: 智能预标注
|
||||
- name: active-learning
|
||||
description: 主动学习
|
||||
|
||||
paths:
|
||||
/api/v1/annotation/tasks:
|
||||
get:
|
||||
tags:
|
||||
- annotation-tasks
|
||||
summary: 获取标注任务列表
|
||||
description: 分页获取标注任务列表
|
||||
parameters:
|
||||
- name: page
|
||||
in: query
|
||||
description: 页码
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
- name: size
|
||||
in: query
|
||||
description: 每页大小
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
- name: status
|
||||
in: query
|
||||
description: 任务状态
|
||||
schema:
|
||||
type: string
|
||||
enum: [PENDING, IN_PROGRESS, COMPLETED, PAUSED]
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/AnnotationTaskPageResponse'
|
||||
'400':
|
||||
description: 请求参数错误
|
||||
'500':
|
||||
description: 服务器内部错误
|
||||
|
||||
post:
|
||||
tags:
|
||||
- annotation-tasks
|
||||
summary: 创建标注任务
|
||||
description: 创建新的标注任务
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateAnnotationTaskRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/AnnotationTaskResponse'
|
||||
'400':
|
||||
description: 请求参数错误
|
||||
'500':
|
||||
description: 服务器内部错误
|
||||
|
||||
/api/v1/annotation/tasks/{taskId}:
|
||||
get:
|
||||
tags:
|
||||
- annotation-tasks
|
||||
summary: 获取标注任务详情
|
||||
parameters:
|
||||
- name: taskId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/AnnotationTaskResponse'
|
||||
'404':
|
||||
description: 任务不存在
|
||||
|
||||
put:
|
||||
tags:
|
||||
- annotation-tasks
|
||||
summary: 更新标注任务
|
||||
parameters:
|
||||
- name: taskId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/UpdateAnnotationTaskRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 更新成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/AnnotationTaskResponse'
|
||||
|
||||
/api/v1/annotation/pre-annotate:
|
||||
post:
|
||||
tags:
|
||||
- pre-annotation
|
||||
summary: 智能预标注
|
||||
description: 使用AI模型进行智能预标注
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PreAnnotationRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 预标注成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PreAnnotationResponse'
|
||||
|
||||
components:
|
||||
schemas:
|
||||
AnnotationTaskResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 任务ID
|
||||
name:
|
||||
type: string
|
||||
description: 任务名称
|
||||
description:
|
||||
type: string
|
||||
description: 任务描述
|
||||
type:
|
||||
type: string
|
||||
enum: [TEXT_CLASSIFICATION, NAMED_ENTITY_RECOGNITION, OBJECT_DETECTION, SEMANTIC_SEGMENTATION]
|
||||
description: 标注类型
|
||||
status:
|
||||
type: string
|
||||
enum: [PENDING, IN_PROGRESS, COMPLETED, PAUSED]
|
||||
description: 任务状态
|
||||
datasetId:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
progress:
|
||||
type: number
|
||||
format: double
|
||||
description: 进度百分比
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 创建时间
|
||||
updatedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 更新时间
|
||||
|
||||
CreateAnnotationTaskRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- type
|
||||
- datasetId
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 任务名称
|
||||
description:
|
||||
type: string
|
||||
description: 任务描述
|
||||
type:
|
||||
type: string
|
||||
enum: [TEXT_CLASSIFICATION, NAMED_ENTITY_RECOGNITION, OBJECT_DETECTION, SEMANTIC_SEGMENTATION]
|
||||
description: 标注类型
|
||||
datasetId:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
configuration:
|
||||
type: object
|
||||
description: 标注配置
|
||||
|
||||
UpdateAnnotationTaskRequest:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 任务名称
|
||||
description:
|
||||
type: string
|
||||
description: 任务描述
|
||||
status:
|
||||
type: string
|
||||
enum: [PENDING, IN_PROGRESS, COMPLETED, PAUSED]
|
||||
description: 任务状态
|
||||
|
||||
AnnotationTaskPageResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/AnnotationTaskResponse'
|
||||
totalElements:
|
||||
type: integer
|
||||
format: int64
|
||||
totalPages:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
|
||||
PreAnnotationRequest:
|
||||
type: object
|
||||
required:
|
||||
- taskId
|
||||
- dataIds
|
||||
properties:
|
||||
taskId:
|
||||
type: string
|
||||
description: 标注任务ID
|
||||
dataIds:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: 待预标注的数据ID列表
|
||||
modelId:
|
||||
type: string
|
||||
description: 预标注模型ID
|
||||
confidence:
|
||||
type: number
|
||||
format: double
|
||||
description: 置信度阈值
|
||||
|
||||
PreAnnotationResponse:
|
||||
type: object
|
||||
properties:
|
||||
taskId:
|
||||
type: string
|
||||
description: 任务ID
|
||||
processedCount:
|
||||
type: integer
|
||||
description: 已处理数据数量
|
||||
successCount:
|
||||
type: integer
|
||||
description: 成功预标注数量
|
||||
results:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
dataId:
|
||||
type: string
|
||||
annotations:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
confidence:
|
||||
type: number
|
||||
format: double
|
||||
|
||||
securitySchemes:
|
||||
BearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
|
||||
security:
|
||||
- BearerAuth: []
|
||||
491
backend/openapi/specs/data-cleaning.yaml
Normal file
@@ -0,0 +1,491 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Data Cleaning Service API
|
||||
description: 数据清洗服务API - 策略/规则、流程编排对接
|
||||
version: 1.0.0
|
||||
contact:
|
||||
name: Data Mate Platform Team
|
||||
|
||||
servers:
|
||||
- url: http://localhost:8084
|
||||
description: Development server
|
||||
|
||||
tags:
|
||||
- name: CleaningTask
|
||||
description: 数据清洗任务管理
|
||||
- name: CleaningTemplate
|
||||
description: 数据清洗模板管理
|
||||
|
||||
paths:
|
||||
/ray/log:
|
||||
get:
|
||||
summary: 获取ray日志文件
|
||||
deprecated: false
|
||||
description: ''
|
||||
tags: [ ]
|
||||
parameters: [ ]
|
||||
responses:
|
||||
'200':
|
||||
description: ''
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties: { }
|
||||
headers: { }
|
||||
security: [ ]
|
||||
/cleaning/tasks:
|
||||
get:
|
||||
summary: 查询数据清洗任务列表
|
||||
deprecated: false
|
||||
description: 获取所有数据清洗任务或根据查询参数筛选任务。
|
||||
tags:
|
||||
- CleaningTask
|
||||
parameters:
|
||||
- name: status
|
||||
in: query
|
||||
description: 根据任务状态筛选 (e.g., pending, running, completed, failed)
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: keywords
|
||||
in: query
|
||||
description: 关键字
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: page
|
||||
in: query
|
||||
description: 分页数
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
- name: size
|
||||
in: query
|
||||
description: 分页单页数
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
responses:
|
||||
'200':
|
||||
description: 成功获取任务列表
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items: &ref_1
|
||||
$ref: '#/components/schemas/CleaningTask'
|
||||
headers: { }
|
||||
security: [ ]
|
||||
post:
|
||||
summary: 创建新的数据清洗任务
|
||||
deprecated: false
|
||||
description: 可以直接创建任务或基于现有模板创建任务。
|
||||
tags:
|
||||
- CleaningTask
|
||||
parameters: [ ]
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateCleaningTaskRequest'
|
||||
examples: { }
|
||||
responses:
|
||||
'201':
|
||||
description: 任务创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema: *ref_1
|
||||
headers: { }
|
||||
security: [ ]
|
||||
/cleaning/tasks/{taskId}:
|
||||
get:
|
||||
summary: 获取单个数据清洗任务详情
|
||||
deprecated: false
|
||||
description: 根据任务ID获取任务的详细信息。
|
||||
tags:
|
||||
- CleaningTask
|
||||
parameters:
|
||||
- name: taskId
|
||||
in: path
|
||||
description: 任务的唯一标识符
|
||||
required: true
|
||||
example: ''
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 成功获取任务详情
|
||||
content:
|
||||
application/json:
|
||||
schema: *ref_1
|
||||
headers: { }
|
||||
security: [ ]
|
||||
delete:
|
||||
summary: 删除数据清洗任务
|
||||
deprecated: false
|
||||
description: 根据任务ID删除指定的任务。
|
||||
tags:
|
||||
- CleaningTask
|
||||
parameters:
|
||||
- name: taskId
|
||||
in: path
|
||||
description: 任务的唯一标识符
|
||||
required: true
|
||||
example: ''
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'204':
|
||||
description: 任务删除成功
|
||||
headers: { }
|
||||
security: [ ]
|
||||
/cleaning/templates:
|
||||
get:
|
||||
summary: 查询数据清洗模板列表
|
||||
deprecated: false
|
||||
description: 获取所有可用的数据清洗模板。
|
||||
tags:
|
||||
- CleaningTemplate
|
||||
parameters: [ ]
|
||||
responses:
|
||||
'200':
|
||||
description: 成功获取模板列表
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items: &ref_2
|
||||
$ref: '#/components/schemas/CleaningTemplate'
|
||||
headers: { }
|
||||
security: [ ]
|
||||
post:
|
||||
summary: 创建新的数据清洗模板
|
||||
deprecated: false
|
||||
description: 定义一个新的数据清洗模板。
|
||||
tags:
|
||||
- CleaningTemplate
|
||||
parameters: [ ]
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateCleaningTemplateRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 模板创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema: *ref_2
|
||||
headers: { }
|
||||
security: [ ]
|
||||
/cleaning/templates/{templateId}:
|
||||
get:
|
||||
summary: 获取单个数据清洗模板详情
|
||||
deprecated: false
|
||||
description: 根据模板ID获取模板的详细信息。
|
||||
tags:
|
||||
- CleaningTemplate
|
||||
parameters:
|
||||
- name: templateId
|
||||
in: path
|
||||
description: 模板的唯一标识符
|
||||
required: true
|
||||
example: ''
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 成功获取模板详情
|
||||
content:
|
||||
application/json:
|
||||
schema: *ref_2
|
||||
headers: { }
|
||||
security: [ ]
|
||||
put:
|
||||
summary: 更新数据清洗模板
|
||||
deprecated: false
|
||||
description: 根据模板ID更新模板的全部信息。
|
||||
tags:
|
||||
- CleaningTemplate
|
||||
parameters:
|
||||
- name: templateId
|
||||
in: path
|
||||
description: 模板的唯一标识符
|
||||
required: true
|
||||
example: ''
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/UpdateCleaningTemplateRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 模板更新成功
|
||||
content:
|
||||
application/json:
|
||||
schema: *ref_2
|
||||
headers: { }
|
||||
security: [ ]
|
||||
delete:
|
||||
summary: 删除数据清洗模板
|
||||
deprecated: false
|
||||
description: 根据模板ID删除指定的模板。
|
||||
tags:
|
||||
- CleaningTemplate
|
||||
parameters:
|
||||
- name: templateId
|
||||
in: path
|
||||
description: 模板的唯一标识符
|
||||
required: true
|
||||
example: ''
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'204':
|
||||
description: 模板删除成功
|
||||
headers: { }
|
||||
security: [ ]
|
||||
|
||||
components:
|
||||
schemas:
|
||||
OperatorInstance:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
overrides:
|
||||
type: object
|
||||
properties: { }
|
||||
additionalProperties:
|
||||
type: object
|
||||
properties: { }
|
||||
required:
|
||||
- id
|
||||
- overrides
|
||||
CleaningProcess:
|
||||
type: object
|
||||
properties:
|
||||
process:
|
||||
type: number
|
||||
format: float
|
||||
description: 进度百分比
|
||||
totalFileNum:
|
||||
type: integer
|
||||
description: 总文件数量
|
||||
finishedFileNum:
|
||||
type: integer
|
||||
description: 已完成文件数量
|
||||
required:
|
||||
- process
|
||||
- totalFileNum
|
||||
- finishedFileNum
|
||||
OperatorResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 算子ID
|
||||
name:
|
||||
type: string
|
||||
description: 算子名称
|
||||
description:
|
||||
type: string
|
||||
description: 算子描述
|
||||
version:
|
||||
type: string
|
||||
description: 算子版本
|
||||
inputs:
|
||||
type: string
|
||||
description: 输入类型
|
||||
outputs:
|
||||
type: string
|
||||
description: 输入类型
|
||||
runtime:
|
||||
type: string
|
||||
description: 运行时设置
|
||||
settings:
|
||||
type: string
|
||||
description: 算子参数
|
||||
isStar:
|
||||
type: boolean
|
||||
description: 是否收藏
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 创建时间
|
||||
updatedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 更新时间
|
||||
required:
|
||||
- inputs
|
||||
- outputs
|
||||
- runtime
|
||||
- settings
|
||||
- isStar
|
||||
UpdateCleaningTemplateRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- instance
|
||||
- id
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
description: 模板名称
|
||||
description:
|
||||
type: string
|
||||
description: 模板描述
|
||||
instance:
|
||||
type: array
|
||||
items: &ref_3
|
||||
$ref: '#/components/schemas/OperatorInstance'
|
||||
description: 模板定义的清洗规则和配置
|
||||
CreateCleaningTemplateRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- instance
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 模板名称
|
||||
description:
|
||||
type: string
|
||||
description: 模板描述
|
||||
instance:
|
||||
type: array
|
||||
items: *ref_3
|
||||
description: 任务的具体配置(如果非模板创建,则直接定义)'
|
||||
CleaningTemplate:
|
||||
type: object
|
||||
required:
|
||||
- id
|
||||
- name
|
||||
- instance
|
||||
- createdAt
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 模板唯一标识符
|
||||
name:
|
||||
type: string
|
||||
description: 模板名称
|
||||
description:
|
||||
type: string
|
||||
description: 模板描述
|
||||
instance:
|
||||
type: array
|
||||
items: &ref_4
|
||||
$ref: '#/components/schemas/OperatorResponse'
|
||||
description: 模板定义的清洗规则和配置
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 模板创建时间
|
||||
updatedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 模板最后更新时间
|
||||
CreateCleaningTaskRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- instance
|
||||
- srcDatasetId
|
||||
- srcDatasetName
|
||||
- destDatasetName
|
||||
- destDatasetType
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 任务名称
|
||||
description:
|
||||
type: string
|
||||
description: 任务描述
|
||||
srcDatasetId:
|
||||
type: string
|
||||
srcDatasetName:
|
||||
type: string
|
||||
destDatasetName:
|
||||
type: string
|
||||
destDatasetType:
|
||||
type: string
|
||||
instance:
|
||||
type: array
|
||||
items: *ref_3
|
||||
description: 任务的具体配置(如果非模板创建,则直接定义)
|
||||
ErrorResponse:
|
||||
type: object
|
||||
properties:
|
||||
error:
|
||||
type: string
|
||||
description: 错误类型
|
||||
message:
|
||||
type: string
|
||||
description: 错误详细信息
|
||||
CleaningTask:
|
||||
type: object
|
||||
required:
|
||||
- id
|
||||
- name
|
||||
- status
|
||||
- createdAt
|
||||
- startedAt
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 任务唯一标识符
|
||||
name:
|
||||
type: string
|
||||
description: 任务名称
|
||||
description:
|
||||
type: string
|
||||
description: 任务描述
|
||||
srcDatasetId:
|
||||
type: string
|
||||
description: 源数据集id
|
||||
srcDatasetName:
|
||||
type: string
|
||||
description: 源数据集名称
|
||||
destDatasetId:
|
||||
type: string
|
||||
description: 目标数据集id
|
||||
destDatasetName:
|
||||
type: string
|
||||
description: 目标数据集名称
|
||||
status:
|
||||
type: string
|
||||
description: 任务当前状态
|
||||
enum:
|
||||
- pending
|
||||
- running
|
||||
- completed
|
||||
- failed
|
||||
templateId:
|
||||
type: string
|
||||
description: 关联的模板ID(如果基于模板创建)
|
||||
instance:
|
||||
type: array
|
||||
items: *ref_4
|
||||
description: 任务的具体配置(如果非模板创建,则直接定义)
|
||||
progress:
|
||||
$ref: '#/components/schemas/CleaningProcess'
|
||||
createdAt:
|
||||
type: string
|
||||
description: 任务创建时间
|
||||
format: date-time
|
||||
startedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 任务开始时间
|
||||
finishedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 任务最后更新时间
|
||||
securitySchemes: { }
|
||||
517
backend/openapi/specs/data-collection.yaml
Normal file
@@ -0,0 +1,517 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Data Collection Service API
|
||||
description: |
|
||||
数据归集服务API,基于数据归集实现数据采集和归集功能。
|
||||
|
||||
主要功能:
|
||||
- 数据归集任务创建和管理
|
||||
- 数据同步任务执行
|
||||
- 任务监控和状态查询
|
||||
- 执行日志查看
|
||||
|
||||
version: 1.0.0
|
||||
|
||||
servers:
|
||||
- url: http://localhost:8090/api/v1/collection
|
||||
description: Development server
|
||||
|
||||
tags:
|
||||
- name: CollectionTask
|
||||
description: 数据归集任务管理(包括模板查询)
|
||||
- name: TaskExecution
|
||||
description: 任务执行管理
|
||||
|
||||
paths:
|
||||
/data-collection/tasks:
|
||||
get:
|
||||
operationId: getTasks
|
||||
tags: [CollectionTask]
|
||||
summary: 获取归集任务列表
|
||||
parameters:
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
- name: size
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
- name: status
|
||||
in: query
|
||||
schema:
|
||||
$ref: '#/components/schemas/TaskStatus'
|
||||
- name: name
|
||||
in: query
|
||||
description: 任务名称关键字搜索
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 归集任务列表
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PagedCollectionTaskSummary'
|
||||
|
||||
post:
|
||||
operationId: createTask
|
||||
tags: [CollectionTask]
|
||||
summary: 创建归集任务
|
||||
description: 创建新的数据归集任务
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateCollectionTaskRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 归集任务创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CollectionTaskResponse'
|
||||
|
||||
/data-collection/tasks/{id}:
|
||||
get:
|
||||
operationId: getTaskDetail
|
||||
tags: [CollectionTask]
|
||||
summary: 获取归集任务详情
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 归集任务详情
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CollectionTaskResponse'
|
||||
'404':
|
||||
description: 归集任务不存在
|
||||
|
||||
put:
|
||||
operationId: updateTask
|
||||
tags: [CollectionTask]
|
||||
summary: 更新归集任务
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/UpdateCollectionTaskRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 归集任务更新成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CollectionTaskResponse'
|
||||
|
||||
delete:
|
||||
operationId: deleteTask
|
||||
tags: [CollectionTask]
|
||||
summary: 删除归集任务
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'204':
|
||||
description: 归集任务删除成功
|
||||
|
||||
/tasks/{id}/execute:
|
||||
post:
|
||||
tags: [TaskExecution]
|
||||
summary: 执行归集任务
|
||||
description: 立即执行指定的归集任务
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'201':
|
||||
description: 任务执行已启动
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/TaskExecutionResponse'
|
||||
|
||||
/tasks/{id}/executions:
|
||||
get:
|
||||
tags: [TaskExecution]
|
||||
summary: 获取任务执行记录
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
- name: size
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
responses:
|
||||
'200':
|
||||
description: 任务执行记录列表
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PagedTaskExecutions'
|
||||
|
||||
/executions/{id}:
|
||||
get:
|
||||
tags: [TaskExecution]
|
||||
summary: 获取执行详情
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 执行详情
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/TaskExecutionDetail'
|
||||
|
||||
delete:
|
||||
tags: [TaskExecution]
|
||||
summary: 停止任务执行
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'204':
|
||||
description: 任务执行已停止
|
||||
|
||||
/templates:
|
||||
get:
|
||||
tags: [CollectionTask]
|
||||
summary: 获取DataX模板列表
|
||||
description: 获取可用的DataX任务模板列表,用于创建任务时选择
|
||||
parameters:
|
||||
- name: sourceType
|
||||
in: query
|
||||
description: 源数据源类型过滤
|
||||
schema:
|
||||
type: string
|
||||
- name: targetType
|
||||
in: query
|
||||
description: 目标数据源类型过滤
|
||||
schema:
|
||||
type: string
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
- name: size
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
responses:
|
||||
'200':
|
||||
description: 归集模板列表
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PagedDataxTemplates'
|
||||
|
||||
components:
|
||||
schemas:
|
||||
TaskStatus:
|
||||
type: string
|
||||
enum:
|
||||
- DRAFT
|
||||
- READY
|
||||
- RUNNING
|
||||
- SUCCESS
|
||||
- FAILED
|
||||
- STOPPED
|
||||
description: |
|
||||
任务和执行状态枚举:
|
||||
- DRAFT: 草稿状态
|
||||
- READY: 就绪状态
|
||||
- RUNNING: 运行中
|
||||
- SUCCESS: 执行成功 (对应原来的COMPLETED/SUCCESS)
|
||||
- FAILED: 执行失败
|
||||
- STOPPED: 已停止
|
||||
|
||||
SyncMode:
|
||||
type: string
|
||||
enum: [ONCE, SCHEDULED]
|
||||
description: 同步方式:一次性(ONCE) 或 定时(SCHEDULED)
|
||||
|
||||
CollectionTaskSummary:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/TaskStatus'
|
||||
syncMode:
|
||||
$ref: '#/components/schemas/SyncMode'
|
||||
lastExecutionId:
|
||||
type: string
|
||||
description: 最后执行ID
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
updatedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 任务列表摘要信息(不包含详细配置与调度表达式)
|
||||
|
||||
CollectionTaskResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
config:
|
||||
type: object
|
||||
additionalProperties: true
|
||||
description: 归集配置,包含源端和目标端配置信息
|
||||
status:
|
||||
$ref: '#/components/schemas/TaskStatus'
|
||||
syncMode:
|
||||
$ref: '#/components/schemas/SyncMode'
|
||||
scheduleExpression:
|
||||
type: string
|
||||
description: Cron调度表达式 (仅当 syncMode = SCHEDULED 时有效)
|
||||
lastExecutionId:
|
||||
type: string
|
||||
description: 最后执行ID
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
updatedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
CreateCollectionTaskRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- config
|
||||
- syncMode
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 任务名称
|
||||
minLength: 1
|
||||
maxLength: 100
|
||||
description:
|
||||
type: string
|
||||
description: 任务描述
|
||||
maxLength: 500
|
||||
config:
|
||||
type: object
|
||||
description: 归集配置,包含源端和目标端配置信息
|
||||
additionalProperties: true
|
||||
syncMode:
|
||||
$ref: '#/components/schemas/SyncMode'
|
||||
scheduleExpression:
|
||||
type: string
|
||||
description: Cron调度表达式 (syncMode=SCHEDULED 时必填)
|
||||
|
||||
UpdateCollectionTaskRequest:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 任务名称
|
||||
minLength: 1
|
||||
maxLength: 100
|
||||
description:
|
||||
type: string
|
||||
description: 任务描述
|
||||
maxLength: 500
|
||||
config:
|
||||
type: object
|
||||
description: 归集配置,包含源端和目标端配置信息
|
||||
additionalProperties: true
|
||||
syncMode:
|
||||
$ref: '#/components/schemas/SyncMode'
|
||||
scheduleExpression:
|
||||
type: string
|
||||
description: Cron调度表达式 (syncMode=SCHEDULED 时必填)
|
||||
|
||||
PagedCollectionTaskSummary:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/CollectionTaskSummary'
|
||||
totalElements:
|
||||
type: integer
|
||||
totalPages:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
|
||||
PagedCollectionTasks:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/CollectionTaskResponse'
|
||||
totalElements:
|
||||
type: integer
|
||||
totalPages:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
|
||||
TaskExecutionResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
taskId:
|
||||
type: string
|
||||
taskName:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/TaskStatus'
|
||||
startedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
TaskExecutionDetail:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
taskId:
|
||||
type: string
|
||||
taskName:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/TaskStatus'
|
||||
progress:
|
||||
type: number
|
||||
format: double
|
||||
minimum: 0
|
||||
maximum: 100
|
||||
recordsTotal:
|
||||
type: integer
|
||||
recordsProcessed:
|
||||
type: integer
|
||||
recordsSuccess:
|
||||
type: integer
|
||||
recordsFailed:
|
||||
type: integer
|
||||
throughput:
|
||||
type: number
|
||||
format: double
|
||||
dataSizeBytes:
|
||||
type: integer
|
||||
startedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
completedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
durationSeconds:
|
||||
type: integer
|
||||
errorMessage:
|
||||
type: string
|
||||
|
||||
PagedTaskExecutions:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TaskExecutionDetail'
|
||||
totalElements:
|
||||
type: integer
|
||||
totalPages:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
|
||||
DataxTemplateSummary:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
sourceType:
|
||||
type: string
|
||||
description: 源数据源类型
|
||||
targetType:
|
||||
type: string
|
||||
description: 目标数据源类型
|
||||
description:
|
||||
type: string
|
||||
version:
|
||||
type: string
|
||||
isSystem:
|
||||
type: boolean
|
||||
description: 是否为系统模板
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
PagedDataxTemplates:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/DataxTemplateSummary'
|
||||
totalElements:
|
||||
type: integer
|
||||
totalPages:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
630
backend/openapi/specs/data-evaluation.yaml
Normal file
@@ -0,0 +1,630 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Data Evaluation Service API
|
||||
description: 数据评估服务API - 质量、适配性、价值评估
|
||||
version: 1.0.0
|
||||
contact:
|
||||
name: Data Mate Platform Team
|
||||
|
||||
servers:
|
||||
- url: http://localhost:8086
|
||||
description: Development server
|
||||
|
||||
tags:
|
||||
- name: quality-evaluation
|
||||
description: 数据质量评估
|
||||
- name: compatibility-evaluation
|
||||
description: 适配性评估
|
||||
- name: value-evaluation
|
||||
description: 价值评估
|
||||
- name: evaluation-reports
|
||||
description: 评估报告
|
||||
|
||||
paths:
|
||||
/api/v1/evaluation/quality:
|
||||
post:
|
||||
tags:
|
||||
- quality-evaluation
|
||||
summary: 数据质量评估
|
||||
description: 对数据集进行质量评估,包括完整性、准确性、一致性等
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QualityEvaluationRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 评估成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QualityEvaluationResponse'
|
||||
|
||||
/api/v1/evaluation/quality/{evaluationId}:
|
||||
get:
|
||||
tags:
|
||||
- quality-evaluation
|
||||
summary: 获取质量评估结果
|
||||
parameters:
|
||||
- name: evaluationId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/QualityEvaluationDetailResponse'
|
||||
|
||||
/api/v1/evaluation/compatibility:
|
||||
post:
|
||||
tags:
|
||||
- compatibility-evaluation
|
||||
summary: 适配性评估
|
||||
description: 评估数据集与目标模型或任务的适配性
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompatibilityEvaluationRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 评估成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompatibilityEvaluationResponse'
|
||||
|
||||
/api/v1/evaluation/value:
|
||||
post:
|
||||
tags:
|
||||
- value-evaluation
|
||||
summary: 价值评估
|
||||
description: 评估数据集的商业价值和使用价值
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ValueEvaluationRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 评估成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ValueEvaluationResponse'
|
||||
|
||||
/api/v1/evaluation/reports:
|
||||
get:
|
||||
tags:
|
||||
- evaluation-reports
|
||||
summary: 获取评估报告列表
|
||||
parameters:
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
- name: size
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
- name: type
|
||||
in: query
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluationType'
|
||||
- name: datasetId
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluationReportPageResponse'
|
||||
|
||||
/api/v1/evaluation/reports/{reportId}:
|
||||
get:
|
||||
tags:
|
||||
- evaluation-reports
|
||||
summary: 获取评估报告详情
|
||||
parameters:
|
||||
- name: reportId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluationReportDetailResponse'
|
||||
|
||||
/api/v1/evaluation/reports/{reportId}/export:
|
||||
get:
|
||||
tags:
|
||||
- evaluation-reports
|
||||
summary: 导出评估报告
|
||||
parameters:
|
||||
- name: reportId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: format
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
enum: [PDF, EXCEL, JSON]
|
||||
default: PDF
|
||||
responses:
|
||||
'200':
|
||||
description: 导出成功
|
||||
content:
|
||||
application/octet-stream:
|
||||
schema:
|
||||
type: string
|
||||
format: binary
|
||||
|
||||
/api/v1/evaluation/batch:
|
||||
post:
|
||||
tags:
|
||||
- evaluation-reports
|
||||
summary: 批量评估
|
||||
description: 对多个数据集进行批量评估
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchEvaluationRequest'
|
||||
responses:
|
||||
'202':
|
||||
description: 批量评估任务已提交
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchEvaluationResponse'
|
||||
|
||||
components:
|
||||
schemas:
|
||||
QualityEvaluationRequest:
|
||||
type: object
|
||||
required:
|
||||
- datasetId
|
||||
- metrics
|
||||
properties:
|
||||
datasetId:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QualityMetric'
|
||||
description: 评估指标
|
||||
sampleSize:
|
||||
type: integer
|
||||
description: 采样大小
|
||||
parameters:
|
||||
type: object
|
||||
description: 评估参数
|
||||
|
||||
QualityEvaluationResponse:
|
||||
type: object
|
||||
properties:
|
||||
evaluationId:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/EvaluationStatus'
|
||||
overallScore:
|
||||
type: number
|
||||
format: double
|
||||
description: 总体质量分数
|
||||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QualityMetricResult'
|
||||
recommendations:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
QualityEvaluationDetailResponse:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/QualityEvaluationResponse'
|
||||
- type: object
|
||||
properties:
|
||||
detailedResults:
|
||||
$ref: '#/components/schemas/DetailedQualityResults'
|
||||
visualizations:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/VisualizationData'
|
||||
|
||||
CompatibilityEvaluationRequest:
|
||||
type: object
|
||||
required:
|
||||
- datasetId
|
||||
- targetType
|
||||
properties:
|
||||
datasetId:
|
||||
type: string
|
||||
targetType:
|
||||
$ref: '#/components/schemas/TargetType'
|
||||
targetConfig:
|
||||
type: object
|
||||
description: 目标配置(模型、任务等)
|
||||
evaluationCriteria:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/CompatibilityCriterion'
|
||||
|
||||
CompatibilityEvaluationResponse:
|
||||
type: object
|
||||
properties:
|
||||
evaluationId:
|
||||
type: string
|
||||
compatibilityScore:
|
||||
type: number
|
||||
format: double
|
||||
results:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/CompatibilityResult'
|
||||
suggestions:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
ValueEvaluationRequest:
|
||||
type: object
|
||||
required:
|
||||
- datasetId
|
||||
- valueCriteria
|
||||
properties:
|
||||
datasetId:
|
||||
type: string
|
||||
valueCriteria:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ValueCriterion'
|
||||
marketContext:
|
||||
type: object
|
||||
description: 市场环境信息
|
||||
businessContext:
|
||||
type: object
|
||||
description: 业务环境信息
|
||||
|
||||
ValueEvaluationResponse:
|
||||
type: object
|
||||
properties:
|
||||
evaluationId:
|
||||
type: string
|
||||
valueScore:
|
||||
type: number
|
||||
format: double
|
||||
monetaryValue:
|
||||
type: number
|
||||
format: double
|
||||
description: 货币价值估算
|
||||
strategicValue:
|
||||
type: number
|
||||
format: double
|
||||
description: 战略价值评分
|
||||
results:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ValueResult'
|
||||
insights:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
|
||||
EvaluationReportResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
datasetId:
|
||||
type: string
|
||||
type:
|
||||
$ref: '#/components/schemas/EvaluationType'
|
||||
status:
|
||||
$ref: '#/components/schemas/EvaluationStatus'
|
||||
overallScore:
|
||||
type: number
|
||||
format: double
|
||||
summary:
|
||||
type: string
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
completedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
EvaluationReportPageResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/EvaluationReportResponse'
|
||||
totalElements:
|
||||
type: integer
|
||||
format: int64
|
||||
totalPages:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
|
||||
EvaluationReportDetailResponse:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/EvaluationReportResponse'
|
||||
- type: object
|
||||
properties:
|
||||
qualityResults:
|
||||
$ref: '#/components/schemas/QualityEvaluationResponse'
|
||||
compatibilityResults:
|
||||
$ref: '#/components/schemas/CompatibilityEvaluationResponse'
|
||||
valueResults:
|
||||
$ref: '#/components/schemas/ValueEvaluationResponse'
|
||||
attachments:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ReportAttachment'
|
||||
|
||||
BatchEvaluationRequest:
|
||||
type: object
|
||||
required:
|
||||
- datasetIds
|
||||
- evaluationTypes
|
||||
properties:
|
||||
datasetIds:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
evaluationTypes:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/EvaluationType'
|
||||
parameters:
|
||||
type: object
|
||||
|
||||
BatchEvaluationResponse:
|
||||
type: object
|
||||
properties:
|
||||
batchId:
|
||||
type: string
|
||||
status:
|
||||
type: string
|
||||
totalTasks:
|
||||
type: integer
|
||||
submittedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
QualityMetric:
|
||||
type: string
|
||||
enum:
|
||||
- COMPLETENESS
|
||||
- ACCURACY
|
||||
- CONSISTENCY
|
||||
- VALIDITY
|
||||
- UNIQUENESS
|
||||
- TIMELINESS
|
||||
|
||||
QualityMetricResult:
|
||||
type: object
|
||||
properties:
|
||||
metric:
|
||||
$ref: '#/components/schemas/QualityMetric'
|
||||
score:
|
||||
type: number
|
||||
format: double
|
||||
details:
|
||||
type: object
|
||||
issues:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/QualityIssue'
|
||||
|
||||
DetailedQualityResults:
|
||||
type: object
|
||||
properties:
|
||||
fieldAnalysis:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/FieldAnalysis'
|
||||
distributionAnalysis:
|
||||
$ref: '#/components/schemas/DistributionAnalysis'
|
||||
correlationAnalysis:
|
||||
$ref: '#/components/schemas/CorrelationAnalysis'
|
||||
|
||||
TargetType:
|
||||
type: string
|
||||
enum:
|
||||
- LANGUAGE_MODEL
|
||||
- CLASSIFICATION_MODEL
|
||||
- RECOMMENDATION_SYSTEM
|
||||
- CUSTOM_TASK
|
||||
|
||||
CompatibilityCriterion:
|
||||
type: string
|
||||
enum:
|
||||
- FORMAT_COMPATIBILITY
|
||||
- SCHEMA_COMPATIBILITY
|
||||
- SIZE_ADEQUACY
|
||||
- DISTRIBUTION_MATCH
|
||||
- FEATURE_COVERAGE
|
||||
|
||||
CompatibilityResult:
|
||||
type: object
|
||||
properties:
|
||||
criterion:
|
||||
$ref: '#/components/schemas/CompatibilityCriterion'
|
||||
score:
|
||||
type: number
|
||||
format: double
|
||||
status:
|
||||
type: string
|
||||
enum: [PASS, WARN, FAIL]
|
||||
details:
|
||||
type: string
|
||||
|
||||
ValueCriterion:
|
||||
type: string
|
||||
enum:
|
||||
- RARITY
|
||||
- DEMAND
|
||||
- QUALITY
|
||||
- COMPLETENESS
|
||||
- TIMELINESS
|
||||
- STRATEGIC_IMPORTANCE
|
||||
|
||||
ValueResult:
|
||||
type: object
|
||||
properties:
|
||||
criterion:
|
||||
$ref: '#/components/schemas/ValueCriterion'
|
||||
score:
|
||||
type: number
|
||||
format: double
|
||||
impact:
|
||||
type: string
|
||||
enum: [LOW, MEDIUM, HIGH]
|
||||
explanation:
|
||||
type: string
|
||||
|
||||
EvaluationType:
|
||||
type: string
|
||||
enum:
|
||||
- QUALITY
|
||||
- COMPATIBILITY
|
||||
- VALUE
|
||||
- COMPREHENSIVE
|
||||
|
||||
EvaluationStatus:
|
||||
type: string
|
||||
enum:
|
||||
- PENDING
|
||||
- RUNNING
|
||||
- COMPLETED
|
||||
- FAILED
|
||||
|
||||
QualityIssue:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
severity:
|
||||
type: string
|
||||
enum: [LOW, MEDIUM, HIGH, CRITICAL]
|
||||
description:
|
||||
type: string
|
||||
affectedRecords:
|
||||
type: integer
|
||||
suggestions:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
|
||||
FieldAnalysis:
|
||||
type: object
|
||||
properties:
|
||||
fieldName:
|
||||
type: string
|
||||
dataType:
|
||||
type: string
|
||||
nullCount:
|
||||
type: integer
|
||||
uniqueCount:
|
||||
type: integer
|
||||
statistics:
|
||||
type: object
|
||||
|
||||
DistributionAnalysis:
|
||||
type: object
|
||||
properties:
|
||||
distributions:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
outliers:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
patterns:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
|
||||
CorrelationAnalysis:
|
||||
type: object
|
||||
properties:
|
||||
correlationMatrix:
|
||||
type: array
|
||||
items:
|
||||
type: array
|
||||
items:
|
||||
type: number
|
||||
significantCorrelations:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
|
||||
VisualizationData:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
enum: [CHART, GRAPH, HISTOGRAM, HEATMAP]
|
||||
title:
|
||||
type: string
|
||||
data:
|
||||
type: object
|
||||
config:
|
||||
type: object
|
||||
|
||||
ReportAttachment:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
size:
|
||||
type: integer
|
||||
format: int64
|
||||
downloadUrl:
|
||||
type: string
|
||||
|
||||
securitySchemes:
|
||||
BearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
|
||||
security:
|
||||
- BearerAuth: []
|
||||
719
backend/openapi/specs/data-management.yaml
Normal file
@@ -0,0 +1,719 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Data Management Service API
|
||||
description: |
|
||||
数据管理服务API,提供数据集的创建、管理和文件操作功能。
|
||||
|
||||
主要功能:
|
||||
- 数据集的创建和管理
|
||||
- 多种数据集类型支持(图像、文本、音频、视频、多模态等)
|
||||
- 数据集文件管理
|
||||
- 数据集标签和元数据管理
|
||||
- 数据集统计信息
|
||||
version: 1.0.0
|
||||
|
||||
servers:
|
||||
- url: http://localhost:8092/api/v1/data-management
|
||||
description: Development server
|
||||
|
||||
tags:
|
||||
- name: Dataset
|
||||
description: 数据集管理
|
||||
- name: DatasetFile
|
||||
description: 数据集文件管理
|
||||
- name: DatasetType
|
||||
description: 数据集类型管理
|
||||
- name: Tag
|
||||
description: 标签管理
|
||||
|
||||
paths:
|
||||
/data-management/datasets:
|
||||
get:
|
||||
tags: [Dataset]
|
||||
operationId: getDatasets
|
||||
summary: 获取数据集列表
|
||||
description: 分页查询数据集列表,支持按类型、标签等条件筛选
|
||||
parameters:
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
description: 页码,从0开始
|
||||
- name: size
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
description: 每页大小
|
||||
- name: type
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: 数据集类型过滤
|
||||
- name: tags
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: 标签过滤,多个标签用逗号分隔
|
||||
- name: keyword
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: 关键词搜索(名称、描述)
|
||||
- name: status
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
enum: [ACTIVE, INACTIVE, PROCESSING]
|
||||
description: 数据集状态过滤
|
||||
responses:
|
||||
'200':
|
||||
description: 成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PagedDatasetResponse'
|
||||
'400':
|
||||
description: 请求参数错误
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
|
||||
post:
|
||||
tags: [Dataset]
|
||||
operationId: createDataset
|
||||
summary: 创建数据集
|
||||
description: 创建新的数据集
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateDatasetRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/DatasetResponse'
|
||||
'400':
|
||||
description: 请求参数错误
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
|
||||
/data-management/datasets/{datasetId}:
|
||||
get:
|
||||
tags: [Dataset]
|
||||
operationId: getDatasetById
|
||||
summary: 获取数据集详情
|
||||
description: 根据ID获取数据集详细信息
|
||||
parameters:
|
||||
- name: datasetId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
responses:
|
||||
'200':
|
||||
description: 成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/DatasetResponse'
|
||||
'404':
|
||||
description: 数据集不存在
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
|
||||
put:
|
||||
tags: [Dataset]
|
||||
summary: 更新数据集
|
||||
operationId: updateDataset
|
||||
description: 更新数据集信息
|
||||
parameters:
|
||||
- name: datasetId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/UpdateDatasetRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 更新成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/DatasetResponse'
|
||||
'404':
|
||||
description: 数据集不存在
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
|
||||
delete:
|
||||
tags: [Dataset]
|
||||
operationId: deleteDataset
|
||||
summary: 删除数据集
|
||||
description: 删除指定的数据集
|
||||
parameters:
|
||||
- name: datasetId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
responses:
|
||||
'204':
|
||||
description: 删除成功
|
||||
'404':
|
||||
description: 数据集不存在
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
|
||||
/data-management/datasets/{datasetId}/files:
|
||||
get:
|
||||
tags: [DatasetFile]
|
||||
summary: 获取数据集文件列表
|
||||
operationId: getDatasetFiles
|
||||
description: 分页获取数据集中的文件列表
|
||||
parameters:
|
||||
- name: datasetId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
description: 页码,从0开始
|
||||
- name: size
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
description: 每页大小
|
||||
- name: fileType
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: 文件类型过滤
|
||||
- name: status
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
|
||||
description: 文件状态过滤
|
||||
responses:
|
||||
'200':
|
||||
description: 成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PagedDatasetFileResponse'
|
||||
|
||||
post:
|
||||
tags: [DatasetFile]
|
||||
summary: 上传文件到数据集
|
||||
operationId: uploadDatasetFile
|
||||
description: 向指定数据集上传文件
|
||||
parameters:
|
||||
- name: datasetId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
multipart/form-data:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
file:
|
||||
type: string
|
||||
format: binary
|
||||
description: 要上传的文件
|
||||
description:
|
||||
type: string
|
||||
description: 文件描述
|
||||
responses:
|
||||
'201':
|
||||
description: 上传成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/DatasetFileResponse'
|
||||
|
||||
/data-management/datasets/{datasetId}/files/{fileId}:
|
||||
get:
|
||||
tags: [DatasetFile]
|
||||
summary: 获取文件详情
|
||||
description: 获取数据集中指定文件的详细信息
|
||||
operationId: getDatasetFileById
|
||||
parameters:
|
||||
- name: datasetId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
- name: fileId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 文件ID
|
||||
responses:
|
||||
'200':
|
||||
description: 成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/DatasetFileResponse'
|
||||
|
||||
delete:
|
||||
tags: [DatasetFile]
|
||||
summary: 删除文件
|
||||
operationId: deleteDatasetFile
|
||||
description: 从数据集中删除指定文件
|
||||
parameters:
|
||||
- name: datasetId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
- name: fileId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 文件ID
|
||||
responses:
|
||||
'204':
|
||||
description: 删除成功
|
||||
|
||||
/data-management/datasets/{datasetId}/files/{fileId}/download:
|
||||
get:
|
||||
tags: [DatasetFile]
|
||||
operationId: downloadDatasetFile
|
||||
summary: 下载文件
|
||||
description: 下载数据集中的指定文件
|
||||
parameters:
|
||||
- name: datasetId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
- name: fileId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 文件ID
|
||||
responses:
|
||||
'200':
|
||||
description: 文件内容
|
||||
content:
|
||||
application/octet-stream:
|
||||
schema:
|
||||
type: string
|
||||
format: binary
|
||||
|
||||
/data-management/dataset-types:
|
||||
get:
|
||||
operationId: getDatasetTypes
|
||||
tags: [DatasetType]
|
||||
summary: 获取数据集类型列表
|
||||
description: 获取所有支持的数据集类型
|
||||
responses:
|
||||
'200':
|
||||
description: 成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/DatasetTypeResponse'
|
||||
|
||||
/data-management/tags:
|
||||
get:
|
||||
tags: [Tag]
|
||||
operationId: getTags
|
||||
summary: 获取标签列表
|
||||
description: 获取所有可用的标签
|
||||
parameters:
|
||||
- name: keyword
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: 标签名称关键词搜索
|
||||
responses:
|
||||
'200':
|
||||
description: 成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TagResponse'
|
||||
|
||||
post:
|
||||
tags: [Tag]
|
||||
operationId: createTag
|
||||
summary: 创建标签
|
||||
description: 创建新的标签
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateTagRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/TagResponse'
|
||||
|
||||
/data-management/datasets/{datasetId}/statistics:
|
||||
get:
|
||||
tags: [Dataset]
|
||||
operationId: getDatasetStatistics
|
||||
summary: 获取数据集统计信息
|
||||
description: 获取数据集的统计信息(文件数量、大小、完成度等)
|
||||
parameters:
|
||||
- name: datasetId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
responses:
|
||||
'200':
|
||||
description: 成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/DatasetStatisticsResponse'
|
||||
|
||||
components:
|
||||
schemas:
|
||||
PagedDatasetResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/DatasetResponse'
|
||||
page:
|
||||
type: integer
|
||||
description: 当前页码
|
||||
size:
|
||||
type: integer
|
||||
description: 每页大小
|
||||
totalElements:
|
||||
type: integer
|
||||
description: 总元素数
|
||||
totalPages:
|
||||
type: integer
|
||||
description: 总页数
|
||||
first:
|
||||
type: boolean
|
||||
description: 是否为第一页
|
||||
last:
|
||||
type: boolean
|
||||
description: 是否为最后一页
|
||||
|
||||
DatasetResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 数据集ID
|
||||
name:
|
||||
type: string
|
||||
description: 数据集名称
|
||||
description:
|
||||
type: string
|
||||
description: 数据集描述
|
||||
type:
|
||||
$ref: '#/components/schemas/DatasetTypeResponse'
|
||||
status:
|
||||
type: string
|
||||
enum: [ACTIVE, INACTIVE, PROCESSING]
|
||||
description: 数据集状态
|
||||
tags:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/TagResponse'
|
||||
description: 标签列表
|
||||
dataSource:
|
||||
type: string
|
||||
description: 数据源
|
||||
targetLocation:
|
||||
type: string
|
||||
description: 目标位置
|
||||
fileCount:
|
||||
type: integer
|
||||
description: 文件数量
|
||||
totalSize:
|
||||
type: integer
|
||||
format: int64
|
||||
description: 总大小(字节)
|
||||
completionRate:
|
||||
type: number
|
||||
format: float
|
||||
description: 完成率(0-100)
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 创建时间
|
||||
updatedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 更新时间
|
||||
createdBy:
|
||||
type: string
|
||||
description: 创建者
|
||||
|
||||
CreateDatasetRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- type
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 数据集名称
|
||||
minLength: 1
|
||||
maxLength: 100
|
||||
description:
|
||||
type: string
|
||||
description: 数据集描述
|
||||
maxLength: 500
|
||||
type:
|
||||
type: string
|
||||
description: 数据集类型
|
||||
tags:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: 标签列表
|
||||
dataSource:
|
||||
type: string
|
||||
description: 数据源
|
||||
targetLocation:
|
||||
type: string
|
||||
description: 目标位置
|
||||
|
||||
UpdateDatasetRequest:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 数据集名称
|
||||
maxLength: 100
|
||||
description:
|
||||
type: string
|
||||
description: 数据集描述
|
||||
maxLength: 500
|
||||
tags:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: 标签列表
|
||||
status:
|
||||
type: string
|
||||
enum: [ACTIVE, INACTIVE]
|
||||
description: 数据集状态
|
||||
|
||||
DatasetTypeResponse:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
description: 类型编码
|
||||
name:
|
||||
type: string
|
||||
description: 类型名称
|
||||
description:
|
||||
type: string
|
||||
description: 类型描述
|
||||
supportedFormats:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: 支持的文件格式
|
||||
icon:
|
||||
type: string
|
||||
description: 图标
|
||||
|
||||
PagedDatasetFileResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/DatasetFileResponse'
|
||||
page:
|
||||
type: integer
|
||||
description: 当前页码
|
||||
size:
|
||||
type: integer
|
||||
description: 每页大小
|
||||
totalElements:
|
||||
type: integer
|
||||
description: 总元素数
|
||||
totalPages:
|
||||
type: integer
|
||||
description: 总页数
|
||||
first:
|
||||
type: boolean
|
||||
description: 是否为第一页
|
||||
last:
|
||||
type: boolean
|
||||
description: 是否为最后一页
|
||||
|
||||
DatasetFileResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 文件ID
|
||||
fileName:
|
||||
type: string
|
||||
description: 文件名
|
||||
originalName:
|
||||
type: string
|
||||
description: 原始文件名
|
||||
fileType:
|
||||
type: string
|
||||
description: 文件类型
|
||||
fileSize:
|
||||
type: integer
|
||||
format: int64
|
||||
description: 文件大小(字节)
|
||||
status:
|
||||
type: string
|
||||
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
|
||||
description: 文件状态
|
||||
description:
|
||||
type: string
|
||||
description: 文件描述
|
||||
filePath:
|
||||
type: string
|
||||
description: 文件路径
|
||||
uploadTime:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 上传时间
|
||||
uploadedBy:
|
||||
type: string
|
||||
description: 上传者
|
||||
|
||||
TagResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 标签ID
|
||||
name:
|
||||
type: string
|
||||
description: 标签名称
|
||||
color:
|
||||
type: string
|
||||
description: 标签颜色
|
||||
description:
|
||||
type: string
|
||||
description: 标签描述
|
||||
usageCount:
|
||||
type: integer
|
||||
description: 使用次数
|
||||
|
||||
CreateTagRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 标签名称
|
||||
minLength: 1
|
||||
maxLength: 50
|
||||
color:
|
||||
type: string
|
||||
description: 标签颜色
|
||||
pattern: '^#[0-9A-Fa-f]{6}$'
|
||||
description:
|
||||
type: string
|
||||
description: 标签描述
|
||||
maxLength: 200
|
||||
|
||||
DatasetStatisticsResponse:
|
||||
type: object
|
||||
properties:
|
||||
totalFiles:
|
||||
type: integer
|
||||
description: 总文件数
|
||||
completedFiles:
|
||||
type: integer
|
||||
description: 已完成文件数
|
||||
totalSize:
|
||||
type: integer
|
||||
format: int64
|
||||
description: 总大小(字节)
|
||||
completionRate:
|
||||
type: number
|
||||
format: float
|
||||
description: 完成率(0-100)
|
||||
fileTypeDistribution:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: integer
|
||||
description: 文件类型分布
|
||||
statusDistribution:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: integer
|
||||
description: 状态分布
|
||||
|
||||
ErrorResponse:
|
||||
type: object
|
||||
properties:
|
||||
error:
|
||||
type: string
|
||||
description: 错误代码
|
||||
message:
|
||||
type: string
|
||||
description: 错误消息
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 错误时间
|
||||
path:
|
||||
type: string
|
||||
description: 请求路径
|
||||
620
backend/openapi/specs/data-synthesis.yaml
Normal file
@@ -0,0 +1,620 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Data Synthesis Service API
|
||||
description: 数据合成服务API - 指令、COT蒸馏、多模态合成
|
||||
version: 1.0.0
|
||||
contact:
|
||||
name: Data Mate Platform Team
|
||||
|
||||
servers:
|
||||
- url: http://localhost:8085
|
||||
description: Development server
|
||||
|
||||
tags:
|
||||
- name: synthesis-templates
|
||||
description: 合成模板管理
|
||||
- name: synthesis-jobs
|
||||
description: 合成任务管理
|
||||
- name: instruction-tuning
|
||||
description: 指令调优
|
||||
- name: cot-distillation
|
||||
description: COT蒸馏
|
||||
|
||||
paths:
|
||||
/api/v1/synthesis/templates:
|
||||
get:
|
||||
tags:
|
||||
- synthesis-templates
|
||||
summary: 获取合成模板列表
|
||||
parameters:
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
- name: size
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
- name: type
|
||||
in: query
|
||||
schema:
|
||||
$ref: '#/components/schemas/SynthesisType'
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SynthesisTemplatePageResponse'
|
||||
|
||||
post:
|
||||
tags:
|
||||
- synthesis-templates
|
||||
summary: 创建合成模板
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateSynthesisTemplateRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SynthesisTemplateResponse'
|
||||
|
||||
/api/v1/synthesis/templates/{templateId}:
|
||||
get:
|
||||
tags:
|
||||
- synthesis-templates
|
||||
summary: 获取合成模板详情
|
||||
parameters:
|
||||
- name: templateId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SynthesisTemplateDetailResponse'
|
||||
|
||||
put:
|
||||
tags:
|
||||
- synthesis-templates
|
||||
summary: 更新合成模板
|
||||
parameters:
|
||||
- name: templateId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/UpdateSynthesisTemplateRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 更新成功
|
||||
|
||||
/api/v1/synthesis/jobs:
|
||||
get:
|
||||
tags:
|
||||
- synthesis-jobs
|
||||
summary: 获取合成任务列表
|
||||
parameters:
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
- name: size
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
- name: status
|
||||
in: query
|
||||
schema:
|
||||
$ref: '#/components/schemas/JobStatus'
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SynthesisJobPageResponse'
|
||||
|
||||
post:
|
||||
tags:
|
||||
- synthesis-jobs
|
||||
summary: 创建合成任务
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateSynthesisJobRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 任务创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SynthesisJobResponse'
|
||||
|
||||
/api/v1/synthesis/jobs/{jobId}:
|
||||
get:
|
||||
tags:
|
||||
- synthesis-jobs
|
||||
summary: 获取合成任务详情
|
||||
parameters:
|
||||
- name: jobId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SynthesisJobDetailResponse'
|
||||
|
||||
/api/v1/synthesis/jobs/{jobId}/execute:
|
||||
post:
|
||||
tags:
|
||||
- synthesis-jobs
|
||||
summary: 执行合成任务
|
||||
parameters:
|
||||
- name: jobId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 任务开始执行
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/JobExecutionResponse'
|
||||
|
||||
/api/v1/synthesis/instruction-tuning:
|
||||
post:
|
||||
tags:
|
||||
- instruction-tuning
|
||||
summary: 指令调优数据合成
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/InstructionTuningRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 合成成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/InstructionTuningResponse'
|
||||
|
||||
/api/v1/synthesis/cot-distillation:
|
||||
post:
|
||||
tags:
|
||||
- cot-distillation
|
||||
summary: COT蒸馏数据合成
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/COTDistillationRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 蒸馏成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/COTDistillationResponse'
|
||||
|
||||
components:
|
||||
schemas:
|
||||
SynthesisTemplateResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
type:
|
||||
$ref: '#/components/schemas/SynthesisType'
|
||||
category:
|
||||
type: string
|
||||
modelConfig:
|
||||
$ref: '#/components/schemas/ModelConfig'
|
||||
enabled:
|
||||
type: boolean
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
SynthesisTemplateDetailResponse:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/SynthesisTemplateResponse'
|
||||
- type: object
|
||||
properties:
|
||||
promptTemplate:
|
||||
type: string
|
||||
parameters:
|
||||
type: object
|
||||
examples:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/SynthesisExample'
|
||||
|
||||
SynthesisTemplatePageResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/SynthesisTemplateResponse'
|
||||
totalElements:
|
||||
type: integer
|
||||
format: int64
|
||||
totalPages:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
|
||||
CreateSynthesisTemplateRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- type
|
||||
- promptTemplate
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
type:
|
||||
$ref: '#/components/schemas/SynthesisType'
|
||||
category:
|
||||
type: string
|
||||
promptTemplate:
|
||||
type: string
|
||||
modelConfig:
|
||||
$ref: '#/components/schemas/ModelConfig'
|
||||
parameters:
|
||||
type: object
|
||||
|
||||
UpdateSynthesisTemplateRequest:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
promptTemplate:
|
||||
type: string
|
||||
enabled:
|
||||
type: boolean
|
||||
parameters:
|
||||
type: object
|
||||
|
||||
SynthesisJobResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
templateId:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/JobStatus'
|
||||
progress:
|
||||
type: number
|
||||
format: double
|
||||
targetCount:
|
||||
type: integer
|
||||
generatedCount:
|
||||
type: integer
|
||||
startTime:
|
||||
type: string
|
||||
format: date-time
|
||||
endTime:
|
||||
type: string
|
||||
format: date-time
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
SynthesisJobDetailResponse:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/SynthesisJobResponse'
|
||||
- type: object
|
||||
properties:
|
||||
template:
|
||||
$ref: '#/components/schemas/SynthesisTemplateResponse'
|
||||
statistics:
|
||||
$ref: '#/components/schemas/SynthesisStatistics'
|
||||
samples:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/GeneratedSample'
|
||||
|
||||
SynthesisJobPageResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/SynthesisJobResponse'
|
||||
totalElements:
|
||||
type: integer
|
||||
format: int64
|
||||
totalPages:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
|
||||
CreateSynthesisJobRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- templateId
|
||||
- targetCount
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
templateId:
|
||||
type: string
|
||||
targetCount:
|
||||
type: integer
|
||||
parameters:
|
||||
type: object
|
||||
seedData:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
|
||||
JobExecutionResponse:
|
||||
type: object
|
||||
properties:
|
||||
executionId:
|
||||
type: string
|
||||
status:
|
||||
type: string
|
||||
message:
|
||||
type: string
|
||||
|
||||
InstructionTuningRequest:
|
||||
type: object
|
||||
required:
|
||||
- baseInstructions
|
||||
- targetDomain
|
||||
- count
|
||||
properties:
|
||||
baseInstructions:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
targetDomain:
|
||||
type: string
|
||||
count:
|
||||
type: integer
|
||||
modelConfig:
|
||||
$ref: '#/components/schemas/ModelConfig'
|
||||
parameters:
|
||||
type: object
|
||||
|
||||
InstructionTuningResponse:
|
||||
type: object
|
||||
properties:
|
||||
jobId:
|
||||
type: string
|
||||
generatedInstructions:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/GeneratedInstruction'
|
||||
statistics:
|
||||
$ref: '#/components/schemas/GenerationStatistics'
|
||||
|
||||
COTDistillationRequest:
|
||||
type: object
|
||||
required:
|
||||
- sourceModel
|
||||
- targetFormat
|
||||
- examples
|
||||
properties:
|
||||
sourceModel:
|
||||
type: string
|
||||
targetFormat:
|
||||
type: string
|
||||
enum: [QA, INSTRUCTION, REASONING]
|
||||
examples:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/COTExample'
|
||||
parameters:
|
||||
type: object
|
||||
|
||||
COTDistillationResponse:
|
||||
type: object
|
||||
properties:
|
||||
jobId:
|
||||
type: string
|
||||
distilledData:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/DistilledCOTData'
|
||||
statistics:
|
||||
$ref: '#/components/schemas/DistillationStatistics'
|
||||
|
||||
SynthesisType:
|
||||
type: string
|
||||
enum:
|
||||
- INSTRUCTION_TUNING
|
||||
- COT_DISTILLATION
|
||||
- DIALOGUE_GENERATION
|
||||
- TEXT_AUGMENTATION
|
||||
- MULTIMODAL_SYNTHESIS
|
||||
- CUSTOM
|
||||
|
||||
JobStatus:
|
||||
type: string
|
||||
enum:
|
||||
- PENDING
|
||||
- RUNNING
|
||||
- COMPLETED
|
||||
- FAILED
|
||||
- CANCELLED
|
||||
|
||||
ModelConfig:
|
||||
type: object
|
||||
properties:
|
||||
modelName:
|
||||
type: string
|
||||
temperature:
|
||||
type: number
|
||||
format: double
|
||||
maxTokens:
|
||||
type: integer
|
||||
topP:
|
||||
type: number
|
||||
format: double
|
||||
frequencyPenalty:
|
||||
type: number
|
||||
format: double
|
||||
|
||||
SynthesisExample:
|
||||
type: object
|
||||
properties:
|
||||
input:
|
||||
type: string
|
||||
output:
|
||||
type: string
|
||||
explanation:
|
||||
type: string
|
||||
|
||||
SynthesisStatistics:
|
||||
type: object
|
||||
properties:
|
||||
totalGenerated:
|
||||
type: integer
|
||||
successfulGenerated:
|
||||
type: integer
|
||||
failedGenerated:
|
||||
type: integer
|
||||
averageLength:
|
||||
type: number
|
||||
format: double
|
||||
uniqueCount:
|
||||
type: integer
|
||||
|
||||
GeneratedSample:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
content:
|
||||
type: string
|
||||
score:
|
||||
type: number
|
||||
format: double
|
||||
metadata:
|
||||
type: object
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
GeneratedInstruction:
|
||||
type: object
|
||||
properties:
|
||||
instruction:
|
||||
type: string
|
||||
input:
|
||||
type: string
|
||||
output:
|
||||
type: string
|
||||
quality:
|
||||
type: number
|
||||
format: double
|
||||
|
||||
GenerationStatistics:
|
||||
type: object
|
||||
properties:
|
||||
totalGenerated:
|
||||
type: integer
|
||||
averageQuality:
|
||||
type: number
|
||||
format: double
|
||||
diversityScore:
|
||||
type: number
|
||||
format: double
|
||||
|
||||
COTExample:
|
||||
type: object
|
||||
properties:
|
||||
question:
|
||||
type: string
|
||||
reasoning:
|
||||
type: string
|
||||
answer:
|
||||
type: string
|
||||
|
||||
DistilledCOTData:
|
||||
type: object
|
||||
properties:
|
||||
question:
|
||||
type: string
|
||||
reasoning:
|
||||
type: string
|
||||
answer:
|
||||
type: string
|
||||
confidence:
|
||||
type: number
|
||||
format: double
|
||||
|
||||
DistillationStatistics:
|
||||
type: object
|
||||
properties:
|
||||
totalProcessed:
|
||||
type: integer
|
||||
successfulDistilled:
|
||||
type: integer
|
||||
averageConfidence:
|
||||
type: number
|
||||
format: double
|
||||
|
||||
securitySchemes:
|
||||
BearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
|
||||
security:
|
||||
- BearerAuth: []
|
||||
712
backend/openapi/specs/execution-engine.yaml
Normal file
@@ -0,0 +1,712 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Execution Engine Service API
|
||||
description: 执行引擎服务API - 与Ray/DataX/Python执行器对接
|
||||
version: 1.0.0
|
||||
contact:
|
||||
name: Data Mate Platform Team
|
||||
|
||||
servers:
|
||||
- url: http://localhost:8088
|
||||
description: Development server
|
||||
|
||||
tags:
|
||||
- name: jobs
|
||||
description: 作业管理
|
||||
- name: executors
|
||||
description: 执行器管理
|
||||
- name: resources
|
||||
description: 资源管理
|
||||
- name: monitoring
|
||||
description: 监控管理
|
||||
|
||||
paths:
|
||||
/api/v1/jobs:
|
||||
get:
|
||||
tags:
|
||||
- jobs
|
||||
summary: 获取作业列表
|
||||
parameters:
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
- name: size
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
- name: status
|
||||
in: query
|
||||
schema:
|
||||
$ref: '#/components/schemas/JobStatus'
|
||||
- name: executor
|
||||
in: query
|
||||
schema:
|
||||
$ref: '#/components/schemas/ExecutorType'
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/JobPageResponse'
|
||||
|
||||
post:
|
||||
tags:
|
||||
- jobs
|
||||
summary: 提交作业
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SubmitJobRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 作业提交成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/JobResponse'
|
||||
|
||||
/api/v1/jobs/{jobId}:
|
||||
get:
|
||||
tags:
|
||||
- jobs
|
||||
summary: 获取作业详情
|
||||
parameters:
|
||||
- name: jobId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/JobDetailResponse'
|
||||
|
||||
delete:
|
||||
tags:
|
||||
- jobs
|
||||
summary: 取消作业
|
||||
parameters:
|
||||
- name: jobId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 取消成功
|
||||
|
||||
/api/v1/jobs/{jobId}/logs:
|
||||
get:
|
||||
tags:
|
||||
- jobs
|
||||
summary: 获取作业日志
|
||||
parameters:
|
||||
- name: jobId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: follow
|
||||
in: query
|
||||
description: 是否实时跟踪日志
|
||||
schema:
|
||||
type: boolean
|
||||
default: false
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/JobLog'
|
||||
|
||||
/api/v1/jobs/{jobId}/retry:
|
||||
post:
|
||||
tags:
|
||||
- jobs
|
||||
summary: 重试作业
|
||||
parameters:
|
||||
- name: jobId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 重试成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/JobResponse'
|
||||
|
||||
/api/v1/executors:
|
||||
get:
|
||||
tags:
|
||||
- executors
|
||||
summary: 获取执行器列表
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ExecutorResponse'
|
||||
|
||||
post:
|
||||
tags:
|
||||
- executors
|
||||
summary: 注册执行器
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RegisterExecutorRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 注册成功
|
||||
|
||||
/api/v1/executors/{executorId}:
|
||||
get:
|
||||
tags:
|
||||
- executors
|
||||
summary: 获取执行器详情
|
||||
parameters:
|
||||
- name: executorId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ExecutorDetailResponse'
|
||||
|
||||
put:
|
||||
tags:
|
||||
- executors
|
||||
summary: 更新执行器
|
||||
parameters:
|
||||
- name: executorId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/UpdateExecutorRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 更新成功
|
||||
|
||||
/api/v1/resources/clusters:
|
||||
get:
|
||||
tags:
|
||||
- resources
|
||||
summary: 获取集群信息
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ClusterInfo'
|
||||
|
||||
/api/v1/resources/nodes:
|
||||
get:
|
||||
tags:
|
||||
- resources
|
||||
summary: 获取节点信息
|
||||
parameters:
|
||||
- name: clusterId
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/NodeInfo'
|
||||
|
||||
/api/v1/monitoring/metrics:
|
||||
get:
|
||||
tags:
|
||||
- monitoring
|
||||
summary: 获取监控指标
|
||||
parameters:
|
||||
- name: metric
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
- name: start
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
format: date-time
|
||||
- name: end
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
format: date-time
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/MetricsResponse'
|
||||
|
||||
components:
|
||||
schemas:
|
||||
JobResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/JobStatus'
|
||||
executorType:
|
||||
$ref: '#/components/schemas/ExecutorType'
|
||||
priority:
|
||||
type: integer
|
||||
progress:
|
||||
type: number
|
||||
format: double
|
||||
submittedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
startedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
completedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
submittedBy:
|
||||
type: string
|
||||
|
||||
JobDetailResponse:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/JobResponse'
|
||||
- type: object
|
||||
properties:
|
||||
configuration:
|
||||
$ref: '#/components/schemas/JobConfiguration'
|
||||
resources:
|
||||
$ref: '#/components/schemas/ResourceRequirement'
|
||||
metrics:
|
||||
$ref: '#/components/schemas/JobMetrics'
|
||||
artifacts:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/JobArtifact'
|
||||
dependencies:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
|
||||
JobPageResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/JobResponse'
|
||||
totalElements:
|
||||
type: integer
|
||||
format: int64
|
||||
totalPages:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
|
||||
SubmitJobRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- executorType
|
||||
- configuration
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
executorType:
|
||||
$ref: '#/components/schemas/ExecutorType'
|
||||
priority:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: 10
|
||||
default: 5
|
||||
configuration:
|
||||
$ref: '#/components/schemas/JobConfiguration'
|
||||
resources:
|
||||
$ref: '#/components/schemas/ResourceRequirement'
|
||||
dependencies:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
timeoutSeconds:
|
||||
type: integer
|
||||
|
||||
JobConfiguration:
|
||||
type: object
|
||||
properties:
|
||||
script:
|
||||
type: string
|
||||
description: 执行脚本或代码
|
||||
arguments:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: 执行参数
|
||||
environment:
|
||||
type: object
|
||||
description: 环境变量
|
||||
files:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/FileReference'
|
||||
packages:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: 依赖包列表
|
||||
|
||||
ResourceRequirement:
|
||||
type: object
|
||||
properties:
|
||||
cpuCores:
|
||||
type: number
|
||||
format: double
|
||||
memoryGB:
|
||||
type: number
|
||||
format: double
|
||||
gpuCount:
|
||||
type: integer
|
||||
diskGB:
|
||||
type: number
|
||||
format: double
|
||||
nodeSelector:
|
||||
type: object
|
||||
description: 节点选择器
|
||||
|
||||
ExecutorResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
type:
|
||||
$ref: '#/components/schemas/ExecutorType'
|
||||
status:
|
||||
$ref: '#/components/schemas/ExecutorStatus'
|
||||
version:
|
||||
type: string
|
||||
capabilities:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
registeredAt:
|
||||
type: string
|
||||
format: date-time
|
||||
lastHeartbeat:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
ExecutorDetailResponse:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/ExecutorResponse'
|
||||
- type: object
|
||||
properties:
|
||||
configuration:
|
||||
type: object
|
||||
resources:
|
||||
$ref: '#/components/schemas/ExecutorResources'
|
||||
currentJobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/JobResponse'
|
||||
statistics:
|
||||
$ref: '#/components/schemas/ExecutorStatistics'
|
||||
|
||||
RegisterExecutorRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- type
|
||||
- endpoint
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
type:
|
||||
$ref: '#/components/schemas/ExecutorType'
|
||||
endpoint:
|
||||
type: string
|
||||
capabilities:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
configuration:
|
||||
type: object
|
||||
|
||||
UpdateExecutorRequest:
|
||||
type: object
|
||||
properties:
|
||||
status:
|
||||
$ref: '#/components/schemas/ExecutorStatus'
|
||||
configuration:
|
||||
type: object
|
||||
|
||||
ClusterInfo:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
enum: [RAY, KUBERNETES, YARN, STANDALONE]
|
||||
status:
|
||||
type: string
|
||||
enum: [ACTIVE, INACTIVE, ERROR]
|
||||
nodeCount:
|
||||
type: integer
|
||||
totalCpuCores:
|
||||
type: integer
|
||||
totalMemoryGB:
|
||||
type: number
|
||||
format: double
|
||||
totalGpuCount:
|
||||
type: integer
|
||||
availableResources:
|
||||
$ref: '#/components/schemas/ResourceInfo'
|
||||
|
||||
NodeInfo:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
clusterId:
|
||||
type: string
|
||||
status:
|
||||
type: string
|
||||
enum: [ACTIVE, INACTIVE, BUSY, ERROR]
|
||||
resources:
|
||||
$ref: '#/components/schemas/ResourceInfo'
|
||||
usage:
|
||||
$ref: '#/components/schemas/ResourceUsage'
|
||||
lastUpdate:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
MetricsResponse:
|
||||
type: object
|
||||
properties:
|
||||
metric:
|
||||
type: string
|
||||
dataPoints:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricDataPoint'
|
||||
aggregation:
|
||||
type: object
|
||||
|
||||
JobLog:
|
||||
type: object
|
||||
properties:
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
level:
|
||||
type: string
|
||||
enum: [DEBUG, INFO, WARN, ERROR]
|
||||
source:
|
||||
type: string
|
||||
message:
|
||||
type: string
|
||||
|
||||
JobMetrics:
|
||||
type: object
|
||||
properties:
|
||||
cpuUsage:
|
||||
type: number
|
||||
format: double
|
||||
memoryUsage:
|
||||
type: number
|
||||
format: double
|
||||
diskUsage:
|
||||
type: number
|
||||
format: double
|
||||
networkIO:
|
||||
type: object
|
||||
duration:
|
||||
type: integer
|
||||
format: int64
|
||||
|
||||
JobArtifact:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
enum: [LOG, OUTPUT, CHECKPOINT, MODEL]
|
||||
size:
|
||||
type: integer
|
||||
format: int64
|
||||
path:
|
||||
type: string
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
FileReference:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
path:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
enum: [LOCAL, HDFS, S3, HTTP]
|
||||
|
||||
ExecutorResources:
|
||||
type: object
|
||||
properties:
|
||||
total:
|
||||
$ref: '#/components/schemas/ResourceInfo'
|
||||
available:
|
||||
$ref: '#/components/schemas/ResourceInfo'
|
||||
allocated:
|
||||
$ref: '#/components/schemas/ResourceInfo'
|
||||
|
||||
ExecutorStatistics:
|
||||
type: object
|
||||
properties:
|
||||
totalJobs:
|
||||
type: integer
|
||||
successfulJobs:
|
||||
type: integer
|
||||
failedJobs:
|
||||
type: integer
|
||||
averageExecutionTime:
|
||||
type: number
|
||||
format: double
|
||||
uptime:
|
||||
type: integer
|
||||
format: int64
|
||||
|
||||
ResourceInfo:
|
||||
type: object
|
||||
properties:
|
||||
cpuCores:
|
||||
type: number
|
||||
format: double
|
||||
memoryGB:
|
||||
type: number
|
||||
format: double
|
||||
gpuCount:
|
||||
type: integer
|
||||
diskGB:
|
||||
type: number
|
||||
format: double
|
||||
|
||||
ResourceUsage:
|
||||
type: object
|
||||
properties:
|
||||
cpuUsagePercent:
|
||||
type: number
|
||||
format: double
|
||||
memoryUsagePercent:
|
||||
type: number
|
||||
format: double
|
||||
diskUsagePercent:
|
||||
type: number
|
||||
format: double
|
||||
|
||||
MetricDataPoint:
|
||||
type: object
|
||||
properties:
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
value:
|
||||
type: number
|
||||
format: double
|
||||
tags:
|
||||
type: object
|
||||
|
||||
JobStatus:
|
||||
type: string
|
||||
enum:
|
||||
- SUBMITTED
|
||||
- PENDING
|
||||
- RUNNING
|
||||
- COMPLETED
|
||||
- FAILED
|
||||
- CANCELLED
|
||||
- TIMEOUT
|
||||
|
||||
ExecutorType:
|
||||
type: string
|
||||
enum:
|
||||
- RAY
|
||||
- DATAX
|
||||
- PYTHON
|
||||
- SPARK
|
||||
- FLINK
|
||||
- CUSTOM
|
||||
|
||||
ExecutorStatus:
|
||||
type: string
|
||||
enum:
|
||||
- ACTIVE
|
||||
- INACTIVE
|
||||
- BUSY
|
||||
- ERROR
|
||||
- MAINTENANCE
|
||||
|
||||
securitySchemes:
|
||||
BearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
|
||||
security:
|
||||
- BearerAuth: []
|
||||
547
backend/openapi/specs/operator-market.yaml
Normal file
@@ -0,0 +1,547 @@
|
||||
openapi: 3.0.1
|
||||
info:
|
||||
title: Operator Market Service API
|
||||
description: |
|
||||
算子市场服务API,提供算子的发布、管理和订阅功能。
|
||||
|
||||
主要功能:
|
||||
- 算子发布和管理
|
||||
- 算子版本控制
|
||||
- 算子评分和评论
|
||||
- 算子分类和标签
|
||||
- 算子下载和安装
|
||||
version: 1.0.0
|
||||
tags:
|
||||
- name: Operator
|
||||
- name: Category
|
||||
- name: Label
|
||||
paths:
|
||||
/operators/list:
|
||||
post:
|
||||
summary: 获取算子列表
|
||||
deprecated: false
|
||||
description: 分页查询算子列表,支持按分类、标签等条件筛选
|
||||
tags:
|
||||
- Operator
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
page:
|
||||
type: integer
|
||||
description: 页数
|
||||
size:
|
||||
type: integer
|
||||
description: 单页数量
|
||||
categories:
|
||||
type: array
|
||||
items:
|
||||
type: integer
|
||||
description: 分类id列表
|
||||
operatorName:
|
||||
type: string
|
||||
description: 算子名称
|
||||
labelName:
|
||||
type: string
|
||||
description: 标签名称
|
||||
isStar:
|
||||
type: boolean
|
||||
description: 是否收藏
|
||||
required:
|
||||
- page
|
||||
- size
|
||||
- categories
|
||||
examples: {}
|
||||
responses:
|
||||
'200':
|
||||
description: 成功返回算子列表
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OperatorResponse'
|
||||
headers: {}
|
||||
security: []
|
||||
/operators/create:
|
||||
post:
|
||||
summary: 创建新算子
|
||||
deprecated: false
|
||||
description: 创建并发布一个新的算子
|
||||
tags:
|
||||
- Operator
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateOperatorRequest'
|
||||
example: null
|
||||
responses:
|
||||
'201':
|
||||
description: 算子创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema: &ref_0
|
||||
$ref: '#/components/schemas/OperatorResponse'
|
||||
headers: {}
|
||||
security: []
|
||||
/operators/upload:
|
||||
post:
|
||||
summary: 上传新算子
|
||||
deprecated: false
|
||||
description: 创建并发布一个新的算子
|
||||
tags:
|
||||
- Operator
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
multipart/form-data:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
file:
|
||||
type: string
|
||||
format: binary
|
||||
example: ''
|
||||
description:
|
||||
type: string
|
||||
example: ''
|
||||
examples: {}
|
||||
responses:
|
||||
'201':
|
||||
description: 算子创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema: *ref_0
|
||||
headers: {}
|
||||
security: []
|
||||
/operators/{id}:
|
||||
get:
|
||||
summary: 获取算子详情
|
||||
deprecated: false
|
||||
description: 根据ID获取算子的详细信息
|
||||
tags:
|
||||
- Operator
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
description: 算子ID
|
||||
required: true
|
||||
example: ''
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 成功返回算子详情
|
||||
content:
|
||||
application/json:
|
||||
schema: *ref_0
|
||||
headers: {}
|
||||
'404':
|
||||
description: 算子不存在
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ErrorResponse'
|
||||
headers: {}
|
||||
security: []
|
||||
put:
|
||||
summary: 更新算子信息
|
||||
deprecated: false
|
||||
description: 根据ID更新算子信息
|
||||
tags:
|
||||
- Operator
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
description: 算子ID
|
||||
required: true
|
||||
example: ''
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/UpdateOperatorRequest'
|
||||
example: null
|
||||
responses:
|
||||
'200':
|
||||
description: 算子更新成功
|
||||
content:
|
||||
application/json:
|
||||
schema: *ref_0
|
||||
headers: {}
|
||||
security: []
|
||||
/category:
|
||||
post:
|
||||
summary: 创建算子分类
|
||||
deprecated: false
|
||||
description: ''
|
||||
tags:
|
||||
- Category
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 名称
|
||||
parentId:
|
||||
type: integer
|
||||
description: 父分类id
|
||||
required:
|
||||
- name
|
||||
- parentId
|
||||
responses:
|
||||
'201':
|
||||
description: ''
|
||||
headers: {}
|
||||
security: []
|
||||
delete:
|
||||
summary: 删除算子分类
|
||||
deprecated: false
|
||||
description: ''
|
||||
tags:
|
||||
- Category
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
description: ID 编号
|
||||
required:
|
||||
- id
|
||||
responses:
|
||||
'204':
|
||||
description: ''
|
||||
headers: {}
|
||||
security: []
|
||||
/categories/tree:
|
||||
get:
|
||||
summary: 获取算子分类列表
|
||||
deprecated: false
|
||||
description: 获取所有可用的算子分类
|
||||
tags:
|
||||
- Category
|
||||
parameters: []
|
||||
responses:
|
||||
'200':
|
||||
description: 成功返回分类列表
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
name:
|
||||
type: string
|
||||
count:
|
||||
type: integer
|
||||
categories:
|
||||
$ref: '#/components/schemas/CategoryResponse'
|
||||
required:
|
||||
- id
|
||||
- name
|
||||
- count
|
||||
- categories
|
||||
headers: {}
|
||||
security: []
|
||||
/labels:
|
||||
get:
|
||||
summary: 获取算子标签列表
|
||||
deprecated: false
|
||||
description: 获取所有算子的标签
|
||||
tags:
|
||||
- Label
|
||||
parameters:
|
||||
- name: page
|
||||
in: query
|
||||
description: 页码,从0开始
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
- name: size
|
||||
in: query
|
||||
description: 每页大小
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
- name: keyword
|
||||
in: query
|
||||
description: 关键词搜索
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 成功返回标签列表
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/LabelResponse'
|
||||
headers: {}
|
||||
security: []
|
||||
post:
|
||||
summary: 创建标签
|
||||
deprecated: false
|
||||
description: 批量创建标签
|
||||
tags:
|
||||
- Label
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 名称
|
||||
required:
|
||||
- name
|
||||
example: veniam
|
||||
responses:
|
||||
'201':
|
||||
description: 创建成功
|
||||
headers: {}
|
||||
security: []
|
||||
delete:
|
||||
summary: 删除标签
|
||||
deprecated: false
|
||||
description: 批量删除标签
|
||||
tags:
|
||||
- Label
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
type: integer
|
||||
format: int64
|
||||
description: 标签id列表
|
||||
example: null
|
||||
responses:
|
||||
'204':
|
||||
description: 删除成功
|
||||
headers: {}
|
||||
security: []
|
||||
/labels/{id}:
|
||||
put:
|
||||
summary: 更新标签
|
||||
deprecated: false
|
||||
description: 更新标签
|
||||
tags:
|
||||
- Label
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
description: 标签ID
|
||||
required: true
|
||||
example: ''
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/UpdateLabelRequest'
|
||||
example: null
|
||||
responses:
|
||||
'200':
|
||||
description: 更新成功
|
||||
headers: {}
|
||||
security: []
|
||||
components:
|
||||
schemas:
|
||||
UpdateLabelRequest:
|
||||
type: object
|
||||
required:
|
||||
- id
|
||||
- name
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
description: 标签id
|
||||
name:
|
||||
type: string
|
||||
description: 标签名称
|
||||
Response:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
message:
|
||||
type: string
|
||||
data:
|
||||
type: object
|
||||
properties: {}
|
||||
required:
|
||||
- code
|
||||
- message
|
||||
- data
|
||||
LabelResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 标签ID
|
||||
name:
|
||||
type: string
|
||||
description: 标签名称
|
||||
SubCategory:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
description: 分类id
|
||||
name:
|
||||
type: string
|
||||
description: 分类名称
|
||||
count:
|
||||
type: integer
|
||||
type:
|
||||
type: string
|
||||
description: 分类类型(0:预置,1:自定义)
|
||||
parentId:
|
||||
type: integer
|
||||
description: 父分类id
|
||||
required:
|
||||
- id
|
||||
- name
|
||||
- type
|
||||
- parentId
|
||||
- count
|
||||
CategoryResponse:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/SubCategory'
|
||||
UpdateOperatorRequest:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 算子名称
|
||||
description:
|
||||
type: string
|
||||
description: 算子描述
|
||||
version:
|
||||
type: string
|
||||
description: 算子版本
|
||||
category:
|
||||
type: string
|
||||
description: 算子分类
|
||||
documentation:
|
||||
type: string
|
||||
description: 文档内容
|
||||
ErrorResponse:
|
||||
type: object
|
||||
properties:
|
||||
error:
|
||||
type: string
|
||||
description: 错误代码
|
||||
message:
|
||||
type: string
|
||||
description: 错误信息
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 错误时间
|
||||
OperatorResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 算子ID
|
||||
name:
|
||||
type: string
|
||||
description: 算子名称
|
||||
description:
|
||||
type: string
|
||||
description: 算子描述
|
||||
version:
|
||||
type: string
|
||||
description: 算子版本
|
||||
inputs:
|
||||
type: string
|
||||
description: 输入类型
|
||||
outputs:
|
||||
type: string
|
||||
description: 输入类型
|
||||
categories:
|
||||
type: array
|
||||
description: 算子分类列表
|
||||
items:
|
||||
type: integer
|
||||
runtime:
|
||||
type: string
|
||||
description: 运行时设置
|
||||
settings:
|
||||
type: string
|
||||
description: 算子参数
|
||||
isStar:
|
||||
type: boolean
|
||||
description: 是否收藏
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 创建时间
|
||||
updatedAt:
|
||||
type: string
|
||||
format: date-time
|
||||
description: 更新时间
|
||||
required:
|
||||
- language
|
||||
- modal
|
||||
- inputs
|
||||
- outputs
|
||||
- runtime
|
||||
- settings
|
||||
- isStar
|
||||
CreateOperatorRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- description
|
||||
- version
|
||||
- category
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description: 算子名称
|
||||
description:
|
||||
type: string
|
||||
description: 算子描述
|
||||
version:
|
||||
type: string
|
||||
description: 算子版本
|
||||
category:
|
||||
type: string
|
||||
description: 算子分类
|
||||
documentation:
|
||||
type: string
|
||||
description: 文档内容
|
||||
securitySchemes: {}
|
||||
servers: []
|
||||
639
backend/openapi/specs/pipeline-orchestration.yaml
Normal file
@@ -0,0 +1,639 @@
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: Pipeline Orchestration Service API
|
||||
description: 流程编排服务API - 可视化、模板、执行计划
|
||||
version: 1.0.0
|
||||
contact:
|
||||
name: Data Mate Platform Team
|
||||
|
||||
servers:
|
||||
- url: http://localhost:8087
|
||||
description: Development server
|
||||
|
||||
tags:
|
||||
- name: pipelines
|
||||
description: 流水线管理
|
||||
- name: pipeline-templates
|
||||
description: 流水线模板
|
||||
- name: executions
|
||||
description: 执行管理
|
||||
- name: workflows
|
||||
description: 工作流编排
|
||||
|
||||
paths:
|
||||
/api/v1/pipelines:
|
||||
get:
|
||||
tags:
|
||||
- pipelines
|
||||
summary: 获取流水线列表
|
||||
parameters:
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
- name: size
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 20
|
||||
- name: status
|
||||
in: query
|
||||
schema:
|
||||
$ref: '#/components/schemas/PipelineStatus'
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PipelinePageResponse'
|
||||
|
||||
post:
|
||||
tags:
|
||||
- pipelines
|
||||
summary: 创建流水线
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreatePipelineRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 创建成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PipelineResponse'
|
||||
|
||||
/api/v1/pipelines/{pipelineId}:
|
||||
get:
|
||||
tags:
|
||||
- pipelines
|
||||
summary: 获取流水线详情
|
||||
parameters:
|
||||
- name: pipelineId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PipelineDetailResponse'
|
||||
|
||||
put:
|
||||
tags:
|
||||
- pipelines
|
||||
summary: 更新流水线
|
||||
parameters:
|
||||
- name: pipelineId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/UpdatePipelineRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 更新成功
|
||||
|
||||
/api/v1/pipelines/{pipelineId}/execute:
|
||||
post:
|
||||
tags:
|
||||
- executions
|
||||
summary: 执行流水线
|
||||
parameters:
|
||||
- name: pipelineId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
required: false
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ExecutePipelineRequest'
|
||||
responses:
|
||||
'200':
|
||||
description: 执行开始
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PipelineExecutionResponse'
|
||||
|
||||
/api/v1/executions:
|
||||
get:
|
||||
tags:
|
||||
- executions
|
||||
summary: 获取执行历史
|
||||
parameters:
|
||||
- name: pipelineId
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
- name: status
|
||||
in: query
|
||||
schema:
|
||||
$ref: '#/components/schemas/ExecutionStatus'
|
||||
- name: page
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ExecutionPageResponse'
|
||||
|
||||
/api/v1/executions/{executionId}:
|
||||
get:
|
||||
tags:
|
||||
- executions
|
||||
summary: 获取执行详情
|
||||
parameters:
|
||||
- name: executionId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ExecutionDetailResponse'
|
||||
|
||||
/api/v1/executions/{executionId}/stop:
|
||||
post:
|
||||
tags:
|
||||
- executions
|
||||
summary: 停止执行
|
||||
parameters:
|
||||
- name: executionId
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 停止成功
|
||||
|
||||
/api/v1/templates:
|
||||
get:
|
||||
tags:
|
||||
- pipeline-templates
|
||||
summary: 获取模板列表
|
||||
parameters:
|
||||
- name: category
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: 获取成功
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/PipelineTemplateResponse'
|
||||
|
||||
post:
|
||||
tags:
|
||||
- pipeline-templates
|
||||
summary: 创建模板
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreatePipelineTemplateRequest'
|
||||
responses:
|
||||
'201':
|
||||
description: 创建成功
|
||||
|
||||
components:
|
||||
schemas:
|
||||
PipelineResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/PipelineStatus'
|
||||
version:
|
||||
type: string
|
||||
category:
|
||||
type: string
|
||||
tags:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
createdBy:
|
||||
type: string
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
lastModified:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
PipelineDetailResponse:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/PipelineResponse'
|
||||
- type: object
|
||||
properties:
|
||||
definition:
|
||||
$ref: '#/components/schemas/PipelineDefinition'
|
||||
parameters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/PipelineParameter'
|
||||
dependencies:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
statistics:
|
||||
$ref: '#/components/schemas/PipelineStatistics'
|
||||
|
||||
PipelinePageResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/PipelineResponse'
|
||||
totalElements:
|
||||
type: integer
|
||||
format: int64
|
||||
totalPages:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
|
||||
CreatePipelineRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- definition
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
category:
|
||||
type: string
|
||||
definition:
|
||||
$ref: '#/components/schemas/PipelineDefinition'
|
||||
parameters:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/PipelineParameter'
|
||||
tags:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
|
||||
UpdatePipelineRequest:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
definition:
|
||||
$ref: '#/components/schemas/PipelineDefinition'
|
||||
status:
|
||||
$ref: '#/components/schemas/PipelineStatus'
|
||||
|
||||
ExecutePipelineRequest:
|
||||
type: object
|
||||
properties:
|
||||
parameters:
|
||||
type: object
|
||||
description: 执行参数
|
||||
environment:
|
||||
type: string
|
||||
description: 执行环境
|
||||
priority:
|
||||
type: integer
|
||||
description: 优先级
|
||||
|
||||
PipelineExecutionResponse:
|
||||
type: object
|
||||
properties:
|
||||
executionId:
|
||||
type: string
|
||||
pipelineId:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/ExecutionStatus'
|
||||
startTime:
|
||||
type: string
|
||||
format: date-time
|
||||
message:
|
||||
type: string
|
||||
|
||||
ExecutionResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
pipelineId:
|
||||
type: string
|
||||
pipelineName:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/ExecutionStatus'
|
||||
progress:
|
||||
type: number
|
||||
format: double
|
||||
startTime:
|
||||
type: string
|
||||
format: date-time
|
||||
endTime:
|
||||
type: string
|
||||
format: date-time
|
||||
duration:
|
||||
type: integer
|
||||
format: int64
|
||||
description: 执行时长(毫秒)
|
||||
|
||||
ExecutionDetailResponse:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/ExecutionResponse'
|
||||
- type: object
|
||||
properties:
|
||||
steps:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ExecutionStep'
|
||||
logs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ExecutionLog'
|
||||
metrics:
|
||||
$ref: '#/components/schemas/ExecutionMetrics'
|
||||
artifacts:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ExecutionArtifact'
|
||||
|
||||
ExecutionPageResponse:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ExecutionResponse'
|
||||
totalElements:
|
||||
type: integer
|
||||
format: int64
|
||||
totalPages:
|
||||
type: integer
|
||||
size:
|
||||
type: integer
|
||||
number:
|
||||
type: integer
|
||||
|
||||
PipelineTemplateResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
category:
|
||||
type: string
|
||||
version:
|
||||
type: string
|
||||
definition:
|
||||
$ref: '#/components/schemas/PipelineDefinition'
|
||||
usageCount:
|
||||
type: integer
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
CreatePipelineTemplateRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
- definition
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
category:
|
||||
type: string
|
||||
definition:
|
||||
$ref: '#/components/schemas/PipelineDefinition'
|
||||
|
||||
PipelineDefinition:
|
||||
type: object
|
||||
properties:
|
||||
nodes:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/PipelineNode'
|
||||
edges:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/PipelineEdge'
|
||||
settings:
|
||||
type: object
|
||||
description: 流水线设置
|
||||
|
||||
PipelineNode:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
enum: [OPERATOR, CONDITION, LOOP, PARALLEL]
|
||||
name:
|
||||
type: string
|
||||
operatorId:
|
||||
type: string
|
||||
configuration:
|
||||
type: object
|
||||
position:
|
||||
$ref: '#/components/schemas/NodePosition'
|
||||
|
||||
PipelineEdge:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
source:
|
||||
type: string
|
||||
target:
|
||||
type: string
|
||||
condition:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
enum: [SUCCESS, FAILURE, ALWAYS]
|
||||
|
||||
NodePosition:
|
||||
type: object
|
||||
properties:
|
||||
x:
|
||||
type: number
|
||||
y:
|
||||
type: number
|
||||
|
||||
PipelineParameter:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
required:
|
||||
type: boolean
|
||||
defaultValue:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
|
||||
PipelineStatistics:
|
||||
type: object
|
||||
properties:
|
||||
totalExecutions:
|
||||
type: integer
|
||||
successfulExecutions:
|
||||
type: integer
|
||||
failedExecutions:
|
||||
type: integer
|
||||
averageDuration:
|
||||
type: number
|
||||
format: double
|
||||
lastExecutionTime:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
ExecutionStep:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
nodeId:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
status:
|
||||
$ref: '#/components/schemas/ExecutionStatus'
|
||||
startTime:
|
||||
type: string
|
||||
format: date-time
|
||||
endTime:
|
||||
type: string
|
||||
format: date-time
|
||||
duration:
|
||||
type: integer
|
||||
format: int64
|
||||
message:
|
||||
type: string
|
||||
|
||||
ExecutionLog:
|
||||
type: object
|
||||
properties:
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
level:
|
||||
type: string
|
||||
enum: [DEBUG, INFO, WARN, ERROR]
|
||||
nodeId:
|
||||
type: string
|
||||
message:
|
||||
type: string
|
||||
|
||||
ExecutionMetrics:
|
||||
type: object
|
||||
properties:
|
||||
totalNodes:
|
||||
type: integer
|
||||
completedNodes:
|
||||
type: integer
|
||||
failedNodes:
|
||||
type: integer
|
||||
cpuUsage:
|
||||
type: number
|
||||
format: double
|
||||
memoryUsage:
|
||||
type: number
|
||||
format: double
|
||||
throughput:
|
||||
type: number
|
||||
format: double
|
||||
|
||||
ExecutionArtifact:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
size:
|
||||
type: integer
|
||||
format: int64
|
||||
path:
|
||||
type: string
|
||||
createdAt:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
PipelineStatus:
|
||||
type: string
|
||||
enum:
|
||||
- DRAFT
|
||||
- ACTIVE
|
||||
- INACTIVE
|
||||
- DEPRECATED
|
||||
|
||||
ExecutionStatus:
|
||||
type: string
|
||||
enum:
|
||||
- PENDING
|
||||
- RUNNING
|
||||
- SUCCESS
|
||||
- FAILED
|
||||
- CANCELLED
|
||||
- SKIPPED
|
||||
|
||||
securitySchemes:
|
||||
BearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
|
||||
security:
|
||||
- BearerAuth: []
|
||||
212
backend/pom.xml
Normal file
@@ -0,0 +1,212 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>data-mate-platform</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<name>DataMatePlatform</name>
|
||||
<description>一站式数据工作平台,面向模型微调与RAG检索</description>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>21</maven.compiler.source>
|
||||
<maven.compiler.target>21</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
|
||||
<spring-boot.version>3.5.6</spring-boot.version>
|
||||
<spring-cloud.version>2025.0.0</spring-cloud.version>
|
||||
<mysql.version>8.0.33</mysql.version>
|
||||
<postgresql.version>42.6.0</postgresql.version>
|
||||
<redis.version>3.2.0</redis.version>
|
||||
<elasticsearch.version>8.11.0</elasticsearch.version>
|
||||
<junit.version>5.10.0</junit.version>
|
||||
<springdoc.version>2.2.0</springdoc.version>
|
||||
<jackson-databind-nullable.version>0.2.6</jackson-databind-nullable.version>
|
||||
<jakarta-validation.version>3.0.2</jakarta-validation.version>
|
||||
<jakarta.persistence.version>3.1.0</jakarta.persistence.version>
|
||||
<maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version>
|
||||
<mybatis-plus.version>3.5.14</mybatis-plus.version>
|
||||
<mapstruct.version>1.6.3</mapstruct.version>
|
||||
<lombok.version>1.18.32</lombok.version>
|
||||
<lombok-mapstruct-binding.version>0.2.0</lombok-mapstruct-binding.version>
|
||||
<poi.version>5.4.0</poi.version>
|
||||
<log4j2.version>2.21.1</log4j2.version>
|
||||
</properties>
|
||||
|
||||
<modules>
|
||||
<!-- 共享库 -->
|
||||
<module>shared/domain-common</module>
|
||||
<module>shared/security-common</module>
|
||||
|
||||
<!-- 核心服务 -->
|
||||
<module>services/data-management-service</module>
|
||||
<module>services/data-collection-service</module>
|
||||
<module>services/operator-market-service</module>
|
||||
<module>services/data-cleaning-service</module>
|
||||
<module>services/data-synthesis-service</module>
|
||||
<module>services/data-annotation-service</module>
|
||||
<module>services/data-evaluation-service</module>
|
||||
<module>services/pipeline-orchestration-service</module>
|
||||
<module>services/execution-engine-service</module>
|
||||
|
||||
<!-- RAG服务 -->
|
||||
<module>services/rag-indexer-service</module>
|
||||
<module>services/rag-query-service</module>
|
||||
|
||||
<!-- 主启动模块 -->
|
||||
<module>services/main-application</module>
|
||||
|
||||
<!-- API Gateway微服务 -->
|
||||
<module>api-gateway</module>
|
||||
</modules>
|
||||
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-dependencies</artifactId>
|
||||
<version>${spring-boot.version}</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-dependencies</artifactId>
|
||||
<version>${spring-cloud.version}</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- OpenAPI相关依赖版本管理 -->
|
||||
<dependency>
|
||||
<groupId>org.springdoc</groupId>
|
||||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||
<version>${springdoc.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>jackson-databind-nullable</artifactId>
|
||||
<version>${jackson-databind-nullable.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>jakarta.validation</groupId>
|
||||
<artifactId>jakarta.validation-api</artifactId>
|
||||
<version>${jakarta-validation.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>jakarta.persistence</groupId>
|
||||
<artifactId>jakarta.persistence-api</artifactId>
|
||||
<version>${jakarta.persistence.version}</version>
|
||||
</dependency>
|
||||
<!-- MyBatis version alignment -->
|
||||
<dependency>
|
||||
<groupId>com.baomidou</groupId>
|
||||
<artifactId>mybatis-plus-bom</artifactId>
|
||||
<version>${mybatis-plus.version}</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mapstruct</groupId>
|
||||
<artifactId>mapstruct</artifactId>
|
||||
<version>${mapstruct.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>${lombok.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<version>${mysql.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi</artifactId>
|
||||
<version>${poi.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
<version>${spring-boot.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter</artifactId>
|
||||
<version>${spring-boot.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.baomidou</groupId>
|
||||
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
|
||||
<version>${mybatis-plus.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.baomidou</groupId>
|
||||
<artifactId>mybatis-plus-jsqlparser</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Log4j2 API -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-log4j2</artifactId>
|
||||
<version>${spring-boot.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>${lombok.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mapstruct</groupId>
|
||||
<artifactId>mapstruct</artifactId>
|
||||
<version>${mapstruct.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi</artifactId>
|
||||
<version>${poi.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<version>${spring-boot.version}</version>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
101
backend/services/data-annotation-service/pom.xml
Normal file
@@ -0,0 +1,101 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>data-mate-platform</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>data-annotation-service</artifactId>
|
||||
<name>Data Annotation Service</name>
|
||||
<description>数据标注服务</description>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>domain-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-websocket</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<version>${mysql.version}</version>
|
||||
</dependency>
|
||||
<!-- OpenAPI Dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springdoc</groupId>
|
||||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||
<version>2.0.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>jackson-databind-nullable</artifactId>
|
||||
<version>0.2.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>jakarta.validation</groupId>
|
||||
<artifactId>jakarta.validation-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
<!-- OpenAPI Generator Plugin -->
|
||||
<plugin>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>openapi-generator-maven-plugin</artifactId>
|
||||
<version>6.6.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>generate</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<inputSpec>${project.basedir}/../../openapi/specs/data-annotation.yaml</inputSpec>
|
||||
<generatorName>spring</generatorName>
|
||||
<output>${project.build.directory}/generated-sources/openapi</output>
|
||||
<apiPackage>com.datamate.annotation.interfaces.api</apiPackage>
|
||||
<modelPackage>com.datamate.annotation.interfaces.dto</modelPackage>
|
||||
<configOptions>
|
||||
<interfaceOnly>true</interfaceOnly>
|
||||
<useTags>true</useTags>
|
||||
<skipDefaultInterface>true</skipDefaultInterface>
|
||||
<hideGenerationTimestamp>true</hideGenerationTimestamp>
|
||||
<java8>true</java8>
|
||||
<dateLibrary>java8</dateLibrary>
|
||||
<useBeanValidation>true</useBeanValidation>
|
||||
<performBeanValidation>true</performBeanValidation>
|
||||
<useSpringBoot3>true</useSpringBoot3>
|
||||
<documentationProvider>springdoc</documentationProvider>
|
||||
</configOptions>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
||||
BIN
backend/services/data-cleaning-service/img.png
Normal file
|
After Width: | Height: | Size: 134 KiB |
BIN
backend/services/data-cleaning-service/img1.png
Normal file
|
After Width: | Height: | Size: 48 KiB |
BIN
backend/services/data-cleaning-service/img2.png
Normal file
|
After Width: | Height: | Size: 91 KiB |
87
backend/services/data-cleaning-service/pom.xml
Normal file
@@ -0,0 +1,87 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>data-mate-platform</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>data-cleaning-service</artifactId>
|
||||
<name>Data Cleaning Service</name>
|
||||
<description>数据清洗服务</description>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>domain-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springdoc</groupId>
|
||||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>jackson-databind-nullable</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.baomidou</groupId>
|
||||
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-compress</artifactId>
|
||||
<version>1.26.1</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mapstruct</groupId>
|
||||
<artifactId>mapstruct</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mapstruct</groupId>
|
||||
<artifactId>mapstruct-processor</artifactId>
|
||||
<version>${mapstruct.version}</version>
|
||||
<scope>provided</scope> <!-- 编译时需要,运行时不需要 -->
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.data</groupId>
|
||||
<artifactId>spring-data-commons</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.datamate.cleaning;
|
||||
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.scheduling.annotation.EnableAsync;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
|
||||
/**
|
||||
* 数据归集服务配置类
|
||||
*
|
||||
* 基于DataX的数据归集和同步服务,支持多种数据源的数据采集和归集
|
||||
*/
|
||||
@SpringBootApplication
|
||||
@EnableAsync
|
||||
@EnableScheduling
|
||||
@ComponentScan(basePackages = {
|
||||
"com.datamate.cleaning",
|
||||
"com.datamate.shared"
|
||||
})
|
||||
public class DataCleaningServiceConfiguration {
|
||||
// Configuration class for JAR packaging - no main method needed
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
package com.datamate.cleaning.application.httpclient;
|
||||
|
||||
import com.datamate.cleaning.domain.model.CreateDatasetRequest;
|
||||
import com.datamate.cleaning.domain.model.DatasetResponse;
|
||||
import com.datamate.cleaning.domain.model.PagedDatasetFileResponse;
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.ErrorCodeImpl;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.text.MessageFormat;
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
public class DatasetClient {
|
||||
private static final String BASE_URL = "http://localhost:8080/api";
|
||||
|
||||
private static final String CREATE_DATASET_URL = BASE_URL + "/data-management/datasets";
|
||||
|
||||
private static final String GET_DATASET_URL = BASE_URL + "/data-management/datasets/{0}";
|
||||
|
||||
private static final String GET_DATASET_FILE_URL = BASE_URL + "/data-management/datasets/{0}/files";
|
||||
|
||||
private static final HttpClient CLIENT = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
static {
|
||||
OBJECT_MAPPER.registerModule(new JavaTimeModule());
|
||||
}
|
||||
|
||||
public static DatasetResponse createDataset(String name, String type) {
|
||||
CreateDatasetRequest createDatasetRequest = new CreateDatasetRequest();
|
||||
createDatasetRequest.setName(name);
|
||||
createDatasetRequest.setDatasetType(type);
|
||||
|
||||
String jsonPayload;
|
||||
try {
|
||||
jsonPayload = OBJECT_MAPPER.writeValueAsString(createDatasetRequest);
|
||||
} catch (IOException e) {
|
||||
log.error("Error occurred while converting the object.", e);
|
||||
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
|
||||
}
|
||||
|
||||
HttpRequest request = HttpRequest.newBuilder()
|
||||
.uri(URI.create(CREATE_DATASET_URL))
|
||||
.timeout(Duration.ofSeconds(30))
|
||||
.header("Content-Type", "application/json")
|
||||
.POST(HttpRequest.BodyPublishers.ofString(jsonPayload))
|
||||
.build();
|
||||
|
||||
return sendAndReturn(request, DatasetResponse.class);
|
||||
}
|
||||
|
||||
public static DatasetResponse getDataset(String datasetId) {
|
||||
HttpRequest request = HttpRequest.newBuilder()
|
||||
.uri(URI.create(MessageFormat.format(GET_DATASET_URL, datasetId)))
|
||||
.timeout(Duration.ofSeconds(30))
|
||||
.header("Content-Type", "application/json")
|
||||
.GET()
|
||||
.build();
|
||||
|
||||
return sendAndReturn(request, DatasetResponse.class);
|
||||
}
|
||||
|
||||
public static PagedDatasetFileResponse getDatasetFile(String datasetId, PageRequest page) {
|
||||
String url = buildQueryParams(MessageFormat.format(GET_DATASET_FILE_URL, datasetId),
|
||||
Map.of("page", page.getPageNumber(), "size", page.getPageSize()));
|
||||
HttpRequest request = HttpRequest.newBuilder()
|
||||
.uri(URI.create(url))
|
||||
.timeout(Duration.ofSeconds(30))
|
||||
.header("Content-Type", "application/json")
|
||||
.GET()
|
||||
.build();
|
||||
|
||||
return sendAndReturn(request, PagedDatasetFileResponse.class);
|
||||
}
|
||||
|
||||
private static <T> T sendAndReturn(HttpRequest request, Class<T> clazz) {
|
||||
try {
|
||||
HttpResponse<String> response = CLIENT.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
int statusCode = response.statusCode();
|
||||
String responseBody = response.body();
|
||||
JsonNode jsonNode = OBJECT_MAPPER.readTree(responseBody);
|
||||
|
||||
if (statusCode < 200 || statusCode >= 300) {
|
||||
String code = jsonNode.get("code").asText();
|
||||
String message = jsonNode.get("message").asText();
|
||||
throw BusinessException.of(ErrorCodeImpl.of(code, message));
|
||||
}
|
||||
return OBJECT_MAPPER.treeToValue(jsonNode.get("data"), clazz);
|
||||
} catch (IOException | InterruptedException e) {
|
||||
log.error("Error occurred while making the request.", e);
|
||||
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
private static String buildQueryParams(String baseUrl, Map<String, Object> params) {
|
||||
if (params == null || params.isEmpty()) {
|
||||
return baseUrl;
|
||||
}
|
||||
|
||||
String queryString = params.entrySet().stream()
|
||||
.map(entry -> entry.getKey() + entry.getValue().toString())
|
||||
.collect(Collectors.joining("&"));
|
||||
|
||||
return baseUrl + (baseUrl.contains("?") ? "&" : "?") + queryString;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
package com.datamate.cleaning.application.httpclient;
|
||||
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.text.MessageFormat;
|
||||
import java.time.Duration;
|
||||
|
||||
@Slf4j
|
||||
public class RuntimeClient {
|
||||
private static final String BASE_URL = "http://runtime:8081/api";
|
||||
|
||||
private static final String CREATE_TASK_URL = BASE_URL + "/task/{0}/submit";
|
||||
|
||||
private static final String STOP_TASK_URL = BASE_URL + "/task/{0}/stop";
|
||||
|
||||
private static final HttpClient CLIENT = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
|
||||
|
||||
public static void submitTask(String taskId) {
|
||||
send(MessageFormat.format(CREATE_TASK_URL, taskId));
|
||||
}
|
||||
|
||||
public static void stopTask(String taskId) {
|
||||
send(MessageFormat.format(STOP_TASK_URL, taskId));
|
||||
}
|
||||
|
||||
private static void send(String url) {
|
||||
HttpRequest request = HttpRequest.newBuilder()
|
||||
.uri(URI.create(url))
|
||||
.timeout(Duration.ofSeconds(30))
|
||||
.header("Content-Type", "application/json")
|
||||
.POST(HttpRequest.BodyPublishers.noBody())
|
||||
.build();
|
||||
|
||||
try {
|
||||
HttpResponse<String> response = CLIENT.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
int statusCode = response.statusCode();
|
||||
|
||||
if (statusCode < 200 || statusCode >= 300) {
|
||||
log.error("Request failed with status code: {}", statusCode);
|
||||
throw BusinessException.of(SystemErrorCode.SYSTEM_BUSY);
|
||||
}
|
||||
} catch (IOException | InterruptedException e) {
|
||||
log.error("Error occurred while making the request.", e);
|
||||
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
package com.datamate.cleaning.application.scheduler;
|
||||
|
||||
import com.datamate.cleaning.application.httpclient.RuntimeClient;
|
||||
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
|
||||
import com.datamate.cleaning.interfaces.dto.CleaningTask;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class CleaningTaskScheduler {
|
||||
private final CleaningTaskMapper cleaningTaskMapper;
|
||||
|
||||
private final ExecutorService taskExecutor = Executors.newFixedThreadPool(5);
|
||||
|
||||
public void executeTask(String taskId) {
|
||||
taskExecutor.submit(() -> submitTask(taskId));
|
||||
}
|
||||
|
||||
private void submitTask(String taskId) {
|
||||
CleaningTask task = new CleaningTask();
|
||||
task.setId(taskId);
|
||||
task.setStatus(CleaningTask.StatusEnum.RUNNING);
|
||||
task.setStartedAt(LocalDateTime.now());
|
||||
cleaningTaskMapper.updateTask(task);
|
||||
RuntimeClient.submitTask(taskId);
|
||||
}
|
||||
|
||||
public void stopTask(String taskId) {
|
||||
RuntimeClient.stopTask(taskId);
|
||||
CleaningTask task = new CleaningTask();
|
||||
task.setId(taskId);
|
||||
task.setStatus(CleaningTask.StatusEnum.STOPPED);
|
||||
cleaningTaskMapper.updateTask(task);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,186 @@
|
||||
package com.datamate.cleaning.application.service;
|
||||
|
||||
|
||||
import com.datamate.cleaning.application.httpclient.DatasetClient;
|
||||
import com.datamate.cleaning.application.scheduler.CleaningTaskScheduler;
|
||||
import com.datamate.cleaning.domain.converter.OperatorInstanceConverter;
|
||||
import com.datamate.cleaning.domain.model.DatasetResponse;
|
||||
import com.datamate.cleaning.domain.model.ExecutorType;
|
||||
import com.datamate.cleaning.domain.model.OperatorInstancePo;
|
||||
import com.datamate.cleaning.domain.model.PagedDatasetFileResponse;
|
||||
import com.datamate.cleaning.domain.model.TaskProcess;
|
||||
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper;
|
||||
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
|
||||
import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
|
||||
import com.datamate.cleaning.interfaces.dto.CleaningTask;
|
||||
import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
|
||||
import com.datamate.cleaning.interfaces.dto.OperatorInstance;
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
|
||||
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.yaml.snakeyaml.DumperOptions;
|
||||
import org.yaml.snakeyaml.Yaml;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class CleaningTaskService {
|
||||
private final CleaningTaskMapper cleaningTaskMapper;
|
||||
|
||||
private final OperatorInstanceMapper operatorInstanceMapper;
|
||||
|
||||
private final CleaningResultMapper cleaningResultMapper;
|
||||
|
||||
private final CleaningTaskScheduler taskScheduler;
|
||||
|
||||
private final String DATASET_PATH = "/dataset";
|
||||
|
||||
private final String FLOW_PATH = "/flow";
|
||||
|
||||
public List<CleaningTask> getTasks(String status, String keywords, Integer page, Integer size) {
|
||||
Integer offset = page * size;
|
||||
return cleaningTaskMapper.findTasks(status, keywords, size, offset);
|
||||
}
|
||||
|
||||
public int countTasks(String status, String keywords) {
|
||||
return cleaningTaskMapper.findTasks(status, keywords, null, null).size();
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public CleaningTask createTask(CreateCleaningTaskRequest request) {
|
||||
DatasetResponse destDataset = DatasetClient.createDataset(request.getDestDatasetName(),
|
||||
request.getDestDatasetType());
|
||||
|
||||
DatasetResponse srcDataset = DatasetClient.getDataset(request.getSrcDatasetId());
|
||||
|
||||
CleaningTask task = new CleaningTask();
|
||||
task.setName(request.getName());
|
||||
task.setDescription(request.getDescription());
|
||||
task.setStatus(CleaningTask.StatusEnum.PENDING);
|
||||
String taskId = UUID.randomUUID().toString();
|
||||
task.setId(taskId);
|
||||
task.setSrcDatasetId(request.getSrcDatasetId());
|
||||
task.setSrcDatasetName(request.getSrcDatasetName());
|
||||
task.setDestDatasetId(destDataset.getId());
|
||||
task.setDestDatasetName(destDataset.getName());
|
||||
task.setBeforeSize(srcDataset.getTotalSize());
|
||||
cleaningTaskMapper.insertTask(task);
|
||||
|
||||
List<OperatorInstancePo> instancePos = request.getInstance().stream()
|
||||
.map(OperatorInstanceConverter.INSTANCE::operatorToDo).toList();
|
||||
operatorInstanceMapper.insertInstance(taskId, instancePos);
|
||||
|
||||
prepareTask(task, request.getInstance());
|
||||
scanDataset(taskId, request.getSrcDatasetId());
|
||||
executeTask(taskId);
|
||||
return task;
|
||||
}
|
||||
|
||||
public CleaningTask getTask(String taskId) {
|
||||
return cleaningTaskMapper.findTaskById(taskId);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void deleteTask(String taskId) {
|
||||
cleaningTaskMapper.deleteTask(taskId);
|
||||
operatorInstanceMapper.deleteByInstanceId(taskId);
|
||||
cleaningResultMapper.deleteByInstanceId(taskId);
|
||||
}
|
||||
|
||||
public void executeTask(String taskId) {
|
||||
taskScheduler.executeTask(taskId);
|
||||
}
|
||||
|
||||
private void prepareTask(CleaningTask task, List<OperatorInstance> instances) {
|
||||
TaskProcess process = new TaskProcess();
|
||||
process.setInstanceId(task.getId());
|
||||
process.setDatasetId(task.getDestDatasetId());
|
||||
process.setDatasetPath(FLOW_PATH + "/" + task.getId() + "/dataset.jsonl");
|
||||
process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId());
|
||||
process.setExecutorType(ExecutorType.DATA_PLATFORM.getValue());
|
||||
process.setProcess(instances.stream()
|
||||
.map(instance -> Map.of(instance.getId(), instance.getOverrides()))
|
||||
.toList());
|
||||
|
||||
ObjectMapper jsonMapper = new ObjectMapper(new YAMLFactory());
|
||||
jsonMapper.setPropertyNamingStrategy(PropertyNamingStrategies.SNAKE_CASE);
|
||||
JsonNode jsonNode = jsonMapper.valueToTree(process);
|
||||
|
||||
DumperOptions options = new DumperOptions();
|
||||
options.setIndent(2);
|
||||
options.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK);
|
||||
Yaml yaml = new Yaml(options);
|
||||
|
||||
File file = new File(FLOW_PATH + "/" + process.getInstanceId() + "/process.yaml");
|
||||
file.getParentFile().mkdirs();
|
||||
|
||||
try (FileWriter writer = new FileWriter(file)) {
|
||||
yaml.dump(jsonMapper.treeToValue(jsonNode, Map.class), writer);
|
||||
} catch (IOException e) {
|
||||
log.error("Failed to prepare process.yaml.", e);
|
||||
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
private void scanDataset(String taskId, String srcDatasetId) {
|
||||
int pageNumber = 0;
|
||||
int pageSize = 500;
|
||||
PageRequest pageRequest = PageRequest.of(pageNumber, pageSize);
|
||||
PagedDatasetFileResponse datasetFile;
|
||||
do {
|
||||
datasetFile = DatasetClient.getDatasetFile(srcDatasetId, pageRequest);
|
||||
if (datasetFile.getContent() != null && datasetFile.getContent().isEmpty()) {
|
||||
break;
|
||||
}
|
||||
List<Map<String, Object>> files = datasetFile.getContent().stream()
|
||||
.map(content -> Map.of("fileName", (Object) content.getFileName(),
|
||||
"fileSize", content.getFileSize(),
|
||||
"filePath", content.getFilePath(),
|
||||
"fileType", content.getFileType(),
|
||||
"fileId", content.getId()))
|
||||
.toList();
|
||||
writeListMapToJsonlFile(files, FLOW_PATH + "/" + taskId + "/dataset.jsonl");
|
||||
pageNumber += 1;
|
||||
} while (pageNumber < datasetFile.getTotalPages());
|
||||
}
|
||||
|
||||
private void writeListMapToJsonlFile(List<Map<String, Object>> mapList, String fileName) {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
try (BufferedWriter writer = new BufferedWriter(new FileWriter(fileName))) {
|
||||
if (!mapList.isEmpty()) { // 检查列表是否为空,避免异常
|
||||
String jsonString = objectMapper.writeValueAsString(mapList.get(0));
|
||||
writer.write(jsonString);
|
||||
|
||||
for (int i = 1; i < mapList.size(); i++) {
|
||||
writer.newLine();
|
||||
jsonString = objectMapper.writeValueAsString(mapList.get(i));
|
||||
writer.write(jsonString);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("Failed to prepare dataset.jsonl.", e);
|
||||
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
public void stopTask(String taskId) {
|
||||
taskScheduler.stopTask(taskId);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
package com.datamate.cleaning.application.service;
|
||||
|
||||
|
||||
import com.datamate.cleaning.domain.converter.OperatorInstanceConverter;
|
||||
import com.datamate.cleaning.domain.model.OperatorInstancePo;
|
||||
import com.datamate.cleaning.domain.model.TemplateWithInstance;
|
||||
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTemplateMapper;
|
||||
import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
|
||||
import com.datamate.cleaning.interfaces.dto.CleaningTemplate;
|
||||
import com.datamate.cleaning.interfaces.dto.CreateCleaningTemplateRequest;
|
||||
import com.datamate.cleaning.interfaces.dto.OperatorResponse;
|
||||
import com.datamate.cleaning.interfaces.dto.UpdateCleaningTemplateRequest;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class CleaningTemplateService {
|
||||
private final CleaningTemplateMapper cleaningTemplateMapper;
|
||||
|
||||
private final OperatorInstanceMapper operatorInstanceMapper;
|
||||
|
||||
public List<CleaningTemplate> getTemplates(String keywords) {
|
||||
List<OperatorResponse> allOperators = cleaningTemplateMapper.findAllOperators();
|
||||
Map<String, OperatorResponse> operatorsMap = allOperators.stream()
|
||||
.collect(Collectors.toMap(OperatorResponse::getId, Function.identity()));
|
||||
List<TemplateWithInstance> allTemplates = cleaningTemplateMapper.findAllTemplates(keywords);
|
||||
Map<String, List<TemplateWithInstance>> templatesMap = allTemplates.stream()
|
||||
.collect(Collectors.groupingBy(TemplateWithInstance::getId));
|
||||
return templatesMap.entrySet().stream().map(twi -> {
|
||||
List<TemplateWithInstance> value = twi.getValue();
|
||||
CleaningTemplate template = new CleaningTemplate();
|
||||
template.setId(twi.getKey());
|
||||
template.setName(value.get(0).getName());
|
||||
template.setDescription(value.get(0).getDescription());
|
||||
template.setInstance(value.stream().filter(v -> StringUtils.isNotBlank(v.getOperatorId()))
|
||||
.sorted(Comparator.comparingInt(TemplateWithInstance::getOpIndex))
|
||||
.map(v -> {
|
||||
OperatorResponse operator = operatorsMap.get(v.getOperatorId());
|
||||
if (StringUtils.isNotBlank(v.getSettingsOverride())) {
|
||||
operator.setSettings(v.getSettingsOverride());
|
||||
}
|
||||
return operator;
|
||||
}).toList());
|
||||
template.setCreatedAt(value.get(0).getCreatedAt());
|
||||
template.setUpdatedAt(value.get(0).getUpdatedAt());
|
||||
return template;
|
||||
}).toList();
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public CleaningTemplate createTemplate(CreateCleaningTemplateRequest request) {
|
||||
CleaningTemplate template = new CleaningTemplate();
|
||||
String templateId = UUID.randomUUID().toString();
|
||||
template.setId(templateId);
|
||||
template.setName(request.getName());
|
||||
template.setDescription(request.getDescription());
|
||||
cleaningTemplateMapper.insertTemplate(template);
|
||||
|
||||
List<OperatorInstancePo> instancePos = request.getInstance().stream()
|
||||
.map(OperatorInstanceConverter.INSTANCE::operatorToDo).toList();
|
||||
operatorInstanceMapper.insertInstance(templateId, instancePos);
|
||||
return template;
|
||||
}
|
||||
|
||||
public CleaningTemplate getTemplate(String templateId) {
|
||||
return cleaningTemplateMapper.findTemplateById(templateId);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public CleaningTemplate updateTemplate(String templateId, UpdateCleaningTemplateRequest request) {
|
||||
CleaningTemplate template = cleaningTemplateMapper.findTemplateById(templateId);
|
||||
if (template != null) {
|
||||
template.setName(request.getName());
|
||||
template.setDescription(request.getDescription());
|
||||
cleaningTemplateMapper.updateTemplate(template);
|
||||
}
|
||||
return template;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void deleteTemplate(String templateId) {
|
||||
cleaningTemplateMapper.deleteTemplate(templateId);
|
||||
operatorInstanceMapper.deleteByInstanceId(templateId);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
package com.datamate.cleaning.domain.converter;
|
||||
|
||||
|
||||
import com.datamate.cleaning.domain.model.OperatorInstancePo;
|
||||
import com.datamate.cleaning.interfaces.dto.OperatorInstance;
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.mapstruct.Mapper;
|
||||
import org.mapstruct.Mapping;
|
||||
import org.mapstruct.Named;
|
||||
import org.mapstruct.factory.Mappers;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@Mapper
|
||||
public interface OperatorInstanceConverter {
|
||||
OperatorInstanceConverter INSTANCE = Mappers.getMapper(OperatorInstanceConverter.class);
|
||||
|
||||
@Mapping(target = "overrides", source = "overrides", qualifiedByName = "mapToJson")
|
||||
OperatorInstancePo operatorToDo(OperatorInstance instance);
|
||||
|
||||
@Named("mapToJson")
|
||||
static String mapToJson(Map<String, Object> objects) {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
try {
|
||||
return objectMapper.writeValueAsString(objects);
|
||||
} catch (JsonProcessingException e) {
|
||||
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
package com.datamate.cleaning.domain.model;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
public class CreateDatasetRequest {
|
||||
/** 数据集名称 */
|
||||
private String name;
|
||||
/** 数据集描述 */
|
||||
private String description;
|
||||
/** 数据集类型 */
|
||||
private String datasetType;
|
||||
/** 标签列表 */
|
||||
private List<String> tags;
|
||||
/** 数据源 */
|
||||
private String dataSource;
|
||||
/** 目标位置 */
|
||||
private String targetLocation;
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package com.datamate.cleaning.domain.model;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
public class DatasetFileResponse {
|
||||
/** 文件ID */
|
||||
private String id;
|
||||
/** 文件名 */
|
||||
private String fileName;
|
||||
/** 原始文件名 */
|
||||
private String originalName;
|
||||
/** 文件类型 */
|
||||
private String fileType;
|
||||
/** 文件大小(字节) */
|
||||
private Long fileSize;
|
||||
/** 文件状态 */
|
||||
private String status;
|
||||
/** 文件描述 */
|
||||
private String description;
|
||||
/** 文件路径 */
|
||||
private String filePath;
|
||||
/** 上传时间 */
|
||||
private LocalDateTime uploadTime;
|
||||
/** 最后更新时间 */
|
||||
private LocalDateTime lastAccessTime;
|
||||
/** 上传者 */
|
||||
private String uploadedBy;
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package com.datamate.cleaning.domain.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* 数据集实体(与数据库表 t_dm_datasets 对齐)
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class DatasetResponse {
|
||||
/** 数据集ID */
|
||||
private String id;
|
||||
/** 数据集名称 */
|
||||
private String name;
|
||||
/** 数据集描述 */
|
||||
private String description;
|
||||
/** 数据集类型 */
|
||||
private String datasetType;
|
||||
/** 数据集状态 */
|
||||
private String status;
|
||||
/** 数据源 */
|
||||
private String dataSource;
|
||||
/** 目标位置 */
|
||||
private String targetLocation;
|
||||
/** 文件数量 */
|
||||
private Integer fileCount;
|
||||
/** 总大小(字节) */
|
||||
private Long totalSize;
|
||||
/** 完成率(0-100) */
|
||||
private Float completionRate;
|
||||
/** 创建时间 */
|
||||
private LocalDateTime createdAt;
|
||||
/** 更新时间 */
|
||||
private LocalDateTime updatedAt;
|
||||
/** 创建者 */
|
||||
private String createdBy;
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.datamate.cleaning.domain.model;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 数据集类型响应DTO
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class DatasetTypeResponse {
|
||||
/** 类型编码 */
|
||||
private String code;
|
||||
/** 类型名称 */
|
||||
private String name;
|
||||
/** 类型描述 */
|
||||
private String description;
|
||||
/** 支持的文件格式 */
|
||||
private List<String> supportedFormats;
|
||||
/** 图标 */
|
||||
private String icon;
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
package com.datamate.cleaning.domain.model;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
public enum ExecutorType {
|
||||
DATA_PLATFORM("data_platform"),
|
||||
DATA_JUICER_RAY("ray"),
|
||||
DATA_JUICER_DEFAULT("default");
|
||||
|
||||
private final String value;
|
||||
|
||||
ExecutorType(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static ExecutorType fromValue(String value) {
|
||||
for (ExecutorType type : ExecutorType.values()) {
|
||||
if (type.value.equals(value)) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unexpected value '" + value + "'");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
package com.datamate.cleaning.domain.model;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class OperatorInstancePo {
|
||||
private String id;
|
||||
|
||||
private String overrides;
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
package com.datamate.cleaning.domain.model;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
public class PagedDatasetFileResponse {
|
||||
/** 文件内容列表 */
|
||||
private List<DatasetFileResponse> content;
|
||||
/** 当前页码 */
|
||||
private Integer page;
|
||||
/** 每页大小 */
|
||||
private Integer size;
|
||||
/** 总元素数 */
|
||||
private Integer totalElements;
|
||||
/** 总页数 */
|
||||
private Integer totalPages;
|
||||
/** 是否为第一页 */
|
||||
private Boolean first;
|
||||
/** 是否为最后一页 */
|
||||
private Boolean last;
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
package com.datamate.cleaning.domain.model;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class TaskProcess {
|
||||
private String instanceId;
|
||||
|
||||
private String datasetId;
|
||||
|
||||
private String datasetPath;
|
||||
|
||||
private String exportPath;
|
||||
|
||||
private String executorType;
|
||||
|
||||
private List<Map<String, Map<String, Object>>> process;
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
package com.datamate.cleaning.domain.model;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.format.annotation.DateTimeFormat;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class TemplateWithInstance {
|
||||
private String id;
|
||||
|
||||
private String name;
|
||||
|
||||
private String description;
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime createdAt;
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime updatedAt;
|
||||
|
||||
private String operatorId;
|
||||
|
||||
private Integer opIndex;
|
||||
|
||||
private String settingsOverride;
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.datamate.cleaning.infrastructure.exception;
|
||||
|
||||
import com.datamate.common.infrastructure.exception.ErrorCode;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
@AllArgsConstructor
|
||||
public enum CleanErrorCode implements ErrorCode {
|
||||
/**
|
||||
* 清洗任务名称重复
|
||||
*/
|
||||
DUPLICATE_TASK_NAME("clean.0001", "清洗任务名称重复"),
|
||||
|
||||
CREATE_DATASET_FAILED("clean.0002", "创建数据集失败");
|
||||
|
||||
private final String code;
|
||||
private final String message;
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package com.datamate.cleaning.infrastructure.persistence.mapper;
|
||||
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
||||
@Mapper
|
||||
public interface CleaningResultMapper {
|
||||
void deleteByInstanceId(@Param("instanceId") String instanceId);
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
package com.datamate.cleaning.infrastructure.persistence.mapper;
|
||||
|
||||
import com.datamate.cleaning.interfaces.dto.CleaningTask;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Mapper
|
||||
public interface CleaningTaskMapper {
|
||||
List<CleaningTask> findTasks(@Param("status") String status, @Param("keywords") String keywords,
|
||||
@Param("size") Integer size, @Param("offset") Integer offset);
|
||||
|
||||
CleaningTask findTaskById(@Param("taskId") String taskId);
|
||||
|
||||
void insertTask(CleaningTask task);
|
||||
|
||||
void updateTask(CleaningTask task);
|
||||
|
||||
void deleteTask(@Param("taskId") String taskId);
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
package com.datamate.cleaning.infrastructure.persistence.mapper;
|
||||
|
||||
import com.datamate.cleaning.domain.model.TemplateWithInstance;
|
||||
import com.datamate.cleaning.interfaces.dto.CleaningTemplate;
|
||||
import com.datamate.cleaning.interfaces.dto.OperatorResponse;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Mapper
|
||||
public interface CleaningTemplateMapper {
|
||||
|
||||
List<TemplateWithInstance> findAllTemplates(@Param("keywords") String keywords);
|
||||
|
||||
List<OperatorResponse> findAllOperators();
|
||||
|
||||
CleaningTemplate findTemplateById(@Param("templateId") String templateId);
|
||||
|
||||
void insertTemplate(CleaningTemplate template);
|
||||
|
||||
void updateTemplate(CleaningTemplate template);
|
||||
|
||||
void deleteTemplate(@Param("templateId") String templateId);
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
package com.datamate.cleaning.infrastructure.persistence.mapper;
|
||||
|
||||
import com.datamate.cleaning.domain.model.OperatorInstancePo;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@Mapper
|
||||
public interface OperatorInstanceMapper {
|
||||
|
||||
void insertInstance(@Param("instanceId") String instanceId,
|
||||
@Param("instances") List<OperatorInstancePo> instances);
|
||||
|
||||
void deleteByInstanceId(@Param("instanceId") String instanceId);
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
package com.datamate.cleaning.interfaces.api;
|
||||
|
||||
import com.datamate.cleaning.application.service.CleaningTaskService;
|
||||
import com.datamate.cleaning.interfaces.dto.CleaningTask;
|
||||
import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
|
||||
import com.datamate.common.infrastructure.common.Response;
|
||||
import com.datamate.common.interfaces.PagedResponse;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/cleaning/tasks")
|
||||
@RequiredArgsConstructor
|
||||
public class CleaningTaskController {
|
||||
private final CleaningTaskService cleaningTaskService;
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<Response<PagedResponse<CleaningTask>>> cleaningTasksGet(
|
||||
@RequestParam("page") Integer page,
|
||||
@RequestParam("size") Integer size, @RequestParam(value = "status", required = false) String status,
|
||||
@RequestParam(value = "keywords", required = false) String keywords) {
|
||||
List<CleaningTask> tasks = cleaningTaskService.getTasks(status, keywords, page, size);
|
||||
int count = cleaningTaskService.countTasks(status, keywords);
|
||||
int totalPages = (count + size + 1) / size;
|
||||
return ResponseEntity.ok(Response.ok(PagedResponse.of(tasks, page, count, totalPages)));
|
||||
}
|
||||
|
||||
@PostMapping
|
||||
public ResponseEntity<Response<CleaningTask>> cleaningTasksPost(@RequestBody CreateCleaningTaskRequest request) {
|
||||
return ResponseEntity.ok(Response.ok(cleaningTaskService.createTask(request)));
|
||||
}
|
||||
|
||||
@PostMapping("/{taskId}/stop")
|
||||
public ResponseEntity<Response<Object>> cleaningTasksStop(@PathVariable("taskId") String taskId) {
|
||||
cleaningTaskService.stopTask(taskId);
|
||||
return ResponseEntity.ok(Response.ok(null));
|
||||
}
|
||||
|
||||
@PostMapping("/{taskId}/execute")
|
||||
public ResponseEntity<Response<Object>> cleaningTasksStart(@PathVariable("taskId") String taskId) {
|
||||
cleaningTaskService.executeTask(taskId);
|
||||
return ResponseEntity.ok(Response.ok(null));
|
||||
}
|
||||
|
||||
@GetMapping("/{taskId}")
|
||||
public ResponseEntity<Response<CleaningTask>> cleaningTasksTaskIdGet(@PathVariable("taskId") String taskId) {
|
||||
return ResponseEntity.ok(Response.ok(cleaningTaskService.getTask(taskId)));
|
||||
}
|
||||
|
||||
@DeleteMapping("/{taskId}")
|
||||
public ResponseEntity<Response<Object>> cleaningTasksTaskIdDelete(@PathVariable("taskId") String taskId) {
|
||||
cleaningTaskService.deleteTask(taskId);
|
||||
return ResponseEntity.ok(Response.ok(null));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
package com.datamate.cleaning.interfaces.api;
|
||||
|
||||
import com.datamate.cleaning.application.service.CleaningTemplateService;
|
||||
import com.datamate.cleaning.interfaces.dto.CleaningTemplate;
|
||||
import com.datamate.cleaning.interfaces.dto.CreateCleaningTemplateRequest;
|
||||
import com.datamate.cleaning.interfaces.dto.UpdateCleaningTemplateRequest;
|
||||
import com.datamate.common.infrastructure.common.Response;
|
||||
import com.datamate.common.interfaces.PagedResponse;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.DeleteMapping;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.PutMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/cleaning/templates")
|
||||
@RequiredArgsConstructor
|
||||
public class CleaningTemplateController {
|
||||
private final CleaningTemplateService cleaningTemplateService;
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<Response<PagedResponse<CleaningTemplate>>> cleaningTemplatesGet(
|
||||
@RequestParam(value = "page", required = false) Integer page,
|
||||
@RequestParam(value = "size", required = false) Integer size,
|
||||
@RequestParam(value = "keywords", required = false) String keyword) {
|
||||
List<CleaningTemplate> templates = cleaningTemplateService.getTemplates(keyword);
|
||||
if (page == null || size == null) {
|
||||
return ResponseEntity.ok(Response.ok(PagedResponse.of(templates.stream()
|
||||
.sorted(Comparator.comparing(CleaningTemplate::getCreatedAt).reversed()).toList())));
|
||||
}
|
||||
int count = templates.size();
|
||||
int totalPages = (count + size + 1) / size;
|
||||
List<CleaningTemplate> limitTemplates = templates.stream()
|
||||
.sorted(Comparator.comparing(CleaningTemplate::getCreatedAt).reversed())
|
||||
.skip((long) page * size)
|
||||
.limit(size).toList();
|
||||
return ResponseEntity.ok(Response.ok(PagedResponse.of(limitTemplates, page, count, totalPages)));
|
||||
}
|
||||
|
||||
@PostMapping
|
||||
public ResponseEntity<Response<CleaningTemplate>> cleaningTemplatesPost(
|
||||
@RequestBody CreateCleaningTemplateRequest request) {
|
||||
return ResponseEntity.ok(Response.ok(cleaningTemplateService.createTemplate(request)));
|
||||
}
|
||||
|
||||
@GetMapping("/{templateId}")
|
||||
public ResponseEntity<Response<CleaningTemplate>> cleaningTemplatesTemplateIdGet(
|
||||
@PathVariable("templateId") String templateId) {
|
||||
return ResponseEntity.ok(Response.ok(cleaningTemplateService.getTemplate(templateId)));
|
||||
}
|
||||
|
||||
@PutMapping("/{templateId}")
|
||||
public ResponseEntity<Response<CleaningTemplate>> cleaningTemplatesTemplateIdPut(
|
||||
@PathVariable("templateId") String templateId, @RequestBody UpdateCleaningTemplateRequest request) {
|
||||
return ResponseEntity.ok(Response.ok(cleaningTemplateService.updateTemplate(templateId, request)));
|
||||
}
|
||||
|
||||
@DeleteMapping("/{templateId}")
|
||||
public ResponseEntity<Response<Object>> cleaningTemplatesTemplateIdDelete(
|
||||
@PathVariable("templateId") String templateId) {
|
||||
cleaningTemplateService.deleteTemplate(templateId);
|
||||
return ResponseEntity.noContent().build();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.datamate.cleaning.interfaces.dto;
|
||||
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* CleaningProcess
|
||||
*/
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class CleaningProcess {
|
||||
private Float process;
|
||||
|
||||
private Integer totalFileNum;
|
||||
|
||||
private Integer finishedFileNum;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
package com.datamate.cleaning.interfaces.dto;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonValue;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.format.annotation.DateTimeFormat;
|
||||
|
||||
/**
|
||||
* CleaningTask
|
||||
*/
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class CleaningTask {
|
||||
|
||||
private String id;
|
||||
|
||||
private String name;
|
||||
|
||||
private String description;
|
||||
|
||||
private String srcDatasetId;
|
||||
|
||||
private String srcDatasetName;
|
||||
|
||||
private String destDatasetId;
|
||||
|
||||
private String destDatasetName;
|
||||
|
||||
private long beforeSize;
|
||||
|
||||
private long afterSize;
|
||||
|
||||
/**
|
||||
* 任务当前状态
|
||||
*/
|
||||
public enum StatusEnum {
|
||||
PENDING("PENDING"),
|
||||
|
||||
RUNNING("RUNNING"),
|
||||
|
||||
COMPLETED("COMPLETED"),
|
||||
|
||||
STOPPED("STOPPED"),
|
||||
|
||||
FAILED("FAILED");
|
||||
|
||||
private final String value;
|
||||
|
||||
StatusEnum(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@JsonValue
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
@JsonCreator
|
||||
public static StatusEnum fromValue(String value) {
|
||||
for (StatusEnum b : StatusEnum.values()) {
|
||||
if (b.value.equals(value)) {
|
||||
return b;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unexpected value '" + value + "'");
|
||||
}
|
||||
}
|
||||
|
||||
private StatusEnum status;
|
||||
|
||||
private String templateId;
|
||||
|
||||
private List<OperatorResponse> instance;
|
||||
|
||||
private CleaningProcess progress;
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime createdAt;
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime startedAt;
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime finishedAt;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
package com.datamate.cleaning.interfaces.dto;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.format.annotation.DateTimeFormat;
|
||||
|
||||
/**
|
||||
* CleaningTemplate
|
||||
*/
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class CleaningTemplate {
|
||||
|
||||
private String id;
|
||||
|
||||
private String name;
|
||||
|
||||
private String description;
|
||||
|
||||
private List<OperatorResponse> instance = new ArrayList<>();
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime createdAt;
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime updatedAt;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
package com.datamate.cleaning.interfaces.dto;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* CreateCleaningTaskRequest
|
||||
*/
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class CreateCleaningTaskRequest {
|
||||
|
||||
private String name;
|
||||
|
||||
private String description;
|
||||
|
||||
private String srcDatasetId;
|
||||
|
||||
private String srcDatasetName;
|
||||
|
||||
private String destDatasetName;
|
||||
|
||||
private String destDatasetType;
|
||||
|
||||
private List<OperatorInstance> instance = new ArrayList<>();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.datamate.cleaning.interfaces.dto;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* CreateCleaningTemplateRequest
|
||||
*/
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class CreateCleaningTemplateRequest {
|
||||
|
||||
private String name;
|
||||
|
||||
private String description;
|
||||
|
||||
private List<OperatorInstance> instance = new ArrayList<>();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.datamate.cleaning.interfaces.dto;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* OperatorInstance
|
||||
*/
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class OperatorInstance {
|
||||
|
||||
private String id;
|
||||
|
||||
private Map<String, Object> overrides = new HashMap<>();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
package com.datamate.cleaning.interfaces.dto;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.format.annotation.DateTimeFormat;
|
||||
|
||||
/**
|
||||
* OperatorResponse
|
||||
*/
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class OperatorResponse {
|
||||
|
||||
private String id;
|
||||
|
||||
private String name;
|
||||
|
||||
private String description;
|
||||
|
||||
private String version;
|
||||
|
||||
private String inputs;
|
||||
|
||||
private String outputs;
|
||||
|
||||
private String runtime;
|
||||
|
||||
private String settings;
|
||||
|
||||
private Boolean isStar;
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime createdAt;
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime updatedAt;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
package com.datamate.cleaning.interfaces.dto;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* UpdateCleaningTemplateRequest
|
||||
*/
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class UpdateCleaningTemplateRequest {
|
||||
|
||||
private String id;
|
||||
|
||||
private String name;
|
||||
|
||||
private String description;
|
||||
|
||||
private List<OperatorInstance> instance = new ArrayList<>();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper">
|
||||
<delete id="deleteByInstanceId">
|
||||
DELETE FROM t_clean_result WHERE instance_id = #{instanceId}
|
||||
</delete>
|
||||
|
||||
</mapper>
|
||||
@@ -0,0 +1,56 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper">
|
||||
<sql id="Base_Column_List">
|
||||
id, name, description, src_dataset_id, src_dataset_name, dest_dataset_id, dest_dataset_name, before_size,
|
||||
after_size, status, created_at, started_at, finished_at
|
||||
</sql>
|
||||
|
||||
<select id="findTasks" resultType="com.datamate.cleaning.interfaces.dto.CleaningTask">
|
||||
SELECT <include refid="Base_Column_List"/> FROM t_clean_task
|
||||
<where>
|
||||
<if test="status != null and status != ''">
|
||||
AND status = #{status}
|
||||
</if>
|
||||
<if test="keywords != null and status != ''">
|
||||
AND name LIKE CONCAT('%', #{keywords}, '%')
|
||||
</if>
|
||||
</where>
|
||||
ORDER BY created_at DESC
|
||||
<if test="size != null and offset != null">
|
||||
LIMIT ${size} OFFSET ${offset}
|
||||
</if>
|
||||
</select>
|
||||
|
||||
<select id="findTaskById" resultType="com.datamate.cleaning.interfaces.dto.CleaningTask">
|
||||
SELECT <include refid="Base_Column_List"/> FROM t_clean_task WHERE id = #{taskId}
|
||||
</select>
|
||||
|
||||
<insert id="insertTask">
|
||||
INSERT INTO t_clean_task (id, name, description, status, src_dataset_id, src_dataset_name, dest_dataset_id,
|
||||
dest_dataset_name, before_size, after_size, created_at)
|
||||
VALUES (#{id}, #{name}, #{description}, #{status}, #{srcDatasetId}, #{srcDatasetName}, #{destDatasetId},
|
||||
#{destDatasetName}, ${beforeSize}, ${afterSize}, NOW())
|
||||
</insert>
|
||||
|
||||
<update id="updateTask">
|
||||
UPDATE t_clean_task
|
||||
<set>
|
||||
<if test="status != null">
|
||||
status = #{status.value},
|
||||
</if>
|
||||
<if test="startedAt != null">
|
||||
started_at = #{startedAt},
|
||||
</if>
|
||||
<if test="finishedAt != null">
|
||||
finished_at = #{finishedAt},
|
||||
</if>
|
||||
</set>
|
||||
WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<delete id="deleteTask">
|
||||
DELETE FROM t_clean_task WHERE id = #{taskId}
|
||||
</delete>
|
||||
|
||||
</mapper>
|
||||
@@ -0,0 +1,38 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTemplateMapper">
|
||||
|
||||
<select id="findAllTemplates" resultType="com.datamate.cleaning.domain.model.TemplateWithInstance">
|
||||
SELECT t.id AS id, name, description, created_at, updated_at, created_by, operator_id, op_index, settings_override
|
||||
FROM t_clean_template t LEFT JOIN t_operator_instance o ON t.id = o.instance_id
|
||||
<where>
|
||||
<if test="keywords != null and status != ''">
|
||||
AND name LIKE CONCAT('%', #{keywords}, '%')
|
||||
</if>
|
||||
</where>
|
||||
ORDER BY created_at DESC
|
||||
</select>
|
||||
|
||||
<select id="findAllOperators" resultType="com.datamate.cleaning.interfaces.dto.OperatorResponse">
|
||||
SELECT id, name, description, version, inputs, outputs, runtime, settings, is_star, created_at, updated_at
|
||||
FROM t_operator
|
||||
</select>
|
||||
|
||||
<select id="findTemplateById" resultType="com.datamate.cleaning.interfaces.dto.CleaningTemplate">
|
||||
SELECT * FROM t_clean_template WHERE id = #{templateId}
|
||||
</select>
|
||||
|
||||
<insert id="insertTemplate">
|
||||
INSERT INTO t_clean_template (id, name, description, created_at)
|
||||
VALUES (#{id}, #{name}, #{description}, NOW())
|
||||
</insert>
|
||||
|
||||
<update id="updateTemplate">
|
||||
UPDATE t_clean_template SET name = #{name}, description = #{description}, updated_at = NOW() WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<delete id="deleteTemplate">
|
||||
DELETE FROM t_clean_template WHERE id = #{templateId}
|
||||
</delete>
|
||||
|
||||
</mapper>
|
||||
@@ -0,0 +1,16 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper">
|
||||
<insert id="insertInstance">
|
||||
INSERT INTO t_operator_instance(instance_id, operator_id, op_index, settings_override)
|
||||
VALUES
|
||||
<foreach collection="instances" item="operator" separator="," index="index">
|
||||
(#{instanceId}, #{operator.id}, #{index} + 1, #{operator.overrides})
|
||||
</foreach>
|
||||
</insert>
|
||||
|
||||
<delete id="deleteByInstanceId">
|
||||
DELETE FROM t_operator_instance
|
||||
WHERE instance_id = #{instanceId};
|
||||
</delete>
|
||||
</mapper>
|
||||
229
backend/services/data-collection-service/README.md
Normal file
@@ -0,0 +1,229 @@
|
||||
# 数据归集服务 (Data Collection Service)
|
||||
|
||||
基于DataX的数据归集和同步服务,提供多数据源之间的数据同步功能。
|
||||
|
||||
## 功能特性
|
||||
|
||||
- 🔗 **多数据源支持**: 支持MySQL、PostgreSQL、Oracle、SQL Server等主流数据库
|
||||
- 📊 **任务管理**: 创建、配置、执行和监控数据同步任务
|
||||
- ⏰ **定时调度**: 支持Cron表达式的定时任务
|
||||
- 📈 **实时监控**: 任务执行进度、状态和性能指标监控
|
||||
- 📝 **执行日志**: 详细的任务执行日志记录
|
||||
- 🔌 **插件化**: DataX Reader/Writer插件化集成
|
||||
|
||||
## 技术架构
|
||||
|
||||
- **框架**: Spring Boot 3.x
|
||||
- **数据库**: MySQL + MyBatis
|
||||
- **同步引擎**: DataX
|
||||
- **API**: OpenAPI 3.0 自动生成
|
||||
- **架构模式**: DDD (领域驱动设计)
|
||||
|
||||
## 项目结构
|
||||
|
||||
```
|
||||
src/main/java/com/datamate/collection/
|
||||
├── DataCollectionApplication.java # 应用启动类
|
||||
├── domain/ # 领域层
|
||||
│ ├── model/ # 领域模型
|
||||
│ │ ├── DataSource.java # 数据源实体
|
||||
│ │ ├── CollectionTask.java # 归集任务实体
|
||||
│ │ ├── TaskExecution.java # 任务执行记录
|
||||
│ │ └── ExecutionLog.java # 执行日志
|
||||
│ └── service/ # 领域服务
|
||||
│ ├── DataSourceService.java
|
||||
│ ├── CollectionTaskService.java
|
||||
│ ├── TaskExecutionService.java
|
||||
│ └── impl/ # 服务实现
|
||||
├── infrastructure/ # 基础设施层
|
||||
│ ├── config/ # 配置类
|
||||
│ ├── datax/ # DataX执行引擎
|
||||
│ │ └── DataXExecutionEngine.java
|
||||
│ └── persistence/ # 持久化
|
||||
│ ├── mapper/ # MyBatis Mapper
|
||||
│ └── typehandler/ # 类型处理器
|
||||
└── interfaces/ # 接口层
|
||||
├── api/ # OpenAPI生成的接口
|
||||
├── dto/ # OpenAPI生成的DTO
|
||||
└── rest/ # REST控制器
|
||||
├── DataSourceController.java
|
||||
├── CollectionTaskController.java
|
||||
├── TaskExecutionController.java
|
||||
└── exception/ # 异常处理
|
||||
|
||||
src/main/resources/
|
||||
├── mappers/ # MyBatis XML映射文件
|
||||
├── application.properties # 应用配置
|
||||
└── ...
|
||||
```
|
||||
|
||||
## 环境要求
|
||||
|
||||
- Java 17+
|
||||
- Maven 3.6+
|
||||
- MySQL 8.0+
|
||||
- DataX 3.0+
|
||||
- Redis (可选,用于缓存)
|
||||
|
||||
## 配置说明
|
||||
|
||||
### 应用配置 (application.properties)
|
||||
|
||||
```properties
|
||||
# 服务端口
|
||||
server.port=8090
|
||||
|
||||
# 数据库配置
|
||||
spring.datasource.url=jdbc:mysql://localhost:3306/knowledge_base
|
||||
spring.datasource.username=root
|
||||
spring.datasource.password=123456
|
||||
|
||||
# DataX配置
|
||||
datax.home=/runtime/datax
|
||||
datax.python.path=/runtime/datax/bin/datax.py
|
||||
datax.job.timeout=7200
|
||||
datax.job.memory=2g
|
||||
```
|
||||
|
||||
### DataX配置
|
||||
|
||||
确保DataX已正确安装并配置:
|
||||
|
||||
1. 下载DataX到 `/runtime/datax` 目录
|
||||
2. 配置相关Reader/Writer插件
|
||||
3. 确保Python环境可用
|
||||
|
||||
## 数据库初始化
|
||||
|
||||
执行数据库初始化脚本:
|
||||
|
||||
```bash
|
||||
mysql -u root -p knowledge_base < scripts/db/data-collection-init.sql
|
||||
```
|
||||
|
||||
## 构建和运行
|
||||
|
||||
### 1. 编译项目
|
||||
|
||||
```bash
|
||||
cd backend/services/data-collection-service
|
||||
mvn clean compile
|
||||
```
|
||||
|
||||
这将触发OpenAPI代码生成。
|
||||
|
||||
### 2. 打包
|
||||
|
||||
```bash
|
||||
mvn clean package -DskipTests
|
||||
```
|
||||
|
||||
### 3. 运行
|
||||
|
||||
作为独立服务运行:
|
||||
```bash
|
||||
java -jar target/data-collection-service-1.0.0-SNAPSHOT.jar
|
||||
```
|
||||
|
||||
或通过main-application统一启动:
|
||||
```bash
|
||||
cd backend/services/main-application
|
||||
mvn spring-boot:run
|
||||
```
|
||||
|
||||
## API文档
|
||||
|
||||
服务启动后,可通过以下地址访问API文档:
|
||||
|
||||
- Swagger UI: http://localhost:8090/swagger-ui.html
|
||||
- OpenAPI JSON: http://localhost:8090/v3/api-docs
|
||||
|
||||
## 主要API端点
|
||||
|
||||
### 数据源管理
|
||||
|
||||
- `GET /api/v1/collection/datasources` - 获取数据源列表
|
||||
- `POST /api/v1/collection/datasources` - 创建数据源
|
||||
- `GET /api/v1/collection/datasources/{id}` - 获取数据源详情
|
||||
- `PUT /api/v1/collection/datasources/{id}` - 更新数据源
|
||||
- `DELETE /api/v1/collection/datasources/{id}` - 删除数据源
|
||||
- `POST /api/v1/collection/datasources/{id}/test` - 测试连接
|
||||
|
||||
### 归集任务管理
|
||||
|
||||
- `GET /api/v1/collection/tasks` - 获取任务列表
|
||||
- `POST /api/v1/collection/tasks` - 创建任务
|
||||
- `GET /api/v1/collection/tasks/{id}` - 获取任务详情
|
||||
- `PUT /api/v1/collection/tasks/{id}` - 更新任务
|
||||
- `DELETE /api/v1/collection/tasks/{id}` - 删除任务
|
||||
|
||||
### 任务执行管理
|
||||
|
||||
- `POST /api/v1/collection/tasks/{id}/execute` - 执行任务
|
||||
- `POST /api/v1/collection/tasks/{id}/stop` - 停止任务
|
||||
- `GET /api/v1/collection/executions` - 获取执行历史
|
||||
- `GET /api/v1/collection/executions/{executionId}` - 获取执行详情
|
||||
- `GET /api/v1/collection/executions/{executionId}/logs` - 获取执行日志
|
||||
|
||||
### 监控统计
|
||||
|
||||
- `GET /api/v1/collection/monitor/statistics` - 获取统计信息
|
||||
|
||||
## 开发指南
|
||||
|
||||
### 添加新的数据源类型
|
||||
|
||||
1. 在 `DataSource.DataSourceType` 枚举中添加新类型
|
||||
2. 在 `DataXExecutionEngine` 中添加对应的Reader/Writer映射
|
||||
3. 更新数据库表结构和初始化数据
|
||||
|
||||
### 自定义DataX插件
|
||||
|
||||
1. 将插件放置在 `/runtime/datax/plugin` 目录下
|
||||
2. 在 `DataXExecutionEngine` 中配置插件映射关系
|
||||
3. 根据插件要求调整配置模板
|
||||
|
||||
### 扩展监控指标
|
||||
|
||||
1. 在 `StatisticsService` 中添加新的统计逻辑
|
||||
2. 更新 `CollectionStatistics` DTO
|
||||
3. 在数据库中添加相应的统计表或字段
|
||||
|
||||
## 故障排查
|
||||
|
||||
### 常见问题
|
||||
|
||||
1. **DataX执行失败**
|
||||
- 检查DataX安装路径和Python环境
|
||||
- 确认数据源连接配置正确
|
||||
- 查看执行日志获取详细错误信息
|
||||
|
||||
2. **数据库连接失败**
|
||||
- 检查数据库配置和网络连通性
|
||||
- 确认数据库用户权限
|
||||
|
||||
3. **API调用失败**
|
||||
- 检查请求参数格式
|
||||
- 查看应用日志获取详细错误信息
|
||||
|
||||
### 日志查看
|
||||
|
||||
```bash
|
||||
# 应用日志
|
||||
tail -f logs/data-collection-service.log
|
||||
|
||||
# 任务执行日志
|
||||
curl http://localhost:8090/api/v1/collection/executions/{executionId}/logs
|
||||
```
|
||||
|
||||
## 贡献指南
|
||||
|
||||
1. Fork项目
|
||||
2. 创建特性分支: `git checkout -b feature/new-feature`
|
||||
3. 提交更改: `git commit -am 'Add new feature'`
|
||||
4. 推送分支: `git push origin feature/new-feature`
|
||||
5. 提交Pull Request
|
||||
|
||||
## 许可证
|
||||
|
||||
MIT License
|
||||
BIN
backend/services/data-collection-service/image.png
Normal file
|
After Width: | Height: | Size: 79 KiB |
BIN
backend/services/data-collection-service/image1.png
Normal file
|
After Width: | Height: | Size: 52 KiB |
BIN
backend/services/data-collection-service/image2.png
Normal file
|
After Width: | Height: | Size: 67 KiB |
BIN
backend/services/data-collection-service/image3.png
Normal file
|
After Width: | Height: | Size: 107 KiB |
200
backend/services/data-collection-service/pom.xml
Normal file
@@ -0,0 +1,200 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>data-mate-platform</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>data-collection-service</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Data Collection Service</name>
|
||||
<description>DataX-based data collection and aggregation service</description>
|
||||
|
||||
<dependencies>
|
||||
<!-- Spring Boot Dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-validation</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-actuator</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- MyBatis Dependencies -->
|
||||
<dependency>
|
||||
<groupId>com.baomidou</groupId>
|
||||
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Database -->
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<version>8.0.33</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Redis -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-data-redis</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- DataX Dependencies (集成DataX插件) -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-exec</artifactId>
|
||||
<version>1.3</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Connection Pool -->
|
||||
<dependency>
|
||||
<groupId>com.zaxxer</groupId>
|
||||
<artifactId>HikariCP</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Oracle JDBC Driver -->
|
||||
<dependency>
|
||||
<groupId>com.oracle.database.jdbc</groupId>
|
||||
<artifactId>ojdbc8</artifactId>
|
||||
<version>21.5.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- PostgreSQL JDBC Driver -->
|
||||
<dependency>
|
||||
<groupId>org.postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- JSON Processing -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Shared Domain -->
|
||||
<dependency>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>domain-common</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
|
||||
<!-- OpenAPI Dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springdoc</groupId>
|
||||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>jackson-databind-nullable</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>jakarta.validation</groupId>
|
||||
<artifactId>jakarta.validation-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Lombok -->
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<!-- Test Dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>2.16.1</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- OpenAPI Generator Plugin -->
|
||||
<plugin>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>openapi-generator-maven-plugin</artifactId>
|
||||
<version>6.6.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>generate</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<inputSpec>${project.basedir}/../../openapi/specs/data-collection.yaml</inputSpec>
|
||||
<generatorName>spring</generatorName>
|
||||
<output>${project.build.directory}/generated-sources/openapi</output>
|
||||
<apiPackage>com.datamate.collection.interfaces.api</apiPackage>
|
||||
<modelPackage>com.datamate.collection.interfaces.dto</modelPackage>
|
||||
<configOptions>
|
||||
<interfaceOnly>true</interfaceOnly>
|
||||
<useTags>true</useTags>
|
||||
<useSpringBoot3>true</useSpringBoot3>
|
||||
<documentationProvider>springdoc</documentationProvider>
|
||||
<dateLibrary>java8-localdatetime</dateLibrary>
|
||||
<java8>true</java8>
|
||||
</configOptions>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
<classifier>exec</classifier>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.11.0</version>
|
||||
<configuration>
|
||||
<source>${maven.compiler.source}</source>
|
||||
<target>${maven.compiler.target}</target>
|
||||
<annotationProcessorPaths>
|
||||
<!-- 顺序很重要 -->
|
||||
<path>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>${lombok.version}</version>
|
||||
</path>
|
||||
<path>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok-mapstruct-binding</artifactId>
|
||||
<version>${lombok-mapstruct-binding.version}</version>
|
||||
</path>
|
||||
<path>
|
||||
<groupId>org.mapstruct</groupId>
|
||||
<artifactId>mapstruct-processor</artifactId>
|
||||
<version>${mapstruct.version}</version>
|
||||
</path>
|
||||
</annotationProcessorPaths>
|
||||
<compilerArgs>
|
||||
<arg>-parameters</arg>
|
||||
<arg>-Amapstruct.defaultComponentModel=spring</arg>
|
||||
</compilerArgs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@@ -0,0 +1,24 @@
|
||||
package com.datamate.collection;
|
||||
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.scheduling.annotation.EnableAsync;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
import org.springframework.transaction.annotation.EnableTransactionManagement;
|
||||
|
||||
/**
|
||||
* 数据归集服务配置类
|
||||
*
|
||||
* 基于DataX的数据归集和同步服务,支持多种数据源的数据采集和归集
|
||||
*/
|
||||
@SpringBootApplication
|
||||
@EnableAsync
|
||||
@EnableScheduling
|
||||
@EnableTransactionManagement
|
||||
@ComponentScan(basePackages = {
|
||||
"com.datamate.collection",
|
||||
"com.datamate.shared"
|
||||
})
|
||||
public class DataCollectionServiceConfiguration {
|
||||
// Configuration class for JAR packaging - no main method needed
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
package com.datamate.collection.application.scheduler;
|
||||
|
||||
import com.datamate.collection.application.service.DataxExecutionService;
|
||||
import com.datamate.collection.domain.model.CollectionTask;
|
||||
import com.datamate.collection.domain.model.TaskStatus;
|
||||
import com.datamate.collection.domain.model.TaskExecution;
|
||||
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
|
||||
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.scheduling.support.CronExpression;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class TaskSchedulerInitializer {
|
||||
|
||||
private final CollectionTaskMapper taskMapper;
|
||||
private final TaskExecutionMapper executionMapper;
|
||||
private final DataxExecutionService dataxExecutionService;
|
||||
|
||||
// 定期扫描激活的采集任务,根据 Cron 判断是否到期执行
|
||||
@Scheduled(fixedDelayString = "${datamate.data-collection.scheduler.scan-interval-ms:10000}")
|
||||
public void scanAndTrigger() {
|
||||
List<CollectionTask> tasks = taskMapper.selectActiveTasks();
|
||||
if (tasks == null || tasks.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
for (CollectionTask task : tasks) {
|
||||
String cronExpr = task.getScheduleExpression();
|
||||
if (!StringUtils.hasText(cronExpr)) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
// 如果最近一次执行仍在运行,则跳过
|
||||
TaskExecution latest = executionMapper.selectLatestByTaskId(task.getId());
|
||||
if (latest != null && latest.getStatus() == TaskStatus.RUNNING) {
|
||||
continue;
|
||||
}
|
||||
|
||||
CronExpression cron = CronExpression.parse(cronExpr);
|
||||
LocalDateTime base = latest != null && latest.getStartedAt() != null
|
||||
? latest.getStartedAt()
|
||||
: now.minusYears(1); // 没有历史记录时,拉长基准时间确保到期判定
|
||||
LocalDateTime nextTime = cron.next(base);
|
||||
|
||||
if (nextTime != null && !nextTime.isAfter(now)) {
|
||||
// 到期,触发一次执行
|
||||
TaskExecution exec = dataxExecutionService.createExecution(task);
|
||||
int timeout = task.getTimeoutSeconds() == null ? 3600 : task.getTimeoutSeconds();
|
||||
dataxExecutionService.runAsync(task, exec.getId(), timeout);
|
||||
log.info("Triggered DataX execution for task {} at {}, execId={}", task.getId(), now, exec.getId());
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
log.warn("Skip task {} due to invalid cron or scheduling error: {}", task.getId(), ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
package com.datamate.collection.application.service;
|
||||
|
||||
import com.datamate.collection.domain.model.CollectionTask;
|
||||
import com.datamate.collection.domain.model.TaskExecution;
|
||||
import com.datamate.collection.domain.model.TaskStatus;
|
||||
import com.datamate.collection.domain.model.DataxTemplate;
|
||||
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
|
||||
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
|
||||
import com.datamate.collection.interfaces.dto.SyncMode;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class CollectionTaskService {
|
||||
private final CollectionTaskMapper taskMapper;
|
||||
private final TaskExecutionMapper executionMapper;
|
||||
private final DataxExecutionService dataxExecutionService;
|
||||
|
||||
@Transactional
|
||||
public CollectionTask create(CollectionTask task) {
|
||||
task.setStatus(TaskStatus.READY);
|
||||
task.setCreatedAt(LocalDateTime.now());
|
||||
task.setUpdatedAt(LocalDateTime.now());
|
||||
taskMapper.insert(task);
|
||||
executeTaskNow(task);
|
||||
return task;
|
||||
}
|
||||
|
||||
private void executeTaskNow(CollectionTask task) {
|
||||
if (Objects.equals(task.getSyncMode(), SyncMode.ONCE.getValue())) {
|
||||
TaskExecution exec = dataxExecutionService.createExecution(task);
|
||||
int timeout = task.getTimeoutSeconds() == null ? 3600 : task.getTimeoutSeconds();
|
||||
dataxExecutionService.runAsync(task, exec.getId(), timeout);
|
||||
log.info("Triggered DataX execution for task {} at {}, execId={}", task.getId(), LocalDateTime.now(), exec.getId());
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public CollectionTask update(CollectionTask task) {
|
||||
task.setUpdatedAt(LocalDateTime.now());
|
||||
taskMapper.update(task);
|
||||
return task;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void delete(String id) { taskMapper.deleteById(id); }
|
||||
|
||||
public CollectionTask get(String id) { return taskMapper.selectById(id); }
|
||||
|
||||
public List<CollectionTask> list(Integer page, Integer size, String status, String name) {
|
||||
Map<String, Object> p = new HashMap<>();
|
||||
p.put("status", status);
|
||||
p.put("name", name);
|
||||
if (page != null && size != null) {
|
||||
p.put("offset", page * size);
|
||||
p.put("limit", size);
|
||||
}
|
||||
return taskMapper.selectAll(p);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public TaskExecution startExecution(CollectionTask task) {
|
||||
return dataxExecutionService.createExecution(task);
|
||||
}
|
||||
|
||||
// ---- Template related merged methods ----
|
||||
public List<DataxTemplate> listTemplates(String sourceType, String targetType, int page, int size) {
|
||||
int offset = page * size;
|
||||
return taskMapper.selectList(sourceType, targetType, offset, size);
|
||||
}
|
||||
|
||||
public int countTemplates(String sourceType, String targetType) {
|
||||
return taskMapper.countTemplates(sourceType, targetType);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
package com.datamate.collection.application.service;
|
||||
|
||||
import com.datamate.collection.domain.model.CollectionTask;
|
||||
import com.datamate.collection.domain.model.TaskExecution;
|
||||
import com.datamate.collection.domain.model.TaskStatus;
|
||||
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
|
||||
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
|
||||
import com.datamate.collection.infrastructure.runtime.datax.DataxJobBuilder;
|
||||
import com.datamate.collection.infrastructure.runtime.datax.DataxProcessRunner;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.time.Duration;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class DataxExecutionService {
|
||||
|
||||
private final DataxJobBuilder jobBuilder;
|
||||
private final DataxProcessRunner processRunner;
|
||||
private final TaskExecutionMapper executionMapper;
|
||||
private final CollectionTaskMapper taskMapper;
|
||||
|
||||
|
||||
@Transactional
|
||||
public TaskExecution createExecution(CollectionTask task) {
|
||||
TaskExecution exec = TaskExecution.initTaskExecution();
|
||||
exec.setTaskId(task.getId());
|
||||
exec.setTaskName(task.getName());
|
||||
executionMapper.insert(exec);
|
||||
taskMapper.updateLastExecution(task.getId(), exec.getId());
|
||||
taskMapper.updateStatus(task.getId(), TaskStatus.RUNNING.name());
|
||||
return exec;
|
||||
}
|
||||
|
||||
@Async
|
||||
public void runAsync(CollectionTask task, String executionId, int timeoutSeconds) {
|
||||
try {
|
||||
Path job = jobBuilder.buildJobFile(task);
|
||||
|
||||
int code = processRunner.runJob(job.toFile(), executionId, Duration.ofSeconds(timeoutSeconds));
|
||||
log.info("DataX finished with code {} for execution {}", code, executionId);
|
||||
// 简化:成功即完成
|
||||
executionMapper.completeExecution(executionId, TaskStatus.SUCCESS.name(), LocalDateTime.now(),
|
||||
0, 0L, 0L, 0L, null, null);
|
||||
taskMapper.updateStatus(task.getId(), TaskStatus.SUCCESS.name());
|
||||
} catch (Exception e) {
|
||||
log.error("DataX execution failed", e);
|
||||
executionMapper.completeExecution(executionId, TaskStatus.FAILED.name(), LocalDateTime.now(),
|
||||
0, 0L, 0L, 0L, e.getMessage(), null);
|
||||
taskMapper.updateStatus(task.getId(), TaskStatus.FAILED.name());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
package com.datamate.collection.application.service;
|
||||
|
||||
import com.datamate.collection.domain.model.CollectionTask;
|
||||
import com.datamate.collection.domain.model.TaskExecution;
|
||||
import com.datamate.collection.domain.model.TaskStatus;
|
||||
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
|
||||
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class TaskExecutionService {
|
||||
private final TaskExecutionMapper executionMapper;
|
||||
private final CollectionTaskMapper taskMapper;
|
||||
|
||||
public List<TaskExecution> list(String taskId, String status, LocalDateTime startDate,
|
||||
LocalDateTime endDate, Integer page, Integer size) {
|
||||
Map<String, Object> p = new HashMap<>();
|
||||
p.put("taskId", taskId);
|
||||
p.put("status", status);
|
||||
p.put("startDate", startDate);
|
||||
p.put("endDate", endDate);
|
||||
if (page != null && size != null) {
|
||||
p.put("offset", page * size);
|
||||
p.put("limit", size);
|
||||
}
|
||||
return executionMapper.selectAll(p);
|
||||
}
|
||||
|
||||
public long count(String taskId, String status, LocalDateTime startDate, LocalDateTime endDate) {
|
||||
Map<String, Object> p = new HashMap<>();
|
||||
p.put("taskId", taskId);
|
||||
p.put("status", status);
|
||||
p.put("startDate", startDate);
|
||||
p.put("endDate", endDate);
|
||||
return executionMapper.count(p);
|
||||
}
|
||||
|
||||
// --- Added convenience methods ---
|
||||
public TaskExecution get(String id) { return executionMapper.selectById(id); }
|
||||
public TaskExecution getLatestByTaskId(String taskId) { return executionMapper.selectLatestByTaskId(taskId); }
|
||||
|
||||
@Transactional
|
||||
public void complete(String executionId, boolean success, long successCount, long failedCount,
|
||||
long dataSizeBytes, String errorMessage, String resultJson) {
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
TaskExecution exec = executionMapper.selectById(executionId);
|
||||
if (exec == null) { return; }
|
||||
int duration = (int) Duration.between(exec.getStartedAt(), now).getSeconds();
|
||||
executionMapper.completeExecution(executionId, success ? TaskStatus.SUCCESS.name() : TaskStatus.FAILED.name(),
|
||||
now, duration, successCount, failedCount, dataSizeBytes, errorMessage, resultJson);
|
||||
CollectionTask task = taskMapper.selectById(exec.getTaskId());
|
||||
if (task != null) {
|
||||
taskMapper.updateStatus(task.getId(), success ? TaskStatus.SUCCESS.name() : TaskStatus.FAILED.name());
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void stop(String executionId) {
|
||||
TaskExecution exec = executionMapper.selectById(executionId);
|
||||
if (exec == null || exec.getStatus() != TaskStatus.RUNNING) { return; }
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
int duration = (int) Duration.between(exec.getStartedAt(), now).getSeconds();
|
||||
// Reuse completeExecution to persist STOPPED status and timing info
|
||||
executionMapper.completeExecution(exec.getId(), TaskStatus.STOPPED.name(), now, duration,
|
||||
exec.getRecordsSuccess(), exec.getRecordsFailed(), exec.getDataSizeBytes(), null, exec.getResult());
|
||||
taskMapper.updateStatus(exec.getTaskId(), TaskStatus.STOPPED.name());
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void stopLatestByTaskId(String taskId) {
|
||||
TaskExecution latest = executionMapper.selectLatestByTaskId(taskId);
|
||||
if (latest != null) { stop(latest.getId()); }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package com.datamate.collection.domain.model;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.Data;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
@Data
|
||||
public class CollectionTask {
|
||||
private String id;
|
||||
private String name;
|
||||
private String description;
|
||||
private String config; // DataX JSON 配置,包含源端和目标端配置信息
|
||||
private TaskStatus status;
|
||||
private String syncMode; // ONCE / SCHEDULED
|
||||
private String scheduleExpression;
|
||||
private Integer retryCount;
|
||||
private Integer timeoutSeconds;
|
||||
private Long maxRecords;
|
||||
private String sortField;
|
||||
private String lastExecutionId;
|
||||
private LocalDateTime createdAt;
|
||||
private LocalDateTime updatedAt;
|
||||
private String createdBy;
|
||||
private String updatedBy;
|
||||
|
||||
public void addPath() {
|
||||
try {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
Map<String, Object> parameter = objectMapper.readValue(
|
||||
config,
|
||||
new TypeReference<>() {}
|
||||
);
|
||||
parameter.put("destPath", "/dataset/local/" + id);
|
||||
parameter.put("filePaths", Collections.singletonList(parameter.get("destPath")));
|
||||
config = objectMapper.writeValueAsString(parameter);
|
||||
} catch (JsonProcessingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
package com.datamate.collection.domain.model;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = false)
|
||||
public class DataxTemplate {
|
||||
|
||||
/**
|
||||
* 模板ID(UUID)
|
||||
*/
|
||||
private String id;
|
||||
|
||||
/**
|
||||
* 模板名称
|
||||
*/
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* 源数据源类型
|
||||
*/
|
||||
private String sourceType;
|
||||
|
||||
/**
|
||||
* 目标数据源类型
|
||||
*/
|
||||
private String targetType;
|
||||
|
||||
/**
|
||||
* 模板内容(JSON格式)
|
||||
*/
|
||||
private String templateContent;
|
||||
|
||||
/**
|
||||
* 模板描述
|
||||
*/
|
||||
private String description;
|
||||
|
||||
/**
|
||||
* 版本号
|
||||
*/
|
||||
private String version;
|
||||
|
||||
/**
|
||||
* 是否为系统模板
|
||||
*/
|
||||
private Boolean isSystem;
|
||||
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
private LocalDateTime createdAt;
|
||||
|
||||
/**
|
||||
* 更新时间
|
||||
*/
|
||||
private LocalDateTime updatedAt;
|
||||
|
||||
/**
|
||||
* 创建者
|
||||
*/
|
||||
private String createdBy;
|
||||
|
||||
/**
|
||||
* 更新者
|
||||
*/
|
||||
private String updatedBy;
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package com.datamate.collection.domain.model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.UUID;
|
||||
|
||||
@Data
|
||||
public class TaskExecution {
|
||||
private String id;
|
||||
private String taskId;
|
||||
private String taskName;
|
||||
private TaskStatus status;
|
||||
private Double progress;
|
||||
private Long recordsTotal;
|
||||
private Long recordsProcessed;
|
||||
private Long recordsSuccess;
|
||||
private Long recordsFailed;
|
||||
private Double throughput;
|
||||
private Long dataSizeBytes;
|
||||
private LocalDateTime startedAt;
|
||||
private LocalDateTime completedAt;
|
||||
private Integer durationSeconds;
|
||||
private String errorMessage;
|
||||
private String dataxJobId;
|
||||
private String config;
|
||||
private String result;
|
||||
private LocalDateTime createdAt;
|
||||
|
||||
public static TaskExecution initTaskExecution() {
|
||||
TaskExecution exec = new TaskExecution();
|
||||
exec.setId(UUID.randomUUID().toString());
|
||||
exec.setStatus(TaskStatus.RUNNING);
|
||||
exec.setProgress(0.0);
|
||||
exec.setStartedAt(LocalDateTime.now());
|
||||
exec.setCreatedAt(LocalDateTime.now());
|
||||
return exec;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
package com.datamate.collection.domain.model;
|
||||
|
||||
/**
|
||||
* 统一的任务和执行状态枚举
|
||||
*
|
||||
* @author Data Mate Platform Team
|
||||
*/
|
||||
public enum TaskStatus {
|
||||
/** 草稿状态 */
|
||||
DRAFT,
|
||||
/** 就绪状态 */
|
||||
READY,
|
||||
/** 运行中 */
|
||||
RUNNING,
|
||||
/** 执行成功(对应原来的COMPLETED) */
|
||||
SUCCESS,
|
||||
/** 执行失败 */
|
||||
FAILED,
|
||||
/** 已停止 */
|
||||
STOPPED
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
package com.datamate.collection.infrastructure.persistence.mapper;
|
||||
|
||||
import com.datamate.collection.domain.model.CollectionTask;
|
||||
import com.datamate.collection.domain.model.DataxTemplate;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Mapper
|
||||
public interface CollectionTaskMapper {
|
||||
int insert(CollectionTask entity);
|
||||
int update(CollectionTask entity);
|
||||
int deleteById(@Param("id") String id);
|
||||
CollectionTask selectById(@Param("id") String id);
|
||||
CollectionTask selectByName(@Param("name") String name);
|
||||
List<CollectionTask> selectByStatus(@Param("status") String status);
|
||||
List<CollectionTask> selectAll(Map<String, Object> params);
|
||||
int updateStatus(@Param("id") String id, @Param("status") String status);
|
||||
int updateLastExecution(@Param("id") String id, @Param("lastExecutionId") String lastExecutionId);
|
||||
List<CollectionTask> selectActiveTasks();
|
||||
|
||||
/**
|
||||
* 查询模板列表
|
||||
*
|
||||
* @param sourceType 源数据源类型(可选)
|
||||
* @param targetType 目标数据源类型(可选)
|
||||
* @param offset 偏移量
|
||||
* @param limit 限制数量
|
||||
* @return 模板列表
|
||||
*/
|
||||
List<DataxTemplate> selectList(@Param("sourceType") String sourceType,
|
||||
@Param("targetType") String targetType,
|
||||
@Param("offset") int offset,
|
||||
@Param("limit") int limit);
|
||||
|
||||
/**
|
||||
* 统计模板数量
|
||||
*
|
||||
* @param sourceType 源数据源类型(可选)
|
||||
* @param targetType 目标数据源类型(可选)
|
||||
* @return 模板总数
|
||||
*/
|
||||
int countTemplates(@Param("sourceType") String sourceType,
|
||||
@Param("targetType") String targetType);
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
package com.datamate.collection.infrastructure.persistence.mapper;
|
||||
|
||||
import com.datamate.collection.domain.model.TaskExecution;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Mapper
|
||||
public interface TaskExecutionMapper {
|
||||
int insert(TaskExecution entity);
|
||||
int update(TaskExecution entity);
|
||||
int deleteById(@Param("id") String id);
|
||||
TaskExecution selectById(@Param("id") String id);
|
||||
List<TaskExecution> selectByTaskId(@Param("taskId") String taskId, @Param("limit") Integer limit);
|
||||
List<TaskExecution> selectByStatus(@Param("status") String status);
|
||||
List<TaskExecution> selectAll(Map<String, Object> params);
|
||||
long count(Map<String, Object> params);
|
||||
int updateProgress(@Param("id") String id,
|
||||
@Param("status") String status,
|
||||
@Param("progress") Double progress,
|
||||
@Param("recordsProcessed") Long recordsProcessed,
|
||||
@Param("throughput") Double throughput);
|
||||
int completeExecution(@Param("id") String id,
|
||||
@Param("status") String status,
|
||||
@Param("completedAt") LocalDateTime completedAt,
|
||||
@Param("durationSeconds") Integer durationSeconds,
|
||||
@Param("recordsSuccess") Long recordsSuccess,
|
||||
@Param("recordsFailed") Long recordsFailed,
|
||||
@Param("dataSizeBytes") Long dataSizeBytes,
|
||||
@Param("errorMessage") String errorMessage,
|
||||
@Param("result") String result);
|
||||
List<TaskExecution> selectRunningExecutions();
|
||||
TaskExecution selectLatestByTaskId(@Param("taskId") String taskId);
|
||||
int deleteOldExecutions(@Param("beforeDate") LocalDateTime beforeDate);
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
package com.datamate.collection.infrastructure.runtime.datax;
|
||||
|
||||
import com.datamate.collection.domain.model.CollectionTask;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 根据任务配置拼装 DataX 作业 JSON 文件
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class DataxJobBuilder {
|
||||
|
||||
private final DataxProperties props;
|
||||
|
||||
public Path buildJobFile(CollectionTask task) throws IOException {
|
||||
Files.createDirectories(Paths.get(props.getJobConfigPath()));
|
||||
String fileName = String.format("datax-job-%s.json", task.getId());
|
||||
Path path = Paths.get(props.getJobConfigPath(), fileName);
|
||||
// 简化:直接将任务中的 config 字段作为 DataX 作业 JSON
|
||||
try (FileWriter fw = new FileWriter(path.toFile())) {
|
||||
String json = task.getConfig() == null || task.getConfig().isEmpty() ?
|
||||
defaultJobJson() : task.getConfig();
|
||||
if (StringUtils.isNotBlank(task.getConfig())) {
|
||||
json = getJobConfig(task);
|
||||
}
|
||||
log.info("Job config: {}", json);
|
||||
fw.write(json);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
private String getJobConfig(CollectionTask task) {
|
||||
try {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
Map<String, Object> parameter = objectMapper.readValue(
|
||||
task.getConfig(),
|
||||
new TypeReference<>() {}
|
||||
);
|
||||
Map<String, Object> job = new HashMap<>();
|
||||
Map<String, Object> content = new HashMap<>();
|
||||
Map<String, Object> reader = new HashMap<>();
|
||||
reader.put("name", "nfsreader");
|
||||
reader.put("parameter", parameter);
|
||||
content.put("reader", reader);
|
||||
Map<String, Object> writer = new HashMap<>();
|
||||
writer.put("name", "nfswriter");
|
||||
writer.put("parameter", parameter);
|
||||
content.put("writer", writer);
|
||||
job.put("content", List.of(content));
|
||||
Map<String, Object> setting = new HashMap<>();
|
||||
Map<String, Object> channel = new HashMap<>();
|
||||
channel.put("channel", 2);
|
||||
setting.put("speed", channel);
|
||||
job.put("setting", setting);
|
||||
Map<String, Object> jobConfig = new HashMap<>();
|
||||
jobConfig.put("job", job);
|
||||
return objectMapper.writeValueAsString(jobConfig);
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to parse task config", e);
|
||||
throw new RuntimeException("Failed to parse task config", e);
|
||||
}
|
||||
}
|
||||
|
||||
private String defaultJobJson() {
|
||||
// 提供一个最小可运行的空 job,实际会被具体任务覆盖
|
||||
return "{\n \"job\": {\n \"setting\": {\n \"speed\": {\n \"channel\": 1\n }\n },\n \"content\": []\n }\n}";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
package com.datamate.collection.infrastructure.runtime.datax;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.exec.*;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.time.Duration;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class DataxProcessRunner {
|
||||
|
||||
private final DataxProperties props;
|
||||
|
||||
public int runJob(File jobFile, String executionId, Duration timeout) throws Exception {
|
||||
File logFile = new File(props.getLogPath(), String.format("datax-%s.log", executionId));
|
||||
String python = props.getPythonPath();
|
||||
String dataxPy = props.getHomePath() + File.separator + "bin" + File.separator + "datax.py";
|
||||
String cmd = String.format("%s %s %s", python, dataxPy, jobFile.getAbsolutePath());
|
||||
|
||||
log.info("Execute DataX: {}", cmd);
|
||||
|
||||
CommandLine cl = CommandLine.parse(cmd);
|
||||
DefaultExecutor executor = new DefaultExecutor();
|
||||
|
||||
// 将日志追加输出到文件
|
||||
File parent = logFile.getParentFile();
|
||||
if (!parent.exists()) parent.mkdirs();
|
||||
|
||||
ExecuteStreamHandler streamHandler = new PumpStreamHandler(
|
||||
new org.apache.commons.io.output.TeeOutputStream(
|
||||
new java.io.FileOutputStream(logFile, true), System.out),
|
||||
new org.apache.commons.io.output.TeeOutputStream(
|
||||
new java.io.FileOutputStream(logFile, true), System.err)
|
||||
);
|
||||
executor.setStreamHandler(streamHandler);
|
||||
|
||||
ExecuteWatchdog watchdog = new ExecuteWatchdog(timeout.toMillis());
|
||||
executor.setWatchdog(watchdog);
|
||||
|
||||
return executor.execute(cl);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
package com.datamate.collection.infrastructure.runtime.datax;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Data
|
||||
@Configuration
|
||||
@ConfigurationProperties(prefix = "datamate.data-collection.datax")
|
||||
public class DataxProperties {
|
||||
private String homePath; // DATAX_HOME
|
||||
private String pythonPath; // python 可执行文件
|
||||
private String jobConfigPath; // 生成的作业文件目录
|
||||
private String logPath; // 运行日志目录
|
||||
private Integer maxMemory = 2048;
|
||||
private Integer channelCount = 5;
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
package com.datamate.collection.interfaces.converter;
|
||||
|
||||
import com.datamate.collection.domain.model.CollectionTask;
|
||||
import com.datamate.collection.domain.model.DataxTemplate;
|
||||
import com.datamate.collection.interfaces.dto.*;
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.mapstruct.Mapper;
|
||||
import org.mapstruct.Mapping;
|
||||
import org.mapstruct.Named;
|
||||
import org.mapstruct.factory.Mappers;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@Mapper
|
||||
public interface CollectionTaskConverter {
|
||||
CollectionTaskConverter INSTANCE = Mappers.getMapper(CollectionTaskConverter.class);
|
||||
|
||||
@Mapping(source = "config", target = "config", qualifiedByName = "parseJsonToMap")
|
||||
CollectionTaskResponse toResponse(CollectionTask task);
|
||||
|
||||
CollectionTaskSummary toSummary(CollectionTask task);
|
||||
|
||||
DataxTemplateSummary toTemplateSummary(DataxTemplate template);
|
||||
|
||||
@Mapping(source = "config", target = "config", qualifiedByName = "mapToJsonString")
|
||||
CollectionTask toCollectionTask(CreateCollectionTaskRequest request);
|
||||
|
||||
@Mapping(source = "config", target = "config", qualifiedByName = "mapToJsonString")
|
||||
CollectionTask toCollectionTask(UpdateCollectionTaskRequest request);
|
||||
|
||||
@Named("parseJsonToMap")
|
||||
default Map<String, Object> parseJsonToMap(String json) {
|
||||
try {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
return objectMapper.readValue(json, Map.class);
|
||||
} catch (Exception e) {
|
||||
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
|
||||
}
|
||||
}
|
||||
|
||||
@Named("mapToJsonString")
|
||||
default String mapToJsonString(Map<String, Object> map) {
|
||||
try {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
return objectMapper.writeValueAsString(map != null ? map : Map.of());
|
||||
} catch (Exception e) {
|
||||
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
package com.datamate.collection.interfaces.rest;
|
||||
|
||||
import com.datamate.collection.application.service.CollectionTaskService;
|
||||
import com.datamate.collection.domain.model.CollectionTask;
|
||||
import com.datamate.collection.domain.model.DataxTemplate;
|
||||
import com.datamate.collection.interfaces.api.CollectionTaskApi;
|
||||
import com.datamate.collection.interfaces.converter.CollectionTaskConverter;
|
||||
import com.datamate.collection.interfaces.dto.*;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.validation.annotation.Validated;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
@RestController
|
||||
@RequiredArgsConstructor
|
||||
@Validated
|
||||
public class CollectionTaskController implements CollectionTaskApi {
|
||||
|
||||
private final CollectionTaskService taskService;
|
||||
|
||||
@Override
|
||||
public ResponseEntity<CollectionTaskResponse> createTask(CreateCollectionTaskRequest request) {
|
||||
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
|
||||
task.setId(UUID.randomUUID().toString());
|
||||
task.addPath();
|
||||
return ResponseEntity.ok().body(CollectionTaskConverter.INSTANCE.toResponse(taskService.create(task)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResponseEntity<CollectionTaskResponse> updateTask(String id, UpdateCollectionTaskRequest request) {
|
||||
if (taskService.get(id) == null) {
|
||||
return ResponseEntity.notFound().build();
|
||||
}
|
||||
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
|
||||
task.setId(id);
|
||||
return ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(taskService.update(task)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResponseEntity<Void> deleteTask(String id) {
|
||||
taskService.delete(id);
|
||||
return ResponseEntity.ok().build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResponseEntity<CollectionTaskResponse> getTaskDetail(String id) {
|
||||
CollectionTask task = taskService.get(id);
|
||||
return task == null ? ResponseEntity.notFound().build() : ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(task));
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResponseEntity<PagedCollectionTaskSummary> getTasks(Integer page, Integer size, TaskStatus status, String name) {
|
||||
var list = taskService.list(page, size, status == null ? null : status.getValue(), name);
|
||||
PagedCollectionTaskSummary response = new PagedCollectionTaskSummary();
|
||||
response.setContent(list.stream().map(CollectionTaskConverter.INSTANCE::toSummary).collect(Collectors.toList()));
|
||||
response.setNumber(page);
|
||||
response.setSize(size);
|
||||
response.setTotalElements(list.size()); // 简化处理,实际项目中应该有单独的count查询
|
||||
response.setTotalPages(size == null || size == 0 ? 1 : (int) Math.ceil(list.size() * 1.0 / size));
|
||||
return ResponseEntity.ok(response);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResponseEntity<PagedDataxTemplates> templatesGet(String sourceType, String targetType,
|
||||
Integer page, Integer size) {
|
||||
int pageNum = page != null ? page : 0;
|
||||
int pageSize = size != null ? size : 20;
|
||||
List<DataxTemplate> templates = taskService.listTemplates(sourceType, targetType, pageNum, pageSize);
|
||||
int totalElements = taskService.countTemplates(sourceType, targetType);
|
||||
PagedDataxTemplates response = new PagedDataxTemplates();
|
||||
response.setContent(templates.stream().map(CollectionTaskConverter.INSTANCE::toTemplateSummary).collect(Collectors.toList()));
|
||||
response.setNumber(pageNum);
|
||||
response.setSize(pageSize);
|
||||
response.setTotalElements(totalElements);
|
||||
response.setTotalPages(pageSize > 0 ? (int) Math.ceil(totalElements * 1.0 / pageSize) : 1);
|
||||
return ResponseEntity.ok(response);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
package com.datamate.collection.interfaces.rest;
|
||||
|
||||
import com.datamate.collection.application.service.CollectionTaskService;
|
||||
import com.datamate.collection.application.service.TaskExecutionService;
|
||||
import com.datamate.collection.domain.model.TaskExecution;
|
||||
import com.datamate.collection.interfaces.api.TaskExecutionApi;
|
||||
import com.datamate.collection.interfaces.dto.PagedTaskExecutions;
|
||||
import com.datamate.collection.interfaces.dto.TaskExecutionDetail;
|
||||
import com.datamate.collection.interfaces.dto.TaskExecutionResponse;
|
||||
import com.datamate.collection.interfaces.dto.TaskStatus; // DTO enum
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.validation.annotation.Validated;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@RestController
|
||||
@RequiredArgsConstructor
|
||||
@Validated
|
||||
public class TaskExecutionController implements TaskExecutionApi {
|
||||
|
||||
private final TaskExecutionService executionService;
|
||||
private final CollectionTaskService taskService;
|
||||
|
||||
private TaskExecutionDetail toDetail(TaskExecution e) {
|
||||
TaskExecutionDetail d = new TaskExecutionDetail();
|
||||
d.setId(e.getId());
|
||||
d.setTaskId(e.getTaskId());
|
||||
d.setTaskName(e.getTaskName());
|
||||
if (e.getStatus() != null) { d.setStatus(TaskStatus.fromValue(e.getStatus().name())); }
|
||||
d.setProgress(e.getProgress());
|
||||
d.setRecordsTotal(e.getRecordsTotal() != null ? e.getRecordsTotal().intValue() : null);
|
||||
d.setRecordsProcessed(e.getRecordsProcessed() != null ? e.getRecordsProcessed().intValue() : null);
|
||||
d.setRecordsSuccess(e.getRecordsSuccess() != null ? e.getRecordsSuccess().intValue() : null);
|
||||
d.setRecordsFailed(e.getRecordsFailed() != null ? e.getRecordsFailed().intValue() : null);
|
||||
d.setThroughput(e.getThroughput());
|
||||
d.setDataSizeBytes(e.getDataSizeBytes() != null ? e.getDataSizeBytes().intValue() : null);
|
||||
d.setStartedAt(e.getStartedAt());
|
||||
d.setCompletedAt(e.getCompletedAt());
|
||||
d.setDurationSeconds(e.getDurationSeconds());
|
||||
d.setErrorMessage(e.getErrorMessage());
|
||||
return d;
|
||||
}
|
||||
|
||||
// GET /executions/{id}
|
||||
@Override
|
||||
public ResponseEntity<TaskExecutionDetail> executionsIdGet(String id) {
|
||||
var exec = executionService.get(id);
|
||||
return exec == null ? ResponseEntity.notFound().build() : ResponseEntity.ok(toDetail(exec));
|
||||
}
|
||||
|
||||
// DELETE /executions/{id}
|
||||
@Override
|
||||
public ResponseEntity<Void> executionsIdDelete(String id) {
|
||||
executionService.stop(id); // 幂等处理,在service内部判断状态
|
||||
return ResponseEntity.noContent().build();
|
||||
}
|
||||
|
||||
// POST /tasks/{id}/execute -> 201
|
||||
@Override
|
||||
public ResponseEntity<TaskExecutionResponse> tasksIdExecutePost(String id) {
|
||||
var task = taskService.get(id);
|
||||
if (task == null) { return ResponseEntity.notFound().build(); }
|
||||
var latestExec = executionService.getLatestByTaskId(id);
|
||||
if (latestExec != null && latestExec.getStatus() == com.datamate.collection.domain.model.TaskStatus.RUNNING) {
|
||||
TaskExecutionResponse r = new TaskExecutionResponse();
|
||||
r.setId(latestExec.getId());
|
||||
r.setTaskId(latestExec.getTaskId());
|
||||
r.setTaskName(latestExec.getTaskName());
|
||||
r.setStatus(TaskStatus.fromValue(latestExec.getStatus().name()));
|
||||
r.setStartedAt(latestExec.getStartedAt());
|
||||
return ResponseEntity.status(HttpStatus.CREATED).body(r); // 返回已有运行实例
|
||||
}
|
||||
var exec = taskService.startExecution(task);
|
||||
TaskExecutionResponse r = new TaskExecutionResponse();
|
||||
r.setId(exec.getId());
|
||||
r.setTaskId(exec.getTaskId());
|
||||
r.setTaskName(exec.getTaskName());
|
||||
r.setStatus(TaskStatus.fromValue(exec.getStatus().name()));
|
||||
r.setStartedAt(exec.getStartedAt());
|
||||
return ResponseEntity.status(HttpStatus.CREATED).body(r);
|
||||
}
|
||||
|
||||
// GET /tasks/{id}/executions -> 分页
|
||||
@Override
|
||||
public ResponseEntity<PagedTaskExecutions> tasksIdExecutionsGet(String id, Integer page, Integer size) {
|
||||
if (page == null || page < 0) { page = 0; }
|
||||
if (size == null || size <= 0) { size = 20; }
|
||||
var list = executionService.list(id, null, null, null, page, size);
|
||||
long total = executionService.count(id, null, null, null);
|
||||
PagedTaskExecutions p = new PagedTaskExecutions();
|
||||
p.setContent(list.stream().map(this::toDetail).collect(Collectors.toList()));
|
||||
p.setNumber(page);
|
||||
p.setSize(size);
|
||||
p.setTotalElements((int) total);
|
||||
p.setTotalPages(size == 0 ? 1 : (int) Math.ceil(total * 1.0 / size));
|
||||
return ResponseEntity.ok(p);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
datamate:
|
||||
data-collection:
|
||||
# DataX配置
|
||||
datax:
|
||||
home-path: ${DATAX_HOME:D:/datax}
|
||||
python-path: ${DATAX_PYTHON_PATH:python3}
|
||||
job-config-path: ${DATAX_JOB_PATH:./data/temp/datax/jobs}
|
||||
log-path: ${DATAX_LOG_PATH:./logs/datax}
|
||||
max-memory: ${DATAX_MAX_MEMORY:2048}
|
||||
channel-count: ${DATAX_CHANNEL_COUNT:5}
|
||||
|
||||
# 执行配置
|
||||
execution:
|
||||
max-concurrent-tasks: ${DATA_COLLECTION_MAX_CONCURRENT_TASKS:10}
|
||||
task-timeout-minutes: ${DATA_COLLECTION_TASK_TIMEOUT:120}
|
||||
retry-count: ${DATA_COLLECTION_RETRY_COUNT:3}
|
||||
retry-interval-seconds: ${DATA_COLLECTION_RETRY_INTERVAL:30}
|
||||
|
||||
# 监控配置
|
||||
monitoring:
|
||||
status-check-interval-seconds: ${DATA_COLLECTION_STATUS_CHECK_INTERVAL:30}
|
||||
log-retention-days: ${DATA_COLLECTION_LOG_RETENTION:30}
|
||||
enable-metrics: ${DATA_COLLECTION_ENABLE_METRICS:true}
|
||||
@@ -0,0 +1,188 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
|
||||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
|
||||
<mapper namespace="com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper">
|
||||
|
||||
<!-- Result Map -->
|
||||
<resultMap id="CollectionTaskResultMap" type="com.datamate.collection.domain.model.CollectionTask">
|
||||
<id property="id" column="id"/>
|
||||
<result property="name" column="name"/>
|
||||
<result property="description" column="description"/>
|
||||
<result property="config" column="config"/>
|
||||
<result property="status" column="status" typeHandler="org.apache.ibatis.type.EnumTypeHandler"/>
|
||||
<result property="syncMode" column="sync_mode"/>
|
||||
<result property="scheduleExpression" column="schedule_expression"/>
|
||||
<result property="retryCount" column="retry_count"/>
|
||||
<result property="timeoutSeconds" column="timeout_seconds"/>
|
||||
<result property="maxRecords" column="max_records"/>
|
||||
<result property="sortField" column="sort_field"/>
|
||||
<result property="lastExecutionId" column="last_execution_id"/>
|
||||
<result property="createdAt" column="created_at"/>
|
||||
<result property="updatedAt" column="updated_at"/>
|
||||
<result property="createdBy" column="created_by"/>
|
||||
<result property="updatedBy" column="updated_by"/>
|
||||
</resultMap>
|
||||
|
||||
<!-- 结果映射 (模板) -->
|
||||
<resultMap id="DataxTemplateResultMap" type="com.datamate.collection.domain.model.DataxTemplate">
|
||||
<id column="id" property="id" jdbcType="VARCHAR"/>
|
||||
<result column="name" property="name" jdbcType="VARCHAR"/>
|
||||
<result column="source_type" property="sourceType" jdbcType="VARCHAR"/>
|
||||
<result column="target_type" property="targetType" jdbcType="VARCHAR"/>
|
||||
<result column="template_content" property="templateContent" jdbcType="VARCHAR"/>
|
||||
<result column="description" property="description" jdbcType="VARCHAR"/>
|
||||
<result column="version" property="version" jdbcType="VARCHAR"/>
|
||||
<result column="is_system" property="isSystem" jdbcType="BOOLEAN"/>
|
||||
<result column="created_at" property="createdAt" jdbcType="TIMESTAMP"/>
|
||||
<result column="updated_at" property="updatedAt" jdbcType="TIMESTAMP"/>
|
||||
<result column="created_by" property="createdBy" jdbcType="VARCHAR"/>
|
||||
</resultMap>
|
||||
|
||||
<!-- Base Column List (tasks) -->
|
||||
<sql id="Base_Column_List">
|
||||
id,
|
||||
name, description, config, status, sync_mode,
|
||||
schedule_expression, retry_count, timeout_seconds, max_records, sort_field,
|
||||
last_execution_id, created_at, updated_at, created_by, updated_by
|
||||
</sql>
|
||||
|
||||
<!-- Template Column List -->
|
||||
<sql id="Template_Column_List">
|
||||
id, name, source_type, target_type, template_content, description, version, is_system, created_at, updated_at, created_by
|
||||
</sql>
|
||||
|
||||
<!-- Insert -->
|
||||
<insert id="insert" parameterType="com.datamate.collection.domain.model.CollectionTask">
|
||||
INSERT INTO t_dc_collection_tasks (id, name, description, config, status, sync_mode,
|
||||
schedule_expression, retry_count, timeout_seconds, max_records, sort_field,
|
||||
last_execution_id, created_at, updated_at, created_by, updated_by)
|
||||
VALUES (#{id}, #{name}, #{description}, #{config}, #{status}, #{syncMode},
|
||||
#{scheduleExpression}, #{retryCount}, #{timeoutSeconds}, #{maxRecords}, #{sortField},
|
||||
#{lastExecutionId}, #{createdAt}, #{updatedAt}, #{createdBy}, #{updatedBy})
|
||||
</insert>
|
||||
|
||||
<!-- Update -->
|
||||
<update id="update" parameterType="com.datamate.collection.domain.model.CollectionTask">
|
||||
UPDATE t_dc_collection_tasks
|
||||
SET name = #{name},
|
||||
description = #{description},
|
||||
config = #{config},
|
||||
status = #{status},
|
||||
sync_mode = #{syncMode},
|
||||
schedule_expression = #{scheduleExpression},
|
||||
retry_count = #{retryCount},
|
||||
timeout_seconds = #{timeoutSeconds},
|
||||
max_records = #{maxRecords},
|
||||
sort_field = #{sortField},
|
||||
last_execution_id = #{lastExecutionId},
|
||||
updated_at = #{updatedAt},
|
||||
updated_by = #{updatedBy}
|
||||
WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<!-- Delete by ID -->
|
||||
<delete id="deleteById" parameterType="java.lang.String">
|
||||
DELETE FROM t_dc_collection_tasks WHERE id = #{id}
|
||||
</delete>
|
||||
|
||||
<!-- Select by ID -->
|
||||
<select id="selectById" parameterType="java.lang.String" resultMap="CollectionTaskResultMap">
|
||||
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks WHERE id = #{id}
|
||||
</select>
|
||||
|
||||
<!-- Select by Name -->
|
||||
<select id="selectByName" parameterType="java.lang.String" resultMap="CollectionTaskResultMap">
|
||||
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks WHERE name = #{name}
|
||||
</select>
|
||||
|
||||
<!-- Select by Status -->
|
||||
<select id="selectByStatus" parameterType="java.lang.String" resultMap="CollectionTaskResultMap">
|
||||
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks WHERE status = #{status} ORDER BY created_at DESC
|
||||
</select>
|
||||
|
||||
<!-- Select All with Pagination -->
|
||||
<select id="selectAll" resultMap="CollectionTaskResultMap">
|
||||
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks
|
||||
<where>
|
||||
<if test="status != null and status != ''">
|
||||
AND status = #{status}
|
||||
</if>
|
||||
<if test="name != null and name != ''">
|
||||
AND name LIKE CONCAT('%', #{name}, '%')
|
||||
</if>
|
||||
</where>
|
||||
ORDER BY created_at DESC
|
||||
<if test="offset != null and limit != null">
|
||||
LIMIT #{offset}, #{limit}
|
||||
</if>
|
||||
</select>
|
||||
|
||||
<!-- Count Total -->
|
||||
<select id="count" resultType="java.lang.Long">
|
||||
SELECT COUNT(*) FROM t_dc_collection_tasks
|
||||
<where>
|
||||
<if test="status != null and status != ''">
|
||||
AND status = #{status}
|
||||
</if>
|
||||
<if test="name != null and name != ''">
|
||||
AND name LIKE CONCAT('%', #{name}, '%')
|
||||
</if>
|
||||
<if test="sourceDataSourceId != null and sourceDataSourceId != ''">
|
||||
AND source_datasource_id = #{sourceDataSourceId}
|
||||
</if>
|
||||
<if test="targetDataSourceId != null and targetDataSourceId != ''">
|
||||
AND target_datasource_id = #{targetDataSourceId}
|
||||
</if>
|
||||
</where>
|
||||
</select>
|
||||
|
||||
<!-- Update Status -->
|
||||
<update id="updateStatus">
|
||||
UPDATE t_dc_collection_tasks SET status = #{status}, updated_at = NOW() WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<!-- Update Last Execution -->
|
||||
<update id="updateLastExecution">
|
||||
UPDATE t_dc_collection_tasks SET last_execution_id = #{lastExecutionId}, updated_at = NOW() WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<!-- Select Active Tasks for Scheduling -->
|
||||
<select id="selectActiveTasks" resultMap="CollectionTaskResultMap">
|
||||
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks
|
||||
WHERE status IN ('READY', 'RUNNING')
|
||||
AND schedule_expression IS NOT NULL
|
||||
ORDER BY created_at DESC
|
||||
</select>
|
||||
|
||||
<!-- 查询模板列表 -->
|
||||
<select id="selectList" resultMap="DataxTemplateResultMap">
|
||||
SELECT <include refid="Template_Column_List"/> FROM t_dc_datax_templates
|
||||
<where>
|
||||
<if test="sourceType != null and sourceType != ''">
|
||||
AND source_type = #{sourceType}
|
||||
</if>
|
||||
<if test="targetType != null and targetType != ''">
|
||||
AND target_type = #{targetType}
|
||||
</if>
|
||||
</where>
|
||||
ORDER BY is_system DESC, created_at DESC
|
||||
<if test="limit > 0">
|
||||
LIMIT #{offset}, #{limit}
|
||||
</if>
|
||||
</select>
|
||||
|
||||
<!-- 统计模板数量 -->
|
||||
<select id="countTemplates" resultType="java.lang.Integer">
|
||||
SELECT COUNT(1) FROM t_dc_datax_templates
|
||||
<where>
|
||||
<if test="sourceType != null and sourceType != ''">
|
||||
AND source_type = #{sourceType}
|
||||
</if>
|
||||
<if test="targetType != null and targetType != ''">
|
||||
AND target_type = #{targetType}
|
||||
</if>
|
||||
</where>
|
||||
</select>
|
||||
|
||||
</mapper>
|
||||
@@ -0,0 +1,191 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
|
||||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
|
||||
<mapper namespace="com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper">
|
||||
|
||||
<!-- Result Map -->
|
||||
<resultMap id="TaskExecutionResultMap" type="com.datamate.collection.domain.model.TaskExecution">
|
||||
<id property="id" column="id"/>
|
||||
<result property="taskId" column="task_id"/>
|
||||
<result property="taskName" column="task_name"/>
|
||||
<result property="status" column="status" typeHandler="org.apache.ibatis.type.EnumTypeHandler"/>
|
||||
<result property="progress" column="progress"/>
|
||||
<result property="recordsTotal" column="records_total"/>
|
||||
<result property="recordsProcessed" column="records_processed"/>
|
||||
<result property="recordsSuccess" column="records_success"/>
|
||||
<result property="recordsFailed" column="records_failed"/>
|
||||
<result property="throughput" column="throughput"/>
|
||||
<result property="dataSizeBytes" column="data_size_bytes"/>
|
||||
<result property="startedAt" column="started_at"/>
|
||||
<result property="completedAt" column="completed_at"/>
|
||||
<result property="durationSeconds" column="duration_seconds"/>
|
||||
<result property="errorMessage" column="error_message"/>
|
||||
<result property="dataxJobId" column="datax_job_id"/>
|
||||
<result property="config" column="config"/>
|
||||
<result property="result" column="result"/>
|
||||
<result property="createdAt" column="created_at"/>
|
||||
</resultMap>
|
||||
|
||||
<!-- Base Column List -->
|
||||
<sql id="Base_Column_List">
|
||||
id, task_id, task_name, status, progress, records_total, records_processed,
|
||||
records_success, records_failed, throughput, data_size_bytes, started_at,
|
||||
completed_at, duration_seconds, error_message, datax_job_id, config, result, created_at
|
||||
</sql>
|
||||
|
||||
<!-- Insert -->
|
||||
<insert id="insert" parameterType="com.datamate.collection.domain.model.TaskExecution">
|
||||
INSERT INTO t_dc_task_executions (
|
||||
id, task_id, task_name, status, progress, records_total, records_processed,
|
||||
records_success, records_failed, throughput, data_size_bytes, started_at,
|
||||
completed_at, duration_seconds, error_message, datax_job_id, config, result, created_at
|
||||
) VALUES (
|
||||
#{id}, #{taskId}, #{taskName}, #{status}, #{progress}, #{recordsTotal}, #{recordsProcessed},
|
||||
#{recordsSuccess}, #{recordsFailed}, #{throughput}, #{dataSizeBytes}, #{startedAt},
|
||||
#{completedAt}, #{durationSeconds}, #{errorMessage}, #{dataxJobId}, #{config}, #{result}, #{createdAt}
|
||||
)
|
||||
</insert>
|
||||
|
||||
<!-- Update -->
|
||||
<update id="update" parameterType="com.datamate.collection.domain.model.TaskExecution">
|
||||
UPDATE t_dc_task_executions
|
||||
SET status = #{status},
|
||||
progress = #{progress},
|
||||
records_total = #{recordsTotal},
|
||||
records_processed = #{recordsProcessed},
|
||||
records_success = #{recordsSuccess},
|
||||
records_failed = #{recordsFailed},
|
||||
throughput = #{throughput},
|
||||
data_size_bytes = #{dataSizeBytes},
|
||||
completed_at = #{completedAt},
|
||||
duration_seconds = #{durationSeconds},
|
||||
error_message = #{errorMessage},
|
||||
result = #{result}
|
||||
WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<!-- Delete by ID -->
|
||||
<delete id="deleteById" parameterType="java.lang.String">
|
||||
DELETE FROM t_dc_task_executions WHERE id = #{id}
|
||||
</delete>
|
||||
|
||||
<!-- Select by ID -->
|
||||
<select id="selectById" parameterType="java.lang.String" resultMap="TaskExecutionResultMap">
|
||||
SELECT <include refid="Base_Column_List"/>
|
||||
FROM t_dc_task_executions
|
||||
WHERE id = #{id}
|
||||
</select>
|
||||
|
||||
<!-- Select by Task ID -->
|
||||
<select id="selectByTaskId" resultMap="TaskExecutionResultMap">
|
||||
SELECT <include refid="Base_Column_List"/>
|
||||
FROM t_dc_task_executions
|
||||
WHERE task_id = #{taskId}
|
||||
ORDER BY started_at DESC
|
||||
<if test="limit != null">
|
||||
LIMIT #{limit}
|
||||
</if>
|
||||
</select>
|
||||
|
||||
<!-- Select by Status -->
|
||||
<select id="selectByStatus" parameterType="java.lang.String" resultMap="TaskExecutionResultMap">
|
||||
SELECT <include refid="Base_Column_List"/>
|
||||
FROM t_dc_task_executions
|
||||
WHERE status = #{status}
|
||||
ORDER BY started_at DESC
|
||||
</select>
|
||||
|
||||
<!-- Select All with Pagination -->
|
||||
<select id="selectAll" resultMap="TaskExecutionResultMap">
|
||||
SELECT <include refid="Base_Column_List"/>
|
||||
FROM t_dc_task_executions
|
||||
<where>
|
||||
<if test="taskId != null and taskId != ''">
|
||||
AND task_id = #{taskId}
|
||||
</if>
|
||||
<if test="status != null and status != ''">
|
||||
AND status = #{status}
|
||||
</if>
|
||||
<if test="startDate != null">
|
||||
AND started_at >= #{startDate}
|
||||
</if>
|
||||
<if test="endDate != null">
|
||||
AND started_at <= #{endDate}
|
||||
</if>
|
||||
</where>
|
||||
ORDER BY started_at DESC
|
||||
<if test="offset != null and limit != null">
|
||||
LIMIT #{offset}, #{limit}
|
||||
</if>
|
||||
</select>
|
||||
|
||||
<!-- Count Total -->
|
||||
<select id="count" resultType="java.lang.Long">
|
||||
SELECT COUNT(*)
|
||||
FROM t_dc_task_executions
|
||||
<where>
|
||||
<if test="taskId != null and taskId != ''">
|
||||
AND task_id = #{taskId}
|
||||
</if>
|
||||
<if test="status != null and status != ''">
|
||||
AND status = #{status}
|
||||
</if>
|
||||
<if test="startDate != null">
|
||||
AND started_at >= #{startDate}
|
||||
</if>
|
||||
<if test="endDate != null">
|
||||
AND started_at <= #{endDate}
|
||||
</if>
|
||||
</where>
|
||||
</select>
|
||||
|
||||
<!-- Update Status and Progress -->
|
||||
<update id="updateProgress">
|
||||
UPDATE t_dc_task_executions
|
||||
SET status = #{status},
|
||||
progress = #{progress},
|
||||
records_processed = #{recordsProcessed},
|
||||
throughput = #{throughput}
|
||||
WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<!-- Complete Execution -->
|
||||
<update id="completeExecution">
|
||||
UPDATE t_dc_task_executions
|
||||
SET status = #{status},
|
||||
progress = 100.00,
|
||||
completed_at = #{completedAt},
|
||||
duration_seconds = #{durationSeconds},
|
||||
records_success = #{recordsSuccess},
|
||||
records_failed = #{recordsFailed},
|
||||
data_size_bytes = #{dataSizeBytes},
|
||||
error_message = #{errorMessage},
|
||||
result = #{result}
|
||||
WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<!-- Select Running Executions -->
|
||||
<select id="selectRunningExecutions" resultMap="TaskExecutionResultMap">
|
||||
SELECT <include refid="Base_Column_List"/>
|
||||
FROM t_dc_task_executions
|
||||
WHERE status = 'RUNNING'
|
||||
ORDER BY started_at ASC
|
||||
</select>
|
||||
|
||||
<!-- Select Latest Execution by Task -->
|
||||
<select id="selectLatestByTaskId" parameterType="java.lang.String" resultMap="TaskExecutionResultMap">
|
||||
SELECT <include refid="Base_Column_List"/>
|
||||
FROM t_dc_task_executions
|
||||
WHERE task_id = #{taskId}
|
||||
ORDER BY started_at DESC
|
||||
LIMIT 1
|
||||
</select>
|
||||
|
||||
<!-- Delete Old Executions -->
|
||||
<delete id="deleteOldExecutions">
|
||||
DELETE FROM t_dc_task_executions
|
||||
WHERE started_at < #{beforeDate}
|
||||
</delete>
|
||||
|
||||
</mapper>
|
||||
92
backend/services/data-evaluation-service/pom.xml
Normal file
@@ -0,0 +1,92 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>data-mate-platform</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>data-evaluation-service</artifactId>
|
||||
<name>Data Evaluation Service</name>
|
||||
<description>数据评估服务</description>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>domain-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<version>${mysql.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springdoc</groupId>
|
||||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>jackson-databind-nullable</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>jakarta.validation</groupId>
|
||||
<artifactId>jakarta.validation-api</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>openapi-generator-maven-plugin</artifactId>
|
||||
<version>6.6.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>generate</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<inputSpec>${project.basedir}/../../openapi/specs/data-evaluation.yaml</inputSpec>
|
||||
<generatorName>spring</generatorName>
|
||||
<output>${project.build.directory}/generated-sources/openapi</output>
|
||||
<apiPackage>com.datamate.evaluation.interfaces.api</apiPackage>
|
||||
<modelPackage>com.datamate.evaluation.interfaces.dto</modelPackage>
|
||||
<configOptions>
|
||||
<interfaceOnly>true</interfaceOnly>
|
||||
<useTags>true</useTags>
|
||||
<useSpringBoot3>true</useSpringBoot3>
|
||||
<documentationProvider>springdoc</documentationProvider>
|
||||
</configOptions>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
||||
|
||||
113
backend/services/data-management-service/pom.xml
Normal file
@@ -0,0 +1,113 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>data-mate-platform</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>data-management-service</artifactId>
|
||||
<name>Data Management Service</name>
|
||||
<description>数据管理服务</description>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>domain-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.baomidou</groupId>
|
||||
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-data-redis</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<version>${mysql.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springdoc</groupId>
|
||||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>jackson-databind-nullable</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>jakarta.validation</groupId>
|
||||
<artifactId>jakarta.validation-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.data</groupId>
|
||||
<artifactId>spring-data-commons</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<arguments>true</arguments>
|
||||
<classifier>exec</classifier>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.11.0</version>
|
||||
<configuration>
|
||||
<source>${maven.compiler.source}</source>
|
||||
<target>${maven.compiler.target}</target>
|
||||
<annotationProcessorPaths>
|
||||
<!-- 顺序很重要 -->
|
||||
<path>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>${lombok.version}</version>
|
||||
</path>
|
||||
<path>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok-mapstruct-binding</artifactId>
|
||||
<version>${lombok-mapstruct-binding.version}</version>
|
||||
</path>
|
||||
<path>
|
||||
<groupId>org.mapstruct</groupId>
|
||||
<artifactId>mapstruct-processor</artifactId>
|
||||
<version>${mapstruct.version}</version>
|
||||
</path>
|
||||
</annotationProcessorPaths>
|
||||
<compilerArgs>
|
||||
<arg>-parameters</arg>
|
||||
<arg>-Amapstruct.defaultComponentModel=spring</arg>
|
||||
</compilerArgs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.datamate.datamanagement;
|
||||
|
||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.scheduling.annotation.EnableAsync;
|
||||
|
||||
/**
|
||||
* Data Management Service Configuration
|
||||
* 数据管理服务配置类 - 多源接入、元数据、血缘治理
|
||||
*/
|
||||
@Configuration
|
||||
@EnableFeignClients(basePackages = "com.datamate.datamanagement.infrastructure.client")
|
||||
@EnableAsync
|
||||
@ComponentScan(basePackages = {
|
||||
"com.datamate.datamanagement",
|
||||
"com.datamate.shared"
|
||||
})
|
||||
public class DataManagementServiceConfiguration {
|
||||
// Service configuration class for JAR packaging
|
||||
// 作为jar包形式提供服务的配置类
|
||||
}
|
||||
@@ -0,0 +1,288 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
||||
import com.datamate.datamanagement.interfaces.dto.*;
|
||||
import com.datamate.common.infrastructure.exception.BusinessAssert;
|
||||
import com.datamate.common.interfaces.PagedResponse;
|
||||
import com.datamate.datamanagement.domain.model.dataset.Dataset;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.datamate.datamanagement.domain.model.dataset.Tag;
|
||||
import com.datamate.datamanagement.infrastructure.client.CollectionTaskClient;
|
||||
import com.datamate.datamanagement.infrastructure.client.dto.CollectionTaskDetailResponse;
|
||||
import com.datamate.datamanagement.infrastructure.client.dto.LocalCollectionConfig;
|
||||
import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.mapper.TagMapper;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
|
||||
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
|
||||
import com.datamate.datamanagement.interfaces.dto.*;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 数据集应用服务(对齐 DB schema,使用 UUID 字符串主键)
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@Transactional
|
||||
@RequiredArgsConstructor
|
||||
public class DatasetApplicationService {
|
||||
private final DatasetRepository datasetRepository;
|
||||
private final TagMapper tagMapper;
|
||||
private final DatasetFileRepository datasetFileRepository;
|
||||
private final CollectionTaskClient collectionTaskClient;
|
||||
private final FileMetadataService fileMetadataService;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
@Value("${dataset.base.path:/dataset}")
|
||||
private String datasetBasePath;
|
||||
|
||||
/**
|
||||
* 创建数据集
|
||||
*/
|
||||
@Transactional
|
||||
public Dataset createDataset(CreateDatasetRequest createDatasetRequest) {
|
||||
BusinessAssert.isTrue(datasetRepository.findByName(createDatasetRequest.getName()) == null, DataManagementErrorCode.DATASET_ALREADY_EXISTS);
|
||||
// 创建数据集对象
|
||||
Dataset dataset = DatasetConverter.INSTANCE.convertToDataset(createDatasetRequest);
|
||||
dataset.initCreateParam(datasetBasePath);
|
||||
// 处理标签
|
||||
Set<Tag> processedTags = Optional.ofNullable(createDatasetRequest.getTags())
|
||||
.filter(CollectionUtils::isNotEmpty)
|
||||
.map(this::processTagNames)
|
||||
.orElseGet(HashSet::new);
|
||||
dataset.setTags(processedTags);
|
||||
datasetRepository.save(dataset);
|
||||
|
||||
//todo 需要解耦这块逻辑
|
||||
if (StringUtils.hasText(createDatasetRequest.getDataSource())) {
|
||||
// 数据源id不为空,使用异步线程进行文件扫盘落库
|
||||
processDataSourceAsync(dataset.getId(), createDatasetRequest.getDataSource());
|
||||
}
|
||||
return dataset;
|
||||
}
|
||||
|
||||
public Dataset updateDataset(String datasetId, UpdateDatasetRequest updateDatasetRequest) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
|
||||
if (StringUtils.hasText(updateDatasetRequest.getName())) {
|
||||
dataset.setName(updateDatasetRequest.getName());
|
||||
}
|
||||
if (StringUtils.hasText(updateDatasetRequest.getDescription())) {
|
||||
dataset.setDescription(updateDatasetRequest.getDescription());
|
||||
}
|
||||
if (CollectionUtils.isNotEmpty(updateDatasetRequest.getTags())) {
|
||||
dataset.setTags(processTagNames(updateDatasetRequest.getTags()));
|
||||
}
|
||||
if (Objects.nonNull(updateDatasetRequest.getStatus())) {
|
||||
dataset.setStatus(updateDatasetRequest.getStatus());
|
||||
}
|
||||
if (StringUtils.hasText(updateDatasetRequest.getDataSource())) {
|
||||
// 数据源id不为空,使用异步线程进行文件扫盘落库
|
||||
processDataSourceAsync(dataset.getId(), updateDatasetRequest.getDataSource());
|
||||
}
|
||||
datasetRepository.updateById(dataset);
|
||||
return dataset;
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除数据集
|
||||
*/
|
||||
public void deleteDataset(String datasetId) {
|
||||
datasetRepository.removeById(datasetId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取数据集详情
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public Dataset getDataset(String datasetId) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
|
||||
return dataset;
|
||||
}
|
||||
|
||||
/**
|
||||
* 分页查询数据集
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public PagedResponse<DatasetResponse> getDatasets(DatasetPagingQuery query) {
|
||||
IPage<Dataset> page = new Page<>(query.getPage(), query.getSize());
|
||||
page = datasetRepository.findByCriteria(page, query);
|
||||
return PagedResponse.of(DatasetConverter.INSTANCE.convertToResponse(page.getRecords()), page.getCurrent(), page.getTotal(), page.getPages());
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理标签名称,创建或获取标签
|
||||
*/
|
||||
private Set<Tag> processTagNames(List<String> tagNames) {
|
||||
Set<Tag> tags = new HashSet<>();
|
||||
for (String tagName : tagNames) {
|
||||
Tag tag = tagMapper.findByName(tagName);
|
||||
if (tag == null) {
|
||||
Tag newTag = new Tag(tagName, null, null, "#007bff");
|
||||
newTag.setUsageCount(0L);
|
||||
newTag.setId(UUID.randomUUID().toString());
|
||||
tagMapper.insert(newTag);
|
||||
tag = newTag;
|
||||
}
|
||||
tag.setUsageCount(tag.getUsageCount() == null ? 1L : tag.getUsageCount() + 1);
|
||||
tagMapper.updateUsageCount(tag.getId(), tag.getUsageCount());
|
||||
tags.add(tag);
|
||||
}
|
||||
return tags;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取数据集统计信息
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public Map<String, Object> getDatasetStatistics(String datasetId) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
if (dataset == null) {
|
||||
throw new IllegalArgumentException("Dataset not found: " + datasetId);
|
||||
}
|
||||
|
||||
Map<String, Object> statistics = new HashMap<>();
|
||||
|
||||
// 基础统计
|
||||
Long totalFiles = datasetFileRepository.countByDatasetId(datasetId);
|
||||
Long completedFiles = datasetFileRepository.countCompletedByDatasetId(datasetId);
|
||||
Long totalSize = datasetFileRepository.sumSizeByDatasetId(datasetId);
|
||||
|
||||
statistics.put("totalFiles", totalFiles != null ? totalFiles.intValue() : 0);
|
||||
statistics.put("completedFiles", completedFiles != null ? completedFiles.intValue() : 0);
|
||||
statistics.put("totalSize", totalSize != null ? totalSize : 0L);
|
||||
|
||||
// 完成率计算
|
||||
float completionRate = 0.0f;
|
||||
if (totalFiles != null && totalFiles > 0) {
|
||||
completionRate = (completedFiles != null ? completedFiles.floatValue() : 0.0f) / totalFiles.floatValue() * 100.0f;
|
||||
}
|
||||
statistics.put("completionRate", completionRate);
|
||||
|
||||
// 文件类型分布统计
|
||||
Map<String, Integer> fileTypeDistribution = new HashMap<>();
|
||||
List<DatasetFile> allFiles = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
if (allFiles != null) {
|
||||
for (DatasetFile file : allFiles) {
|
||||
String fileType = file.getFileType() != null ? file.getFileType() : "unknown";
|
||||
fileTypeDistribution.put(fileType, fileTypeDistribution.getOrDefault(fileType, 0) + 1);
|
||||
}
|
||||
}
|
||||
statistics.put("fileTypeDistribution", fileTypeDistribution);
|
||||
|
||||
// 状态分布统计
|
||||
Map<String, Integer> statusDistribution = new HashMap<>();
|
||||
if (allFiles != null) {
|
||||
for (DatasetFile file : allFiles) {
|
||||
String status = file.getStatus() != null ? file.getStatus() : "unknown";
|
||||
statusDistribution.put(status, statusDistribution.getOrDefault(status, 0) + 1);
|
||||
}
|
||||
}
|
||||
statistics.put("statusDistribution", statusDistribution);
|
||||
|
||||
return statistics;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有数据集的汇总统计信息
|
||||
*/
|
||||
public AllDatasetStatisticsResponse getAllDatasetStatistics() {
|
||||
return datasetRepository.getAllDatasetStatistics();
|
||||
}
|
||||
|
||||
/**
|
||||
* 异步处理数据源文件扫描
|
||||
*
|
||||
* @param datasetId 数据集ID
|
||||
* @param dataSourceId 数据源ID(归集任务ID)
|
||||
*/
|
||||
@Async
|
||||
public void processDataSourceAsync(String datasetId, String dataSourceId) {
|
||||
try {
|
||||
log.info("开始处理数据源文件扫描,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId);
|
||||
|
||||
// 1. 调用数据归集服务获取任务详情
|
||||
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId).getData();
|
||||
if (taskDetail == null) {
|
||||
log.error("获取归集任务详情失败,任务ID: {}", dataSourceId);
|
||||
return;
|
||||
}
|
||||
|
||||
log.info("获取到归集任务详情: {}", taskDetail);
|
||||
|
||||
// 2. 解析任务配置
|
||||
LocalCollectionConfig config = parseTaskConfig(taskDetail.getConfig());
|
||||
if (config == null) {
|
||||
log.error("解析任务配置失败,任务ID: {}", dataSourceId);
|
||||
return;
|
||||
}
|
||||
|
||||
// 4. 获取文件路径列表
|
||||
List<String> filePaths = config.getFilePaths();
|
||||
if (CollectionUtils.isEmpty(filePaths)) {
|
||||
log.warn("文件路径列表为空,任务ID: {}", dataSourceId);
|
||||
return;
|
||||
}
|
||||
|
||||
log.info("开始扫描文件,共 {} 个文件路径", filePaths.size());
|
||||
|
||||
// 5. 扫描文件元数据
|
||||
List<DatasetFile> datasetFiles = fileMetadataService.scanFiles(filePaths, datasetId);
|
||||
// 查询数据集中已存在的文件
|
||||
List<DatasetFile> existDatasetFileList = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
Map<String, DatasetFile> existDatasetFilePathMap = existDatasetFileList.stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity()));
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
|
||||
// 6. 批量插入数据集文件表
|
||||
if (CollectionUtils.isNotEmpty(datasetFiles)) {
|
||||
for (DatasetFile datasetFile : datasetFiles) {
|
||||
if (existDatasetFilePathMap.containsKey(datasetFile.getFilePath())) {
|
||||
DatasetFile existDatasetFile = existDatasetFilePathMap.get(datasetFile.getFilePath());
|
||||
dataset.removeFile(existDatasetFile);
|
||||
existDatasetFile.setFileSize(datasetFile.getFileSize());
|
||||
dataset.addFile(existDatasetFile);
|
||||
datasetFileRepository.updateById(existDatasetFile);
|
||||
} else {
|
||||
dataset.addFile(datasetFile);
|
||||
datasetFileRepository.save(datasetFile);
|
||||
}
|
||||
}
|
||||
log.info("文件元数据写入完成,共写入 {} 条记录", datasetFiles.size());
|
||||
} else {
|
||||
log.warn("未扫描到有效文件");
|
||||
}
|
||||
datasetRepository.updateById(dataset);
|
||||
} catch (Exception e) {
|
||||
log.error("处理数据源文件扫描失败,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析任务配置
|
||||
*/
|
||||
private LocalCollectionConfig parseTaskConfig(Map<String, Object> configMap) {
|
||||
try {
|
||||
if (configMap == null || configMap.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return objectMapper.convertValue(configMap, LocalCollectionConfig.class);
|
||||
} catch (Exception e) {
|
||||
log.error("解析任务配置失败", e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,306 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.datamate.common.domain.model.ChunkUploadPreRequest;
|
||||
import com.datamate.common.domain.model.FileUploadResult;
|
||||
import com.datamate.common.domain.service.FileService;
|
||||
import com.datamate.common.domain.utils.AnalyzerUtils;
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import com.datamate.datamanagement.domain.contants.DatasetConstant;
|
||||
import com.datamate.datamanagement.domain.model.dataset.Dataset;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFileUploadCheckInfo;
|
||||
import com.datamate.datamanagement.domain.model.dataset.StatusConstants;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
|
||||
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
|
||||
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
|
||||
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import jakarta.servlet.http.HttpServletResponse;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.ibatis.session.RowBounds;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.core.io.UrlResource;
|
||||
import org.springframework.data.domain.Page;
|
||||
import org.springframework.data.domain.PageImpl;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.UUID;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
/**
|
||||
* 数据集文件应用服务
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@Transactional
|
||||
public class DatasetFileApplicationService {
|
||||
|
||||
private final DatasetFileRepository datasetFileRepository;
|
||||
private final DatasetRepository datasetRepository;
|
||||
private final Path fileStorageLocation;
|
||||
private final FileService fileService;
|
||||
|
||||
@Value("${dataset.base.path:/dataset}")
|
||||
private String datasetBasePath;
|
||||
|
||||
@Autowired
|
||||
public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository,
|
||||
DatasetRepository datasetRepository, FileService fileService,
|
||||
@Value("${app.file.upload-dir:./dataset}") String uploadDir) {
|
||||
this.datasetFileRepository = datasetFileRepository;
|
||||
this.datasetRepository = datasetRepository;
|
||||
this.fileStorageLocation = Paths.get(uploadDir).toAbsolutePath().normalize();
|
||||
this.fileService = fileService;
|
||||
try {
|
||||
Files.createDirectories(this.fileStorageLocation);
|
||||
} catch (Exception ex) {
|
||||
throw new RuntimeException("Could not create the directory where the uploaded files will be stored.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传文件到数据集
|
||||
*/
|
||||
public DatasetFile uploadFile(String datasetId, MultipartFile file) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
if (dataset == null) {
|
||||
throw new IllegalArgumentException("Dataset not found: " + datasetId);
|
||||
}
|
||||
|
||||
String originalFilename = file.getOriginalFilename();
|
||||
String fileName = originalFilename != null ? originalFilename : "file";
|
||||
try {
|
||||
// 保存文件到磁盘
|
||||
Path targetLocation = this.fileStorageLocation.resolve(datasetId + File.separator + fileName);
|
||||
// 确保目标目录存在
|
||||
Files.createDirectories(targetLocation);
|
||||
Files.copy(file.getInputStream(), targetLocation, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
// 创建文件实体(UUID 主键)
|
||||
DatasetFile datasetFile = new DatasetFile();
|
||||
datasetFile.setId(UUID.randomUUID().toString());
|
||||
datasetFile.setDatasetId(datasetId);
|
||||
datasetFile.setFileName(fileName);
|
||||
datasetFile.setFilePath(targetLocation.toString());
|
||||
datasetFile.setFileType(getFileExtension(originalFilename));
|
||||
datasetFile.setFileSize(file.getSize());
|
||||
datasetFile.setUploadTime(LocalDateTime.now());
|
||||
datasetFile.setStatus(StatusConstants.DatasetFileStatuses.COMPLETED);
|
||||
|
||||
// 保存到数据库
|
||||
datasetFileRepository.save(datasetFile);
|
||||
|
||||
// 更新数据集统计
|
||||
dataset.addFile(datasetFile);
|
||||
datasetRepository.updateById(dataset);
|
||||
|
||||
return datasetFileRepository.findByDatasetIdAndFileName(datasetId, fileName);
|
||||
|
||||
} catch (IOException ex) {
|
||||
log.error("Could not store file {}", fileName, ex);
|
||||
throw new RuntimeException("Could not store file " + fileName, ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取数据集文件列表
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public Page<DatasetFile> getDatasetFiles(String datasetId, String fileType,
|
||||
String status, Pageable pageable) {
|
||||
RowBounds bounds = new RowBounds(pageable.getPageNumber() * pageable.getPageSize(), pageable.getPageSize());
|
||||
List<DatasetFile> content = datasetFileRepository.findByCriteria(datasetId, fileType, status, bounds);
|
||||
long total = content.size() < pageable.getPageSize() && pageable.getPageNumber() == 0 ? content.size() : content.size() + (long) pageable.getPageNumber() * pageable.getPageSize();
|
||||
return new PageImpl<>(content, pageable, total);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文件详情
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public DatasetFile getDatasetFile(String datasetId, String fileId) {
|
||||
DatasetFile file = datasetFileRepository.getById(fileId);
|
||||
if (file == null) {
|
||||
throw new IllegalArgumentException("File not found: " + fileId);
|
||||
}
|
||||
if (!file.getDatasetId().equals(datasetId)) {
|
||||
throw new IllegalArgumentException("File does not belong to the specified dataset");
|
||||
}
|
||||
return file;
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除文件
|
||||
*/
|
||||
public void deleteDatasetFile(String datasetId, String fileId) {
|
||||
DatasetFile file = getDatasetFile(datasetId, fileId);
|
||||
try {
|
||||
Path filePath = Paths.get(file.getFilePath());
|
||||
Files.deleteIfExists(filePath);
|
||||
} catch (IOException ex) {
|
||||
// ignore
|
||||
}
|
||||
datasetFileRepository.removeById(fileId);
|
||||
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
// 简单刷新统计(精确处理可从DB统计)
|
||||
dataset.setFileCount(Math.max(0, dataset.getFileCount() - 1));
|
||||
dataset.setSizeBytes(Math.max(0, dataset.getSizeBytes() - (file.getFileSize() != null ? file.getFileSize() : 0)));
|
||||
datasetRepository.updateById(dataset);
|
||||
}
|
||||
|
||||
/**
|
||||
* 下载文件
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public Resource downloadFile(String datasetId, String fileId) {
|
||||
DatasetFile file = getDatasetFile(datasetId, fileId);
|
||||
try {
|
||||
Path filePath = Paths.get(file.getFilePath()).normalize();
|
||||
Resource resource = new UrlResource(filePath.toUri());
|
||||
if (resource.exists()) {
|
||||
return resource;
|
||||
} else {
|
||||
throw new RuntimeException("File not found: " + file.getFileName());
|
||||
}
|
||||
} catch (MalformedURLException ex) {
|
||||
throw new RuntimeException("File not found: " + file.getFileName(), ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 下载文件
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) {
|
||||
List<DatasetFile> allByDatasetId = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
response.setContentType("application/zip");
|
||||
String zipName = String.format("dataset_%s.zip",
|
||||
LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss")));
|
||||
response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + zipName);
|
||||
try (ZipOutputStream zos = new ZipOutputStream(response.getOutputStream())) {
|
||||
for (DatasetFile file : allByDatasetId) {
|
||||
addToZipFile(file, zos);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("Failed to download files in batches.", e);
|
||||
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
private void addToZipFile(DatasetFile file, ZipOutputStream zos) throws IOException {
|
||||
if (file.getFilePath() == null || !Files.exists(Paths.get(file.getFilePath()))) {
|
||||
log.warn("The file hasn't been found on filesystem, id: {}", file.getId());
|
||||
return;
|
||||
}
|
||||
try (InputStream fis = Files.newInputStream(Paths.get(file.getFilePath()));
|
||||
BufferedInputStream bis = new BufferedInputStream(fis)) {
|
||||
ZipEntry zipEntry = new ZipEntry(file.getFileName());
|
||||
zos.putNextEntry(zipEntry);
|
||||
byte[] buffer = new byte[8192];
|
||||
int length;
|
||||
while ((length = bis.read(buffer)) >= 0) {
|
||||
zos.write(buffer, 0, length);
|
||||
}
|
||||
zos.closeEntry();
|
||||
}
|
||||
}
|
||||
|
||||
private String getFileExtension(String fileName) {
|
||||
if (fileName == null || fileName.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
int lastDotIndex = fileName.lastIndexOf(".");
|
||||
if (lastDotIndex == -1) {
|
||||
return null;
|
||||
}
|
||||
return fileName.substring(lastDotIndex + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* 预上传
|
||||
*
|
||||
* @param chunkUploadRequest 上传请求
|
||||
* @param datasetId 数据集id
|
||||
* @return 请求id
|
||||
*/
|
||||
@Transactional
|
||||
public String preUpload(UploadFilesPreRequest chunkUploadRequest, String datasetId) {
|
||||
ChunkUploadPreRequest request = ChunkUploadPreRequest.builder().build();
|
||||
request.setUploadPath(datasetBasePath + File.separator + datasetId);
|
||||
request.setTotalFileNum(chunkUploadRequest.getTotalFileNum());
|
||||
request.setServiceId(DatasetConstant.SERVICE_ID);
|
||||
DatasetFileUploadCheckInfo checkInfo = new DatasetFileUploadCheckInfo();
|
||||
checkInfo.setDatasetId(datasetId);
|
||||
checkInfo.setHasArchive(chunkUploadRequest.isHasArchive());
|
||||
try {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
String checkInfoJson = objectMapper.writeValueAsString(checkInfo);
|
||||
request.setCheckInfo(checkInfoJson);
|
||||
} catch (JsonProcessingException e) {
|
||||
throw new IllegalArgumentException("Failed to serialize checkInfo to JSON", e);
|
||||
}
|
||||
return fileService.preUpload(request);
|
||||
}
|
||||
|
||||
/**
|
||||
* 切片上传
|
||||
*
|
||||
* @param uploadFileRequest 上传请求
|
||||
*/
|
||||
@Transactional
|
||||
public void chunkUpload(String datasetId, UploadFileRequest uploadFileRequest) {
|
||||
FileUploadResult uploadResult = fileService.chunkUpload(DatasetConverter.INSTANCE.toChunkUploadRequest(uploadFileRequest));
|
||||
saveFileInfoToDb(uploadResult, uploadFileRequest, datasetId);
|
||||
if (uploadResult.isAllFilesUploaded()) {
|
||||
// 解析文件,后续依据需求看是否添加校验文件元数据和解析半结构化文件的逻辑,
|
||||
}
|
||||
}
|
||||
|
||||
private void saveFileInfoToDb(FileUploadResult fileUploadResult, UploadFileRequest uploadFile, String datasetId) {
|
||||
if (Objects.isNull(fileUploadResult.getSavedFile())) {
|
||||
// 文件切片上传没有完成
|
||||
return;
|
||||
}
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
File savedFile = fileUploadResult.getSavedFile();
|
||||
LocalDateTime currentTime = LocalDateTime.now();
|
||||
DatasetFile datasetFile = DatasetFile.builder()
|
||||
.id(UUID.randomUUID().toString())
|
||||
.datasetId(datasetId)
|
||||
.fileSize(savedFile.length())
|
||||
.uploadTime(currentTime)
|
||||
.lastAccessTime(currentTime)
|
||||
.fileName(uploadFile.getFileName())
|
||||
.filePath(savedFile.getPath())
|
||||
.fileType(AnalyzerUtils.getExtension(uploadFile.getFileName()))
|
||||
.build();
|
||||
|
||||
datasetFileRepository.save(datasetFile);
|
||||
dataset.addFile(datasetFile);
|
||||
datasetRepository.updateById(dataset);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* 文件元数据扫描服务
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class FileMetadataService {
|
||||
|
||||
/**
|
||||
* 扫描文件路径列表,提取文件元数据
|
||||
* @param datasetId 数据集ID
|
||||
* @return 数据集文件列表
|
||||
*/
|
||||
public List<DatasetFile> scanFiles(List<String> filePaths, String datasetId) {
|
||||
List<DatasetFile> datasetFiles = new ArrayList<>();
|
||||
|
||||
if (filePaths == null || filePaths.isEmpty()) {
|
||||
log.warn("文件路径列表为空,跳过扫描");
|
||||
return datasetFiles;
|
||||
}
|
||||
|
||||
for (String filePath : filePaths) {
|
||||
try {
|
||||
Path path = Paths.get(filePath);
|
||||
|
||||
if (!Files.exists(path)) {
|
||||
log.warn("路径不存在: {}", filePath);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Files.isDirectory(path)) {
|
||||
scanDirectory(datasetId, filePath, path, datasetFiles);
|
||||
} else {
|
||||
// 如果是文件,直接处理
|
||||
DatasetFile datasetFile = extractFileMetadata(filePath, datasetId);
|
||||
if (datasetFile != null) {
|
||||
datasetFiles.add(datasetFile);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("扫描路径失败: {}, 错误: {}", filePath, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
log.info("文件扫描完成,共扫描 {} 个文件", datasetFiles.size());
|
||||
return datasetFiles;
|
||||
}
|
||||
|
||||
private void scanDirectory(String datasetId, String filePath, Path path,
|
||||
List<DatasetFile> datasetFiles) throws IOException {
|
||||
// 如果是目录,扫描该目录下的所有文件(非递归)
|
||||
List<Path> filesInDir = Files.list(path)
|
||||
.filter(Files::isRegularFile)
|
||||
.toList();
|
||||
|
||||
for (Path file : filesInDir) {
|
||||
try {
|
||||
DatasetFile datasetFile = extractFileMetadata(file.toString(), datasetId);
|
||||
if (datasetFile != null) {
|
||||
datasetFiles.add(datasetFile);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("处理目录中的文件失败: {}, 错误: {}", file, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
log.info("已扫描目录 {} 下的 {} 个文件", filePath, filesInDir.size());
|
||||
}
|
||||
/**
|
||||
* @param filePath 文件路径
|
||||
* @param datasetId 数据集ID
|
||||
* @return 数据集文件对象
|
||||
*/
|
||||
private DatasetFile extractFileMetadata(String filePath, String datasetId) throws IOException {
|
||||
Path path = Paths.get(filePath);
|
||||
|
||||
if (!Files.exists(path)) {
|
||||
log.warn("文件不存在: {}", filePath);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!Files.isRegularFile(path)) {
|
||||
log.warn("路径不是文件: {}", filePath);
|
||||
return null;
|
||||
}
|
||||
|
||||
String fileName = path.getFileName().toString();
|
||||
long fileSize = Files.size(path);
|
||||
String fileType = getFileExtension(fileName);
|
||||
|
||||
return DatasetFile.builder()
|
||||
.id(UUID.randomUUID().toString())
|
||||
.datasetId(datasetId)
|
||||
.fileName(fileName)
|
||||
.filePath(filePath)
|
||||
.fileSize(fileSize)
|
||||
.fileType(fileType)
|
||||
.uploadTime(LocalDateTime.now())
|
||||
.lastAccessTime(LocalDateTime.now())
|
||||
.status("UPLOADED")
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文件扩展名
|
||||
*/
|
||||
private String getFileExtension(String fileName) {
|
||||
int lastDotIndex = fileName.lastIndexOf('.');
|
||||
if (lastDotIndex > 0 && lastDotIndex < fileName.length() - 1) {
|
||||
return fileName.substring(lastDotIndex + 1).toLowerCase();
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
package com.datamate.datamanagement.application;
|
||||
|
||||
import com.datamate.datamanagement.domain.model.dataset.Tag;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.mapper.TagMapper;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* 标签应用服务(UUID 主键)
|
||||
*/
|
||||
@Service
|
||||
@Transactional
|
||||
public class TagApplicationService {
|
||||
|
||||
private final TagMapper tagMapper;
|
||||
|
||||
@Autowired
|
||||
public TagApplicationService(TagMapper tagMapper) {
|
||||
this.tagMapper = tagMapper;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建标签
|
||||
*/
|
||||
public Tag createTag(String name, String color, String description) {
|
||||
// 检查名称是否已存在
|
||||
if (tagMapper.findByName(name) != null) {
|
||||
throw new IllegalArgumentException("Tag with name '" + name + "' already exists");
|
||||
}
|
||||
|
||||
Tag tag = new Tag(name, description, null, color);
|
||||
tag.setUsageCount(0L);
|
||||
tag.setId(UUID.randomUUID().toString());
|
||||
tagMapper.insert(tag);
|
||||
return tagMapper.findById(tag.getId());
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新标签
|
||||
*
|
||||
* @param tag 待更新的标签实体,必须包含有效的 ID
|
||||
* @return 更新结果
|
||||
*/
|
||||
@Transactional
|
||||
public Tag updateTag(Tag tag) {
|
||||
Tag existingTag = tagMapper.findById(tag.getId());
|
||||
if (existingTag == null) {
|
||||
throw new IllegalArgumentException("Tag not found: " + tag.getId());
|
||||
}
|
||||
existingTag.setName(tag.getName());
|
||||
existingTag.setColor(tag.getColor());
|
||||
existingTag.setDescription(tag.getDescription());
|
||||
tagMapper.update(existingTag);
|
||||
return tagMapper.findById(existingTag.getId());
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void deleteTag(List<String> tagIds) {
|
||||
List<Tag> tags = tagMapper.findByIdIn(tagIds);
|
||||
if (tags.stream().anyMatch(tag -> tag.getUsageCount() > 0)) {
|
||||
throw new IllegalArgumentException("Cannot delete tags that are in use");
|
||||
}
|
||||
if (CollectionUtils.isEmpty(tags)) {
|
||||
return;
|
||||
}
|
||||
tagMapper.deleteTagsById(tags.stream().map(Tag::getId).toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有标签
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public List<Tag> getAllTags() {
|
||||
return tagMapper.findAllByOrderByUsageCountDesc();
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据关键词搜索标签
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public List<Tag> searchTags(String keyword) {
|
||||
if (keyword == null || keyword.trim().isEmpty()) {
|
||||
return getAllTags();
|
||||
}
|
||||
return tagMapper.findByKeyword(keyword.trim());
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取标签详情
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public Tag getTag(String tagId) {
|
||||
Tag tag = tagMapper.findById(tagId);
|
||||
if (tag == null) {
|
||||
throw new IllegalArgumentException("Tag not found: " + tagId);
|
||||
}
|
||||
return tag;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据名称获取标签
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public Tag getTagByName(String name) {
|
||||
Tag tag = tagMapper.findByName(name);
|
||||
if (tag == null) {
|
||||
throw new IllegalArgumentException("Tag not found: " + name);
|
||||
}
|
||||
return tag;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
package com.datamate.datamanagement.common.enums;
|
||||
|
||||
/**
|
||||
* 数据集状态类型
|
||||
* <p>数据集可以处于以下几种状态:
|
||||
* <p>草稿(DRAFT):数据集正在创建中,尚未完成。
|
||||
* <p>活动(ACTIVE):数据集处于活动状态, 可以被查询和使用,也可以被更新和删除。
|
||||
* <p>处理中(PROCESSING):数据集正在处理中,可能需要一些时间,处理完成后会变成活动状态。
|
||||
* <p>已归档(ARCHIVED):数据集已被归档,不可以更新文件,可以解锁变成活动状态。
|
||||
* <p>已发布(PUBLISHED):数据集已被发布,可供外部使用,外部用户可以查询和使用数据集。
|
||||
* <p>已弃用(DEPRECATED):数据集已被弃用,不建议再使用。
|
||||
*
|
||||
* @author dallas
|
||||
* @since 2025-10-17
|
||||
*/
|
||||
public enum DatasetStatusType {
|
||||
/**
|
||||
* 草稿状态
|
||||
*/
|
||||
DRAFT,
|
||||
/**
|
||||
* 活动状态
|
||||
*/
|
||||
ACTIVE,
|
||||
/**
|
||||
* 处理中状态
|
||||
*/
|
||||
PROCESSING,
|
||||
/**
|
||||
* 已归档状态
|
||||
*/
|
||||
ARCHIVED,
|
||||
/**
|
||||
* 已发布状态
|
||||
*/
|
||||
PUBLISHED,
|
||||
/**
|
||||
* 已弃用状态
|
||||
*/
|
||||
DEPRECATED
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
package com.datamate.datamanagement.common.enums;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
/**
|
||||
* 数据集类型值对象
|
||||
*
|
||||
* @author DataMate
|
||||
* @since 2025-10-15
|
||||
*/
|
||||
public enum DatasetType {
|
||||
TEXT("text", "文本数据集"),
|
||||
IMAGE("image", "图像数据集"),
|
||||
AUDIO("audio", "音频数据集"),
|
||||
VIDEO("video", "视频数据集"),
|
||||
OTHER("other", "其他数据集");
|
||||
|
||||
@Getter
|
||||
private final String code;
|
||||
|
||||
@Getter
|
||||
private final String description;
|
||||
|
||||
DatasetType(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.datamate.datamanagement.domain.contants;
|
||||
|
||||
/**
|
||||
* 数据集常量
|
||||
*/
|
||||
public interface DatasetConstant {
|
||||
/**
|
||||
* 服务ID
|
||||
*/
|
||||
String SERVICE_ID = "DATA_MANAGEMENT";
|
||||
}
|
||||
@@ -0,0 +1,146 @@
|
||||
package com.datamate.datamanagement.domain.model.dataset;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.TableField;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
|
||||
import com.datamate.common.domain.model.base.BaseEntity;
|
||||
import com.datamate.datamanagement.common.enums.DatasetStatusType;
|
||||
import com.datamate.datamanagement.common.enums.DatasetType;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.io.File;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* 数据集实体(与数据库表 t_dm_datasets 对齐)
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@TableName(value = "t_dm_datasets", autoResultMap = true)
|
||||
public class Dataset extends BaseEntity<String> {
|
||||
/**
|
||||
* 数据集名称
|
||||
*/
|
||||
private String name;
|
||||
/**
|
||||
* 数据集描述
|
||||
*/
|
||||
private String description;
|
||||
/**
|
||||
* 数据集类型
|
||||
*/
|
||||
private DatasetType datasetType;
|
||||
/**
|
||||
* 数据集分类
|
||||
*/
|
||||
private String category;
|
||||
/**
|
||||
* 数据集路径
|
||||
*/
|
||||
private String path;
|
||||
/**
|
||||
* 数据集格式
|
||||
*/
|
||||
private String format;
|
||||
/**
|
||||
* 数据集模式信息,JSON格式, 用于解析当前数据集的文件结构
|
||||
*/
|
||||
private String schemaInfo;
|
||||
/**
|
||||
* 数据集大小(字节)
|
||||
*/
|
||||
private Long sizeBytes = 0L;
|
||||
/**
|
||||
* 文件数量
|
||||
*/
|
||||
private Long fileCount = 0L;
|
||||
/**
|
||||
* 记录数量
|
||||
*/
|
||||
private Long recordCount = 0L;
|
||||
/**
|
||||
* 数据集保留天数
|
||||
*/
|
||||
private Integer retentionDays = 0;
|
||||
/**
|
||||
* 标签列表, JSON格式
|
||||
*/
|
||||
@TableField(typeHandler = JacksonTypeHandler.class)
|
||||
private Collection<Tag> tags = new HashSet<>();
|
||||
/**
|
||||
* 额外元数据,JSON格式
|
||||
*/
|
||||
private String metadata;
|
||||
/**
|
||||
* 数据集状态
|
||||
*/
|
||||
private DatasetStatusType status;
|
||||
/**
|
||||
* 是否为公共数据集
|
||||
*/
|
||||
private Boolean isPublic = false;
|
||||
/**
|
||||
* 是否为精选数据集
|
||||
*/
|
||||
private Boolean isFeatured = false;
|
||||
/**
|
||||
* 数据集版本号
|
||||
*/
|
||||
private Long version = 0L;
|
||||
|
||||
@TableField(exist = false)
|
||||
private List<DatasetFile> files = new ArrayList<>();
|
||||
|
||||
public Dataset() {
|
||||
}
|
||||
|
||||
public Dataset(String name, String description, DatasetType datasetType, String category, String path,
|
||||
String format, DatasetStatusType status, String createdBy) {
|
||||
this.name = name;
|
||||
this.description = description;
|
||||
this.datasetType = datasetType;
|
||||
this.category = category;
|
||||
this.path = path;
|
||||
this.format = format;
|
||||
this.status = status;
|
||||
this.createdBy = createdBy;
|
||||
this.createdAt = LocalDateTime.now();
|
||||
this.updatedAt = LocalDateTime.now();
|
||||
}
|
||||
|
||||
public void initCreateParam(String datasetBasePath) {
|
||||
this.id = UUID.randomUUID().toString();
|
||||
this.path = datasetBasePath + File.separator + this.id;
|
||||
this.status = DatasetStatusType.DRAFT;
|
||||
}
|
||||
|
||||
public void updateBasicInfo(String name, String description, String category) {
|
||||
if (name != null && !name.isEmpty()) this.name = name;
|
||||
if (description != null) this.description = description;
|
||||
if (category != null) this.category = category;
|
||||
this.updatedAt = LocalDateTime.now();
|
||||
}
|
||||
|
||||
public void updateStatus(DatasetStatusType status, String updatedBy) {
|
||||
this.status = status;
|
||||
this.updatedBy = updatedBy;
|
||||
this.updatedAt = LocalDateTime.now();
|
||||
}
|
||||
|
||||
public void addFile(DatasetFile file) {
|
||||
this.files.add(file);
|
||||
this.fileCount = this.fileCount + 1;
|
||||
this.sizeBytes = this.sizeBytes + (file.getFileSize() != null ? file.getFileSize() : 0L);
|
||||
this.updatedAt = LocalDateTime.now();
|
||||
}
|
||||
|
||||
public void removeFile(DatasetFile file) {
|
||||
if (this.files.remove(file)) {
|
||||
this.fileCount = Math.max(0, this.fileCount - 1);
|
||||
this.sizeBytes = Math.max(0, this.sizeBytes - (file.getFileSize() != null ? file.getFileSize() : 0L));
|
||||
this.updatedAt = LocalDateTime.now();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
package com.datamate.datamanagement.domain.model.dataset;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import lombok.*;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 数据集文件实体(与数据库表 t_dm_dataset_files 对齐)
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@TableName("t_dm_dataset_files")
|
||||
public class DatasetFile {
|
||||
@TableId
|
||||
private String id; // UUID
|
||||
private String datasetId; // UUID
|
||||
private String fileName;
|
||||
private String filePath;
|
||||
private String fileType; // JPG/PNG/DCM/TXT
|
||||
private Long fileSize; // bytes
|
||||
private String checkSum;
|
||||
private List<String> tags;
|
||||
private String metadata;
|
||||
private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR
|
||||
private LocalDateTime uploadTime;
|
||||
private LocalDateTime lastAccessTime;
|
||||
private LocalDateTime createdAt;
|
||||
private LocalDateTime updatedAt;
|
||||
}
|
||||