You've already forked DataMate
refactor: modify data collection to python implementation (#214)
* feature: LabelStudio jumps without login * refactor: modify data collection to python implementation * refactor: modify data collection to python implementation * refactor: modify data collection to python implementation * refactor: modify data collection to python implementation * refactor: modify data collection to python implementation * refactor: modify data collection to python implementation * fix: remove terrabase dependency * feature: add the collection task executions page and the collection template page * fix: fix the collection task creation * fix: fix the collection task creation
This commit is contained in:
@@ -27,25 +27,6 @@
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-starter-gateway</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.terrabase</groupId>
|
||||
<artifactId>enterprise-impl-commercial</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>spring-web</artifactId>
|
||||
<groupId>org.springframework</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>spring-boot-starter-logging</artifactId>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- Log4j2 API -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
|
||||
@@ -4,9 +4,7 @@ import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.cloud.gateway.route.RouteLocator;
|
||||
import org.springframework.cloud.gateway.route.builder.RouteLocatorBuilder;
|
||||
import org.springframework.cloud.openfeign.EnableFeignClients;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
|
||||
/**
|
||||
* API Gateway & Auth Service Application
|
||||
@@ -14,8 +12,6 @@ import org.springframework.context.annotation.ComponentScan;
|
||||
* 提供路由、鉴权、限流等功能
|
||||
*/
|
||||
@SpringBootApplication
|
||||
@ComponentScan(basePackages = {"com.datamate.gateway", "com.terrabase"})
|
||||
@EnableFeignClients(basePackages = {"com.terrabase"})
|
||||
public class ApiGatewayApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
@@ -37,6 +33,10 @@ public class ApiGatewayApplication {
|
||||
.route("data-evaluation", r -> r.path("/api/evaluation/**")
|
||||
.uri("http://datamate-backend-python:18000"))
|
||||
|
||||
// 数据归集服务路由
|
||||
.route("data-collection", r -> r.path("/api/data-collection/**")
|
||||
.uri("http://datamate-backend-python:18000"))
|
||||
|
||||
.route("deer-flow-frontend", r -> r.path("/chat/**")
|
||||
.uri("http://deer-flow-frontend:3000"))
|
||||
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
package com.datamate.gateway.filter;
|
||||
|
||||
import com.terrabase.enterprise.api.UserManagementService;
|
||||
import com.terrabase.enterprise.api.dto.LoginUserDto;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.cloud.gateway.filter.GatewayFilterChain;
|
||||
import org.springframework.cloud.gateway.filter.GlobalFilter;
|
||||
@@ -21,16 +18,13 @@ public class UserContextFilter implements GlobalFilter {
|
||||
@Value("${commercial.switch:false}")
|
||||
private boolean isCommercial;
|
||||
|
||||
@Autowired
|
||||
private UserManagementService userManagementService;
|
||||
|
||||
@Override
|
||||
public Mono<Void> filter(ServerWebExchange exchange, GatewayFilterChain chain) {
|
||||
if (!isCommercial) {
|
||||
return chain.filter(exchange);
|
||||
}
|
||||
try {
|
||||
LoginUserDto loginUserDto = userManagementService.getCurrentUserInfo().getData().getFirst();
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("get current user info error", e);
|
||||
return chain.filter(exchange);
|
||||
|
||||
@@ -1,229 +0,0 @@
|
||||
# 数据归集服务 (Data Collection Service)
|
||||
|
||||
基于DataX的数据归集和同步服务,提供多数据源之间的数据同步功能。
|
||||
|
||||
## 功能特性
|
||||
|
||||
- 🔗 **多数据源支持**: 支持MySQL、PostgreSQL、Oracle、SQL Server等主流数据库
|
||||
- 📊 **任务管理**: 创建、配置、执行和监控数据同步任务
|
||||
- ⏰ **定时调度**: 支持Cron表达式的定时任务
|
||||
- 📈 **实时监控**: 任务执行进度、状态和性能指标监控
|
||||
- 📝 **执行日志**: 详细的任务执行日志记录
|
||||
- 🔌 **插件化**: DataX Reader/Writer插件化集成
|
||||
|
||||
## 技术架构
|
||||
|
||||
- **框架**: Spring Boot 3.x
|
||||
- **数据库**: MySQL + MyBatis
|
||||
- **同步引擎**: DataX
|
||||
- **API**: OpenAPI 3.0 自动生成
|
||||
- **架构模式**: DDD (领域驱动设计)
|
||||
|
||||
## 项目结构
|
||||
|
||||
```
|
||||
src/main/java/com/datamate/collection/
|
||||
├── DataCollectionApplication.java # 应用启动类
|
||||
├── domain/ # 领域层
|
||||
│ ├── model/ # 领域模型
|
||||
│ │ ├── DataSource.java # 数据源实体
|
||||
│ │ ├── CollectionTask.java # 归集任务实体
|
||||
│ │ ├── TaskExecution.java # 任务执行记录
|
||||
│ │ └── ExecutionLog.java # 执行日志
|
||||
│ └── service/ # 领域服务
|
||||
│ ├── DataSourceService.java
|
||||
│ ├── CollectionTaskService.java
|
||||
│ ├── TaskExecutionService.java
|
||||
│ └── impl/ # 服务实现
|
||||
├── infrastructure/ # 基础设施层
|
||||
│ ├── config/ # 配置类
|
||||
│ ├── datax/ # DataX执行引擎
|
||||
│ │ └── DataXExecutionEngine.java
|
||||
│ └── persistence/ # 持久化
|
||||
│ ├── mapper/ # MyBatis Mapper
|
||||
│ └── typehandler/ # 类型处理器
|
||||
└── interfaces/ # 接口层
|
||||
├── api/ # OpenAPI生成的接口
|
||||
├── dto/ # OpenAPI生成的DTO
|
||||
└── rest/ # REST控制器
|
||||
├── DataSourceController.java
|
||||
├── CollectionTaskController.java
|
||||
├── TaskExecutionController.java
|
||||
└── exception/ # 异常处理
|
||||
|
||||
src/main/resources/
|
||||
├── mappers/ # MyBatis XML映射文件
|
||||
├── application.properties # 应用配置
|
||||
└── ...
|
||||
```
|
||||
|
||||
## 环境要求
|
||||
|
||||
- Java 17+
|
||||
- Maven 3.6+
|
||||
- MySQL 8.0+
|
||||
- DataX 3.0+
|
||||
- Redis (可选,用于缓存)
|
||||
|
||||
## 配置说明
|
||||
|
||||
### 应用配置 (application.properties)
|
||||
|
||||
```properties
|
||||
# 服务端口
|
||||
server.port=8090
|
||||
|
||||
# 数据库配置
|
||||
spring.datasource.url=jdbc:mysql://localhost:3306/knowledge_base
|
||||
spring.datasource.username=root
|
||||
spring.datasource.password=123456
|
||||
|
||||
# DataX配置
|
||||
datax.home=/runtime/datax
|
||||
datax.python.path=/runtime/datax/bin/datax.py
|
||||
datax.job.timeout=7200
|
||||
datax.job.memory=2g
|
||||
```
|
||||
|
||||
### DataX配置
|
||||
|
||||
确保DataX已正确安装并配置:
|
||||
|
||||
1. 下载DataX到 `/runtime/datax` 目录
|
||||
2. 配置相关Reader/Writer插件
|
||||
3. 确保Python环境可用
|
||||
|
||||
## 数据库初始化
|
||||
|
||||
执行数据库初始化脚本:
|
||||
|
||||
```bash
|
||||
mysql -u root -p knowledge_base < scripts/db/data-collection-init.sql
|
||||
```
|
||||
|
||||
## 构建和运行
|
||||
|
||||
### 1. 编译项目
|
||||
|
||||
```bash
|
||||
cd backend/services/data-collection-service
|
||||
mvn clean compile
|
||||
```
|
||||
|
||||
这将触发OpenAPI代码生成。
|
||||
|
||||
### 2. 打包
|
||||
|
||||
```bash
|
||||
mvn clean package -DskipTests
|
||||
```
|
||||
|
||||
### 3. 运行
|
||||
|
||||
作为独立服务运行:
|
||||
```bash
|
||||
java -jar target/data-collection-service-1.0.0-SNAPSHOT.jar
|
||||
```
|
||||
|
||||
或通过main-application统一启动:
|
||||
```bash
|
||||
cd backend/services/main-application
|
||||
mvn spring-boot:run
|
||||
```
|
||||
|
||||
## API文档
|
||||
|
||||
服务启动后,可通过以下地址访问API文档:
|
||||
|
||||
- Swagger UI: http://localhost:8090/swagger-ui.html
|
||||
- OpenAPI JSON: http://localhost:8090/v3/api-docs
|
||||
|
||||
## 主要API端点
|
||||
|
||||
### 数据源管理
|
||||
|
||||
- `GET /api/v1/collection/datasources` - 获取数据源列表
|
||||
- `POST /api/v1/collection/datasources` - 创建数据源
|
||||
- `GET /api/v1/collection/datasources/{id}` - 获取数据源详情
|
||||
- `PUT /api/v1/collection/datasources/{id}` - 更新数据源
|
||||
- `DELETE /api/v1/collection/datasources/{id}` - 删除数据源
|
||||
- `POST /api/v1/collection/datasources/{id}/test` - 测试连接
|
||||
|
||||
### 归集任务管理
|
||||
|
||||
- `GET /api/v1/collection/tasks` - 获取任务列表
|
||||
- `POST /api/v1/collection/tasks` - 创建任务
|
||||
- `GET /api/v1/collection/tasks/{id}` - 获取任务详情
|
||||
- `PUT /api/v1/collection/tasks/{id}` - 更新任务
|
||||
- `DELETE /api/v1/collection/tasks/{id}` - 删除任务
|
||||
|
||||
### 任务执行管理
|
||||
|
||||
- `POST /api/v1/collection/tasks/{id}/execute` - 执行任务
|
||||
- `POST /api/v1/collection/tasks/{id}/stop` - 停止任务
|
||||
- `GET /api/v1/collection/executions` - 获取执行历史
|
||||
- `GET /api/v1/collection/executions/{executionId}` - 获取执行详情
|
||||
- `GET /api/v1/collection/executions/{executionId}/logs` - 获取执行日志
|
||||
|
||||
### 监控统计
|
||||
|
||||
- `GET /api/v1/collection/monitor/statistics` - 获取统计信息
|
||||
|
||||
## 开发指南
|
||||
|
||||
### 添加新的数据源类型
|
||||
|
||||
1. 在 `DataSource.DataSourceType` 枚举中添加新类型
|
||||
2. 在 `DataXExecutionEngine` 中添加对应的Reader/Writer映射
|
||||
3. 更新数据库表结构和初始化数据
|
||||
|
||||
### 自定义DataX插件
|
||||
|
||||
1. 将插件放置在 `/runtime/datax/plugin` 目录下
|
||||
2. 在 `DataXExecutionEngine` 中配置插件映射关系
|
||||
3. 根据插件要求调整配置模板
|
||||
|
||||
### 扩展监控指标
|
||||
|
||||
1. 在 `StatisticsService` 中添加新的统计逻辑
|
||||
2. 更新 `CollectionStatistics` DTO
|
||||
3. 在数据库中添加相应的统计表或字段
|
||||
|
||||
## 故障排查
|
||||
|
||||
### 常见问题
|
||||
|
||||
1. **DataX执行失败**
|
||||
- 检查DataX安装路径和Python环境
|
||||
- 确认数据源连接配置正确
|
||||
- 查看执行日志获取详细错误信息
|
||||
|
||||
2. **数据库连接失败**
|
||||
- 检查数据库配置和网络连通性
|
||||
- 确认数据库用户权限
|
||||
|
||||
3. **API调用失败**
|
||||
- 检查请求参数格式
|
||||
- 查看应用日志获取详细错误信息
|
||||
|
||||
### 日志查看
|
||||
|
||||
```bash
|
||||
# 应用日志
|
||||
tail -f logs/data-collection-service.log
|
||||
|
||||
# 任务执行日志
|
||||
curl http://localhost:8090/api/v1/collection/executions/{executionId}/logs
|
||||
```
|
||||
|
||||
## 贡献指南
|
||||
|
||||
1. Fork项目
|
||||
2. 创建特性分支: `git checkout -b feature/new-feature`
|
||||
3. 提交更改: `git commit -am 'Add new feature'`
|
||||
4. 推送分支: `git push origin feature/new-feature`
|
||||
5. 提交Pull Request
|
||||
|
||||
## 许可证
|
||||
|
||||
MIT License
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 79 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 52 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 67 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 107 KiB |
@@ -1,163 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>services</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>data-collection-service</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Data Collection Service</name>
|
||||
<description>DataX-based data collection and aggregation service</description>
|
||||
|
||||
<dependencies>
|
||||
<!-- Spring Boot Dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-validation</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-actuator</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Database -->
|
||||
<dependency>
|
||||
<groupId>com.mysql</groupId>
|
||||
<artifactId>mysql-connector-j</artifactId>
|
||||
<version>8.0.33</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Redis -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-data-redis</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- DataX Dependencies (集成DataX插件) -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-exec</artifactId>
|
||||
<version>1.3</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Connection Pool -->
|
||||
<dependency>
|
||||
<groupId>com.zaxxer</groupId>
|
||||
<artifactId>HikariCP</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Oracle JDBC Driver -->
|
||||
<dependency>
|
||||
<groupId>com.oracle.database.jdbc</groupId>
|
||||
<artifactId>ojdbc8</artifactId>
|
||||
<version>21.5.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- PostgreSQL JDBC Driver -->
|
||||
<dependency>
|
||||
<groupId>org.postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- JSON Processing -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Shared Domain -->
|
||||
<dependency>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>domain-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>data-management-service</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- OpenAPI Dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springdoc</groupId>
|
||||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openapitools</groupId>
|
||||
<artifactId>jackson-databind-nullable</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>jakarta.validation</groupId>
|
||||
<artifactId>jakarta.validation-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Test Dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>2.16.1</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
<classifier>exec</classifier>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.11.0</version>
|
||||
<configuration>
|
||||
<source>${maven.compiler.source}</source>
|
||||
<target>${maven.compiler.target}</target>
|
||||
<annotationProcessorPaths>
|
||||
<!-- 顺序很重要 -->
|
||||
<path>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>${lombok.version}</version>
|
||||
</path>
|
||||
<path>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok-mapstruct-binding</artifactId>
|
||||
<version>${lombok-mapstruct-binding.version}</version>
|
||||
</path>
|
||||
<path>
|
||||
<groupId>org.mapstruct</groupId>
|
||||
<artifactId>mapstruct-processor</artifactId>
|
||||
<version>${mapstruct.version}</version>
|
||||
</path>
|
||||
</annotationProcessorPaths>
|
||||
<compilerArgs>
|
||||
<arg>-parameters</arg>
|
||||
<arg>-Amapstruct.defaultComponentModel=spring</arg>
|
||||
</compilerArgs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@@ -1,25 +0,0 @@
|
||||
package com.datamate.collection;
|
||||
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.scheduling.annotation.EnableAsync;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
import org.springframework.transaction.annotation.EnableTransactionManagement;
|
||||
|
||||
/**
|
||||
* 数据归集服务配置类
|
||||
*
|
||||
* 基于DataX的数据归集和同步服务,支持多种数据源的数据采集和归集
|
||||
*/
|
||||
@SpringBootApplication
|
||||
@EnableAsync
|
||||
@EnableScheduling
|
||||
@EnableTransactionManagement
|
||||
@ComponentScan(basePackages = {
|
||||
"com.datamate.collection",
|
||||
"com.datamate.datamanagement",
|
||||
"com.datamate.shared"
|
||||
})
|
||||
public class DataCollectionServiceConfiguration {
|
||||
// Configuration class for JAR packaging - no main method needed
|
||||
}
|
||||
@@ -1,73 +0,0 @@
|
||||
package com.datamate.collection.application;
|
||||
|
||||
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
||||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.datamate.collection.domain.model.entity.TaskExecution;
|
||||
import com.datamate.collection.domain.repository.CollectionTaskRepository;
|
||||
import com.datamate.collection.common.enums.SyncMode;
|
||||
import com.datamate.common.domain.utils.ChunksSaver;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class CollectionTaskService {
|
||||
private final TaskExecutionService taskExecutionService;
|
||||
private final CollectionTaskRepository collectionTaskRepository;
|
||||
|
||||
@Transactional
|
||||
public CollectionTask create(CollectionTask task, String datasetId) {
|
||||
task.initCreateParam();
|
||||
collectionTaskRepository.save(task);
|
||||
executeTaskNow(task, datasetId);
|
||||
return task;
|
||||
}
|
||||
|
||||
private void executeTaskNow(CollectionTask task, String datasetId) {
|
||||
if (Objects.equals(task.getSyncMode(), SyncMode.ONCE)) {
|
||||
TaskExecution exec = taskExecutionService.createExecution(task);
|
||||
int timeout = task.getTimeoutSeconds() == null ? 3600 : task.getTimeoutSeconds();
|
||||
taskExecutionService.runAsync(task, exec.getId(), timeout, datasetId);
|
||||
log.info("Triggered DataX execution for task {} at {}, execId={}", task.getId(), LocalDateTime.now(), exec.getId());
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public CollectionTask update(CollectionTask task, String datasetId) {
|
||||
task.setUpdatedAt(LocalDateTime.now());
|
||||
task.addPath();
|
||||
collectionTaskRepository.updateById(task);
|
||||
executeTaskNow(task, datasetId);
|
||||
return task;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void delete(String id) {
|
||||
CollectionTask task = collectionTaskRepository.getById(id);
|
||||
if (task != null) {
|
||||
ChunksSaver.deleteFolder("/dataset/local/" + task.getId());
|
||||
}
|
||||
collectionTaskRepository.removeById(id);
|
||||
}
|
||||
|
||||
public CollectionTask get(String id) {
|
||||
return collectionTaskRepository.getById(id);
|
||||
}
|
||||
|
||||
public IPage<CollectionTask> getTasks(Page<CollectionTask> page, LambdaQueryWrapper<CollectionTask> wrapper) {
|
||||
return collectionTaskRepository.page(page, wrapper);
|
||||
}
|
||||
|
||||
public List<CollectionTask> selectActiveTasks() {
|
||||
return collectionTaskRepository.selectActiveTasks();
|
||||
}
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
package com.datamate.collection.application;
|
||||
|
||||
import com.datamate.collection.common.enums.TemplateType;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.datamate.collection.domain.model.entity.TaskExecution;
|
||||
import com.datamate.collection.common.enums.TaskStatus;
|
||||
import com.datamate.collection.domain.process.ProcessRunner;
|
||||
import com.datamate.collection.domain.repository.CollectionTaskRepository;
|
||||
import com.datamate.collection.domain.repository.TaskExecutionRepository;
|
||||
import com.datamate.datamanagement.application.DatasetApplicationService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class TaskExecutionService {
|
||||
private final ProcessRunner processRunner;
|
||||
private final TaskExecutionRepository executionRepository;
|
||||
private final CollectionTaskRepository collectionTaskRepository;
|
||||
private final DatasetApplicationService datasetApplicationService;
|
||||
|
||||
|
||||
@Transactional
|
||||
public TaskExecution createExecution(CollectionTask task) {
|
||||
TaskExecution exec = TaskExecution.initTaskExecution();
|
||||
exec.setTaskId(task.getId());
|
||||
exec.setTaskName(task.getName());
|
||||
executionRepository.save(exec);
|
||||
collectionTaskRepository.updateLastExecution(task.getId(), exec.getId());
|
||||
collectionTaskRepository.updateStatus(task.getId(), TaskStatus.RUNNING.name());
|
||||
return exec;
|
||||
}
|
||||
|
||||
public TaskExecution selectLatestByTaskId(String taskId) {
|
||||
return executionRepository.selectLatestByTaskId(taskId);
|
||||
}
|
||||
|
||||
@Async
|
||||
@Transactional
|
||||
public void runAsync(CollectionTask task, String executionId, int timeoutSeconds, String datasetId) {
|
||||
try {
|
||||
int code = processRunner.runJob(task, executionId, timeoutSeconds);
|
||||
log.info("DataX finished with code {} for execution {}", code, executionId);
|
||||
// 简化:成功即完成
|
||||
executionRepository.completeExecution(executionId, TaskStatus.SUCCESS.name(), LocalDateTime.now(),
|
||||
0, 0L, 0L, 0L, null);
|
||||
collectionTaskRepository.updateStatus(task.getId(), TaskStatus.SUCCESS.name());
|
||||
if (StringUtils.isNotBlank(datasetId)) {
|
||||
datasetApplicationService.processDataSourceAsync(datasetId, task.getId());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("DataX execution failed", e);
|
||||
executionRepository.completeExecution(executionId, TaskStatus.FAILED.name(), LocalDateTime.now(),
|
||||
0, 0L, 0L, 0L, e.getMessage());
|
||||
collectionTaskRepository.updateStatus(task.getId(), TaskStatus.FAILED.name());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
package com.datamate.collection.common.enums;
|
||||
|
||||
/**
|
||||
* 同步方式:一次性(ONCE) 或 定时(SCHEDULED)
|
||||
*/
|
||||
public enum SyncMode {
|
||||
/** 一次性(ONCE) */
|
||||
ONCE,
|
||||
/// 定时(SCHEDULED)
|
||||
SCHEDULED
|
||||
}
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
package com.datamate.collection.common.enums;
|
||||
|
||||
/**
|
||||
* 统一的任务和执行状态枚举
|
||||
* 任务和执行状态枚举: - DRAFT: 草稿状态 - READY: 就绪状态 - RUNNING: 运行中 - SUCCESS: 执行成功 (对应原来的COMPLETED/SUCCESS) - FAILED: 执行失败 - STOPPED: 已停止
|
||||
*
|
||||
* @author Data Mate Platform Team
|
||||
*/
|
||||
public enum TaskStatus {
|
||||
/** 草稿状态 */
|
||||
DRAFT,
|
||||
/** 就绪状态 */
|
||||
READY,
|
||||
/** 运行中 */
|
||||
RUNNING,
|
||||
/** 执行成功(对应原来的COMPLETED) */
|
||||
SUCCESS,
|
||||
/** 执行失败 */
|
||||
FAILED,
|
||||
/** 已停止 */
|
||||
STOPPED
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
package com.datamate.collection.common.enums;
|
||||
|
||||
/**
|
||||
* 模板类型枚举
|
||||
*
|
||||
*/
|
||||
public enum TemplateType {
|
||||
NAS,
|
||||
OBS,
|
||||
MYSQL
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
package com.datamate.collection.domain.model.entity;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import com.datamate.collection.common.enums.SyncMode;
|
||||
import com.datamate.collection.common.enums.TaskStatus;
|
||||
import com.datamate.collection.common.enums.TemplateType;
|
||||
import com.datamate.common.domain.model.base.BaseEntity;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* 数据采集任务实体(与数据库表 t_dc_collection_tasks 对齐)
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@TableName(value = "t_dc_collection_tasks", autoResultMap = true)
|
||||
public class CollectionTask extends BaseEntity<String> {
|
||||
private String name;
|
||||
private String description;
|
||||
private TemplateType taskType; // 任务类型
|
||||
private String targetPath; // 目标存储路径
|
||||
private String config; // DataX JSON 配置,包含源端和目标端配置信息
|
||||
private TaskStatus status;
|
||||
private SyncMode syncMode; // ONCE / SCHEDULED
|
||||
private String scheduleExpression;
|
||||
private Integer retryCount;
|
||||
private Integer timeoutSeconds;
|
||||
private Long maxRecords;
|
||||
private String sortField;
|
||||
private String lastExecutionId;
|
||||
|
||||
public void addPath() {
|
||||
try {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
Map<String, Object> parameter = objectMapper.readValue(
|
||||
config,
|
||||
new TypeReference<>() {}
|
||||
);
|
||||
parameter.put("destPath", "/dataset/local/" + id);
|
||||
parameter.put("filePaths", Collections.singletonList(parameter.get("destPath")));
|
||||
config = objectMapper.writeValueAsString(parameter);
|
||||
} catch (JsonProcessingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void initCreateParam() {
|
||||
this.id = UUID.randomUUID().toString();
|
||||
this.targetPath = "/dataset/local/" + id;
|
||||
this.status = TaskStatus.READY;
|
||||
this.createdAt = LocalDateTime.now();
|
||||
this.updatedAt = LocalDateTime.now();
|
||||
}
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
package com.datamate.collection.domain.model.entity;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = false)
|
||||
public class DataxTemplate {
|
||||
|
||||
/**
|
||||
* 模板ID(UUID)
|
||||
*/
|
||||
private String id;
|
||||
|
||||
/**
|
||||
* 模板名称
|
||||
*/
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* 源数据源类型
|
||||
*/
|
||||
private String sourceType;
|
||||
|
||||
/**
|
||||
* 目标数据源类型
|
||||
*/
|
||||
private String targetType;
|
||||
|
||||
/**
|
||||
* 模板内容(JSON格式)
|
||||
*/
|
||||
private String templateContent;
|
||||
|
||||
/**
|
||||
* 模板描述
|
||||
*/
|
||||
private String description;
|
||||
|
||||
/**
|
||||
* 版本号
|
||||
*/
|
||||
private String version;
|
||||
|
||||
/**
|
||||
* 是否为系统模板
|
||||
*/
|
||||
private Boolean isSystem;
|
||||
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
private LocalDateTime createdAt;
|
||||
|
||||
/**
|
||||
* 更新时间
|
||||
*/
|
||||
private LocalDateTime updatedAt;
|
||||
|
||||
/**
|
||||
* 创建者
|
||||
*/
|
||||
private String createdBy;
|
||||
|
||||
/**
|
||||
* 更新者
|
||||
*/
|
||||
private String updatedBy;
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
package com.datamate.collection.domain.model.entity;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import com.datamate.collection.common.enums.TaskStatus;
|
||||
import com.datamate.common.domain.model.base.BaseEntity;
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.UUID;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@TableName(value = "t_dc_task_executions", autoResultMap = true)
|
||||
public class TaskExecution extends BaseEntity<String> {
|
||||
private String taskId;
|
||||
private String taskName;
|
||||
private TaskStatus status;
|
||||
private Double progress;
|
||||
private Long recordsTotal;
|
||||
private Long recordsProcessed;
|
||||
private Long recordsSuccess;
|
||||
private Long recordsFailed;
|
||||
private Double throughput;
|
||||
private Long dataSizeBytes;
|
||||
private LocalDateTime startedAt;
|
||||
private LocalDateTime completedAt;
|
||||
private Integer durationSeconds;
|
||||
private String errorMessage;
|
||||
private String dataxJobId;
|
||||
private String config;
|
||||
private String result;
|
||||
|
||||
public static TaskExecution initTaskExecution() {
|
||||
TaskExecution exec = new TaskExecution();
|
||||
exec.setId(UUID.randomUUID().toString());
|
||||
exec.setStatus(TaskStatus.RUNNING);
|
||||
exec.setProgress(0.0);
|
||||
exec.setStartedAt(LocalDateTime.now());
|
||||
exec.setCreatedAt(LocalDateTime.now());
|
||||
return exec;
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
package com.datamate.collection.domain.process;
|
||||
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
|
||||
/**
|
||||
* 归集执行器接口
|
||||
*
|
||||
* @since 2025/10/23
|
||||
*/
|
||||
public interface ProcessRunner {
|
||||
/**
|
||||
* 执行归集任务
|
||||
*
|
||||
* @param task 任务
|
||||
* @param executionId 执行ID
|
||||
* @param timeoutSeconds 超时时间(秒)
|
||||
* @return 执行结果
|
||||
* @throws Exception 执行异常
|
||||
*/
|
||||
int runJob(CollectionTask task, String executionId, int timeoutSeconds) throws Exception;
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
package com.datamate.collection.domain.repository;
|
||||
|
||||
import com.baomidou.mybatisplus.extension.repository.IRepository;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 归集任务仓储层
|
||||
*
|
||||
* @since 2025/10/23
|
||||
*/
|
||||
public interface CollectionTaskRepository extends IRepository<CollectionTask> {
|
||||
List<CollectionTask> selectActiveTasks();
|
||||
|
||||
void updateStatus(String id, String status);
|
||||
|
||||
void updateLastExecution(String id, String lastExecutionId);
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
package com.datamate.collection.domain.repository;
|
||||
|
||||
import com.baomidou.mybatisplus.extension.service.IService;
|
||||
import com.datamate.collection.domain.model.entity.TaskExecution;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* TaskExecutionRepository
|
||||
*
|
||||
* @since 2025/10/23
|
||||
*/
|
||||
public interface TaskExecutionRepository extends IService<TaskExecution> {
|
||||
TaskExecution selectLatestByTaskId(String taskId);
|
||||
|
||||
void completeExecution(String executionId, String status, LocalDateTime completedAt,
|
||||
Integer recordsProcessed, Long recordsTotal,
|
||||
Long recordsSuccess, Long recordsFailed, String errorMessage);
|
||||
}
|
||||
@@ -1,147 +0,0 @@
|
||||
// java
|
||||
package com.datamate.collection.infrastructure.datax;
|
||||
|
||||
import com.datamate.collection.common.enums.TemplateType;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.datamate.collection.domain.process.ProcessRunner;
|
||||
import com.datamate.collection.infrastructure.datax.config.MysqlConfig;
|
||||
import com.datamate.collection.infrastructure.datax.config.NasConfig;
|
||||
import com.datamate.collection.infrastructure.datax.config.ObsConfig;
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.exec.*;
|
||||
import org.apache.commons.io.output.TeeOutputStream;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.*;
|
||||
import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class DataxProcessRunner implements ProcessRunner {
|
||||
|
||||
private final DataxProperties props;
|
||||
|
||||
@Override
|
||||
public int runJob(CollectionTask task, String executionId, int timeoutSeconds) throws Exception {
|
||||
Path job = buildJobFile(task);
|
||||
int code = runJob(job.toFile(), executionId, Duration.ofSeconds(timeoutSeconds));
|
||||
// 任务成功后做后处理(仅针对 MYSQL 类型)
|
||||
postProcess(task);
|
||||
return code;
|
||||
}
|
||||
|
||||
private int runJob(File jobFile, String executionId, Duration timeout) throws Exception {
|
||||
File logFile = new File(props.getLogPath(), String.format("datax-%s.log", executionId));
|
||||
String python = props.getPythonPath();
|
||||
String dataxPy = props.getHomePath() + File.separator + "bin" + File.separator + "datax.py";
|
||||
String cmd = String.format("%s %s %s", python, dataxPy, jobFile.getAbsolutePath());
|
||||
|
||||
log.info("Execute DataX: {}", cmd);
|
||||
|
||||
CommandLine cl = CommandLine.parse(cmd);
|
||||
DefaultExecutor executor = getExecutor(timeout, logFile);
|
||||
|
||||
return executor.execute(cl);
|
||||
}
|
||||
|
||||
private static DefaultExecutor getExecutor(Duration timeout, File logFile) throws FileNotFoundException {
|
||||
DefaultExecutor executor = new DefaultExecutor();
|
||||
|
||||
// 将日志追加输出到文件
|
||||
File parent = logFile.getParentFile();
|
||||
if (!parent.exists()) {
|
||||
parent.mkdirs();
|
||||
}
|
||||
|
||||
ExecuteStreamHandler streamHandler = new PumpStreamHandler(
|
||||
new TeeOutputStream(new FileOutputStream(logFile, true), System.out),
|
||||
new TeeOutputStream(new FileOutputStream(logFile, true), System.err)
|
||||
);
|
||||
executor.setStreamHandler(streamHandler);
|
||||
|
||||
ExecuteWatchdog watchdog = new ExecuteWatchdog(timeout.toMillis());
|
||||
executor.setWatchdog(watchdog);
|
||||
return executor;
|
||||
}
|
||||
|
||||
private Path buildJobFile(CollectionTask task) throws IOException {
|
||||
Files.createDirectories(Paths.get(props.getJobConfigPath()));
|
||||
String fileName = String.format("datax-job-%s.json", task.getId());
|
||||
Path path = Paths.get(props.getJobConfigPath(), fileName);
|
||||
// 简化:直接将任务中的 config 字段作为 DataX 作业 JSON
|
||||
try (FileWriter fw = new FileWriter(path.toFile())) {
|
||||
if (StringUtils.isBlank(task.getConfig())) {
|
||||
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
|
||||
}
|
||||
String json = getJobConfig(task);
|
||||
log.info("Job config: {}", json);
|
||||
fw.write(json);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
private String getJobConfig(CollectionTask task) {
|
||||
try {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
TemplateType templateType = task.getTaskType();
|
||||
return switch (templateType) {
|
||||
case NAS -> {
|
||||
// NAS 特殊处理
|
||||
NasConfig nasConfig = objectMapper.readValue(task.getConfig(), NasConfig.class);
|
||||
yield nasConfig.toJobConfig(objectMapper, task);
|
||||
}
|
||||
case OBS -> {
|
||||
ObsConfig obsConfig = objectMapper.readValue(task.getConfig(), ObsConfig.class);
|
||||
yield obsConfig.toJobConfig(objectMapper, task);
|
||||
}
|
||||
case MYSQL -> {
|
||||
MysqlConfig mysqlConfig = objectMapper.readValue(task.getConfig(), MysqlConfig.class);
|
||||
yield mysqlConfig.toJobConfig(objectMapper, task);
|
||||
}
|
||||
};
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to parse task config", e);
|
||||
throw new RuntimeException("Failed to parse task config", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void postProcess(CollectionTask task) throws IOException {
|
||||
if (task.getTaskType() != TemplateType.MYSQL) {
|
||||
return;
|
||||
}
|
||||
String targetPath = task.getTargetPath();
|
||||
// 将targetPath下所有不以.csv结尾的文件修改为以.csv结尾
|
||||
Path dir = Paths.get(targetPath);
|
||||
if (!Files.exists(dir) || !Files.isDirectory(dir)) {
|
||||
log.info("Target path {} does not exist or is not a directory for task {}, skip post processing.", targetPath, task.getId());
|
||||
return;
|
||||
}
|
||||
|
||||
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
|
||||
for (Path path : stream) {
|
||||
if (!Files.isRegularFile(path)) continue;
|
||||
String name = path.getFileName().toString();
|
||||
if (name.toLowerCase().endsWith(".csv")) continue;
|
||||
|
||||
Path target = dir.resolve(name + ".csv");
|
||||
try {
|
||||
Files.move(path, target, StandardCopyOption.REPLACE_EXISTING);
|
||||
log.info("Renamed file for task {}: {} -> {}", task.getId(), name, target.getFileName().toString());
|
||||
} catch (IOException ex) {
|
||||
log.warn("Failed to rename file {} for task {}: {}", path, task.getId(), ex.getMessage(), ex);
|
||||
}
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
log.warn("Error scanning target directory {} for task {}: {}", targetPath, task.getId(), ioe.getMessage(), ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
package com.datamate.collection.infrastructure.datax;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Data
|
||||
@Configuration
|
||||
@ConfigurationProperties(prefix = "datamate.data-collection.datax")
|
||||
public class DataxProperties {
|
||||
private String homePath; // DATAX_HOME
|
||||
private String pythonPath; // python 可执行文件
|
||||
private String jobConfigPath; // 生成的作业文件目录
|
||||
private String logPath; // 运行日志目录
|
||||
private Integer maxMemory = 2048;
|
||||
private Integer channelCount = 5;
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
package com.datamate.collection.infrastructure.datax.config;
|
||||
|
||||
public interface BaseConfig {
|
||||
}
|
||||
@@ -1,73 +0,0 @@
|
||||
package com.datamate.collection.infrastructure.datax.config;
|
||||
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class MysqlConfig {
|
||||
private String jdbcUrl;
|
||||
|
||||
private String username;
|
||||
|
||||
private String password;
|
||||
|
||||
private String querySql;
|
||||
|
||||
private List<String> headers;
|
||||
|
||||
/**
|
||||
* 将当前 MYSQL 配置构造成 DataX 所需的 job JSON 字符串。
|
||||
*/
|
||||
public String toJobConfig(ObjectMapper objectMapper, CollectionTask task) throws Exception {
|
||||
Map<String, Object> mysqlParameter = new HashMap<>();
|
||||
Map<String, Object> connection = new HashMap<>();
|
||||
if (username != null) mysqlParameter.put("username", username);
|
||||
if (password != null) mysqlParameter.put("password", password);
|
||||
if (jdbcUrl != null) connection.put("jdbcUrl", Collections.singletonList(jdbcUrl));
|
||||
if (querySql != null) connection.put("querySql", Collections.singletonList(querySql));
|
||||
mysqlParameter.put("connection", Collections.singletonList(connection));
|
||||
|
||||
Map<String, Object> job = new HashMap<>();
|
||||
Map<String, Object> content = new HashMap<>();
|
||||
Map<String, Object> reader = new HashMap<>();
|
||||
reader.put("name", "mysqlreader");
|
||||
reader.put("parameter", mysqlParameter);
|
||||
content.put("reader", reader);
|
||||
|
||||
Map<String, Object> writer = new HashMap<>();
|
||||
Map<String, Object> writerParameter = new HashMap<>();
|
||||
writer.put("name", "txtfilewriter");
|
||||
if (CollectionUtils.isNotEmpty(headers)) {
|
||||
writerParameter.put("header", headers);
|
||||
}
|
||||
writerParameter.put("path", task.getTargetPath());
|
||||
writerParameter.put("fileName", "collectionResult");
|
||||
writerParameter.put("writeMode", "truncate");
|
||||
writerParameter.put("dateFormat", "yyyy-MM-dd HH:mm:ss");
|
||||
writerParameter.put("fileFormat", "csv");
|
||||
writerParameter.put("encoding", "UTF-8");
|
||||
writerParameter.put("fieldDelimiter", ",");
|
||||
writer.put("parameter", writerParameter);
|
||||
content.put("writer", writer);
|
||||
|
||||
job.put("content", List.of(content));
|
||||
Map<String, Object> setting = new HashMap<>();
|
||||
Map<String, Object> channel = new HashMap<>();
|
||||
channel.put("channel", 1);
|
||||
setting.put("speed", channel);
|
||||
job.put("setting", setting);
|
||||
|
||||
Map<String, Object> jobConfig = new HashMap<>();
|
||||
jobConfig.put("job", job);
|
||||
return objectMapper.writeValueAsString(jobConfig);
|
||||
}
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
package com.datamate.collection.infrastructure.datax.config;
|
||||
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class NasConfig implements BaseConfig{
|
||||
private String ip;
|
||||
|
||||
private String path;
|
||||
|
||||
private List<String> files;
|
||||
|
||||
/**
|
||||
* 将当前 NAS 配置构造成 DataX 所需的 job JSON 字符串。
|
||||
*/
|
||||
public String toJobConfig(ObjectMapper objectMapper, CollectionTask task) throws Exception {
|
||||
Map<String, Object> parameter = new HashMap<>();
|
||||
if (ip != null) parameter.put("ip", ip);
|
||||
if (path != null) parameter.put("path", path);
|
||||
if (files != null) parameter.put("files", files);
|
||||
parameter.put("destPath", task.getTargetPath());
|
||||
|
||||
Map<String, Object> job = new HashMap<>();
|
||||
Map<String, Object> content = new HashMap<>();
|
||||
Map<String, Object> reader = new HashMap<>();
|
||||
reader.put("name", "nfsreader");
|
||||
reader.put("parameter", parameter);
|
||||
content.put("reader", reader);
|
||||
|
||||
Map<String, Object> writer = new HashMap<>();
|
||||
writer.put("name", "nfswriter");
|
||||
writer.put("parameter", parameter);
|
||||
content.put("writer", writer);
|
||||
|
||||
job.put("content", List.of(content));
|
||||
Map<String, Object> setting = new HashMap<>();
|
||||
Map<String, Object> channel = new HashMap<>();
|
||||
channel.put("channel", 2);
|
||||
setting.put("speed", channel);
|
||||
job.put("setting", setting);
|
||||
|
||||
Map<String, Object> jobConfig = new HashMap<>();
|
||||
jobConfig.put("job", job);
|
||||
return objectMapper.writeValueAsString(jobConfig);
|
||||
}
|
||||
}
|
||||
@@ -1,61 +0,0 @@
|
||||
package com.datamate.collection.infrastructure.datax.config;
|
||||
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* OBS 归集配置类
|
||||
*
|
||||
* @since 2025/11/18
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class ObsConfig implements BaseConfig{
|
||||
private String endpoint;
|
||||
private String bucket;
|
||||
private String accessKey;
|
||||
private String secretKey;
|
||||
private String prefix;
|
||||
|
||||
/**
|
||||
* 将当前 OBS 配置构造成 DataX 所需的 job JSON 字符串。
|
||||
*/
|
||||
public String toJobConfig(ObjectMapper objectMapper, CollectionTask task) throws Exception {
|
||||
Map<String, Object> parameter = new HashMap<>();
|
||||
if (endpoint != null) parameter.put("endpoint", endpoint);
|
||||
if (bucket != null) parameter.put("bucket", bucket);
|
||||
if (accessKey != null) parameter.put("accessKey", accessKey);
|
||||
if (secretKey != null) parameter.put("secretKey", secretKey);
|
||||
if (prefix != null) parameter.put("prefix", prefix);
|
||||
parameter.put("destPath", task.getTargetPath());
|
||||
|
||||
Map<String, Object> job = new HashMap<>();
|
||||
Map<String, Object> content = new HashMap<>();
|
||||
Map<String, Object> reader = new HashMap<>();
|
||||
reader.put("name", "obsreader");
|
||||
reader.put("parameter", parameter);
|
||||
content.put("reader", reader);
|
||||
|
||||
Map<String, Object> writer = new HashMap<>();
|
||||
writer.put("name", "obswriter");
|
||||
writer.put("parameter", parameter);
|
||||
content.put("writer", writer);
|
||||
|
||||
job.put("content", List.of(content));
|
||||
Map<String, Object> setting = new HashMap<>();
|
||||
Map<String, Object> channel = new HashMap<>();
|
||||
channel.put("channel", 2);
|
||||
setting.put("speed", channel);
|
||||
job.put("setting", setting);
|
||||
|
||||
Map<String, Object> jobConfig = new HashMap<>();
|
||||
jobConfig.put("job", job);
|
||||
return objectMapper.writeValueAsString(jobConfig);
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
package com.datamate.collection.infrastructure.persistence.mapper;
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Mapper
|
||||
public interface CollectionTaskMapper extends BaseMapper<CollectionTask> {
|
||||
int updateStatus(@Param("id") String id, @Param("status") String status);
|
||||
int updateLastExecution(@Param("id") String id, @Param("lastExecutionId") String lastExecutionId);
|
||||
List<CollectionTask> selectActiveTasks();
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
package com.datamate.collection.infrastructure.persistence.mapper;
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.datamate.collection.domain.model.entity.TaskExecution;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
@Mapper
|
||||
public interface TaskExecutionMapper extends BaseMapper<TaskExecution> {
|
||||
TaskExecution selectLatestByTaskId(@Param("taskId") String taskId);
|
||||
|
||||
void completeExecution(@Param("executionId") String executionId,
|
||||
@Param("status") String status,
|
||||
@Param("completedAt") LocalDateTime completedAt,
|
||||
@Param("recordsProcessed") Integer recordsProcessed,
|
||||
@Param("recordsTotal") Long recordsTotal,
|
||||
@Param("recordsSuccess") Long recordsSuccess,
|
||||
@Param("recordsFailed") Long recordsFailed,
|
||||
@Param("errorMessage") String errorMessage);
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
package com.datamate.collection.infrastructure.persistence.repository;
|
||||
|
||||
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.datamate.collection.domain.repository.CollectionTaskRepository;
|
||||
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* CollectionTaskRepositoryImpl
|
||||
*
|
||||
* @since 2025/10/23
|
||||
*/
|
||||
@Repository
|
||||
@RequiredArgsConstructor
|
||||
public class CollectionTaskRepositoryImpl extends CrudRepository<CollectionTaskMapper, CollectionTask> implements CollectionTaskRepository {
|
||||
private final CollectionTaskMapper collectionTaskMapper;
|
||||
|
||||
@Override
|
||||
public List<CollectionTask> selectActiveTasks() {
|
||||
return collectionTaskMapper.selectActiveTasks();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateStatus(String id, String status) {
|
||||
collectionTaskMapper.updateStatus(id, status);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateLastExecution(String id, String lastExecutionId) {
|
||||
collectionTaskMapper.updateLastExecution(id, lastExecutionId);
|
||||
}
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
package com.datamate.collection.infrastructure.persistence.repository;
|
||||
|
||||
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
||||
import com.datamate.collection.domain.model.entity.TaskExecution;
|
||||
import com.datamate.collection.domain.repository.TaskExecutionRepository;
|
||||
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* TaskExecutionRepositoryImpl
|
||||
*
|
||||
* @since 2025/10/23
|
||||
*/
|
||||
@Repository
|
||||
@RequiredArgsConstructor
|
||||
public class TaskExecutionRepositoryImpl extends ServiceImpl<TaskExecutionMapper, TaskExecution>
|
||||
implements TaskExecutionRepository {
|
||||
|
||||
private final TaskExecutionMapper taskExecutionMapper;
|
||||
|
||||
@Override
|
||||
public TaskExecution selectLatestByTaskId(String taskId) {
|
||||
return taskExecutionMapper.selectLatestByTaskId(taskId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void completeExecution(String executionId, String status, LocalDateTime completedAt,
|
||||
Integer recordsProcessed, Long recordsTotal,
|
||||
Long recordsSuccess, Long recordsFailed, String errorMessage) {
|
||||
taskExecutionMapper.completeExecution(executionId, status, completedAt,
|
||||
recordsProcessed, recordsTotal,
|
||||
recordsSuccess, recordsFailed, errorMessage);
|
||||
}
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
package com.datamate.collection.interfaces.converter;
|
||||
|
||||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.datamate.collection.interfaces.dto.*;
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
import com.datamate.common.interfaces.PagedResponse;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.mapstruct.Mapper;
|
||||
import org.mapstruct.Mapping;
|
||||
import org.mapstruct.Named;
|
||||
import org.mapstruct.factory.Mappers;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Mapper
|
||||
public interface CollectionTaskConverter {
|
||||
CollectionTaskConverter INSTANCE = Mappers.getMapper(CollectionTaskConverter.class);
|
||||
|
||||
@Mapping(source = "config", target = "config", qualifiedByName = "parseJsonToMap")
|
||||
CollectionTaskResponse toResponse(CollectionTask task);
|
||||
|
||||
List<CollectionTaskResponse> toResponse(List<CollectionTask> tasks);
|
||||
|
||||
@Mapping(source = "config", target = "config", qualifiedByName = "mapToJsonString")
|
||||
CollectionTask toCollectionTask(CreateCollectionTaskRequest request);
|
||||
|
||||
@Mapping(source = "config", target = "config", qualifiedByName = "mapToJsonString")
|
||||
CollectionTask toCollectionTask(UpdateCollectionTaskRequest request);
|
||||
|
||||
@Mapping(source = "current", target = "page")
|
||||
@Mapping(source = "size", target = "size")
|
||||
@Mapping(source = "total", target = "totalElements")
|
||||
@Mapping(source = "pages", target = "totalPages")
|
||||
@Mapping(source = "records", target = "content")
|
||||
PagedResponse<CollectionTaskResponse> toResponse(IPage<CollectionTask> tasks);
|
||||
|
||||
@Named("parseJsonToMap")
|
||||
default Map<String, Object> parseJsonToMap(String json) {
|
||||
try {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
return objectMapper.readValue(json, Map.class);
|
||||
} catch (Exception e) {
|
||||
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
|
||||
}
|
||||
}
|
||||
|
||||
@Named("mapToJsonString")
|
||||
default String mapToJsonString(Map<String, Object> map) {
|
||||
try {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
return objectMapper.writeValueAsString(map != null ? map : Map.of());
|
||||
} catch (Exception e) {
|
||||
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
package com.datamate.collection.interfaces.dto;
|
||||
|
||||
import com.datamate.collection.common.enums.TaskStatus;
|
||||
import com.datamate.common.interfaces.PagingQuery;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* 归集任务分页查询参数
|
||||
*
|
||||
* @since 2025/10/23
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
public class CollectionTaskPagingQuery extends PagingQuery {
|
||||
/**
|
||||
* 任务状态
|
||||
*/
|
||||
private TaskStatus status;
|
||||
|
||||
/**
|
||||
* 任务名称
|
||||
*/
|
||||
private String keyword;
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
package com.datamate.collection.interfaces.dto;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.datamate.collection.common.enums.TaskStatus;
|
||||
import com.datamate.collection.common.enums.SyncMode;
|
||||
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import org.springframework.format.annotation.DateTimeFormat;
|
||||
import jakarta.validation.Valid;
|
||||
|
||||
/**
|
||||
* CollectionTaskResponse
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class CollectionTaskResponse {
|
||||
|
||||
private String id;
|
||||
|
||||
private String name;
|
||||
|
||||
private String description;
|
||||
|
||||
private String targetPath;
|
||||
|
||||
private Map<String, Object> config = new HashMap<>();
|
||||
|
||||
private TaskStatus status;
|
||||
|
||||
private SyncMode syncMode;
|
||||
|
||||
private String scheduleExpression;
|
||||
|
||||
private String lastExecutionId;
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime createdAt;
|
||||
|
||||
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
|
||||
private LocalDateTime updatedAt;
|
||||
|
||||
private DatasetResponse dataset;
|
||||
}
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
package com.datamate.collection.interfaces.dto;
|
||||
|
||||
import com.datamate.collection.common.enums.SyncMode;
|
||||
import com.datamate.collection.common.enums.TemplateType;
|
||||
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
|
||||
import jakarta.validation.Valid;
|
||||
import jakarta.validation.constraints.*;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
|
||||
/**
|
||||
* CreateCollectionTaskRequest
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class CreateCollectionTaskRequest {
|
||||
@NotNull
|
||||
@Size(min = 1, max = 100)
|
||||
@Schema(name = "name", description = "任务名称", requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@JsonProperty("name")
|
||||
private String name;
|
||||
|
||||
@Size(max = 500)
|
||||
@Schema(name = "description", description = "任务描述", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||
@JsonProperty("description")
|
||||
private String description;
|
||||
|
||||
@NotNull
|
||||
@Schema(name = "taskType", description = "任务类型", requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@JsonProperty("taskType")
|
||||
private TemplateType taskType;
|
||||
|
||||
@Valid
|
||||
@NotNull
|
||||
@Schema(name = "config", description = "归集配置,包含源端和目标端配置信息", requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@JsonProperty("config")
|
||||
private Map<String, Object> config = new HashMap<>();
|
||||
|
||||
@NotNull
|
||||
@Valid
|
||||
@Schema(name = "syncMode", requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
@JsonProperty("syncMode")
|
||||
private SyncMode syncMode;
|
||||
|
||||
@Schema(name = "scheduleExpression", description = "Cron调度表达式 (syncMode=SCHEDULED 时必填)", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||
@JsonProperty("scheduleExpression")
|
||||
private String scheduleExpression;
|
||||
|
||||
/** 创建数据集参数 */
|
||||
@Valid
|
||||
private CreateDatasetRequest dataset;
|
||||
}
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
package com.datamate.collection.interfaces.dto;
|
||||
|
||||
import com.datamate.collection.common.enums.SyncMode;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
|
||||
import jakarta.validation.Valid;
|
||||
import jakarta.validation.constraints.*;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
|
||||
/**
|
||||
* UpdateCollectionTaskRequest
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class UpdateCollectionTaskRequest {
|
||||
@Size(min = 1, max = 100)
|
||||
@Schema(name = "name", description = "任务名称", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||
@JsonProperty("name")
|
||||
private String name;
|
||||
|
||||
@Size(max = 500)
|
||||
@Schema(name = "description", description = "任务描述", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||
@JsonProperty("description")
|
||||
private String description;
|
||||
|
||||
@Valid
|
||||
@Schema(name = "config", description = "归集配置,包含源端和目标端配置信息", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||
@JsonProperty("config")
|
||||
private Map<String, Object> config = new HashMap<>();
|
||||
|
||||
@Valid
|
||||
@Schema(name = "syncMode", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||
@JsonProperty("syncMode")
|
||||
private SyncMode syncMode;
|
||||
|
||||
@Schema(name = "scheduleExpression", description = "Cron调度表达式 (syncMode=SCHEDULED 时必填)", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
|
||||
@JsonProperty("scheduleExpression")
|
||||
private String scheduleExpression;
|
||||
|
||||
/** 数据集id */
|
||||
private String datasetId;
|
||||
}
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
package com.datamate.collection.interfaces.rest;
|
||||
|
||||
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
||||
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
||||
import com.datamate.collection.application.CollectionTaskService;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.datamate.collection.interfaces.converter.CollectionTaskConverter;
|
||||
import com.datamate.collection.interfaces.dto.*;
|
||||
import com.datamate.common.interfaces.PagedResponse;
|
||||
import com.datamate.datamanagement.application.DatasetApplicationService;
|
||||
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
|
||||
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
|
||||
import jakarta.validation.Valid;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@Slf4j
|
||||
@RestController
|
||||
@RequestMapping("/data-collection/tasks")
|
||||
@RequiredArgsConstructor
|
||||
public class CollectionTaskController{
|
||||
|
||||
private final CollectionTaskService taskService;
|
||||
|
||||
private final DatasetApplicationService datasetService;
|
||||
|
||||
@PostMapping
|
||||
@Transactional
|
||||
public ResponseEntity<CollectionTaskResponse> createTask(@Valid @RequestBody CreateCollectionTaskRequest request) {
|
||||
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
|
||||
String datasetId = null;
|
||||
DatasetResponse dataset = null;
|
||||
if (Objects.nonNull(request.getDataset())) {
|
||||
dataset = DatasetConverter.INSTANCE.convertToResponse(datasetService.createDataset(request.getDataset()));
|
||||
datasetId = dataset.getId();
|
||||
}
|
||||
CollectionTaskResponse response = CollectionTaskConverter.INSTANCE.toResponse(taskService.create(task, datasetId));
|
||||
response.setDataset(dataset);
|
||||
return ResponseEntity.ok().body(response);
|
||||
}
|
||||
|
||||
@PutMapping("/{id}")
|
||||
public ResponseEntity<CollectionTaskResponse> updateTask(@PathVariable("id") String id, @Valid @RequestBody UpdateCollectionTaskRequest request) {
|
||||
if (taskService.get(id) == null) {
|
||||
return ResponseEntity.notFound().build();
|
||||
}
|
||||
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
|
||||
task.setId(id);
|
||||
return ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(taskService.update(task, request.getDatasetId())));
|
||||
}
|
||||
|
||||
@DeleteMapping("/{id}")
|
||||
public ResponseEntity<Void> deleteTask(@PathVariable("id") String id) {
|
||||
taskService.delete(id);
|
||||
return ResponseEntity.ok().build();
|
||||
}
|
||||
|
||||
@GetMapping("/{id}")
|
||||
public ResponseEntity<CollectionTaskResponse> getTaskDetail(@PathVariable("id") String id) {
|
||||
CollectionTask task = taskService.get(id);
|
||||
return task == null ? ResponseEntity.notFound().build() : ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(task));
|
||||
}
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<PagedResponse<CollectionTaskResponse>> getTasks(@Valid CollectionTaskPagingQuery query) {
|
||||
Page<CollectionTask> page = new Page<>(query.getPage(), query.getSize());
|
||||
LambdaQueryWrapper<CollectionTask> wrapper = new LambdaQueryWrapper<CollectionTask>()
|
||||
.eq(query.getStatus() != null, CollectionTask::getStatus, query.getStatus())
|
||||
.like(StringUtils.isNotBlank(query.getKeyword()), CollectionTask::getName, query.getKeyword())
|
||||
.orderByDesc(CollectionTask::getCreatedAt);
|
||||
return ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(taskService.getTasks(page, wrapper)));
|
||||
}
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
package com.datamate.collection.interfaces.scheduler;
|
||||
|
||||
import com.datamate.collection.application.CollectionTaskService;
|
||||
import com.datamate.collection.application.TaskExecutionService;
|
||||
import com.datamate.collection.common.enums.TaskStatus;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.datamate.collection.domain.model.entity.TaskExecution;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.scheduling.support.CronExpression;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class TaskSchedulerInitializer {
|
||||
|
||||
private final CollectionTaskService collectionTaskService;
|
||||
private final TaskExecutionService taskExecutionService;
|
||||
|
||||
// 定期扫描激活的采集任务,根据 Cron 判断是否到期执行
|
||||
@Scheduled(fixedDelayString = "${datamate.data-collection.scheduler.scan-interval-ms:10000}")
|
||||
public void scanAndTrigger() {
|
||||
List<CollectionTask> tasks = collectionTaskService.selectActiveTasks();
|
||||
if (tasks == null || tasks.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
for (CollectionTask task : tasks) {
|
||||
String cronExpr = task.getScheduleExpression();
|
||||
if (!StringUtils.hasText(cronExpr)) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
// 如果最近一次执行仍在运行,则跳过
|
||||
TaskExecution latest = taskExecutionService.selectLatestByTaskId(task.getId());
|
||||
if (latest != null && latest.getStatus() == TaskStatus.RUNNING) {
|
||||
continue;
|
||||
}
|
||||
|
||||
CronExpression cron = CronExpression.parse(cronExpr);
|
||||
LocalDateTime base = latest != null && latest.getStartedAt() != null
|
||||
? latest.getStartedAt()
|
||||
: now.minusYears(1); // 没有历史记录时,拉长基准时间确保到期判定
|
||||
LocalDateTime nextTime = cron.next(base);
|
||||
|
||||
if (nextTime != null && !nextTime.isAfter(now)) {
|
||||
// 到期,触发一次执行
|
||||
TaskExecution exec = taskExecutionService.createExecution(task);
|
||||
int timeout = task.getTimeoutSeconds() == null ? 3600 : task.getTimeoutSeconds();
|
||||
taskExecutionService.runAsync(task, exec.getId(), timeout, null);
|
||||
log.info("Triggered DataX execution for task {} at {}, execId={}", task.getId(), now, exec.getId());
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
log.warn("Skip task {} due to invalid cron or scheduling error: {}", task.getId(), ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
datamate:
|
||||
data-collection:
|
||||
# DataX配置
|
||||
datax:
|
||||
home-path: ${DATAX_HOME:D:/datax}
|
||||
python-path: ${DATAX_PYTHON_PATH:python3}
|
||||
job-config-path: ${DATAX_JOB_PATH:./data/temp/datax/jobs}
|
||||
log-path: ${DATAX_LOG_PATH:./logs/datax}
|
||||
max-memory: ${DATAX_MAX_MEMORY:2048}
|
||||
channel-count: ${DATAX_CHANNEL_COUNT:5}
|
||||
|
||||
# 执行配置
|
||||
execution:
|
||||
max-concurrent-tasks: ${DATA_COLLECTION_MAX_CONCURRENT_TASKS:10}
|
||||
task-timeout-minutes: ${DATA_COLLECTION_TASK_TIMEOUT:120}
|
||||
retry-count: ${DATA_COLLECTION_RETRY_COUNT:3}
|
||||
retry-interval-seconds: ${DATA_COLLECTION_RETRY_INTERVAL:30}
|
||||
|
||||
# 监控配置
|
||||
monitoring:
|
||||
status-check-interval-seconds: ${DATA_COLLECTION_STATUS_CHECK_INTERVAL:30}
|
||||
log-retention-days: ${DATA_COLLECTION_LOG_RETENTION:30}
|
||||
enable-metrics: ${DATA_COLLECTION_ENABLE_METRICS:true}
|
||||
@@ -1,51 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
|
||||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper">
|
||||
|
||||
<!-- Result Map -->
|
||||
<resultMap id="CollectionTaskResultMap" type="com.datamate.collection.domain.model.entity.CollectionTask">
|
||||
<id property="id" column="id"/>
|
||||
<result property="name" column="name"/>
|
||||
<result property="description" column="description"/>
|
||||
<result property="config" column="config"/>
|
||||
<result property="status" column="status" typeHandler="org.apache.ibatis.type.EnumTypeHandler"/>
|
||||
<result property="syncMode" column="sync_mode"/>
|
||||
<result property="scheduleExpression" column="schedule_expression"/>
|
||||
<result property="retryCount" column="retry_count"/>
|
||||
<result property="timeoutSeconds" column="timeout_seconds"/>
|
||||
<result property="maxRecords" column="max_records"/>
|
||||
<result property="sortField" column="sort_field"/>
|
||||
<result property="lastExecutionId" column="last_execution_id"/>
|
||||
<result property="createdAt" column="created_at"/>
|
||||
<result property="updatedAt" column="updated_at"/>
|
||||
<result property="createdBy" column="created_by"/>
|
||||
<result property="updatedBy" column="updated_by"/>
|
||||
</resultMap>
|
||||
|
||||
<!-- Base Column List (tasks) -->
|
||||
<sql id="Base_Column_List">
|
||||
id,
|
||||
name, description, config, status, sync_mode,
|
||||
schedule_expression, retry_count, timeout_seconds, max_records, sort_field,
|
||||
last_execution_id, created_at, updated_at, created_by, updated_by
|
||||
</sql>
|
||||
|
||||
<!-- Update Status -->
|
||||
<update id="updateStatus">
|
||||
UPDATE t_dc_collection_tasks SET status = #{status}, updated_at = NOW() WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<!-- Update Last Execution -->
|
||||
<update id="updateLastExecution">
|
||||
UPDATE t_dc_collection_tasks SET last_execution_id = #{lastExecutionId}, updated_at = NOW() WHERE id = #{id}
|
||||
</update>
|
||||
|
||||
<!-- Select Active Tasks for Scheduling -->
|
||||
<select id="selectActiveTasks" resultMap="CollectionTaskResultMap">
|
||||
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks
|
||||
WHERE status IN ('READY', 'RUNNING')
|
||||
AND schedule_expression IS NOT NULL
|
||||
ORDER BY created_at DESC
|
||||
</select>
|
||||
</mapper>
|
||||
@@ -1,28 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
|
||||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper">
|
||||
|
||||
<!-- Select Latest Execution by Task -->
|
||||
<select id="selectLatestByTaskId" resultType="com.datamate.collection.domain.model.entity.TaskExecution">
|
||||
SELECT * FROM t_dc_task_executions
|
||||
WHERE task_id = #{taskId}
|
||||
ORDER BY started_at DESC
|
||||
LIMIT 1
|
||||
</select>
|
||||
|
||||
<!-- Complete Execution -->
|
||||
<update id="completeExecution">
|
||||
UPDATE t_dc_task_executions
|
||||
SET status = #{status},
|
||||
completed_at = #{completedAt},
|
||||
records_processed = #{recordsProcessed},
|
||||
records_total = #{recordsTotal},
|
||||
records_success = #{recordsSuccess},
|
||||
records_failed = #{recordsFailed},
|
||||
error_message = #{errorMessage},
|
||||
updated_at = NOW()
|
||||
WHERE id = #{executionId}
|
||||
</update>
|
||||
|
||||
</mapper>
|
||||
@@ -9,7 +9,7 @@ import org.springframework.web.bind.annotation.PathVariable;
|
||||
/**
|
||||
* 数据归集服务 Feign Client
|
||||
*/
|
||||
@FeignClient(name = "collection-service", url = "${collection.service.url:http://localhost:8080}")
|
||||
@FeignClient(name = "collection-service", url = "${collection.service.url:http://datamate-backend-python:18000}")
|
||||
public interface CollectionTaskClient {
|
||||
|
||||
/**
|
||||
|
||||
@@ -56,11 +56,6 @@
|
||||
<artifactId>data-management-service</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>data-collection-service</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.datamate</groupId>
|
||||
<artifactId>operator-market-service</artifactId>
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
|
||||
<!-- 核心服务 -->
|
||||
<module>data-management-service</module>
|
||||
<module>data-collection-service</module>
|
||||
<module>operator-market-service</module>
|
||||
<module>data-cleaning-service</module>
|
||||
<module>data-synthesis-service</module>
|
||||
|
||||
Reference in New Issue
Block a user