refactor: modify data collection to python implementation (#214)

* feature: LabelStudio jumps without login

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* fix: remove terrabase dependency

* feature: add the collection task executions page and the collection template page

* fix: fix the collection task creation

* fix: fix the collection task creation
This commit is contained in:
hefanli
2025-12-30 18:48:43 +08:00
committed by GitHub
parent 80d4dfd285
commit 63f4e3e447
71 changed files with 1861 additions and 2557 deletions

View File

@@ -27,25 +27,6 @@
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-gateway</artifactId>
</dependency>
<dependency>
<groupId>com.terrabase</groupId>
<artifactId>enterprise-impl-commercial</artifactId>
<version>1.0.0</version>
<exclusions>
<exclusion>
<artifactId>spring-web</artifactId>
<groupId>org.springframework</groupId>
</exclusion>
<exclusion>
<artifactId>spring-boot-starter-web</artifactId>
<groupId>org.springframework.boot</groupId>
</exclusion>
<exclusion>
<artifactId>spring-boot-starter-logging</artifactId>
<groupId>org.springframework.boot</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- Log4j2 API -->
<dependency>
<groupId>org.springframework.boot</groupId>

View File

@@ -4,9 +4,7 @@ import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cloud.gateway.route.RouteLocator;
import org.springframework.cloud.gateway.route.builder.RouteLocatorBuilder;
import org.springframework.cloud.openfeign.EnableFeignClients;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
/**
* API Gateway & Auth Service Application
@@ -14,8 +12,6 @@ import org.springframework.context.annotation.ComponentScan;
* 提供路由、鉴权、限流等功能
*/
@SpringBootApplication
@ComponentScan(basePackages = {"com.datamate.gateway", "com.terrabase"})
@EnableFeignClients(basePackages = {"com.terrabase"})
public class ApiGatewayApplication {
public static void main(String[] args) {
@@ -37,6 +33,10 @@ public class ApiGatewayApplication {
.route("data-evaluation", r -> r.path("/api/evaluation/**")
.uri("http://datamate-backend-python:18000"))
// 数据归集服务路由
.route("data-collection", r -> r.path("/api/data-collection/**")
.uri("http://datamate-backend-python:18000"))
.route("deer-flow-frontend", r -> r.path("/chat/**")
.uri("http://deer-flow-frontend:3000"))

View File

@@ -1,9 +1,6 @@
package com.datamate.gateway.filter;
import com.terrabase.enterprise.api.UserManagementService;
import com.terrabase.enterprise.api.dto.LoginUserDto;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.cloud.gateway.filter.GatewayFilterChain;
import org.springframework.cloud.gateway.filter.GlobalFilter;
@@ -21,16 +18,13 @@ public class UserContextFilter implements GlobalFilter {
@Value("${commercial.switch:false}")
private boolean isCommercial;
@Autowired
private UserManagementService userManagementService;
@Override
public Mono<Void> filter(ServerWebExchange exchange, GatewayFilterChain chain) {
if (!isCommercial) {
return chain.filter(exchange);
}
try {
LoginUserDto loginUserDto = userManagementService.getCurrentUserInfo().getData().getFirst();
} catch (Exception e) {
log.error("get current user info error", e);
return chain.filter(exchange);

View File

@@ -1,229 +0,0 @@
# 数据归集服务 (Data Collection Service)
基于DataX的数据归集和同步服务,提供多数据源之间的数据同步功能。
## 功能特性
- 🔗 **多数据源支持**: 支持MySQL、PostgreSQL、Oracle、SQL Server等主流数据库
- 📊 **任务管理**: 创建、配置、执行和监控数据同步任务
-**定时调度**: 支持Cron表达式的定时任务
- 📈 **实时监控**: 任务执行进度、状态和性能指标监控
- 📝 **执行日志**: 详细的任务执行日志记录
- 🔌 **插件化**: DataX Reader/Writer插件化集成
## 技术架构
- **框架**: Spring Boot 3.x
- **数据库**: MySQL + MyBatis
- **同步引擎**: DataX
- **API**: OpenAPI 3.0 自动生成
- **架构模式**: DDD (领域驱动设计)
## 项目结构
```
src/main/java/com/datamate/collection/
├── DataCollectionApplication.java # 应用启动类
├── domain/ # 领域层
│ ├── model/ # 领域模型
│ │ ├── DataSource.java # 数据源实体
│ │ ├── CollectionTask.java # 归集任务实体
│ │ ├── TaskExecution.java # 任务执行记录
│ │ └── ExecutionLog.java # 执行日志
│ └── service/ # 领域服务
│ ├── DataSourceService.java
│ ├── CollectionTaskService.java
│ ├── TaskExecutionService.java
│ └── impl/ # 服务实现
├── infrastructure/ # 基础设施层
│ ├── config/ # 配置类
│ ├── datax/ # DataX执行引擎
│ │ └── DataXExecutionEngine.java
│ └── persistence/ # 持久化
│ ├── mapper/ # MyBatis Mapper
│ └── typehandler/ # 类型处理器
└── interfaces/ # 接口层
├── api/ # OpenAPI生成的接口
├── dto/ # OpenAPI生成的DTO
└── rest/ # REST控制器
├── DataSourceController.java
├── CollectionTaskController.java
├── TaskExecutionController.java
└── exception/ # 异常处理
src/main/resources/
├── mappers/ # MyBatis XML映射文件
├── application.properties # 应用配置
└── ...
```
## 环境要求
- Java 17+
- Maven 3.6+
- MySQL 8.0+
- DataX 3.0+
- Redis (可选,用于缓存)
## 配置说明
### 应用配置 (application.properties)
```properties
# 服务端口
server.port=8090
# 数据库配置
spring.datasource.url=jdbc:mysql://localhost:3306/knowledge_base
spring.datasource.username=root
spring.datasource.password=123456
# DataX配置
datax.home=/runtime/datax
datax.python.path=/runtime/datax/bin/datax.py
datax.job.timeout=7200
datax.job.memory=2g
```
### DataX配置
确保DataX已正确安装并配置:
1. 下载DataX到 `/runtime/datax` 目录
2. 配置相关Reader/Writer插件
3. 确保Python环境可用
## 数据库初始化
执行数据库初始化脚本:
```bash
mysql -u root -p knowledge_base < scripts/db/data-collection-init.sql
```
## 构建和运行
### 1. 编译项目
```bash
cd backend/services/data-collection-service
mvn clean compile
```
这将触发OpenAPI代码生成。
### 2. 打包
```bash
mvn clean package -DskipTests
```
### 3. 运行
作为独立服务运行:
```bash
java -jar target/data-collection-service-1.0.0-SNAPSHOT.jar
```
或通过main-application统一启动:
```bash
cd backend/services/main-application
mvn spring-boot:run
```
## API文档
服务启动后,可通过以下地址访问API文档:
- Swagger UI: http://localhost:8090/swagger-ui.html
- OpenAPI JSON: http://localhost:8090/v3/api-docs
## 主要API端点
### 数据源管理
- `GET /api/v1/collection/datasources` - 获取数据源列表
- `POST /api/v1/collection/datasources` - 创建数据源
- `GET /api/v1/collection/datasources/{id}` - 获取数据源详情
- `PUT /api/v1/collection/datasources/{id}` - 更新数据源
- `DELETE /api/v1/collection/datasources/{id}` - 删除数据源
- `POST /api/v1/collection/datasources/{id}/test` - 测试连接
### 归集任务管理
- `GET /api/v1/collection/tasks` - 获取任务列表
- `POST /api/v1/collection/tasks` - 创建任务
- `GET /api/v1/collection/tasks/{id}` - 获取任务详情
- `PUT /api/v1/collection/tasks/{id}` - 更新任务
- `DELETE /api/v1/collection/tasks/{id}` - 删除任务
### 任务执行管理
- `POST /api/v1/collection/tasks/{id}/execute` - 执行任务
- `POST /api/v1/collection/tasks/{id}/stop` - 停止任务
- `GET /api/v1/collection/executions` - 获取执行历史
- `GET /api/v1/collection/executions/{executionId}` - 获取执行详情
- `GET /api/v1/collection/executions/{executionId}/logs` - 获取执行日志
### 监控统计
- `GET /api/v1/collection/monitor/statistics` - 获取统计信息
## 开发指南
### 添加新的数据源类型
1.`DataSource.DataSourceType` 枚举中添加新类型
2.`DataXExecutionEngine` 中添加对应的Reader/Writer映射
3. 更新数据库表结构和初始化数据
### 自定义DataX插件
1. 将插件放置在 `/runtime/datax/plugin` 目录下
2.`DataXExecutionEngine` 中配置插件映射关系
3. 根据插件要求调整配置模板
### 扩展监控指标
1.`StatisticsService` 中添加新的统计逻辑
2. 更新 `CollectionStatistics` DTO
3. 在数据库中添加相应的统计表或字段
## 故障排查
### 常见问题
1. **DataX执行失败**
- 检查DataX安装路径和Python环境
- 确认数据源连接配置正确
- 查看执行日志获取详细错误信息
2. **数据库连接失败**
- 检查数据库配置和网络连通性
- 确认数据库用户权限
3. **API调用失败**
- 检查请求参数格式
- 查看应用日志获取详细错误信息
### 日志查看
```bash
# 应用日志
tail -f logs/data-collection-service.log
# 任务执行日志
curl http://localhost:8090/api/v1/collection/executions/{executionId}/logs
```
## 贡献指南
1. Fork项目
2. 创建特性分支: `git checkout -b feature/new-feature`
3. 提交更改: `git commit -am 'Add new feature'`
4. 推送分支: `git push origin feature/new-feature`
5. 提交Pull Request
## 许可证
MIT License

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 107 KiB

View File

@@ -1,163 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.datamate</groupId>
<artifactId>services</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>data-collection-service</artifactId>
<packaging>jar</packaging>
<name>Data Collection Service</name>
<description>DataX-based data collection and aggregation service</description>
<dependencies>
<!-- Spring Boot Dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-validation</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<!-- Database -->
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<version>8.0.33</version>
<scope>runtime</scope>
</dependency>
<!-- Redis -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<!-- DataX Dependencies (集成DataX插件) -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-exec</artifactId>
<version>1.3</version>
</dependency>
<!-- Connection Pool -->
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP</artifactId>
</dependency>
<!-- Oracle JDBC Driver -->
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
<version>21.5.0.0</version>
</dependency>
<!-- PostgreSQL JDBC Driver -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>
<!-- JSON Processing -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<!-- Shared Domain -->
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-management-service</artifactId>
<version>${project.version}</version>
</dependency>
<!-- OpenAPI Dependencies -->
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
</dependency>
<dependency>
<groupId>org.openapitools</groupId>
<artifactId>jackson-databind-nullable</artifactId>
</dependency>
<dependency>
<groupId>jakarta.validation</groupId>
<artifactId>jakarta.validation-api</artifactId>
</dependency>
<!-- Test Dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
<scope>compile</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<skip>true</skip>
<classifier>exec</classifier>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<annotationProcessorPaths>
<!-- 顺序很重要 -->
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
</path>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok-mapstruct-binding</artifactId>
<version>${lombok-mapstruct-binding.version}</version>
</path>
<path>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct-processor</artifactId>
<version>${mapstruct.version}</version>
</path>
</annotationProcessorPaths>
<compilerArgs>
<arg>-parameters</arg>
<arg>-Amapstruct.defaultComponentModel=spring</arg>
</compilerArgs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -1,25 +0,0 @@
package com.datamate.collection;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.transaction.annotation.EnableTransactionManagement;
/**
* 数据归集服务配置类
*
* 基于DataX的数据归集和同步服务,支持多种数据源的数据采集和归集
*/
@SpringBootApplication
@EnableAsync
@EnableScheduling
@EnableTransactionManagement
@ComponentScan(basePackages = {
"com.datamate.collection",
"com.datamate.datamanagement",
"com.datamate.shared"
})
public class DataCollectionServiceConfiguration {
// Configuration class for JAR packaging - no main method needed
}

View File

@@ -1,73 +0,0 @@
package com.datamate.collection.application;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.domain.model.entity.TaskExecution;
import com.datamate.collection.domain.repository.CollectionTaskRepository;
import com.datamate.collection.common.enums.SyncMode;
import com.datamate.common.domain.utils.ChunksSaver;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Objects;
@Slf4j
@Service
@RequiredArgsConstructor
public class CollectionTaskService {
private final TaskExecutionService taskExecutionService;
private final CollectionTaskRepository collectionTaskRepository;
@Transactional
public CollectionTask create(CollectionTask task, String datasetId) {
task.initCreateParam();
collectionTaskRepository.save(task);
executeTaskNow(task, datasetId);
return task;
}
private void executeTaskNow(CollectionTask task, String datasetId) {
if (Objects.equals(task.getSyncMode(), SyncMode.ONCE)) {
TaskExecution exec = taskExecutionService.createExecution(task);
int timeout = task.getTimeoutSeconds() == null ? 3600 : task.getTimeoutSeconds();
taskExecutionService.runAsync(task, exec.getId(), timeout, datasetId);
log.info("Triggered DataX execution for task {} at {}, execId={}", task.getId(), LocalDateTime.now(), exec.getId());
}
}
@Transactional
public CollectionTask update(CollectionTask task, String datasetId) {
task.setUpdatedAt(LocalDateTime.now());
task.addPath();
collectionTaskRepository.updateById(task);
executeTaskNow(task, datasetId);
return task;
}
@Transactional
public void delete(String id) {
CollectionTask task = collectionTaskRepository.getById(id);
if (task != null) {
ChunksSaver.deleteFolder("/dataset/local/" + task.getId());
}
collectionTaskRepository.removeById(id);
}
public CollectionTask get(String id) {
return collectionTaskRepository.getById(id);
}
public IPage<CollectionTask> getTasks(Page<CollectionTask> page, LambdaQueryWrapper<CollectionTask> wrapper) {
return collectionTaskRepository.page(page, wrapper);
}
public List<CollectionTask> selectActiveTasks() {
return collectionTaskRepository.selectActiveTasks();
}
}

View File

@@ -1,65 +0,0 @@
package com.datamate.collection.application;
import com.datamate.collection.common.enums.TemplateType;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.domain.model.entity.TaskExecution;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.collection.domain.process.ProcessRunner;
import com.datamate.collection.domain.repository.CollectionTaskRepository;
import com.datamate.collection.domain.repository.TaskExecutionRepository;
import com.datamate.datamanagement.application.DatasetApplicationService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
@Slf4j
@Service
@RequiredArgsConstructor
public class TaskExecutionService {
private final ProcessRunner processRunner;
private final TaskExecutionRepository executionRepository;
private final CollectionTaskRepository collectionTaskRepository;
private final DatasetApplicationService datasetApplicationService;
@Transactional
public TaskExecution createExecution(CollectionTask task) {
TaskExecution exec = TaskExecution.initTaskExecution();
exec.setTaskId(task.getId());
exec.setTaskName(task.getName());
executionRepository.save(exec);
collectionTaskRepository.updateLastExecution(task.getId(), exec.getId());
collectionTaskRepository.updateStatus(task.getId(), TaskStatus.RUNNING.name());
return exec;
}
public TaskExecution selectLatestByTaskId(String taskId) {
return executionRepository.selectLatestByTaskId(taskId);
}
@Async
@Transactional
public void runAsync(CollectionTask task, String executionId, int timeoutSeconds, String datasetId) {
try {
int code = processRunner.runJob(task, executionId, timeoutSeconds);
log.info("DataX finished with code {} for execution {}", code, executionId);
// 简化:成功即完成
executionRepository.completeExecution(executionId, TaskStatus.SUCCESS.name(), LocalDateTime.now(),
0, 0L, 0L, 0L, null);
collectionTaskRepository.updateStatus(task.getId(), TaskStatus.SUCCESS.name());
if (StringUtils.isNotBlank(datasetId)) {
datasetApplicationService.processDataSourceAsync(datasetId, task.getId());
}
} catch (Exception e) {
log.error("DataX execution failed", e);
executionRepository.completeExecution(executionId, TaskStatus.FAILED.name(), LocalDateTime.now(),
0, 0L, 0L, 0L, e.getMessage());
collectionTaskRepository.updateStatus(task.getId(), TaskStatus.FAILED.name());
}
}
}

View File

@@ -1,12 +0,0 @@
package com.datamate.collection.common.enums;
/**
* 同步方式:一次性(ONCE) 或 定时(SCHEDULED)
*/
public enum SyncMode {
/** 一次性(ONCE) */
ONCE,
/// 定时(SCHEDULED)
SCHEDULED
}

View File

@@ -1,22 +0,0 @@
package com.datamate.collection.common.enums;
/**
* 统一的任务和执行状态枚举
* 任务和执行状态枚举: - DRAFT: 草稿状态 - READY: 就绪状态 - RUNNING: 运行中 - SUCCESS: 执行成功 (对应原来的COMPLETED/SUCCESS) - FAILED: 执行失败 - STOPPED: 已停止
*
* @author Data Mate Platform Team
*/
public enum TaskStatus {
/** 草稿状态 */
DRAFT,
/** 就绪状态 */
READY,
/** 运行中 */
RUNNING,
/** 执行成功(对应原来的COMPLETED) */
SUCCESS,
/** 执行失败 */
FAILED,
/** 已停止 */
STOPPED
}

View File

@@ -1,11 +0,0 @@
package com.datamate.collection.common.enums;
/**
* 模板类型枚举
*
*/
public enum TemplateType {
NAS,
OBS,
MYSQL
}

View File

@@ -1,62 +0,0 @@
package com.datamate.collection.domain.model.entity;
import com.baomidou.mybatisplus.annotation.TableName;
import com.datamate.collection.common.enums.SyncMode;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.collection.common.enums.TemplateType;
import com.datamate.common.domain.model.base.BaseEntity;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.Getter;
import lombok.Setter;
import java.time.LocalDateTime;
import java.util.Collections;
import java.util.Map;
import java.util.UUID;
/**
* 数据采集任务实体(与数据库表 t_dc_collection_tasks 对齐)
*/
@Getter
@Setter
@TableName(value = "t_dc_collection_tasks", autoResultMap = true)
public class CollectionTask extends BaseEntity<String> {
private String name;
private String description;
private TemplateType taskType; // 任务类型
private String targetPath; // 目标存储路径
private String config; // DataX JSON 配置,包含源端和目标端配置信息
private TaskStatus status;
private SyncMode syncMode; // ONCE / SCHEDULED
private String scheduleExpression;
private Integer retryCount;
private Integer timeoutSeconds;
private Long maxRecords;
private String sortField;
private String lastExecutionId;
public void addPath() {
try {
ObjectMapper objectMapper = new ObjectMapper();
Map<String, Object> parameter = objectMapper.readValue(
config,
new TypeReference<>() {}
);
parameter.put("destPath", "/dataset/local/" + id);
parameter.put("filePaths", Collections.singletonList(parameter.get("destPath")));
config = objectMapper.writeValueAsString(parameter);
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
public void initCreateParam() {
this.id = UUID.randomUUID().toString();
this.targetPath = "/dataset/local/" + id;
this.status = TaskStatus.READY;
this.createdAt = LocalDateTime.now();
this.updatedAt = LocalDateTime.now();
}
}

View File

@@ -1,71 +0,0 @@
package com.datamate.collection.domain.model.entity;
import lombok.Data;
import lombok.EqualsAndHashCode;
import java.time.LocalDateTime;
@Data
@EqualsAndHashCode(callSuper = false)
public class DataxTemplate {
/**
* 模板ID(UUID)
*/
private String id;
/**
* 模板名称
*/
private String name;
/**
* 源数据源类型
*/
private String sourceType;
/**
* 目标数据源类型
*/
private String targetType;
/**
* 模板内容(JSON格式)
*/
private String templateContent;
/**
* 模板描述
*/
private String description;
/**
* 版本号
*/
private String version;
/**
* 是否为系统模板
*/
private Boolean isSystem;
/**
* 创建时间
*/
private LocalDateTime createdAt;
/**
* 更新时间
*/
private LocalDateTime updatedAt;
/**
* 创建者
*/
private String createdBy;
/**
* 更新者
*/
private String updatedBy;
}

View File

@@ -1,44 +0,0 @@
package com.datamate.collection.domain.model.entity;
import com.baomidou.mybatisplus.annotation.TableName;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.common.domain.model.base.BaseEntity;
import lombok.Data;
import lombok.Getter;
import lombok.Setter;
import java.time.LocalDateTime;
import java.util.UUID;
@Getter
@Setter
@TableName(value = "t_dc_task_executions", autoResultMap = true)
public class TaskExecution extends BaseEntity<String> {
private String taskId;
private String taskName;
private TaskStatus status;
private Double progress;
private Long recordsTotal;
private Long recordsProcessed;
private Long recordsSuccess;
private Long recordsFailed;
private Double throughput;
private Long dataSizeBytes;
private LocalDateTime startedAt;
private LocalDateTime completedAt;
private Integer durationSeconds;
private String errorMessage;
private String dataxJobId;
private String config;
private String result;
public static TaskExecution initTaskExecution() {
TaskExecution exec = new TaskExecution();
exec.setId(UUID.randomUUID().toString());
exec.setStatus(TaskStatus.RUNNING);
exec.setProgress(0.0);
exec.setStartedAt(LocalDateTime.now());
exec.setCreatedAt(LocalDateTime.now());
return exec;
}
}

View File

@@ -1,21 +0,0 @@
package com.datamate.collection.domain.process;
import com.datamate.collection.domain.model.entity.CollectionTask;
/**
* 归集执行器接口
*
* @since 2025/10/23
*/
public interface ProcessRunner {
/**
* 执行归集任务
*
* @param task 任务
* @param executionId 执行ID
* @param timeoutSeconds 超时时间(秒)
* @return 执行结果
* @throws Exception 执行异常
*/
int runJob(CollectionTask task, String executionId, int timeoutSeconds) throws Exception;
}

View File

@@ -1,19 +0,0 @@
package com.datamate.collection.domain.repository;
import com.baomidou.mybatisplus.extension.repository.IRepository;
import com.datamate.collection.domain.model.entity.CollectionTask;
import java.util.List;
/**
* 归集任务仓储层
*
* @since 2025/10/23
*/
public interface CollectionTaskRepository extends IRepository<CollectionTask> {
List<CollectionTask> selectActiveTasks();
void updateStatus(String id, String status);
void updateLastExecution(String id, String lastExecutionId);
}

View File

@@ -1,19 +0,0 @@
package com.datamate.collection.domain.repository;
import com.baomidou.mybatisplus.extension.service.IService;
import com.datamate.collection.domain.model.entity.TaskExecution;
import java.time.LocalDateTime;
/**
* TaskExecutionRepository
*
* @since 2025/10/23
*/
public interface TaskExecutionRepository extends IService<TaskExecution> {
TaskExecution selectLatestByTaskId(String taskId);
void completeExecution(String executionId, String status, LocalDateTime completedAt,
Integer recordsProcessed, Long recordsTotal,
Long recordsSuccess, Long recordsFailed, String errorMessage);
}

View File

@@ -1,147 +0,0 @@
// java
package com.datamate.collection.infrastructure.datax;
import com.datamate.collection.common.enums.TemplateType;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.domain.process.ProcessRunner;
import com.datamate.collection.infrastructure.datax.config.MysqlConfig;
import com.datamate.collection.infrastructure.datax.config.NasConfig;
import com.datamate.collection.infrastructure.datax.config.ObsConfig;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.exec.*;
import org.apache.commons.io.output.TeeOutputStream;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import java.io.*;
import java.nio.file.*;
import java.time.Duration;
import java.util.*;
import java.util.regex.Pattern;
@Slf4j
@Component
@RequiredArgsConstructor
public class DataxProcessRunner implements ProcessRunner {
private final DataxProperties props;
@Override
public int runJob(CollectionTask task, String executionId, int timeoutSeconds) throws Exception {
Path job = buildJobFile(task);
int code = runJob(job.toFile(), executionId, Duration.ofSeconds(timeoutSeconds));
// 任务成功后做后处理(仅针对 MYSQL 类型)
postProcess(task);
return code;
}
private int runJob(File jobFile, String executionId, Duration timeout) throws Exception {
File logFile = new File(props.getLogPath(), String.format("datax-%s.log", executionId));
String python = props.getPythonPath();
String dataxPy = props.getHomePath() + File.separator + "bin" + File.separator + "datax.py";
String cmd = String.format("%s %s %s", python, dataxPy, jobFile.getAbsolutePath());
log.info("Execute DataX: {}", cmd);
CommandLine cl = CommandLine.parse(cmd);
DefaultExecutor executor = getExecutor(timeout, logFile);
return executor.execute(cl);
}
private static DefaultExecutor getExecutor(Duration timeout, File logFile) throws FileNotFoundException {
DefaultExecutor executor = new DefaultExecutor();
// 将日志追加输出到文件
File parent = logFile.getParentFile();
if (!parent.exists()) {
parent.mkdirs();
}
ExecuteStreamHandler streamHandler = new PumpStreamHandler(
new TeeOutputStream(new FileOutputStream(logFile, true), System.out),
new TeeOutputStream(new FileOutputStream(logFile, true), System.err)
);
executor.setStreamHandler(streamHandler);
ExecuteWatchdog watchdog = new ExecuteWatchdog(timeout.toMillis());
executor.setWatchdog(watchdog);
return executor;
}
private Path buildJobFile(CollectionTask task) throws IOException {
Files.createDirectories(Paths.get(props.getJobConfigPath()));
String fileName = String.format("datax-job-%s.json", task.getId());
Path path = Paths.get(props.getJobConfigPath(), fileName);
// 简化:直接将任务中的 config 字段作为 DataX 作业 JSON
try (FileWriter fw = new FileWriter(path.toFile())) {
if (StringUtils.isBlank(task.getConfig())) {
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
String json = getJobConfig(task);
log.info("Job config: {}", json);
fw.write(json);
}
return path;
}
private String getJobConfig(CollectionTask task) {
try {
ObjectMapper objectMapper = new ObjectMapper();
TemplateType templateType = task.getTaskType();
return switch (templateType) {
case NAS -> {
// NAS 特殊处理
NasConfig nasConfig = objectMapper.readValue(task.getConfig(), NasConfig.class);
yield nasConfig.toJobConfig(objectMapper, task);
}
case OBS -> {
ObsConfig obsConfig = objectMapper.readValue(task.getConfig(), ObsConfig.class);
yield obsConfig.toJobConfig(objectMapper, task);
}
case MYSQL -> {
MysqlConfig mysqlConfig = objectMapper.readValue(task.getConfig(), MysqlConfig.class);
yield mysqlConfig.toJobConfig(objectMapper, task);
}
};
} catch (Exception e) {
log.error("Failed to parse task config", e);
throw new RuntimeException("Failed to parse task config", e);
}
}
private void postProcess(CollectionTask task) throws IOException {
if (task.getTaskType() != TemplateType.MYSQL) {
return;
}
String targetPath = task.getTargetPath();
// 将targetPath下所有不以.csv结尾的文件修改为以.csv结尾
Path dir = Paths.get(targetPath);
if (!Files.exists(dir) || !Files.isDirectory(dir)) {
log.info("Target path {} does not exist or is not a directory for task {}, skip post processing.", targetPath, task.getId());
return;
}
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
for (Path path : stream) {
if (!Files.isRegularFile(path)) continue;
String name = path.getFileName().toString();
if (name.toLowerCase().endsWith(".csv")) continue;
Path target = dir.resolve(name + ".csv");
try {
Files.move(path, target, StandardCopyOption.REPLACE_EXISTING);
log.info("Renamed file for task {}: {} -> {}", task.getId(), name, target.getFileName().toString());
} catch (IOException ex) {
log.warn("Failed to rename file {} for task {}: {}", path, task.getId(), ex.getMessage(), ex);
}
}
} catch (IOException ioe) {
log.warn("Error scanning target directory {} for task {}: {}", targetPath, task.getId(), ioe.getMessage(), ioe);
}
}
}

View File

@@ -1,17 +0,0 @@
package com.datamate.collection.infrastructure.datax;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Data
@Configuration
@ConfigurationProperties(prefix = "datamate.data-collection.datax")
public class DataxProperties {
private String homePath; // DATAX_HOME
private String pythonPath; // python 可执行文件
private String jobConfigPath; // 生成的作业文件目录
private String logPath; // 运行日志目录
private Integer maxMemory = 2048;
private Integer channelCount = 5;
}

View File

@@ -1,4 +0,0 @@
package com.datamate.collection.infrastructure.datax.config;
public interface BaseConfig {
}

View File

@@ -1,73 +0,0 @@
package com.datamate.collection.infrastructure.datax.config;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.Getter;
import lombok.Setter;
import org.apache.commons.collections4.CollectionUtils;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Getter
@Setter
public class MysqlConfig {
private String jdbcUrl;
private String username;
private String password;
private String querySql;
private List<String> headers;
/**
* 将当前 MYSQL 配置构造成 DataX 所需的 job JSON 字符串。
*/
public String toJobConfig(ObjectMapper objectMapper, CollectionTask task) throws Exception {
Map<String, Object> mysqlParameter = new HashMap<>();
Map<String, Object> connection = new HashMap<>();
if (username != null) mysqlParameter.put("username", username);
if (password != null) mysqlParameter.put("password", password);
if (jdbcUrl != null) connection.put("jdbcUrl", Collections.singletonList(jdbcUrl));
if (querySql != null) connection.put("querySql", Collections.singletonList(querySql));
mysqlParameter.put("connection", Collections.singletonList(connection));
Map<String, Object> job = new HashMap<>();
Map<String, Object> content = new HashMap<>();
Map<String, Object> reader = new HashMap<>();
reader.put("name", "mysqlreader");
reader.put("parameter", mysqlParameter);
content.put("reader", reader);
Map<String, Object> writer = new HashMap<>();
Map<String, Object> writerParameter = new HashMap<>();
writer.put("name", "txtfilewriter");
if (CollectionUtils.isNotEmpty(headers)) {
writerParameter.put("header", headers);
}
writerParameter.put("path", task.getTargetPath());
writerParameter.put("fileName", "collectionResult");
writerParameter.put("writeMode", "truncate");
writerParameter.put("dateFormat", "yyyy-MM-dd HH:mm:ss");
writerParameter.put("fileFormat", "csv");
writerParameter.put("encoding", "UTF-8");
writerParameter.put("fieldDelimiter", ",");
writer.put("parameter", writerParameter);
content.put("writer", writer);
job.put("content", List.of(content));
Map<String, Object> setting = new HashMap<>();
Map<String, Object> channel = new HashMap<>();
channel.put("channel", 1);
setting.put("speed", channel);
job.put("setting", setting);
Map<String, Object> jobConfig = new HashMap<>();
jobConfig.put("job", job);
return objectMapper.writeValueAsString(jobConfig);
}
}

View File

@@ -1,54 +0,0 @@
package com.datamate.collection.infrastructure.datax.config;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.Getter;
import lombok.Setter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Getter
@Setter
public class NasConfig implements BaseConfig{
private String ip;
private String path;
private List<String> files;
/**
* 将当前 NAS 配置构造成 DataX 所需的 job JSON 字符串。
*/
public String toJobConfig(ObjectMapper objectMapper, CollectionTask task) throws Exception {
Map<String, Object> parameter = new HashMap<>();
if (ip != null) parameter.put("ip", ip);
if (path != null) parameter.put("path", path);
if (files != null) parameter.put("files", files);
parameter.put("destPath", task.getTargetPath());
Map<String, Object> job = new HashMap<>();
Map<String, Object> content = new HashMap<>();
Map<String, Object> reader = new HashMap<>();
reader.put("name", "nfsreader");
reader.put("parameter", parameter);
content.put("reader", reader);
Map<String, Object> writer = new HashMap<>();
writer.put("name", "nfswriter");
writer.put("parameter", parameter);
content.put("writer", writer);
job.put("content", List.of(content));
Map<String, Object> setting = new HashMap<>();
Map<String, Object> channel = new HashMap<>();
channel.put("channel", 2);
setting.put("speed", channel);
job.put("setting", setting);
Map<String, Object> jobConfig = new HashMap<>();
jobConfig.put("job", job);
return objectMapper.writeValueAsString(jobConfig);
}
}

View File

@@ -1,61 +0,0 @@
package com.datamate.collection.infrastructure.datax.config;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.Getter;
import lombok.Setter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* OBS 归集配置类
*
* @since 2025/11/18
*/
@Getter
@Setter
public class ObsConfig implements BaseConfig{
private String endpoint;
private String bucket;
private String accessKey;
private String secretKey;
private String prefix;
/**
* 将当前 OBS 配置构造成 DataX 所需的 job JSON 字符串。
*/
public String toJobConfig(ObjectMapper objectMapper, CollectionTask task) throws Exception {
Map<String, Object> parameter = new HashMap<>();
if (endpoint != null) parameter.put("endpoint", endpoint);
if (bucket != null) parameter.put("bucket", bucket);
if (accessKey != null) parameter.put("accessKey", accessKey);
if (secretKey != null) parameter.put("secretKey", secretKey);
if (prefix != null) parameter.put("prefix", prefix);
parameter.put("destPath", task.getTargetPath());
Map<String, Object> job = new HashMap<>();
Map<String, Object> content = new HashMap<>();
Map<String, Object> reader = new HashMap<>();
reader.put("name", "obsreader");
reader.put("parameter", parameter);
content.put("reader", reader);
Map<String, Object> writer = new HashMap<>();
writer.put("name", "obswriter");
writer.put("parameter", parameter);
content.put("writer", writer);
job.put("content", List.of(content));
Map<String, Object> setting = new HashMap<>();
Map<String, Object> channel = new HashMap<>();
channel.put("channel", 2);
setting.put("speed", channel);
job.put("setting", setting);
Map<String, Object> jobConfig = new HashMap<>();
jobConfig.put("job", job);
return objectMapper.writeValueAsString(jobConfig);
}
}

View File

@@ -1,15 +0,0 @@
package com.datamate.collection.infrastructure.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.datamate.collection.domain.model.entity.CollectionTask;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
@Mapper
public interface CollectionTaskMapper extends BaseMapper<CollectionTask> {
int updateStatus(@Param("id") String id, @Param("status") String status);
int updateLastExecution(@Param("id") String id, @Param("lastExecutionId") String lastExecutionId);
List<CollectionTask> selectActiveTasks();
}

View File

@@ -1,22 +0,0 @@
package com.datamate.collection.infrastructure.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.datamate.collection.domain.model.entity.TaskExecution;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.time.LocalDateTime;
@Mapper
public interface TaskExecutionMapper extends BaseMapper<TaskExecution> {
TaskExecution selectLatestByTaskId(@Param("taskId") String taskId);
void completeExecution(@Param("executionId") String executionId,
@Param("status") String status,
@Param("completedAt") LocalDateTime completedAt,
@Param("recordsProcessed") Integer recordsProcessed,
@Param("recordsTotal") Long recordsTotal,
@Param("recordsSuccess") Long recordsSuccess,
@Param("recordsFailed") Long recordsFailed,
@Param("errorMessage") String errorMessage);
}

View File

@@ -1,36 +0,0 @@
package com.datamate.collection.infrastructure.persistence.repository;
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.domain.repository.CollectionTaskRepository;
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Repository;
import java.util.List;
/**
* CollectionTaskRepositoryImpl
*
* @since 2025/10/23
*/
@Repository
@RequiredArgsConstructor
public class CollectionTaskRepositoryImpl extends CrudRepository<CollectionTaskMapper, CollectionTask> implements CollectionTaskRepository {
private final CollectionTaskMapper collectionTaskMapper;
@Override
public List<CollectionTask> selectActiveTasks() {
return collectionTaskMapper.selectActiveTasks();
}
@Override
public void updateStatus(String id, String status) {
collectionTaskMapper.updateStatus(id, status);
}
@Override
public void updateLastExecution(String id, String lastExecutionId) {
collectionTaskMapper.updateLastExecution(id, lastExecutionId);
}
}

View File

@@ -1,37 +0,0 @@
package com.datamate.collection.infrastructure.persistence.repository;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.datamate.collection.domain.model.entity.TaskExecution;
import com.datamate.collection.domain.repository.TaskExecutionRepository;
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Repository;
import java.time.LocalDateTime;
/**
* TaskExecutionRepositoryImpl
*
* @since 2025/10/23
*/
@Repository
@RequiredArgsConstructor
public class TaskExecutionRepositoryImpl extends ServiceImpl<TaskExecutionMapper, TaskExecution>
implements TaskExecutionRepository {
private final TaskExecutionMapper taskExecutionMapper;
@Override
public TaskExecution selectLatestByTaskId(String taskId) {
return taskExecutionMapper.selectLatestByTaskId(taskId);
}
@Override
public void completeExecution(String executionId, String status, LocalDateTime completedAt,
Integer recordsProcessed, Long recordsTotal,
Long recordsSuccess, Long recordsFailed, String errorMessage) {
taskExecutionMapper.completeExecution(executionId, status, completedAt,
recordsProcessed, recordsTotal,
recordsSuccess, recordsFailed, errorMessage);
}
}

View File

@@ -1,59 +0,0 @@
package com.datamate.collection.interfaces.converter;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.interfaces.dto.*;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.common.interfaces.PagedResponse;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.mapstruct.Mapper;
import org.mapstruct.Mapping;
import org.mapstruct.Named;
import org.mapstruct.factory.Mappers;
import java.util.List;
import java.util.Map;
@Mapper
public interface CollectionTaskConverter {
CollectionTaskConverter INSTANCE = Mappers.getMapper(CollectionTaskConverter.class);
@Mapping(source = "config", target = "config", qualifiedByName = "parseJsonToMap")
CollectionTaskResponse toResponse(CollectionTask task);
List<CollectionTaskResponse> toResponse(List<CollectionTask> tasks);
@Mapping(source = "config", target = "config", qualifiedByName = "mapToJsonString")
CollectionTask toCollectionTask(CreateCollectionTaskRequest request);
@Mapping(source = "config", target = "config", qualifiedByName = "mapToJsonString")
CollectionTask toCollectionTask(UpdateCollectionTaskRequest request);
@Mapping(source = "current", target = "page")
@Mapping(source = "size", target = "size")
@Mapping(source = "total", target = "totalElements")
@Mapping(source = "pages", target = "totalPages")
@Mapping(source = "records", target = "content")
PagedResponse<CollectionTaskResponse> toResponse(IPage<CollectionTask> tasks);
@Named("parseJsonToMap")
default Map<String, Object> parseJsonToMap(String json) {
try {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(json, Map.class);
} catch (Exception e) {
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
}
}
@Named("mapToJsonString")
default String mapToJsonString(Map<String, Object> map) {
try {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(map != null ? map : Map.of());
} catch (Exception e) {
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
}
}
}

View File

@@ -1,25 +0,0 @@
package com.datamate.collection.interfaces.dto;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.common.interfaces.PagingQuery;
import lombok.Getter;
import lombok.Setter;
/**
* 归集任务分页查询参数
*
* @since 2025/10/23
*/
@Getter
@Setter
public class CollectionTaskPagingQuery extends PagingQuery {
/**
* 任务状态
*/
private TaskStatus status;
/**
* 任务名称
*/
private String keyword;
}

View File

@@ -1,52 +0,0 @@
package com.datamate.collection.interfaces.dto;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.Map;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.collection.common.enums.SyncMode;
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.springframework.format.annotation.DateTimeFormat;
import jakarta.validation.Valid;
/**
* CollectionTaskResponse
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class CollectionTaskResponse {
private String id;
private String name;
private String description;
private String targetPath;
private Map<String, Object> config = new HashMap<>();
private TaskStatus status;
private SyncMode syncMode;
private String scheduleExpression;
private String lastExecutionId;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime createdAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime updatedAt;
private DatasetResponse dataset;
}

View File

@@ -1,64 +0,0 @@
package com.datamate.collection.interfaces.dto;
import com.datamate.collection.common.enums.SyncMode;
import com.datamate.collection.common.enums.TemplateType;
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.HashMap;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import jakarta.validation.Valid;
import jakarta.validation.constraints.*;
import io.swagger.v3.oas.annotations.media.Schema;
/**
* CreateCollectionTaskRequest
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class CreateCollectionTaskRequest {
@NotNull
@Size(min = 1, max = 100)
@Schema(name = "name", description = "任务名称", requiredMode = Schema.RequiredMode.REQUIRED)
@JsonProperty("name")
private String name;
@Size(max = 500)
@Schema(name = "description", description = "任务描述", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("description")
private String description;
@NotNull
@Schema(name = "taskType", description = "任务类型", requiredMode = Schema.RequiredMode.REQUIRED)
@JsonProperty("taskType")
private TemplateType taskType;
@Valid
@NotNull
@Schema(name = "config", description = "归集配置,包含源端和目标端配置信息", requiredMode = Schema.RequiredMode.REQUIRED)
@JsonProperty("config")
private Map<String, Object> config = new HashMap<>();
@NotNull
@Valid
@Schema(name = "syncMode", requiredMode = Schema.RequiredMode.REQUIRED)
@JsonProperty("syncMode")
private SyncMode syncMode;
@Schema(name = "scheduleExpression", description = "Cron调度表达式 (syncMode=SCHEDULED 时必填)", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("scheduleExpression")
private String scheduleExpression;
/** 创建数据集参数 */
@Valid
private CreateDatasetRequest dataset;
}

View File

@@ -1,53 +0,0 @@
package com.datamate.collection.interfaces.dto;
import com.datamate.collection.common.enums.SyncMode;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.HashMap;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import jakarta.validation.Valid;
import jakarta.validation.constraints.*;
import io.swagger.v3.oas.annotations.media.Schema;
/**
* UpdateCollectionTaskRequest
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class UpdateCollectionTaskRequest {
@Size(min = 1, max = 100)
@Schema(name = "name", description = "任务名称", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("name")
private String name;
@Size(max = 500)
@Schema(name = "description", description = "任务描述", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("description")
private String description;
@Valid
@Schema(name = "config", description = "归集配置,包含源端和目标端配置信息", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("config")
private Map<String, Object> config = new HashMap<>();
@Valid
@Schema(name = "syncMode", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("syncMode")
private SyncMode syncMode;
@Schema(name = "scheduleExpression", description = "Cron调度表达式 (syncMode=SCHEDULED 时必填)", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("scheduleExpression")
private String scheduleExpression;
/** 数据集id */
private String datasetId;
}

View File

@@ -1,79 +0,0 @@
package com.datamate.collection.interfaces.rest;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.datamate.collection.application.CollectionTaskService;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.interfaces.converter.CollectionTaskConverter;
import com.datamate.collection.interfaces.dto.*;
import com.datamate.common.interfaces.PagedResponse;
import com.datamate.datamanagement.application.DatasetApplicationService;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
import jakarta.validation.Valid;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.http.ResponseEntity;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.bind.annotation.*;
import java.util.*;
@Slf4j
@RestController
@RequestMapping("/data-collection/tasks")
@RequiredArgsConstructor
public class CollectionTaskController{
private final CollectionTaskService taskService;
private final DatasetApplicationService datasetService;
@PostMapping
@Transactional
public ResponseEntity<CollectionTaskResponse> createTask(@Valid @RequestBody CreateCollectionTaskRequest request) {
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
String datasetId = null;
DatasetResponse dataset = null;
if (Objects.nonNull(request.getDataset())) {
dataset = DatasetConverter.INSTANCE.convertToResponse(datasetService.createDataset(request.getDataset()));
datasetId = dataset.getId();
}
CollectionTaskResponse response = CollectionTaskConverter.INSTANCE.toResponse(taskService.create(task, datasetId));
response.setDataset(dataset);
return ResponseEntity.ok().body(response);
}
@PutMapping("/{id}")
public ResponseEntity<CollectionTaskResponse> updateTask(@PathVariable("id") String id, @Valid @RequestBody UpdateCollectionTaskRequest request) {
if (taskService.get(id) == null) {
return ResponseEntity.notFound().build();
}
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
task.setId(id);
return ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(taskService.update(task, request.getDatasetId())));
}
@DeleteMapping("/{id}")
public ResponseEntity<Void> deleteTask(@PathVariable("id") String id) {
taskService.delete(id);
return ResponseEntity.ok().build();
}
@GetMapping("/{id}")
public ResponseEntity<CollectionTaskResponse> getTaskDetail(@PathVariable("id") String id) {
CollectionTask task = taskService.get(id);
return task == null ? ResponseEntity.notFound().build() : ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(task));
}
@GetMapping
public ResponseEntity<PagedResponse<CollectionTaskResponse>> getTasks(@Valid CollectionTaskPagingQuery query) {
Page<CollectionTask> page = new Page<>(query.getPage(), query.getSize());
LambdaQueryWrapper<CollectionTask> wrapper = new LambdaQueryWrapper<CollectionTask>()
.eq(query.getStatus() != null, CollectionTask::getStatus, query.getStatus())
.like(StringUtils.isNotBlank(query.getKeyword()), CollectionTask::getName, query.getKeyword())
.orderByDesc(CollectionTask::getCreatedAt);
return ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(taskService.getTasks(page, wrapper)));
}
}

View File

@@ -1,64 +0,0 @@
package com.datamate.collection.interfaces.scheduler;
import com.datamate.collection.application.CollectionTaskService;
import com.datamate.collection.application.TaskExecutionService;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.domain.model.entity.TaskExecution;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.scheduling.support.CronExpression;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;
import java.time.LocalDateTime;
import java.util.List;
@Slf4j
@Component
@RequiredArgsConstructor
public class TaskSchedulerInitializer {
private final CollectionTaskService collectionTaskService;
private final TaskExecutionService taskExecutionService;
// 定期扫描激活的采集任务,根据 Cron 判断是否到期执行
@Scheduled(fixedDelayString = "${datamate.data-collection.scheduler.scan-interval-ms:10000}")
public void scanAndTrigger() {
List<CollectionTask> tasks = collectionTaskService.selectActiveTasks();
if (tasks == null || tasks.isEmpty()) {
return;
}
LocalDateTime now = LocalDateTime.now();
for (CollectionTask task : tasks) {
String cronExpr = task.getScheduleExpression();
if (!StringUtils.hasText(cronExpr)) {
continue;
}
try {
// 如果最近一次执行仍在运行,则跳过
TaskExecution latest = taskExecutionService.selectLatestByTaskId(task.getId());
if (latest != null && latest.getStatus() == TaskStatus.RUNNING) {
continue;
}
CronExpression cron = CronExpression.parse(cronExpr);
LocalDateTime base = latest != null && latest.getStartedAt() != null
? latest.getStartedAt()
: now.minusYears(1); // 没有历史记录时,拉长基准时间确保到期判定
LocalDateTime nextTime = cron.next(base);
if (nextTime != null && !nextTime.isAfter(now)) {
// 到期,触发一次执行
TaskExecution exec = taskExecutionService.createExecution(task);
int timeout = task.getTimeoutSeconds() == null ? 3600 : task.getTimeoutSeconds();
taskExecutionService.runAsync(task, exec.getId(), timeout, null);
log.info("Triggered DataX execution for task {} at {}, execId={}", task.getId(), now, exec.getId());
}
} catch (Exception ex) {
log.warn("Skip task {} due to invalid cron or scheduling error: {}", task.getId(), ex.getMessage());
}
}
}
}

View File

@@ -1,23 +0,0 @@
datamate:
data-collection:
# DataX配置
datax:
home-path: ${DATAX_HOME:D:/datax}
python-path: ${DATAX_PYTHON_PATH:python3}
job-config-path: ${DATAX_JOB_PATH:./data/temp/datax/jobs}
log-path: ${DATAX_LOG_PATH:./logs/datax}
max-memory: ${DATAX_MAX_MEMORY:2048}
channel-count: ${DATAX_CHANNEL_COUNT:5}
# 执行配置
execution:
max-concurrent-tasks: ${DATA_COLLECTION_MAX_CONCURRENT_TASKS:10}
task-timeout-minutes: ${DATA_COLLECTION_TASK_TIMEOUT:120}
retry-count: ${DATA_COLLECTION_RETRY_COUNT:3}
retry-interval-seconds: ${DATA_COLLECTION_RETRY_INTERVAL:30}
# 监控配置
monitoring:
status-check-interval-seconds: ${DATA_COLLECTION_STATUS_CHECK_INTERVAL:30}
log-retention-days: ${DATA_COLLECTION_LOG_RETENTION:30}
enable-metrics: ${DATA_COLLECTION_ENABLE_METRICS:true}

View File

@@ -1,51 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper">
<!-- Result Map -->
<resultMap id="CollectionTaskResultMap" type="com.datamate.collection.domain.model.entity.CollectionTask">
<id property="id" column="id"/>
<result property="name" column="name"/>
<result property="description" column="description"/>
<result property="config" column="config"/>
<result property="status" column="status" typeHandler="org.apache.ibatis.type.EnumTypeHandler"/>
<result property="syncMode" column="sync_mode"/>
<result property="scheduleExpression" column="schedule_expression"/>
<result property="retryCount" column="retry_count"/>
<result property="timeoutSeconds" column="timeout_seconds"/>
<result property="maxRecords" column="max_records"/>
<result property="sortField" column="sort_field"/>
<result property="lastExecutionId" column="last_execution_id"/>
<result property="createdAt" column="created_at"/>
<result property="updatedAt" column="updated_at"/>
<result property="createdBy" column="created_by"/>
<result property="updatedBy" column="updated_by"/>
</resultMap>
<!-- Base Column List (tasks) -->
<sql id="Base_Column_List">
id,
name, description, config, status, sync_mode,
schedule_expression, retry_count, timeout_seconds, max_records, sort_field,
last_execution_id, created_at, updated_at, created_by, updated_by
</sql>
<!-- Update Status -->
<update id="updateStatus">
UPDATE t_dc_collection_tasks SET status = #{status}, updated_at = NOW() WHERE id = #{id}
</update>
<!-- Update Last Execution -->
<update id="updateLastExecution">
UPDATE t_dc_collection_tasks SET last_execution_id = #{lastExecutionId}, updated_at = NOW() WHERE id = #{id}
</update>
<!-- Select Active Tasks for Scheduling -->
<select id="selectActiveTasks" resultMap="CollectionTaskResultMap">
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks
WHERE status IN ('READY', 'RUNNING')
AND schedule_expression IS NOT NULL
ORDER BY created_at DESC
</select>
</mapper>

View File

@@ -1,28 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper">
<!-- Select Latest Execution by Task -->
<select id="selectLatestByTaskId" resultType="com.datamate.collection.domain.model.entity.TaskExecution">
SELECT * FROM t_dc_task_executions
WHERE task_id = #{taskId}
ORDER BY started_at DESC
LIMIT 1
</select>
<!-- Complete Execution -->
<update id="completeExecution">
UPDATE t_dc_task_executions
SET status = #{status},
completed_at = #{completedAt},
records_processed = #{recordsProcessed},
records_total = #{recordsTotal},
records_success = #{recordsSuccess},
records_failed = #{recordsFailed},
error_message = #{errorMessage},
updated_at = NOW()
WHERE id = #{executionId}
</update>
</mapper>

View File

@@ -9,7 +9,7 @@ import org.springframework.web.bind.annotation.PathVariable;
/**
* 数据归集服务 Feign Client
*/
@FeignClient(name = "collection-service", url = "${collection.service.url:http://localhost:8080}")
@FeignClient(name = "collection-service", url = "${collection.service.url:http://datamate-backend-python:18000}")
public interface CollectionTaskClient {
/**

View File

@@ -56,11 +56,6 @@
<artifactId>data-management-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-collection-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>operator-market-service</artifactId>

View File

@@ -24,7 +24,6 @@
<!-- 核心服务 -->
<module>data-management-service</module>
<module>data-collection-service</module>
<module>operator-market-service</module>
<module>data-cleaning-service</module>
<module>data-synthesis-service</module>