refactor: modify data collection to python implementation (#214)

* feature: LabelStudio jumps without login

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* refactor: modify data collection to python implementation

* fix: remove terrabase dependency

* feature: add the collection task executions page and the collection template page

* fix: fix the collection task creation

* fix: fix the collection task creation
This commit is contained in:
hefanli
2025-12-30 18:48:43 +08:00
committed by GitHub
parent 80d4dfd285
commit 63f4e3e447
71 changed files with 1861 additions and 2557 deletions

View File

@@ -27,25 +27,6 @@
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-gateway</artifactId>
</dependency>
<dependency>
<groupId>com.terrabase</groupId>
<artifactId>enterprise-impl-commercial</artifactId>
<version>1.0.0</version>
<exclusions>
<exclusion>
<artifactId>spring-web</artifactId>
<groupId>org.springframework</groupId>
</exclusion>
<exclusion>
<artifactId>spring-boot-starter-web</artifactId>
<groupId>org.springframework.boot</groupId>
</exclusion>
<exclusion>
<artifactId>spring-boot-starter-logging</artifactId>
<groupId>org.springframework.boot</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- Log4j2 API -->
<dependency>
<groupId>org.springframework.boot</groupId>

View File

@@ -4,9 +4,7 @@ import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cloud.gateway.route.RouteLocator;
import org.springframework.cloud.gateway.route.builder.RouteLocatorBuilder;
import org.springframework.cloud.openfeign.EnableFeignClients;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
/**
* API Gateway & Auth Service Application
@@ -14,8 +12,6 @@ import org.springframework.context.annotation.ComponentScan;
* 提供路由、鉴权、限流等功能
*/
@SpringBootApplication
@ComponentScan(basePackages = {"com.datamate.gateway", "com.terrabase"})
@EnableFeignClients(basePackages = {"com.terrabase"})
public class ApiGatewayApplication {
public static void main(String[] args) {
@@ -37,6 +33,10 @@ public class ApiGatewayApplication {
.route("data-evaluation", r -> r.path("/api/evaluation/**")
.uri("http://datamate-backend-python:18000"))
// 数据归集服务路由
.route("data-collection", r -> r.path("/api/data-collection/**")
.uri("http://datamate-backend-python:18000"))
.route("deer-flow-frontend", r -> r.path("/chat/**")
.uri("http://deer-flow-frontend:3000"))

View File

@@ -1,9 +1,6 @@
package com.datamate.gateway.filter;
import com.terrabase.enterprise.api.UserManagementService;
import com.terrabase.enterprise.api.dto.LoginUserDto;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.cloud.gateway.filter.GatewayFilterChain;
import org.springframework.cloud.gateway.filter.GlobalFilter;
@@ -21,16 +18,13 @@ public class UserContextFilter implements GlobalFilter {
@Value("${commercial.switch:false}")
private boolean isCommercial;
@Autowired
private UserManagementService userManagementService;
@Override
public Mono<Void> filter(ServerWebExchange exchange, GatewayFilterChain chain) {
if (!isCommercial) {
return chain.filter(exchange);
}
try {
LoginUserDto loginUserDto = userManagementService.getCurrentUserInfo().getData().getFirst();
} catch (Exception e) {
log.error("get current user info error", e);
return chain.filter(exchange);

View File

@@ -1,229 +0,0 @@
# 数据归集服务 (Data Collection Service)
基于DataX的数据归集和同步服务,提供多数据源之间的数据同步功能。
## 功能特性
- 🔗 **多数据源支持**: 支持MySQL、PostgreSQL、Oracle、SQL Server等主流数据库
- 📊 **任务管理**: 创建、配置、执行和监控数据同步任务
-**定时调度**: 支持Cron表达式的定时任务
- 📈 **实时监控**: 任务执行进度、状态和性能指标监控
- 📝 **执行日志**: 详细的任务执行日志记录
- 🔌 **插件化**: DataX Reader/Writer插件化集成
## 技术架构
- **框架**: Spring Boot 3.x
- **数据库**: MySQL + MyBatis
- **同步引擎**: DataX
- **API**: OpenAPI 3.0 自动生成
- **架构模式**: DDD (领域驱动设计)
## 项目结构
```
src/main/java/com/datamate/collection/
├── DataCollectionApplication.java # 应用启动类
├── domain/ # 领域层
│ ├── model/ # 领域模型
│ │ ├── DataSource.java # 数据源实体
│ │ ├── CollectionTask.java # 归集任务实体
│ │ ├── TaskExecution.java # 任务执行记录
│ │ └── ExecutionLog.java # 执行日志
│ └── service/ # 领域服务
│ ├── DataSourceService.java
│ ├── CollectionTaskService.java
│ ├── TaskExecutionService.java
│ └── impl/ # 服务实现
├── infrastructure/ # 基础设施层
│ ├── config/ # 配置类
│ ├── datax/ # DataX执行引擎
│ │ └── DataXExecutionEngine.java
│ └── persistence/ # 持久化
│ ├── mapper/ # MyBatis Mapper
│ └── typehandler/ # 类型处理器
└── interfaces/ # 接口层
├── api/ # OpenAPI生成的接口
├── dto/ # OpenAPI生成的DTO
└── rest/ # REST控制器
├── DataSourceController.java
├── CollectionTaskController.java
├── TaskExecutionController.java
└── exception/ # 异常处理
src/main/resources/
├── mappers/ # MyBatis XML映射文件
├── application.properties # 应用配置
└── ...
```
## 环境要求
- Java 17+
- Maven 3.6+
- MySQL 8.0+
- DataX 3.0+
- Redis (可选,用于缓存)
## 配置说明
### 应用配置 (application.properties)
```properties
# 服务端口
server.port=8090
# 数据库配置
spring.datasource.url=jdbc:mysql://localhost:3306/knowledge_base
spring.datasource.username=root
spring.datasource.password=123456
# DataX配置
datax.home=/runtime/datax
datax.python.path=/runtime/datax/bin/datax.py
datax.job.timeout=7200
datax.job.memory=2g
```
### DataX配置
确保DataX已正确安装并配置:
1. 下载DataX到 `/runtime/datax` 目录
2. 配置相关Reader/Writer插件
3. 确保Python环境可用
## 数据库初始化
执行数据库初始化脚本:
```bash
mysql -u root -p knowledge_base < scripts/db/data-collection-init.sql
```
## 构建和运行
### 1. 编译项目
```bash
cd backend/services/data-collection-service
mvn clean compile
```
这将触发OpenAPI代码生成。
### 2. 打包
```bash
mvn clean package -DskipTests
```
### 3. 运行
作为独立服务运行:
```bash
java -jar target/data-collection-service-1.0.0-SNAPSHOT.jar
```
或通过main-application统一启动:
```bash
cd backend/services/main-application
mvn spring-boot:run
```
## API文档
服务启动后,可通过以下地址访问API文档:
- Swagger UI: http://localhost:8090/swagger-ui.html
- OpenAPI JSON: http://localhost:8090/v3/api-docs
## 主要API端点
### 数据源管理
- `GET /api/v1/collection/datasources` - 获取数据源列表
- `POST /api/v1/collection/datasources` - 创建数据源
- `GET /api/v1/collection/datasources/{id}` - 获取数据源详情
- `PUT /api/v1/collection/datasources/{id}` - 更新数据源
- `DELETE /api/v1/collection/datasources/{id}` - 删除数据源
- `POST /api/v1/collection/datasources/{id}/test` - 测试连接
### 归集任务管理
- `GET /api/v1/collection/tasks` - 获取任务列表
- `POST /api/v1/collection/tasks` - 创建任务
- `GET /api/v1/collection/tasks/{id}` - 获取任务详情
- `PUT /api/v1/collection/tasks/{id}` - 更新任务
- `DELETE /api/v1/collection/tasks/{id}` - 删除任务
### 任务执行管理
- `POST /api/v1/collection/tasks/{id}/execute` - 执行任务
- `POST /api/v1/collection/tasks/{id}/stop` - 停止任务
- `GET /api/v1/collection/executions` - 获取执行历史
- `GET /api/v1/collection/executions/{executionId}` - 获取执行详情
- `GET /api/v1/collection/executions/{executionId}/logs` - 获取执行日志
### 监控统计
- `GET /api/v1/collection/monitor/statistics` - 获取统计信息
## 开发指南
### 添加新的数据源类型
1.`DataSource.DataSourceType` 枚举中添加新类型
2.`DataXExecutionEngine` 中添加对应的Reader/Writer映射
3. 更新数据库表结构和初始化数据
### 自定义DataX插件
1. 将插件放置在 `/runtime/datax/plugin` 目录下
2.`DataXExecutionEngine` 中配置插件映射关系
3. 根据插件要求调整配置模板
### 扩展监控指标
1.`StatisticsService` 中添加新的统计逻辑
2. 更新 `CollectionStatistics` DTO
3. 在数据库中添加相应的统计表或字段
## 故障排查
### 常见问题
1. **DataX执行失败**
- 检查DataX安装路径和Python环境
- 确认数据源连接配置正确
- 查看执行日志获取详细错误信息
2. **数据库连接失败**
- 检查数据库配置和网络连通性
- 确认数据库用户权限
3. **API调用失败**
- 检查请求参数格式
- 查看应用日志获取详细错误信息
### 日志查看
```bash
# 应用日志
tail -f logs/data-collection-service.log
# 任务执行日志
curl http://localhost:8090/api/v1/collection/executions/{executionId}/logs
```
## 贡献指南
1. Fork项目
2. 创建特性分支: `git checkout -b feature/new-feature`
3. 提交更改: `git commit -am 'Add new feature'`
4. 推送分支: `git push origin feature/new-feature`
5. 提交Pull Request
## 许可证
MIT License

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 107 KiB

View File

@@ -1,163 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.datamate</groupId>
<artifactId>services</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>data-collection-service</artifactId>
<packaging>jar</packaging>
<name>Data Collection Service</name>
<description>DataX-based data collection and aggregation service</description>
<dependencies>
<!-- Spring Boot Dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-validation</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<!-- Database -->
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<version>8.0.33</version>
<scope>runtime</scope>
</dependency>
<!-- Redis -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<!-- DataX Dependencies (集成DataX插件) -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-exec</artifactId>
<version>1.3</version>
</dependency>
<!-- Connection Pool -->
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP</artifactId>
</dependency>
<!-- Oracle JDBC Driver -->
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
<version>21.5.0.0</version>
</dependency>
<!-- PostgreSQL JDBC Driver -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>
<!-- JSON Processing -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<!-- Shared Domain -->
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-management-service</artifactId>
<version>${project.version}</version>
</dependency>
<!-- OpenAPI Dependencies -->
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
</dependency>
<dependency>
<groupId>org.openapitools</groupId>
<artifactId>jackson-databind-nullable</artifactId>
</dependency>
<dependency>
<groupId>jakarta.validation</groupId>
<artifactId>jakarta.validation-api</artifactId>
</dependency>
<!-- Test Dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
<scope>compile</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<skip>true</skip>
<classifier>exec</classifier>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<annotationProcessorPaths>
<!-- 顺序很重要 -->
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
</path>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok-mapstruct-binding</artifactId>
<version>${lombok-mapstruct-binding.version}</version>
</path>
<path>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct-processor</artifactId>
<version>${mapstruct.version}</version>
</path>
</annotationProcessorPaths>
<compilerArgs>
<arg>-parameters</arg>
<arg>-Amapstruct.defaultComponentModel=spring</arg>
</compilerArgs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -1,25 +0,0 @@
package com.datamate.collection;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.transaction.annotation.EnableTransactionManagement;
/**
* 数据归集服务配置类
*
* 基于DataX的数据归集和同步服务,支持多种数据源的数据采集和归集
*/
@SpringBootApplication
@EnableAsync
@EnableScheduling
@EnableTransactionManagement
@ComponentScan(basePackages = {
"com.datamate.collection",
"com.datamate.datamanagement",
"com.datamate.shared"
})
public class DataCollectionServiceConfiguration {
// Configuration class for JAR packaging - no main method needed
}

View File

@@ -1,73 +0,0 @@
package com.datamate.collection.application;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.domain.model.entity.TaskExecution;
import com.datamate.collection.domain.repository.CollectionTaskRepository;
import com.datamate.collection.common.enums.SyncMode;
import com.datamate.common.domain.utils.ChunksSaver;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Objects;
@Slf4j
@Service
@RequiredArgsConstructor
public class CollectionTaskService {
private final TaskExecutionService taskExecutionService;
private final CollectionTaskRepository collectionTaskRepository;
@Transactional
public CollectionTask create(CollectionTask task, String datasetId) {
task.initCreateParam();
collectionTaskRepository.save(task);
executeTaskNow(task, datasetId);
return task;
}
private void executeTaskNow(CollectionTask task, String datasetId) {
if (Objects.equals(task.getSyncMode(), SyncMode.ONCE)) {
TaskExecution exec = taskExecutionService.createExecution(task);
int timeout = task.getTimeoutSeconds() == null ? 3600 : task.getTimeoutSeconds();
taskExecutionService.runAsync(task, exec.getId(), timeout, datasetId);
log.info("Triggered DataX execution for task {} at {}, execId={}", task.getId(), LocalDateTime.now(), exec.getId());
}
}
@Transactional
public CollectionTask update(CollectionTask task, String datasetId) {
task.setUpdatedAt(LocalDateTime.now());
task.addPath();
collectionTaskRepository.updateById(task);
executeTaskNow(task, datasetId);
return task;
}
@Transactional
public void delete(String id) {
CollectionTask task = collectionTaskRepository.getById(id);
if (task != null) {
ChunksSaver.deleteFolder("/dataset/local/" + task.getId());
}
collectionTaskRepository.removeById(id);
}
public CollectionTask get(String id) {
return collectionTaskRepository.getById(id);
}
public IPage<CollectionTask> getTasks(Page<CollectionTask> page, LambdaQueryWrapper<CollectionTask> wrapper) {
return collectionTaskRepository.page(page, wrapper);
}
public List<CollectionTask> selectActiveTasks() {
return collectionTaskRepository.selectActiveTasks();
}
}

View File

@@ -1,65 +0,0 @@
package com.datamate.collection.application;
import com.datamate.collection.common.enums.TemplateType;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.domain.model.entity.TaskExecution;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.collection.domain.process.ProcessRunner;
import com.datamate.collection.domain.repository.CollectionTaskRepository;
import com.datamate.collection.domain.repository.TaskExecutionRepository;
import com.datamate.datamanagement.application.DatasetApplicationService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
@Slf4j
@Service
@RequiredArgsConstructor
public class TaskExecutionService {
private final ProcessRunner processRunner;
private final TaskExecutionRepository executionRepository;
private final CollectionTaskRepository collectionTaskRepository;
private final DatasetApplicationService datasetApplicationService;
@Transactional
public TaskExecution createExecution(CollectionTask task) {
TaskExecution exec = TaskExecution.initTaskExecution();
exec.setTaskId(task.getId());
exec.setTaskName(task.getName());
executionRepository.save(exec);
collectionTaskRepository.updateLastExecution(task.getId(), exec.getId());
collectionTaskRepository.updateStatus(task.getId(), TaskStatus.RUNNING.name());
return exec;
}
public TaskExecution selectLatestByTaskId(String taskId) {
return executionRepository.selectLatestByTaskId(taskId);
}
@Async
@Transactional
public void runAsync(CollectionTask task, String executionId, int timeoutSeconds, String datasetId) {
try {
int code = processRunner.runJob(task, executionId, timeoutSeconds);
log.info("DataX finished with code {} for execution {}", code, executionId);
// 简化:成功即完成
executionRepository.completeExecution(executionId, TaskStatus.SUCCESS.name(), LocalDateTime.now(),
0, 0L, 0L, 0L, null);
collectionTaskRepository.updateStatus(task.getId(), TaskStatus.SUCCESS.name());
if (StringUtils.isNotBlank(datasetId)) {
datasetApplicationService.processDataSourceAsync(datasetId, task.getId());
}
} catch (Exception e) {
log.error("DataX execution failed", e);
executionRepository.completeExecution(executionId, TaskStatus.FAILED.name(), LocalDateTime.now(),
0, 0L, 0L, 0L, e.getMessage());
collectionTaskRepository.updateStatus(task.getId(), TaskStatus.FAILED.name());
}
}
}

View File

@@ -1,12 +0,0 @@
package com.datamate.collection.common.enums;
/**
* 同步方式:一次性(ONCE) 或 定时(SCHEDULED)
*/
public enum SyncMode {
/** 一次性(ONCE) */
ONCE,
/// 定时(SCHEDULED)
SCHEDULED
}

View File

@@ -1,22 +0,0 @@
package com.datamate.collection.common.enums;
/**
* 统一的任务和执行状态枚举
* 任务和执行状态枚举: - DRAFT: 草稿状态 - READY: 就绪状态 - RUNNING: 运行中 - SUCCESS: 执行成功 (对应原来的COMPLETED/SUCCESS) - FAILED: 执行失败 - STOPPED: 已停止
*
* @author Data Mate Platform Team
*/
public enum TaskStatus {
/** 草稿状态 */
DRAFT,
/** 就绪状态 */
READY,
/** 运行中 */
RUNNING,
/** 执行成功(对应原来的COMPLETED) */
SUCCESS,
/** 执行失败 */
FAILED,
/** 已停止 */
STOPPED
}

View File

@@ -1,11 +0,0 @@
package com.datamate.collection.common.enums;
/**
* 模板类型枚举
*
*/
public enum TemplateType {
NAS,
OBS,
MYSQL
}

View File

@@ -1,62 +0,0 @@
package com.datamate.collection.domain.model.entity;
import com.baomidou.mybatisplus.annotation.TableName;
import com.datamate.collection.common.enums.SyncMode;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.collection.common.enums.TemplateType;
import com.datamate.common.domain.model.base.BaseEntity;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.Getter;
import lombok.Setter;
import java.time.LocalDateTime;
import java.util.Collections;
import java.util.Map;
import java.util.UUID;
/**
* 数据采集任务实体(与数据库表 t_dc_collection_tasks 对齐)
*/
@Getter
@Setter
@TableName(value = "t_dc_collection_tasks", autoResultMap = true)
public class CollectionTask extends BaseEntity<String> {
private String name;
private String description;
private TemplateType taskType; // 任务类型
private String targetPath; // 目标存储路径
private String config; // DataX JSON 配置,包含源端和目标端配置信息
private TaskStatus status;
private SyncMode syncMode; // ONCE / SCHEDULED
private String scheduleExpression;
private Integer retryCount;
private Integer timeoutSeconds;
private Long maxRecords;
private String sortField;
private String lastExecutionId;
public void addPath() {
try {
ObjectMapper objectMapper = new ObjectMapper();
Map<String, Object> parameter = objectMapper.readValue(
config,
new TypeReference<>() {}
);
parameter.put("destPath", "/dataset/local/" + id);
parameter.put("filePaths", Collections.singletonList(parameter.get("destPath")));
config = objectMapper.writeValueAsString(parameter);
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
public void initCreateParam() {
this.id = UUID.randomUUID().toString();
this.targetPath = "/dataset/local/" + id;
this.status = TaskStatus.READY;
this.createdAt = LocalDateTime.now();
this.updatedAt = LocalDateTime.now();
}
}

View File

@@ -1,71 +0,0 @@
package com.datamate.collection.domain.model.entity;
import lombok.Data;
import lombok.EqualsAndHashCode;
import java.time.LocalDateTime;
@Data
@EqualsAndHashCode(callSuper = false)
public class DataxTemplate {
/**
* 模板ID(UUID)
*/
private String id;
/**
* 模板名称
*/
private String name;
/**
* 源数据源类型
*/
private String sourceType;
/**
* 目标数据源类型
*/
private String targetType;
/**
* 模板内容(JSON格式)
*/
private String templateContent;
/**
* 模板描述
*/
private String description;
/**
* 版本号
*/
private String version;
/**
* 是否为系统模板
*/
private Boolean isSystem;
/**
* 创建时间
*/
private LocalDateTime createdAt;
/**
* 更新时间
*/
private LocalDateTime updatedAt;
/**
* 创建者
*/
private String createdBy;
/**
* 更新者
*/
private String updatedBy;
}

View File

@@ -1,44 +0,0 @@
package com.datamate.collection.domain.model.entity;
import com.baomidou.mybatisplus.annotation.TableName;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.common.domain.model.base.BaseEntity;
import lombok.Data;
import lombok.Getter;
import lombok.Setter;
import java.time.LocalDateTime;
import java.util.UUID;
@Getter
@Setter
@TableName(value = "t_dc_task_executions", autoResultMap = true)
public class TaskExecution extends BaseEntity<String> {
private String taskId;
private String taskName;
private TaskStatus status;
private Double progress;
private Long recordsTotal;
private Long recordsProcessed;
private Long recordsSuccess;
private Long recordsFailed;
private Double throughput;
private Long dataSizeBytes;
private LocalDateTime startedAt;
private LocalDateTime completedAt;
private Integer durationSeconds;
private String errorMessage;
private String dataxJobId;
private String config;
private String result;
public static TaskExecution initTaskExecution() {
TaskExecution exec = new TaskExecution();
exec.setId(UUID.randomUUID().toString());
exec.setStatus(TaskStatus.RUNNING);
exec.setProgress(0.0);
exec.setStartedAt(LocalDateTime.now());
exec.setCreatedAt(LocalDateTime.now());
return exec;
}
}

View File

@@ -1,21 +0,0 @@
package com.datamate.collection.domain.process;
import com.datamate.collection.domain.model.entity.CollectionTask;
/**
* 归集执行器接口
*
* @since 2025/10/23
*/
public interface ProcessRunner {
/**
* 执行归集任务
*
* @param task 任务
* @param executionId 执行ID
* @param timeoutSeconds 超时时间(秒)
* @return 执行结果
* @throws Exception 执行异常
*/
int runJob(CollectionTask task, String executionId, int timeoutSeconds) throws Exception;
}

View File

@@ -1,19 +0,0 @@
package com.datamate.collection.domain.repository;
import com.baomidou.mybatisplus.extension.repository.IRepository;
import com.datamate.collection.domain.model.entity.CollectionTask;
import java.util.List;
/**
* 归集任务仓储层
*
* @since 2025/10/23
*/
public interface CollectionTaskRepository extends IRepository<CollectionTask> {
List<CollectionTask> selectActiveTasks();
void updateStatus(String id, String status);
void updateLastExecution(String id, String lastExecutionId);
}

View File

@@ -1,19 +0,0 @@
package com.datamate.collection.domain.repository;
import com.baomidou.mybatisplus.extension.service.IService;
import com.datamate.collection.domain.model.entity.TaskExecution;
import java.time.LocalDateTime;
/**
* TaskExecutionRepository
*
* @since 2025/10/23
*/
public interface TaskExecutionRepository extends IService<TaskExecution> {
TaskExecution selectLatestByTaskId(String taskId);
void completeExecution(String executionId, String status, LocalDateTime completedAt,
Integer recordsProcessed, Long recordsTotal,
Long recordsSuccess, Long recordsFailed, String errorMessage);
}

View File

@@ -1,147 +0,0 @@
// java
package com.datamate.collection.infrastructure.datax;
import com.datamate.collection.common.enums.TemplateType;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.domain.process.ProcessRunner;
import com.datamate.collection.infrastructure.datax.config.MysqlConfig;
import com.datamate.collection.infrastructure.datax.config.NasConfig;
import com.datamate.collection.infrastructure.datax.config.ObsConfig;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.exec.*;
import org.apache.commons.io.output.TeeOutputStream;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import java.io.*;
import java.nio.file.*;
import java.time.Duration;
import java.util.*;
import java.util.regex.Pattern;
@Slf4j
@Component
@RequiredArgsConstructor
public class DataxProcessRunner implements ProcessRunner {
private final DataxProperties props;
@Override
public int runJob(CollectionTask task, String executionId, int timeoutSeconds) throws Exception {
Path job = buildJobFile(task);
int code = runJob(job.toFile(), executionId, Duration.ofSeconds(timeoutSeconds));
// 任务成功后做后处理(仅针对 MYSQL 类型)
postProcess(task);
return code;
}
private int runJob(File jobFile, String executionId, Duration timeout) throws Exception {
File logFile = new File(props.getLogPath(), String.format("datax-%s.log", executionId));
String python = props.getPythonPath();
String dataxPy = props.getHomePath() + File.separator + "bin" + File.separator + "datax.py";
String cmd = String.format("%s %s %s", python, dataxPy, jobFile.getAbsolutePath());
log.info("Execute DataX: {}", cmd);
CommandLine cl = CommandLine.parse(cmd);
DefaultExecutor executor = getExecutor(timeout, logFile);
return executor.execute(cl);
}
private static DefaultExecutor getExecutor(Duration timeout, File logFile) throws FileNotFoundException {
DefaultExecutor executor = new DefaultExecutor();
// 将日志追加输出到文件
File parent = logFile.getParentFile();
if (!parent.exists()) {
parent.mkdirs();
}
ExecuteStreamHandler streamHandler = new PumpStreamHandler(
new TeeOutputStream(new FileOutputStream(logFile, true), System.out),
new TeeOutputStream(new FileOutputStream(logFile, true), System.err)
);
executor.setStreamHandler(streamHandler);
ExecuteWatchdog watchdog = new ExecuteWatchdog(timeout.toMillis());
executor.setWatchdog(watchdog);
return executor;
}
private Path buildJobFile(CollectionTask task) throws IOException {
Files.createDirectories(Paths.get(props.getJobConfigPath()));
String fileName = String.format("datax-job-%s.json", task.getId());
Path path = Paths.get(props.getJobConfigPath(), fileName);
// 简化:直接将任务中的 config 字段作为 DataX 作业 JSON
try (FileWriter fw = new FileWriter(path.toFile())) {
if (StringUtils.isBlank(task.getConfig())) {
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
String json = getJobConfig(task);
log.info("Job config: {}", json);
fw.write(json);
}
return path;
}
private String getJobConfig(CollectionTask task) {
try {
ObjectMapper objectMapper = new ObjectMapper();
TemplateType templateType = task.getTaskType();
return switch (templateType) {
case NAS -> {
// NAS 特殊处理
NasConfig nasConfig = objectMapper.readValue(task.getConfig(), NasConfig.class);
yield nasConfig.toJobConfig(objectMapper, task);
}
case OBS -> {
ObsConfig obsConfig = objectMapper.readValue(task.getConfig(), ObsConfig.class);
yield obsConfig.toJobConfig(objectMapper, task);
}
case MYSQL -> {
MysqlConfig mysqlConfig = objectMapper.readValue(task.getConfig(), MysqlConfig.class);
yield mysqlConfig.toJobConfig(objectMapper, task);
}
};
} catch (Exception e) {
log.error("Failed to parse task config", e);
throw new RuntimeException("Failed to parse task config", e);
}
}
private void postProcess(CollectionTask task) throws IOException {
if (task.getTaskType() != TemplateType.MYSQL) {
return;
}
String targetPath = task.getTargetPath();
// 将targetPath下所有不以.csv结尾的文件修改为以.csv结尾
Path dir = Paths.get(targetPath);
if (!Files.exists(dir) || !Files.isDirectory(dir)) {
log.info("Target path {} does not exist or is not a directory for task {}, skip post processing.", targetPath, task.getId());
return;
}
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
for (Path path : stream) {
if (!Files.isRegularFile(path)) continue;
String name = path.getFileName().toString();
if (name.toLowerCase().endsWith(".csv")) continue;
Path target = dir.resolve(name + ".csv");
try {
Files.move(path, target, StandardCopyOption.REPLACE_EXISTING);
log.info("Renamed file for task {}: {} -> {}", task.getId(), name, target.getFileName().toString());
} catch (IOException ex) {
log.warn("Failed to rename file {} for task {}: {}", path, task.getId(), ex.getMessage(), ex);
}
}
} catch (IOException ioe) {
log.warn("Error scanning target directory {} for task {}: {}", targetPath, task.getId(), ioe.getMessage(), ioe);
}
}
}

View File

@@ -1,17 +0,0 @@
package com.datamate.collection.infrastructure.datax;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Data
@Configuration
@ConfigurationProperties(prefix = "datamate.data-collection.datax")
public class DataxProperties {
private String homePath; // DATAX_HOME
private String pythonPath; // python 可执行文件
private String jobConfigPath; // 生成的作业文件目录
private String logPath; // 运行日志目录
private Integer maxMemory = 2048;
private Integer channelCount = 5;
}

View File

@@ -1,4 +0,0 @@
package com.datamate.collection.infrastructure.datax.config;
public interface BaseConfig {
}

View File

@@ -1,73 +0,0 @@
package com.datamate.collection.infrastructure.datax.config;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.Getter;
import lombok.Setter;
import org.apache.commons.collections4.CollectionUtils;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Getter
@Setter
public class MysqlConfig {
private String jdbcUrl;
private String username;
private String password;
private String querySql;
private List<String> headers;
/**
* 将当前 MYSQL 配置构造成 DataX 所需的 job JSON 字符串。
*/
public String toJobConfig(ObjectMapper objectMapper, CollectionTask task) throws Exception {
Map<String, Object> mysqlParameter = new HashMap<>();
Map<String, Object> connection = new HashMap<>();
if (username != null) mysqlParameter.put("username", username);
if (password != null) mysqlParameter.put("password", password);
if (jdbcUrl != null) connection.put("jdbcUrl", Collections.singletonList(jdbcUrl));
if (querySql != null) connection.put("querySql", Collections.singletonList(querySql));
mysqlParameter.put("connection", Collections.singletonList(connection));
Map<String, Object> job = new HashMap<>();
Map<String, Object> content = new HashMap<>();
Map<String, Object> reader = new HashMap<>();
reader.put("name", "mysqlreader");
reader.put("parameter", mysqlParameter);
content.put("reader", reader);
Map<String, Object> writer = new HashMap<>();
Map<String, Object> writerParameter = new HashMap<>();
writer.put("name", "txtfilewriter");
if (CollectionUtils.isNotEmpty(headers)) {
writerParameter.put("header", headers);
}
writerParameter.put("path", task.getTargetPath());
writerParameter.put("fileName", "collectionResult");
writerParameter.put("writeMode", "truncate");
writerParameter.put("dateFormat", "yyyy-MM-dd HH:mm:ss");
writerParameter.put("fileFormat", "csv");
writerParameter.put("encoding", "UTF-8");
writerParameter.put("fieldDelimiter", ",");
writer.put("parameter", writerParameter);
content.put("writer", writer);
job.put("content", List.of(content));
Map<String, Object> setting = new HashMap<>();
Map<String, Object> channel = new HashMap<>();
channel.put("channel", 1);
setting.put("speed", channel);
job.put("setting", setting);
Map<String, Object> jobConfig = new HashMap<>();
jobConfig.put("job", job);
return objectMapper.writeValueAsString(jobConfig);
}
}

View File

@@ -1,54 +0,0 @@
package com.datamate.collection.infrastructure.datax.config;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.Getter;
import lombok.Setter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Getter
@Setter
public class NasConfig implements BaseConfig{
private String ip;
private String path;
private List<String> files;
/**
* 将当前 NAS 配置构造成 DataX 所需的 job JSON 字符串。
*/
public String toJobConfig(ObjectMapper objectMapper, CollectionTask task) throws Exception {
Map<String, Object> parameter = new HashMap<>();
if (ip != null) parameter.put("ip", ip);
if (path != null) parameter.put("path", path);
if (files != null) parameter.put("files", files);
parameter.put("destPath", task.getTargetPath());
Map<String, Object> job = new HashMap<>();
Map<String, Object> content = new HashMap<>();
Map<String, Object> reader = new HashMap<>();
reader.put("name", "nfsreader");
reader.put("parameter", parameter);
content.put("reader", reader);
Map<String, Object> writer = new HashMap<>();
writer.put("name", "nfswriter");
writer.put("parameter", parameter);
content.put("writer", writer);
job.put("content", List.of(content));
Map<String, Object> setting = new HashMap<>();
Map<String, Object> channel = new HashMap<>();
channel.put("channel", 2);
setting.put("speed", channel);
job.put("setting", setting);
Map<String, Object> jobConfig = new HashMap<>();
jobConfig.put("job", job);
return objectMapper.writeValueAsString(jobConfig);
}
}

View File

@@ -1,61 +0,0 @@
package com.datamate.collection.infrastructure.datax.config;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.Getter;
import lombok.Setter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* OBS 归集配置类
*
* @since 2025/11/18
*/
@Getter
@Setter
public class ObsConfig implements BaseConfig{
private String endpoint;
private String bucket;
private String accessKey;
private String secretKey;
private String prefix;
/**
* 将当前 OBS 配置构造成 DataX 所需的 job JSON 字符串。
*/
public String toJobConfig(ObjectMapper objectMapper, CollectionTask task) throws Exception {
Map<String, Object> parameter = new HashMap<>();
if (endpoint != null) parameter.put("endpoint", endpoint);
if (bucket != null) parameter.put("bucket", bucket);
if (accessKey != null) parameter.put("accessKey", accessKey);
if (secretKey != null) parameter.put("secretKey", secretKey);
if (prefix != null) parameter.put("prefix", prefix);
parameter.put("destPath", task.getTargetPath());
Map<String, Object> job = new HashMap<>();
Map<String, Object> content = new HashMap<>();
Map<String, Object> reader = new HashMap<>();
reader.put("name", "obsreader");
reader.put("parameter", parameter);
content.put("reader", reader);
Map<String, Object> writer = new HashMap<>();
writer.put("name", "obswriter");
writer.put("parameter", parameter);
content.put("writer", writer);
job.put("content", List.of(content));
Map<String, Object> setting = new HashMap<>();
Map<String, Object> channel = new HashMap<>();
channel.put("channel", 2);
setting.put("speed", channel);
job.put("setting", setting);
Map<String, Object> jobConfig = new HashMap<>();
jobConfig.put("job", job);
return objectMapper.writeValueAsString(jobConfig);
}
}

View File

@@ -1,15 +0,0 @@
package com.datamate.collection.infrastructure.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.datamate.collection.domain.model.entity.CollectionTask;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
@Mapper
public interface CollectionTaskMapper extends BaseMapper<CollectionTask> {
int updateStatus(@Param("id") String id, @Param("status") String status);
int updateLastExecution(@Param("id") String id, @Param("lastExecutionId") String lastExecutionId);
List<CollectionTask> selectActiveTasks();
}

View File

@@ -1,22 +0,0 @@
package com.datamate.collection.infrastructure.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.datamate.collection.domain.model.entity.TaskExecution;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.time.LocalDateTime;
@Mapper
public interface TaskExecutionMapper extends BaseMapper<TaskExecution> {
TaskExecution selectLatestByTaskId(@Param("taskId") String taskId);
void completeExecution(@Param("executionId") String executionId,
@Param("status") String status,
@Param("completedAt") LocalDateTime completedAt,
@Param("recordsProcessed") Integer recordsProcessed,
@Param("recordsTotal") Long recordsTotal,
@Param("recordsSuccess") Long recordsSuccess,
@Param("recordsFailed") Long recordsFailed,
@Param("errorMessage") String errorMessage);
}

View File

@@ -1,36 +0,0 @@
package com.datamate.collection.infrastructure.persistence.repository;
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.domain.repository.CollectionTaskRepository;
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Repository;
import java.util.List;
/**
* CollectionTaskRepositoryImpl
*
* @since 2025/10/23
*/
@Repository
@RequiredArgsConstructor
public class CollectionTaskRepositoryImpl extends CrudRepository<CollectionTaskMapper, CollectionTask> implements CollectionTaskRepository {
private final CollectionTaskMapper collectionTaskMapper;
@Override
public List<CollectionTask> selectActiveTasks() {
return collectionTaskMapper.selectActiveTasks();
}
@Override
public void updateStatus(String id, String status) {
collectionTaskMapper.updateStatus(id, status);
}
@Override
public void updateLastExecution(String id, String lastExecutionId) {
collectionTaskMapper.updateLastExecution(id, lastExecutionId);
}
}

View File

@@ -1,37 +0,0 @@
package com.datamate.collection.infrastructure.persistence.repository;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.datamate.collection.domain.model.entity.TaskExecution;
import com.datamate.collection.domain.repository.TaskExecutionRepository;
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Repository;
import java.time.LocalDateTime;
/**
* TaskExecutionRepositoryImpl
*
* @since 2025/10/23
*/
@Repository
@RequiredArgsConstructor
public class TaskExecutionRepositoryImpl extends ServiceImpl<TaskExecutionMapper, TaskExecution>
implements TaskExecutionRepository {
private final TaskExecutionMapper taskExecutionMapper;
@Override
public TaskExecution selectLatestByTaskId(String taskId) {
return taskExecutionMapper.selectLatestByTaskId(taskId);
}
@Override
public void completeExecution(String executionId, String status, LocalDateTime completedAt,
Integer recordsProcessed, Long recordsTotal,
Long recordsSuccess, Long recordsFailed, String errorMessage) {
taskExecutionMapper.completeExecution(executionId, status, completedAt,
recordsProcessed, recordsTotal,
recordsSuccess, recordsFailed, errorMessage);
}
}

View File

@@ -1,59 +0,0 @@
package com.datamate.collection.interfaces.converter;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.interfaces.dto.*;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.common.interfaces.PagedResponse;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.mapstruct.Mapper;
import org.mapstruct.Mapping;
import org.mapstruct.Named;
import org.mapstruct.factory.Mappers;
import java.util.List;
import java.util.Map;
@Mapper
public interface CollectionTaskConverter {
CollectionTaskConverter INSTANCE = Mappers.getMapper(CollectionTaskConverter.class);
@Mapping(source = "config", target = "config", qualifiedByName = "parseJsonToMap")
CollectionTaskResponse toResponse(CollectionTask task);
List<CollectionTaskResponse> toResponse(List<CollectionTask> tasks);
@Mapping(source = "config", target = "config", qualifiedByName = "mapToJsonString")
CollectionTask toCollectionTask(CreateCollectionTaskRequest request);
@Mapping(source = "config", target = "config", qualifiedByName = "mapToJsonString")
CollectionTask toCollectionTask(UpdateCollectionTaskRequest request);
@Mapping(source = "current", target = "page")
@Mapping(source = "size", target = "size")
@Mapping(source = "total", target = "totalElements")
@Mapping(source = "pages", target = "totalPages")
@Mapping(source = "records", target = "content")
PagedResponse<CollectionTaskResponse> toResponse(IPage<CollectionTask> tasks);
@Named("parseJsonToMap")
default Map<String, Object> parseJsonToMap(String json) {
try {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(json, Map.class);
} catch (Exception e) {
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
}
}
@Named("mapToJsonString")
default String mapToJsonString(Map<String, Object> map) {
try {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(map != null ? map : Map.of());
} catch (Exception e) {
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
}
}
}

View File

@@ -1,25 +0,0 @@
package com.datamate.collection.interfaces.dto;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.common.interfaces.PagingQuery;
import lombok.Getter;
import lombok.Setter;
/**
* 归集任务分页查询参数
*
* @since 2025/10/23
*/
@Getter
@Setter
public class CollectionTaskPagingQuery extends PagingQuery {
/**
* 任务状态
*/
private TaskStatus status;
/**
* 任务名称
*/
private String keyword;
}

View File

@@ -1,52 +0,0 @@
package com.datamate.collection.interfaces.dto;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.Map;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.collection.common.enums.SyncMode;
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.springframework.format.annotation.DateTimeFormat;
import jakarta.validation.Valid;
/**
* CollectionTaskResponse
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class CollectionTaskResponse {
private String id;
private String name;
private String description;
private String targetPath;
private Map<String, Object> config = new HashMap<>();
private TaskStatus status;
private SyncMode syncMode;
private String scheduleExpression;
private String lastExecutionId;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime createdAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime updatedAt;
private DatasetResponse dataset;
}

View File

@@ -1,64 +0,0 @@
package com.datamate.collection.interfaces.dto;
import com.datamate.collection.common.enums.SyncMode;
import com.datamate.collection.common.enums.TemplateType;
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.HashMap;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import jakarta.validation.Valid;
import jakarta.validation.constraints.*;
import io.swagger.v3.oas.annotations.media.Schema;
/**
* CreateCollectionTaskRequest
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class CreateCollectionTaskRequest {
@NotNull
@Size(min = 1, max = 100)
@Schema(name = "name", description = "任务名称", requiredMode = Schema.RequiredMode.REQUIRED)
@JsonProperty("name")
private String name;
@Size(max = 500)
@Schema(name = "description", description = "任务描述", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("description")
private String description;
@NotNull
@Schema(name = "taskType", description = "任务类型", requiredMode = Schema.RequiredMode.REQUIRED)
@JsonProperty("taskType")
private TemplateType taskType;
@Valid
@NotNull
@Schema(name = "config", description = "归集配置,包含源端和目标端配置信息", requiredMode = Schema.RequiredMode.REQUIRED)
@JsonProperty("config")
private Map<String, Object> config = new HashMap<>();
@NotNull
@Valid
@Schema(name = "syncMode", requiredMode = Schema.RequiredMode.REQUIRED)
@JsonProperty("syncMode")
private SyncMode syncMode;
@Schema(name = "scheduleExpression", description = "Cron调度表达式 (syncMode=SCHEDULED 时必填)", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("scheduleExpression")
private String scheduleExpression;
/** 创建数据集参数 */
@Valid
private CreateDatasetRequest dataset;
}

View File

@@ -1,53 +0,0 @@
package com.datamate.collection.interfaces.dto;
import com.datamate.collection.common.enums.SyncMode;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.HashMap;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import jakarta.validation.Valid;
import jakarta.validation.constraints.*;
import io.swagger.v3.oas.annotations.media.Schema;
/**
* UpdateCollectionTaskRequest
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class UpdateCollectionTaskRequest {
@Size(min = 1, max = 100)
@Schema(name = "name", description = "任务名称", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("name")
private String name;
@Size(max = 500)
@Schema(name = "description", description = "任务描述", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("description")
private String description;
@Valid
@Schema(name = "config", description = "归集配置,包含源端和目标端配置信息", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("config")
private Map<String, Object> config = new HashMap<>();
@Valid
@Schema(name = "syncMode", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("syncMode")
private SyncMode syncMode;
@Schema(name = "scheduleExpression", description = "Cron调度表达式 (syncMode=SCHEDULED 时必填)", requiredMode = Schema.RequiredMode.NOT_REQUIRED)
@JsonProperty("scheduleExpression")
private String scheduleExpression;
/** 数据集id */
private String datasetId;
}

View File

@@ -1,79 +0,0 @@
package com.datamate.collection.interfaces.rest;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.datamate.collection.application.CollectionTaskService;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.interfaces.converter.CollectionTaskConverter;
import com.datamate.collection.interfaces.dto.*;
import com.datamate.common.interfaces.PagedResponse;
import com.datamate.datamanagement.application.DatasetApplicationService;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
import jakarta.validation.Valid;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.http.ResponseEntity;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.bind.annotation.*;
import java.util.*;
@Slf4j
@RestController
@RequestMapping("/data-collection/tasks")
@RequiredArgsConstructor
public class CollectionTaskController{
private final CollectionTaskService taskService;
private final DatasetApplicationService datasetService;
@PostMapping
@Transactional
public ResponseEntity<CollectionTaskResponse> createTask(@Valid @RequestBody CreateCollectionTaskRequest request) {
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
String datasetId = null;
DatasetResponse dataset = null;
if (Objects.nonNull(request.getDataset())) {
dataset = DatasetConverter.INSTANCE.convertToResponse(datasetService.createDataset(request.getDataset()));
datasetId = dataset.getId();
}
CollectionTaskResponse response = CollectionTaskConverter.INSTANCE.toResponse(taskService.create(task, datasetId));
response.setDataset(dataset);
return ResponseEntity.ok().body(response);
}
@PutMapping("/{id}")
public ResponseEntity<CollectionTaskResponse> updateTask(@PathVariable("id") String id, @Valid @RequestBody UpdateCollectionTaskRequest request) {
if (taskService.get(id) == null) {
return ResponseEntity.notFound().build();
}
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
task.setId(id);
return ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(taskService.update(task, request.getDatasetId())));
}
@DeleteMapping("/{id}")
public ResponseEntity<Void> deleteTask(@PathVariable("id") String id) {
taskService.delete(id);
return ResponseEntity.ok().build();
}
@GetMapping("/{id}")
public ResponseEntity<CollectionTaskResponse> getTaskDetail(@PathVariable("id") String id) {
CollectionTask task = taskService.get(id);
return task == null ? ResponseEntity.notFound().build() : ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(task));
}
@GetMapping
public ResponseEntity<PagedResponse<CollectionTaskResponse>> getTasks(@Valid CollectionTaskPagingQuery query) {
Page<CollectionTask> page = new Page<>(query.getPage(), query.getSize());
LambdaQueryWrapper<CollectionTask> wrapper = new LambdaQueryWrapper<CollectionTask>()
.eq(query.getStatus() != null, CollectionTask::getStatus, query.getStatus())
.like(StringUtils.isNotBlank(query.getKeyword()), CollectionTask::getName, query.getKeyword())
.orderByDesc(CollectionTask::getCreatedAt);
return ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(taskService.getTasks(page, wrapper)));
}
}

View File

@@ -1,64 +0,0 @@
package com.datamate.collection.interfaces.scheduler;
import com.datamate.collection.application.CollectionTaskService;
import com.datamate.collection.application.TaskExecutionService;
import com.datamate.collection.common.enums.TaskStatus;
import com.datamate.collection.domain.model.entity.CollectionTask;
import com.datamate.collection.domain.model.entity.TaskExecution;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.scheduling.support.CronExpression;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;
import java.time.LocalDateTime;
import java.util.List;
@Slf4j
@Component
@RequiredArgsConstructor
public class TaskSchedulerInitializer {
private final CollectionTaskService collectionTaskService;
private final TaskExecutionService taskExecutionService;
// 定期扫描激活的采集任务,根据 Cron 判断是否到期执行
@Scheduled(fixedDelayString = "${datamate.data-collection.scheduler.scan-interval-ms:10000}")
public void scanAndTrigger() {
List<CollectionTask> tasks = collectionTaskService.selectActiveTasks();
if (tasks == null || tasks.isEmpty()) {
return;
}
LocalDateTime now = LocalDateTime.now();
for (CollectionTask task : tasks) {
String cronExpr = task.getScheduleExpression();
if (!StringUtils.hasText(cronExpr)) {
continue;
}
try {
// 如果最近一次执行仍在运行,则跳过
TaskExecution latest = taskExecutionService.selectLatestByTaskId(task.getId());
if (latest != null && latest.getStatus() == TaskStatus.RUNNING) {
continue;
}
CronExpression cron = CronExpression.parse(cronExpr);
LocalDateTime base = latest != null && latest.getStartedAt() != null
? latest.getStartedAt()
: now.minusYears(1); // 没有历史记录时,拉长基准时间确保到期判定
LocalDateTime nextTime = cron.next(base);
if (nextTime != null && !nextTime.isAfter(now)) {
// 到期,触发一次执行
TaskExecution exec = taskExecutionService.createExecution(task);
int timeout = task.getTimeoutSeconds() == null ? 3600 : task.getTimeoutSeconds();
taskExecutionService.runAsync(task, exec.getId(), timeout, null);
log.info("Triggered DataX execution for task {} at {}, execId={}", task.getId(), now, exec.getId());
}
} catch (Exception ex) {
log.warn("Skip task {} due to invalid cron or scheduling error: {}", task.getId(), ex.getMessage());
}
}
}
}

View File

@@ -1,23 +0,0 @@
datamate:
data-collection:
# DataX配置
datax:
home-path: ${DATAX_HOME:D:/datax}
python-path: ${DATAX_PYTHON_PATH:python3}
job-config-path: ${DATAX_JOB_PATH:./data/temp/datax/jobs}
log-path: ${DATAX_LOG_PATH:./logs/datax}
max-memory: ${DATAX_MAX_MEMORY:2048}
channel-count: ${DATAX_CHANNEL_COUNT:5}
# 执行配置
execution:
max-concurrent-tasks: ${DATA_COLLECTION_MAX_CONCURRENT_TASKS:10}
task-timeout-minutes: ${DATA_COLLECTION_TASK_TIMEOUT:120}
retry-count: ${DATA_COLLECTION_RETRY_COUNT:3}
retry-interval-seconds: ${DATA_COLLECTION_RETRY_INTERVAL:30}
# 监控配置
monitoring:
status-check-interval-seconds: ${DATA_COLLECTION_STATUS_CHECK_INTERVAL:30}
log-retention-days: ${DATA_COLLECTION_LOG_RETENTION:30}
enable-metrics: ${DATA_COLLECTION_ENABLE_METRICS:true}

View File

@@ -1,51 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper">
<!-- Result Map -->
<resultMap id="CollectionTaskResultMap" type="com.datamate.collection.domain.model.entity.CollectionTask">
<id property="id" column="id"/>
<result property="name" column="name"/>
<result property="description" column="description"/>
<result property="config" column="config"/>
<result property="status" column="status" typeHandler="org.apache.ibatis.type.EnumTypeHandler"/>
<result property="syncMode" column="sync_mode"/>
<result property="scheduleExpression" column="schedule_expression"/>
<result property="retryCount" column="retry_count"/>
<result property="timeoutSeconds" column="timeout_seconds"/>
<result property="maxRecords" column="max_records"/>
<result property="sortField" column="sort_field"/>
<result property="lastExecutionId" column="last_execution_id"/>
<result property="createdAt" column="created_at"/>
<result property="updatedAt" column="updated_at"/>
<result property="createdBy" column="created_by"/>
<result property="updatedBy" column="updated_by"/>
</resultMap>
<!-- Base Column List (tasks) -->
<sql id="Base_Column_List">
id,
name, description, config, status, sync_mode,
schedule_expression, retry_count, timeout_seconds, max_records, sort_field,
last_execution_id, created_at, updated_at, created_by, updated_by
</sql>
<!-- Update Status -->
<update id="updateStatus">
UPDATE t_dc_collection_tasks SET status = #{status}, updated_at = NOW() WHERE id = #{id}
</update>
<!-- Update Last Execution -->
<update id="updateLastExecution">
UPDATE t_dc_collection_tasks SET last_execution_id = #{lastExecutionId}, updated_at = NOW() WHERE id = #{id}
</update>
<!-- Select Active Tasks for Scheduling -->
<select id="selectActiveTasks" resultMap="CollectionTaskResultMap">
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks
WHERE status IN ('READY', 'RUNNING')
AND schedule_expression IS NOT NULL
ORDER BY created_at DESC
</select>
</mapper>

View File

@@ -1,28 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper">
<!-- Select Latest Execution by Task -->
<select id="selectLatestByTaskId" resultType="com.datamate.collection.domain.model.entity.TaskExecution">
SELECT * FROM t_dc_task_executions
WHERE task_id = #{taskId}
ORDER BY started_at DESC
LIMIT 1
</select>
<!-- Complete Execution -->
<update id="completeExecution">
UPDATE t_dc_task_executions
SET status = #{status},
completed_at = #{completedAt},
records_processed = #{recordsProcessed},
records_total = #{recordsTotal},
records_success = #{recordsSuccess},
records_failed = #{recordsFailed},
error_message = #{errorMessage},
updated_at = NOW()
WHERE id = #{executionId}
</update>
</mapper>

View File

@@ -9,7 +9,7 @@ import org.springframework.web.bind.annotation.PathVariable;
/**
* 数据归集服务 Feign Client
*/
@FeignClient(name = "collection-service", url = "${collection.service.url:http://localhost:8080}")
@FeignClient(name = "collection-service", url = "${collection.service.url:http://datamate-backend-python:18000}")
public interface CollectionTaskClient {
/**

View File

@@ -56,11 +56,6 @@
<artifactId>data-management-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-collection-service</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>operator-market-service</artifactId>

View File

@@ -24,7 +24,6 @@
<!-- 核心服务 -->
<module>data-management-service</module>
<module>data-collection-service</module>
<module>operator-market-service</module>
<module>data-cleaning-service</module>
<module>data-synthesis-service</module>

View File

@@ -1,79 +1,60 @@
import { useState } from "react";
import { Input, Button, Radio, Form, App, Select } from "antd";
import { useEffect, useState } from "react";
import { Input, Button, Radio, Form, App, Select, InputNumber } from "antd";
import { Link, useNavigate } from "react-router";
import { ArrowLeft } from "lucide-react";
import { createTaskUsingPost } from "../collection.apis";
import { createTaskUsingPost, queryDataXTemplatesUsingGet } from "../collection.apis";
import SimpleCronScheduler from "@/pages/DataCollection/Create/SimpleCronScheduler";
import RadioCard from "@/components/RadioCard";
import { datasetTypes } from "@/pages/DataManagement/dataset.const";
import { SyncModeMap } from "../collection.const";
import { SyncMode } from "../collection.model";
import { DatasetSubType } from "@/pages/DataManagement/dataset.model";
const { TextArea } = Input;
const defaultTemplates = [
{
id: "NAS",
name: "NAS到本地",
description: "从NAS文件系统导入数据到本地文件系统",
config: {
reader: "nfsreader",
writer: "localwriter",
},
},
{
id: "OBS",
name: "OBS到本地",
description: "从OBS文件系统导入数据到本地文件系统",
config: {
reader: "obsreader",
writer: "localwriter",
},
},
{
id: "MYSQL",
name: "Mysql到本地",
description: "从Mysql中导入数据到本地文件系统",
config: {
reader: "mysqlreader",
writer: "localwriter",
},
},
];
const syncModeOptions = Object.values(SyncModeMap);
enum TemplateType {
NAS = "NAS",
OBS = "OBS",
MYSQL = "MYSQL",
}
type CollectionTemplate = {
id: string;
name: string;
description?: string;
sourceType?: string;
sourceName?: string;
targetType?: string;
targetName?: string;
templateContent?: {
parameter?: any;
reader?: any;
writer?: any;
};
builtIn?: boolean;
};
type TemplateFieldDef = {
name?: string;
type?: string;
description?: string;
required?: boolean;
options?: Array<{ label: string; value: string | number } | string | number>;
defaultValue?: any;
};
export default function CollectionTaskCreate() {
const navigate = useNavigate();
const [form] = Form.useForm();
const { message } = App.useApp();
const [templateType, setTemplateType] = useState<"default" | "custom">(
"default"
);
// 默认模板类型设为 NAS
const [selectedTemplate, setSelectedTemplate] = useState<TemplateType>(
TemplateType.NAS
);
const [customConfig, setCustomConfig] = useState("");
const [templates, setTemplates] = useState<CollectionTemplate[]>([]);
const [templatesLoading, setTemplatesLoading] = useState(false);
const [selectedTemplateId, setSelectedTemplateId] = useState<string | undefined>(undefined);
// 将 newTask 设为 any,并初始化 config.templateType 为 NAS
const [newTask, setNewTask] = useState<any>({
name: "",
description: "",
syncMode: SyncMode.ONCE,
cronExpression: "",
maxRetries: 10,
dataset: null,
config: { templateType: TemplateType.NAS },
createDataset: false,
scheduleExpression: "",
timeoutSeconds: 3600,
templateId: "",
config: {
parameter: {},
},
});
const [scheduleExpression, setScheduleExpression] = useState({
type: "once",
@@ -81,33 +62,37 @@ export default function CollectionTaskCreate() {
cronExpression: "0 0 0 * * ?",
});
const [isCreateDataset, setIsCreateDataset] = useState(false);
useEffect(() => {
const run = async () => {
setTemplatesLoading(true);
try {
const resp: any = await queryDataXTemplatesUsingGet({ page: 1, size: 1000 });
const list: CollectionTemplate[] = resp?.data?.content || [];
setTemplates(list);
} catch (e) {
message.error("加载归集模板失败");
} finally {
setTemplatesLoading(false);
}
};
run()
}, []);
const handleSubmit = async () => {
try {
await form.validateFields();
if (templateType === "default" && !selectedTemplate) {
window.alert("请选择默认模板");
return;
}
if (templateType === "custom" && !customConfig.trim()) {
window.alert("请填写自定义配置");
return;
}
// 构建最终 payload,不依赖异步 setState
const values = form.getFieldsValue(true);
const payload = {
...newTask,
taskType:
templateType === "default" ? selectedTemplate : "CUSTOM",
config: {
...((newTask && newTask.config) || {}),
...(templateType === "custom" ? { dataxJson: customConfig } : {}),
},
name: values.name,
description: values.description,
syncMode: values.syncMode,
scheduleExpression: values.scheduleExpression,
timeoutSeconds: values.timeoutSeconds,
templateId: values.templateId,
config: values.config,
};
console.log("创建任务 payload:", payload);
await createTaskUsingPost(payload);
message.success("任务创建成功");
navigate("/data/collection");
@@ -116,6 +101,102 @@ export default function CollectionTaskCreate() {
}
};
const selectedTemplate = templates.find((t) => t.id === selectedTemplateId);
const renderTemplateFields = (
section: "parameter" | "reader" | "writer",
defs: Record<string, TemplateFieldDef> | undefined
) => {
if (!defs || typeof defs !== "object") return null;
const items = Object.entries(defs).map(([key, def]) => {
const label = def?.name || key;
const description = def?.description;
const fieldType = (def?.type || "input").toLowerCase();
const required = def?.required !== false;
const rules = required
? [{ required: true, message: `请输入${label}` }]
: undefined;
if (fieldType === "password") {
return (
<Form.Item
key={`${section}.${key}`}
name={["config", section, key]}
label={label}
tooltip={description}
rules={rules}
>
<Input.Password placeholder={description || `请输入${label}`} />
</Form.Item>
);
}
if (fieldType === "textarea") {
return (
<Form.Item
key={`${section}.${key}`}
name={["config", section, key]}
label={label}
tooltip={description}
rules={rules}
className="md:col-span-2"
>
<TextArea rows={4} placeholder={description || `请输入${label}`} />
</Form.Item>
);
}
if (fieldType === "select") {
const options = (def?.options || []).map((opt: any) => {
if (typeof opt === "string" || typeof opt === "number") {
return { label: String(opt), value: opt };
}
return { label: opt?.label ?? String(opt?.value), value: opt?.value };
});
return (
<Form.Item
key={`${section}.${key}`}
name={["config", section, key]}
label={label}
tooltip={description}
rules={rules}
>
<Select placeholder={description || `请选择${label}`} options={options} />
</Form.Item>
);
}
return (
<Form.Item
key={`${section}.${key}`}
name={["config", section, key]}
label={label}
tooltip={description}
rules={rules}
>
<Input placeholder={description || `请输入${label}`} />
</Form.Item>
);
});
return (
<div className="grid grid-cols-1 md:grid-cols-2 gap-x-4 gap-y-2">
{items}
</div>
);
};
const getPropertyCountSafe = (obj: any) => {
// 类型检查
if (obj === null || obj === undefined) {
return 0;
}
// 处理普通对象
return Object.keys(obj).length;
}
return (
<div className="h-full flex flex-col">
<div className="flex items-center justify-between mb-2">
@@ -130,10 +211,11 @@ export default function CollectionTaskCreate() {
</div>
<div className="flex-overflow-auto border-card">
<div className="flex-1 overflow-auto p-6">
<div className="flex-1 overflow-auto p-4">
<Form
form={form}
layout="vertical"
className="[&_.ant-form-item]:mb-3 [&_.ant-form-item-label]:pb-1"
initialValues={newTask}
onValuesChange={(_, allValues) => {
setNewTask({ ...newTask, ...allValues });
@@ -142,19 +224,36 @@ export default function CollectionTaskCreate() {
{/* 基本信息 */}
<h2 className="font-medium text-gray-900 text-lg mb-2"></h2>
<Form.Item
label="名称"
name="name"
rules={[{ required: true, message: "请输入任务名称" }]}
>
<Input placeholder="请输入任务名称" />
</Form.Item>
<Form.Item label="描述" name="description">
<TextArea placeholder="请输入任务描述" rows={3} />
</Form.Item>
<div className="grid grid-cols-1 md:grid-cols-2 gap-x-4 gap-y-2">
<Form.Item
label="名称"
name="name"
rules={[{ required: true, message: "请输入任务名称" }]}
>
<Input placeholder="请输入任务名称" />
</Form.Item>
<Form.Item
label="超时时间(秒)"
name="timeoutSeconds"
rules={[{ required: true, message: "请输入超时时间" }]}
initialValue={3600}
>
<InputNumber
className="w-full"
min={1}
precision={0}
placeholder="默认 3600"
/>
</Form.Item>
<Form.Item className="md:col-span-2" label="描述" name="description">
<TextArea placeholder="请输入任务描述" rows={2} />
</Form.Item>
</div>
{/* 同步配置 */}
<h2 className="font-medium text-gray-900 pt-6 mb-2 text-lg">
<h2 className="font-medium text-gray-900 pt-2 mb-1 text-lg">
</h2>
<Form.Item name="syncMode" label="同步方式">
@@ -180,7 +279,7 @@ export default function CollectionTaskCreate() {
rules={[{ required: true, message: "请输入Cron表达式" }]}
>
<SimpleCronScheduler
className="px-2 rounded"
className="px-2 py-1 rounded"
value={scheduleExpression}
onChange={(value) => {
setScheduleExpression(value);
@@ -194,271 +293,90 @@ export default function CollectionTaskCreate() {
)}
{/* 模板配置 */}
<h2 className="font-medium text-gray-900 pt-6 mb-2 text-lg">
<h2 className="font-medium text-gray-900 pt-4 mb-2 text-lg">
</h2>
{/* <Form.Item label="模板类型">
<Radio.Group
value={templateType}
onChange={(e) => setTemplateType(e.target.value)}
>
<Radio value="default">使用默认模板</Radio>
<Radio value="custom">自定义DataX JSON配置</Radio>
</Radio.Group>
</Form.Item> */}
{templateType === "default" && (
<>
{
<Form.Item label="选择模板">
<div className="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4">
{defaultTemplates.map((template) => (
<div
key={template.id}
className={`border p-4 rounded-md hover:shadow-lg transition-shadow ${
selectedTemplate === template.id
? "border-blue-500"
: "border-gray-300"
}`}
onClick={() => {
setSelectedTemplate(template.id as TemplateType);
// 使用函数式更新,合并之前的 config
setNewTask((prev: any) => ({
...prev,
config: {
templateType: template.id,
},
}));
// 同步表单显示
form.setFieldsValue({
config: { templateType: template.id },
});
}}
>
<div className="font-medium">{template.name}</div>
<div className="text-gray-500">
{template.description}
</div>
<div className="text-gray-400">
{template.config.reader} {template.config.writer}
</div>
</div>
))}
<Form.Item
label="选择模板"
name="templateId"
rules={[{ required: true, message: "请选择归集模板" }]}
>
<Select
placeholder="请选择归集模板"
loading={templatesLoading}
onChange={(templateId) => {
setSelectedTemplateId(templateId);
form.setFieldsValue({
templateId,
config: {},
});
setNewTask((prev: any) => ({
...prev,
templateId,
config: {},
}));
}}
optionRender={(option) => {
const tpl = templates.find((t) => t.id === option.value);
return (
<div>
<div className="font-medium">{tpl?.name || option.label}</div>
<div className="text-xs text-gray-500 line-clamp-2">
{tpl?.description || ""}
</div>
</div>
</Form.Item>
}
{/* nas import */}
{selectedTemplate === TemplateType.NAS && (
<div className="grid grid-cols-2 gap-3 px-2 bg-blue-50 rounded">
<Form.Item
name={["config", "ip"]}
rules={[{ required: true, message: "请输入NAS地址" }]}
label="NAS地址"
>
<Input placeholder="192.168.1.100" />
</Form.Item>
<Form.Item
name={["config", "path"]}
rules={[{ required: true, message: "请输入共享路径" }]}
label="共享路径"
>
<Input placeholder="/share/importConfig" />
</Form.Item>
<Form.Item
name={["config", "files"]}
label="文件列表"
className="col-span-2"
>
<Select placeholder="请选择文件列表" mode="tags" />
</Form.Item>
</div>
)}
);
}}
options={templates.map((template) => ({
label: template.name,
value: template.id,
}))}
/>
</Form.Item>
{/* obs import */}
{selectedTemplate === TemplateType.OBS && (
<div className="grid grid-cols-2 gap-3 p-4 bg-blue-50 rounded-lg">
<Form.Item
name={["config", "endpoint"]}
rules={[{ required: true }]}
label="Endpoint"
>
<Input
className="h-8 text-xs"
placeholder="obs.cn-north-4.myhuaweicloud.com"
/>
</Form.Item>
<Form.Item
name={["config", "bucket"]}
rules={[{ required: true }]}
label="Bucket"
>
<Input className="h-8 text-xs" placeholder="my-bucket" />
</Form.Item>
<Form.Item
name={["config", "accessKey"]}
rules={[{ required: true }]}
label="Access Key"
>
<Input className="h-8 text-xs" placeholder="Access Key" />
</Form.Item>
<Form.Item
name={["config", "secretKey"]}
rules={[{ required: true }]}
label="Secret Key"
>
<Input
type="password"
className="h-8 text-xs"
placeholder="Secret Key"
/>
</Form.Item>
<Form.Item
name={["config", "prefix"]}
rules={[{ required: true }]}
label="Prefix"
>
<Input className="h-8 text-xs" placeholder="Prefix" />
</Form.Item>
</div>
)}
{/* mysql import */}
{selectedTemplate === TemplateType.MYSQL && (
<div className="grid grid-cols-2 gap-3 px-2 bg-blue-50 rounded">
<Form.Item
name={["config", "jdbcUrl"]}
rules={[{ required: true, message: "请输入数据库链接" }]}
label="数据库链接"
className="col-span-2"
>
<Input placeholder="jdbc:mysql://localhost:3306/mysql?useUnicode=true&characterEncoding=utf8" />
</Form.Item>
<Form.Item
name={["config", "username"]}
rules={[{ required: true, message: "请输入用户名" }]}
label="用户名"
>
<Input placeholder="mysql" />
</Form.Item>
<Form.Item
name={["config", "password"]}
rules={[{ required: true, message: "请输入密码" }]}
label="密码"
>
<Input type="password" className="h-8 text-xs" placeholder="Secret Key" />
</Form.Item>
<Form.Item
name={["config", "querySql"]}
rules={[{ required: true, message: "请输入查询语句" }]}
label="查询语句"
>
<Input placeholder="select * from your_table" />
</Form.Item>
<Form.Item
name={["config", "headers"]}
label="列名"
className="col-span-2"
>
<Select placeholder="请输入列名" mode="tags" />
</Form.Item>
</div>
)}
</>
)}
{templateType === "custom" && (
<Form.Item label="DataX JSON配置">
<TextArea
placeholder="请输入DataX JSON配置..."
value={customConfig}
onChange={(e) => setCustomConfig(e.target.value)}
rows={12}
className="w-full"
/>
</Form.Item>
)}
{/* 数据集配置 */}
{templateType === "default" && (
{selectedTemplate ? (
<>
<h2 className="font-medium text-gray-900 my-4 text-lg">
</h2>
<Form.Item
label="是否创建数据集"
name="createDataset"
required
rules={[{ required: true, message: "请选择是否创建数据集" }]}
tooltip={"支持后续在【数据管理】中手动创建数据集并关联至此任务。"}
>
<Radio.Group
value={isCreateDataset}
onChange={(e) => {
const value = e.target.value;
let datasetInit = null;
if (value === true) {
datasetInit = {};
}
form.setFieldsValue({
dataset: datasetInit,
});
setNewTask((prev: any) => ({
...prev,
dataset: datasetInit,
}));
setIsCreateDataset(e.target.value);
}}
>
<Radio value={true}></Radio>
<Radio value={false}></Radio>
</Radio.Group>
</Form.Item>
{isCreateDataset && (
{getPropertyCountSafe(selectedTemplate.templateContent?.parameter) > 0 ? (
<>
<Form.Item
label="数据集名称"
name={["dataset", "name"]}
required
>
<Input
placeholder="输入数据集名称"
onChange={(e) => {
setNewTask((prev: any) => ({
...prev,
dataset: {
...(prev.dataset || {}),
name: e.target.value,
},
}));
}}
/>
</Form.Item>
<Form.Item
label="数据集类型"
name={["dataset", "datasetType"]}
rules={[{ required: true, message: "请选择数据集类型" }]}
>
<RadioCard
options={datasetTypes}
value={newTask.dataset?.datasetType}
onChange={(type) => {
form.setFieldValue(["dataset", "datasetType"], type);
setNewTask((prev: any) => ({
...prev,
dataset: {
...(prev.dataset || {}),
datasetType: type as DatasetSubType,
},
}));
}}
/>
</Form.Item>
<h3 className="font-medium text-gray-900 pt-2 mb-2">
</h3>
{renderTemplateFields(
"parameter",
selectedTemplate.templateContent?.parameter as Record<string, TemplateFieldDef>
)}
</>
)}
): null}
{getPropertyCountSafe(selectedTemplate.templateContent?.reader) > 0 ? (
<>
<h3 className="font-medium text-gray-900 pt-2 mb-2">
</h3>
{renderTemplateFields(
"reader",
selectedTemplate.templateContent?.reader as Record<string, TemplateFieldDef>
)}
</>
) : null}
{getPropertyCountSafe(selectedTemplate.templateContent?.writer) > 0 ? (
<>
<h3 className="font-medium text-gray-900 pt-2 mb-2">
</h3>
{renderTemplateFields(
"writer",
selectedTemplate.templateContent?.writer as Record<string, TemplateFieldDef>
)}
</>
) : null}
</>
)}
) : null}
</Form>
</div>
<div className="flex gap-2 justify-end border-top p-6">
<div className="flex gap-2 justify-end border-top p-4">
<Button onClick={() => navigate("/data/collection")}></Button>
<Button type="primary" onClick={handleSubmit}>

View File

@@ -1,13 +1,27 @@
import { useState } from "react";
import { useEffect, useState } from "react";
import { Button, Tabs } from "antd";
import { PlusOutlined } from "@ant-design/icons";
import TaskManagement from "./TaskManagement";
import ExecutionLog from "./ExecutionLog";
import { useNavigate } from "react-router";
import Execution from "./Execution.tsx";
import TemplateManagement from "./TemplateManagement";
import { useLocation, useNavigate } from "react-router";
export default function DataCollection() {
const navigate = useNavigate();
const location = useLocation();
const [activeTab, setActiveTab] = useState("task-management");
const [taskId, setTaskId] = useState<string | undefined>(undefined);
useEffect(() => {
const params = new URLSearchParams(location.search);
const tab = params.get("tab") || undefined;
const nextTaskId = params.get("taskId") || undefined;
if (tab === "task-execution" || tab === "task-management" || tab === "task-template") {
setActiveTab(tab);
}
setTaskId(nextTaskId);
}, [location.search]);
return (
<div className="gap-4 h-full flex flex-col">
@@ -29,13 +43,20 @@ export default function DataCollection() {
activeKey={activeTab}
items={[
{ label: "任务管理", key: "task-management" },
// { label: "执行日志", key: "execution-log" },
{ label: "执行记录", key: "task-execution" },
{ label: "模板管理", key: "task-template" },
]}
onChange={(tab) => {
setActiveTab(tab);
setTaskId(undefined);
const params = new URLSearchParams();
params.set("tab", tab);
navigate({ pathname: location.pathname, search: params.toString() }, { replace: true });
}}
/>
{activeTab === "task-management" ? <TaskManagement /> : <ExecutionLog />}
{activeTab === "task-management" ? <TaskManagement /> : null}
{activeTab === "task-execution" ? <Execution taskId={taskId} /> : null}
{activeTab === "task-template" ? <TemplateManagement /> : null}
</div>
);
}

View File

@@ -0,0 +1,291 @@
import {Card, Badge, Button, Modal, Table, Tag} from "antd";
import type { ColumnsType } from "antd/es/table";
import { SearchControls } from "@/components/SearchControls";
import { queryExecutionLogUsingPost } from "../collection.apis";
import useFetchData from "@/hooks/useFetchData";
import { useEffect, useState } from "react";
import {TaskExecution} from "@/pages/DataCollection/collection.model.ts";
import {mapTaskExecution} from "@/pages/DataCollection/collection.const.ts";
import { queryExecutionLogFileByIdUsingGet } from "../collection.apis";
import { FileTextOutlined } from "@ant-design/icons";
const filterOptions = [
{
key: "status",
label: "状态筛选",
options: [
{ value: "all", label: "全部状态" },
{ value: "RUNNING", label: "运行中" },
{ value: "SUCCESS", label: "成功" },
{ value: "FAILED", label: "失败" },
{ value: "STOPPED", label: "停止" },
],
},
];
export default function Execution({ taskId }: { taskId?: string }) {
const [dateRange, setDateRange] = useState<[any, any] | null>(null);
const [logOpen, setLogOpen] = useState(false);
const [logLoading, setLogLoading] = useState(false);
const [logTitle, setLogTitle] = useState<string>("");
const [logContent, setLogContent] = useState<string>("");
const [logFilename, setLogFilename] = useState<string>("");
const [logBlobUrl, setLogBlobUrl] = useState<string>("");
const formatDuration = (seconds?: number) => {
if (seconds === undefined || seconds === null) return "-";
const total = Math.max(0, Math.floor(seconds));
if (total < 60) return `${total}s`;
const min = Math.floor(total / 60);
const sec = total % 60;
return `${min}min${sec}s`;
};
const handleReset = () => {
setSearchParams({
keyword: "",
filter: {
type: [],
status: [],
tags: [],
},
current: 1,
pageSize: 10,
});
setDateRange(null);
};
const {
loading,
tableData,
pagination,
searchParams,
setSearchParams,
handleFiltersChange,
handleKeywordChange,
} = useFetchData<TaskExecution>(
(params) => {
const { keyword, start_time, end_time, ...rest } = params || {};
return queryExecutionLogUsingPost({
...rest,
task_id: taskId || undefined,
task_name: keyword || undefined,
start_time,
end_time,
});
},
mapTaskExecution,
30000,
false,
[],
0
);
useEffect(() => {
setSearchParams((prev) => ({
...prev,
current: 1,
}));
}, [taskId, setSearchParams]);
const handleViewLog = async (record: TaskExecution) => {
setLogOpen(true);
setLogLoading(true);
setLogTitle(`${record.taskName} / ${record.id}`);
setLogContent("");
setLogFilename("");
if (logBlobUrl) {
URL.revokeObjectURL(logBlobUrl);
setLogBlobUrl("");
}
try {
const { blob, filename } = await queryExecutionLogFileByIdUsingGet(record.id);
setLogFilename(filename);
const url = URL.createObjectURL(blob);
setLogBlobUrl(url);
const text = await blob.text();
setLogContent(text);
} catch (e: any) {
setLogContent(e?.data?.detail || e?.message || "Failed to load log");
} finally {
setLogLoading(false);
}
};
const columns: ColumnsType<any> = [
{
title: "任务名称",
dataIndex: "taskName",
key: "taskName",
fixed: "left",
render: (text: string) => (
<span style={{ fontWeight: 500 }}>{text}</span>
),
},
{
title: "状态",
dataIndex: "status",
key: "status",
render: (status: any) => ((
<Tag color={status.color}>{status.label}</Tag>
)
),
},
{
title: "开始时间",
dataIndex: "startedAt",
key: "startedAt",
},
{
title: "结束时间",
dataIndex: "completedAt",
key: "completedAt",
},
{
title: "执行时长",
dataIndex: "durationSeconds",
key: "durationSeconds",
render: (v?: number) => formatDuration(v),
},
{
title: "错误信息",
dataIndex: "errorMessage",
key: "errorMessage",
render: (msg?: string) =>
msg ? (
<span style={{ color: "#f5222d" }} title={msg}>
{msg}
</span>
) : (
<span style={{ color: "#bbb" }}>-</span>
),
},
{
title: "操作",
key: "action",
fixed: "right",
width: 120,
render: (_: any, record: TaskExecution) => (
<Button
type="link"
icon={<FileTextOutlined />}
onClick={() => handleViewLog(record)}
>
</Button>
),
},
];
return (
<div className="flex flex-col gap-4">
{/* Filter Controls */}
<div className="flex items-center justify-between gap-4">
<SearchControls
searchTerm={searchParams.keyword}
onSearchChange={handleKeywordChange}
filters={filterOptions}
onFiltersChange={handleFiltersChange}
showViewToggle={false}
onClearFilters={() =>
setSearchParams((prev) => ({
...prev,
filter: { ...prev.filter, status: [] },
current: 1,
}))
}
showDatePicker
dateRange={dateRange as any}
onDateChange={(date) => {
setDateRange(date as any);
const start = (date?.[0] as any)?.toISOString?.() || undefined;
const end = (date?.[1] as any)?.toISOString?.() || undefined;
setSearchParams((prev) => ({
...prev,
current: 1,
start_time: start,
end_time: end,
}));
}}
onReload={handleReset}
searchPlaceholder="搜索任务名称..."
className="flex-1"
/>
</div>
<Card>
<Table
loading={loading}
columns={columns}
dataSource={tableData}
rowKey="id"
pagination={pagination}
scroll={{ x: "max-content" }}
/>
</Card>
<Modal
title={logTitle || "执行日志"}
open={logOpen}
onCancel={() => {
setLogOpen(false);
if (logBlobUrl) {
URL.revokeObjectURL(logBlobUrl);
setLogBlobUrl("");
}
}}
footer={
<div style={{ display: "flex", justifyContent: "space-between", width: "100%" }}>
<div style={{ color: "#6b7280", fontSize: 12 }}>{logFilename || ""}</div>
<div style={{ display: "flex", gap: 8 }}>
{logBlobUrl ? (
<Button
onClick={() => {
const a = document.createElement("a");
a.href = logBlobUrl;
a.download = logFilename || "execution.log";
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
}}
>
</Button>
) : null}
<Button
type="primary"
onClick={() => {
setLogOpen(false);
if (logBlobUrl) {
URL.revokeObjectURL(logBlobUrl);
setLogBlobUrl("");
}
}}
>
</Button>
</div>
</div>
}
width={900}
>
<div
style={{
background: "#0b1020",
color: "#e5e7eb",
borderRadius: 8,
padding: 12,
maxHeight: "60vh",
overflow: "auto",
fontFamily: "ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, 'Liberation Mono', 'Courier New', monospace",
fontSize: 12,
lineHeight: 1.5,
whiteSpace: "pre-wrap",
wordBreak: "break-word",
}}
>
{logLoading ? "Loading..." : (logContent || "(empty)")}
</div>
</Modal>
</div>
);
}

View File

@@ -1,149 +0,0 @@
import { Card, Badge, Table } from "antd";
import type { ColumnsType } from "antd/es/table";
import { SearchControls } from "@/components/SearchControls";
import type { CollectionLog } from "@/pages/DataCollection/collection.model";
import { queryExecutionLogUsingPost } from "../collection.apis";
import { LogStatusMap, LogTriggerTypeMap } from "../collection.const";
import useFetchData from "@/hooks/useFetchData";
const filterOptions = [
{
key: "status",
label: "状态筛选",
options: Object.values(LogStatusMap),
},
{
key: "triggerType",
label: "触发类型",
options: Object.values(LogTriggerTypeMap),
},
];
export default function ExecutionLog() {
const handleReset = () => {
setSearchParams({
keyword: "",
filters: {},
current: 1,
pageSize: 10,
dateRange: null,
});
};
const {
loading,
tableData,
pagination,
searchParams,
setSearchParams,
handleFiltersChange,
handleKeywordChange,
} = useFetchData(queryExecutionLogUsingPost);
const columns: ColumnsType<CollectionLog> = [
{
title: "任务名称",
dataIndex: "taskName",
key: "taskName",
fixed: "left",
render: (text: string) => <span style={{ fontWeight: 500 }}>{text}</span>,
},
{
title: "状态",
dataIndex: "status",
key: "status",
render: (status: string) => (
<Badge
text={LogStatusMap[status]?.label}
color={LogStatusMap[status]?.color}
/>
),
},
{
title: "触发类型",
dataIndex: "triggerType",
key: "triggerType",
render: (type: string) => LogTriggerTypeMap[type].label,
},
{
title: "开始时间",
dataIndex: "startTime",
key: "startTime",
},
{
title: "结束时间",
dataIndex: "endTime",
key: "endTime",
},
{
title: "执行时长",
dataIndex: "duration",
key: "duration",
},
{
title: "重试次数",
dataIndex: "retryCount",
key: "retryCount",
},
{
title: "进程ID",
dataIndex: "processId",
key: "processId",
render: (text: string) => (
<span style={{ fontFamily: "monospace" }}>{text}</span>
),
},
{
title: "错误信息",
dataIndex: "errorMessage",
key: "errorMessage",
render: (msg?: string) =>
msg ? (
<span style={{ color: "#f5222d" }} title={msg}>
{msg}
</span>
) : (
<span style={{ color: "#bbb" }}>-</span>
),
},
];
return (
<div className="flex flex-col gap-4">
{/* Filter Controls */}
<div className="flex items-center justify-between gap-4">
<SearchControls
searchTerm={searchParams.keyword}
onSearchChange={handleKeywordChange}
filters={filterOptions}
onFiltersChange={handleFiltersChange}
showViewToggle={false}
onClearFilters={() =>
setSearchParams((prev) => ({
...prev,
filters: {},
}))
}
showDatePicker
dateRange={searchParams.dateRange || [null, null]}
onDateChange={(date) =>
setSearchParams((prev) => ({ ...prev, dateRange: date }))
}
onReload={handleReset}
searchPlaceholder="搜索任务名称、进程ID或错误信息..."
className="flex-1"
/>
</div>
<Card>
<Table
loading={loading}
columns={columns}
dataSource={tableData}
rowKey="id"
pagination={pagination}
scroll={{ x: "max-content" }}
/>
</Card>
</div>
);
}

View File

@@ -1,34 +1,16 @@
import {
Card,
Button,
Badge,
Table,
Dropdown,
App,
Tooltip,
Popconfirm,
} from "antd";
import {
DeleteOutlined,
EditOutlined,
EllipsisOutlined,
PauseCircleOutlined,
PauseOutlined,
PlayCircleOutlined,
StopOutlined,
} from "@ant-design/icons";
import { SearchControls } from "@/components/SearchControls";
import {App, Button, Card, Popconfirm, Table, Tag, Tooltip,} from "antd";
import {DeleteOutlined, PauseCircleOutlined, PlayCircleOutlined, ProfileOutlined,} from "@ant-design/icons";
import {SearchControls} from "@/components/SearchControls";
import {
deleteTaskByIdUsingDelete,
executeTaskByIdUsingPost,
queryTasksUsingGet,
stopTaskByIdUsingPost,
} from "../collection.apis";
import { TaskStatus, type CollectionTask } from "../collection.model";
import { StatusMap, SyncModeMap } from "../collection.const";
import {type CollectionTask, TaskStatus} from "../collection.model";
import {mapCollectionTask, StatusMap} from "../collection.const";
import useFetchData from "@/hooks/useFetchData";
import { useNavigate } from "react-router";
import { mapCollectionTask } from "../collection.const";
import {useNavigate} from "react-router";
export default function TaskManagement() {
const { message } = App.useApp();
@@ -51,8 +33,20 @@ export default function TaskManagement() {
searchParams,
setSearchParams,
fetchData,
handleFiltersChange,
} = useFetchData(queryTasksUsingGet, mapCollectionTask);
} = useFetchData(
(params) => {
const { keyword, ...rest } = params || {};
return queryTasksUsingGet({
...rest,
name: keyword || undefined,
});
},
mapCollectionTask,
30000,
false,
[],
0
);
const handleStartTask = async (taskId: string) => {
await executeTaskByIdUsingPost(taskId);
@@ -86,21 +80,21 @@ export default function TaskManagement() {
icon: <PauseCircleOutlined />,
onClick: () => handleStopTask(record.id),
};
const items = [
// isStopped ? startButton : stopButton,
// {
// key: "edit",
// label: "编辑",
// icon: <EditOutlined />,
// onClick: () => {
// showEditTaskModal(record);
// },
// },
return [
{
key: "executions",
label: "执行记录",
icon: <ProfileOutlined/>,
onClick: () =>
navigate(
`/data/collection?tab=task-execution&taskId=${encodeURIComponent(record.id)}`
),
},
{
key: "delete",
label: "删除",
danger: true,
icon: <DeleteOutlined />,
icon: <DeleteOutlined/>,
confirm: {
title: "确定要删除该任务吗?此操作不可撤销。",
okText: "删除",
@@ -110,7 +104,6 @@ export default function TaskManagement() {
onClick: () => handleDeleteTask(record.id),
},
];
return items;
};
const columns = [
@@ -128,17 +121,49 @@ export default function TaskManagement() {
key: "status",
width: 150,
ellipsis: true,
render: (status: string) => (
<Badge text={status.label} color={status.color} />
render: (status: any) => (
<Tag color={status.color}>{status.label}</Tag>
),
},
{
title: "所用模板",
dataIndex: "templateName",
key: "templateName",
width: 180,
ellipsis: true,
render: (v?: string) => v || "-",
},
{
title: "同步方式",
dataIndex: "syncMode",
key: "syncMode",
width: 150,
ellipsis: true,
render: (text: string) => <span>{SyncModeMap[text]?.label}</span>,
render: (text: any) => (
<Tag color={text.color}>{text.label}</Tag>
),
},
{
title: "Cron调度表达式",
dataIndex: "scheduleExpression",
key: "scheduleExpression",
width: 200,
ellipsis: true,
},
{
title: "超时时间",
dataIndex: "timeoutSeconds",
key: "timeoutSeconds",
width: 140,
ellipsis: true,
render: (v?: number) => (v === undefined || v === null ? "-" : `${v}s`),
},
{
title: "描述",
dataIndex: "description",
key: "description",
ellipsis: true,
width: 200,
},
{
title: "创建时间",
@@ -154,20 +179,6 @@ export default function TaskManagement() {
width: 150,
ellipsis: true,
},
{
title: "最近执行ID",
dataIndex: "lastExecutionId",
key: "lastExecutionId",
width: 150,
ellipsis: true,
},
{
title: "描述",
dataIndex: "description",
key: "description",
ellipsis: true,
width: 200,
},
{
title: "操作",
key: "action",
@@ -180,7 +191,7 @@ export default function TaskManagement() {
type="text"
icon={op.icon}
danger={op?.danger}
onClick={() => op.onClick(record)}
onClick={() => op.onClick()}
/>
</Tooltip>
);
@@ -192,7 +203,7 @@ export default function TaskManagement() {
okText={op.confirm.okText}
cancelText={op.confirm.cancelText}
okType={op.danger ? "danger" : "primary"}
onConfirm={() => op.onClick(record)}
onConfirm={() => op.onClick()}
>
<Tooltip key={op.key} title={op.label}>
<Button type="text" icon={op.icon} danger={op?.danger} />
@@ -218,14 +229,15 @@ export default function TaskManagement() {
current: 1,
}))
}
searchPlaceholder="搜索任务名称或描述..."
searchPlaceholder="搜索任务名称..."
filters={filters}
onFiltersChange={handleFiltersChange}
onFiltersChange={() => {}}
showViewToggle={false}
onClearFilters={() =>
setSearchParams((prev) => ({
...prev,
filters: {},
filter: { ...prev.filter, status: [] },
current: 1,
}))
}
onReload={fetchData}

View File

@@ -0,0 +1,173 @@
import { App, Card, Table, Tag } from "antd";
import type { ColumnsType } from "antd/es/table";
import { SearchControls } from "@/components/SearchControls";
import useFetchData from "@/hooks/useFetchData";
import { queryDataXTemplatesUsingGet } from "../collection.apis";
import { formatDateTime } from "@/utils/unit";
type CollectionTemplate = {
id: string;
name: string;
description?: string;
sourceType: string;
sourceName: string;
targetType: string;
targetName: string;
builtIn?: boolean;
createdAt?: string;
updatedAt?: string;
};
export default function TemplateManagement() {
const { message } = App.useApp();
const filters = [
{
key: "builtIn",
label: "模板类型",
options: [
{ value: "all", label: "全部" },
{ value: "true", label: "内置" },
{ value: "false", label: "自定义" },
],
},
];
const {
loading,
tableData,
pagination,
searchParams,
setSearchParams,
fetchData,
handleFiltersChange,
} = useFetchData<CollectionTemplate>(
(params) => {
const { keyword, builtIn, ...rest } = params || {};
const builtInValue = Array.isArray(builtIn)
? builtIn?.[0]
: builtIn;
return queryDataXTemplatesUsingGet({
...rest,
name: keyword || undefined,
built_in:
builtInValue && builtInValue !== "all"
? builtInValue === "true"
: undefined,
});
},
(tpl) => ({
...tpl,
createdAt: tpl.createdAt ? formatDateTime(tpl.createdAt) : "-",
updatedAt: tpl.updatedAt ? formatDateTime(tpl.updatedAt) : "-",
}),
30000,
false,
[],
0
);
const columns: ColumnsType<CollectionTemplate> = [
{
title: "模板名称",
dataIndex: "name",
key: "name",
fixed: "left",
width: 200,
ellipsis: true,
},
{
title: "模板类型",
dataIndex: "builtIn",
key: "builtIn",
width: 120,
render: (v?: boolean) => (
<Tag color={v ? "blue" : "default"}>{v ? "内置" : "自定义"}</Tag>
),
},
{
title: "源端",
key: "source",
width: 220,
ellipsis: true,
render: (_: any, record: CollectionTemplate) => (
<span>{`${record.sourceType} / ${record.sourceName}`}</span>
),
},
{
title: "目标端",
key: "target",
width: 220,
ellipsis: true,
render: (_: any, record: CollectionTemplate) => (
<span>{`${record.targetType} / ${record.targetName}`}</span>
),
},
{
title: "描述",
dataIndex: "description",
key: "description",
width: 260,
ellipsis: true,
render: (v?: string) => v || "-",
},
{
title: "创建时间",
dataIndex: "createdAt",
key: "createdAt",
width: 160,
},
{
title: "更新时间",
dataIndex: "updatedAt",
key: "updatedAt",
width: 160,
},
];
return (
<div className="space-y-4">
<SearchControls
searchTerm={searchParams.keyword}
onSearchChange={(newSearchTerm) =>
setSearchParams((prev) => ({
...prev,
keyword: newSearchTerm,
current: 1,
}))
}
searchPlaceholder="搜索模板名称..."
filters={filters}
onFiltersChange={handleFiltersChange}
showViewToggle={false}
onClearFilters={() =>
setSearchParams((prev) => ({
...prev,
filter: { ...prev.filter, builtIn: [] },
current: 1,
}))
}
onReload={() => {
fetchData().catch(() => message.error("刷新失败"));
}}
/>
<Card>
<Table
columns={columns}
dataSource={tableData}
loading={loading}
rowKey="id"
pagination={{
...pagination,
current: searchParams.current,
pageSize: searchParams.pageSize,
total: pagination.total,
}}
scroll={{ x: "max-content", y: "calc(100vh - 25rem)" }}
/>
</Card>
</div>
);
}

View File

@@ -28,7 +28,7 @@ export function queryDataXTemplatesUsingGet(params?: any) {
return get("/api/data-collection/templates", params);
}
export function deleteTaskByIdUsingDelete(id: string | number) {
return del(`/api/data-collection/tasks/${id}`);
return del("/api/data-collection/tasks", { ids: [id] });
}
export function executeTaskByIdUsingPost(
@@ -47,13 +47,47 @@ export function stopTaskByIdUsingPost(
// 执行日志相关接口
export function queryExecutionLogUsingPost(params?: any) {
return post("/api/data-collection/executions", params);
return get("/api/data-collection/executions", params);
}
export function queryExecutionLogByIdUsingGet(id: string | number) {
return get(`/api/data-collection/executions/${id}`);
}
export function queryExecutionLogContentByIdUsingGet(id: string | number) {
return get(`/api/data-collection/executions/${id}/log`);
}
export async function queryExecutionLogFileByIdUsingGet(id: string | number) {
const token = localStorage.getItem("token") || sessionStorage.getItem("token");
const resp = await fetch(`/api/data-collection/executions/${id}/log`, {
method: "GET",
headers: {
...(token ? { Authorization: `Bearer ${token}` } : {}),
},
credentials: "include",
});
if (!resp.ok) {
let detail = "";
try {
detail = await resp.text();
} catch {
detail = resp.statusText;
}
const err: any = new Error(detail || `HTTP error ${resp.status}`);
err.status = resp.status;
err.data = detail;
throw err;
}
const contentDisposition = resp.headers.get("content-disposition") || "";
const filenameMatch = contentDisposition.match(/filename\*?=(?:UTF-8''|\")?([^;\"\n]+)/i);
const filename = filenameMatch?.[1] ? decodeURIComponent(filenameMatch[1].replace(/\"/g, "").trim()) : `execution_${id}.log`;
const blob = await resp.blob();
return { blob, filename };
}
// 监控统计相关接口
export function queryCollectionStatisticsUsingGet(params?: any) {
return get("/api/data-collection/monitor/statistics", params);

View File

@@ -1,9 +1,11 @@
import {
CollectionTask,
LogStatus,
SyncMode,
SyncMode, TaskExecution,
TaskStatus,
TriggerType,
} from "./collection.model";
import {formatDateTime} from "@/utils/unit.ts";
export const StatusMap: Record<
TaskStatus,
@@ -24,23 +26,27 @@ export const StatusMap: Record<
color: "red",
value: TaskStatus.FAILED,
},
[TaskStatus.SUCCESS]: {
[TaskStatus.COMPLETED]: {
label: "成功",
color: "green",
value: TaskStatus.SUCCESS,
value: TaskStatus.COMPLETED,
},
[TaskStatus.DRAFT]: {
label: "草稿",
color: "orange",
value: TaskStatus.DRAFT,
},
[TaskStatus.READY]: { label: "就绪", color: "cyan", value: TaskStatus.READY },
[TaskStatus.PENDING]: {
label: "就绪",
color: "cyan",
value: TaskStatus.PENDING
},
};
export const SyncModeMap: Record<SyncMode, { label: string; value: SyncMode }> =
export const SyncModeMap: Record<SyncMode, { label: string; value: SyncMode, color: string }> =
{
[SyncMode.ONCE]: { label: "立即同步", value: SyncMode.ONCE },
[SyncMode.SCHEDULED]: { label: "定时同步", value: SyncMode.SCHEDULED },
[SyncMode.ONCE]: { label: "立即同步", value: SyncMode.ONCE, color: "orange" },
[SyncMode.SCHEDULED]: { label: "定时同步", value: SyncMode.SCHEDULED, color: "blue" },
};
export const LogStatusMap: Record<
@@ -73,9 +79,21 @@ export const LogTriggerTypeMap: Record<
[TriggerType.API]: { label: "API", value: TriggerType.API },
};
export function mapCollectionTask(task: CollectionTask): CollectionTask {
export function mapCollectionTask(task: CollectionTask): any {
return {
...task,
status: StatusMap[task.status],
syncMode: SyncModeMap[task.syncMode],
createdAt: formatDateTime(task.createdAt),
updatedAt: formatDateTime(task.updatedAt)
};
}
export function mapTaskExecution(execution: TaskExecution): any {
return {
...execution,
status: StatusMap[execution.status],
startedAt: formatDateTime(execution.startedAt),
completedAt: formatDateTime(execution.completedAt)
};
}

View File

@@ -1,8 +1,8 @@
export enum TaskStatus {
DRAFT = "DRAFT",
READY = "READY",
PENDING = "PENDING",
RUNNING = "RUNNING",
SUCCESS = "SUCCESS",
COMPLETED = "COMPLETED",
FAILED = "FAILED",
STOPPED = "STOPPED",
}
@@ -19,12 +19,26 @@ export interface CollectionTask {
config: object; // 具体配置结构根据实际需求定义
status: TaskStatus;
syncMode: SyncMode;
templateName?: string;
scheduleExpression?: string; // 仅当 syncMode 为 SCHEDULED 时存在
timeoutSeconds?: number;
lastExecutionId: string;
createdAt: string; // ISO date string
updatedAt: string; // ISO date string
}
export interface TaskExecution {
id: string;
taskId: string;
taskName: string;
status: string;
logPath: string;
startedAt: string;
completedAt: string;
durationSeconds: number;
errorMessage: string;
}
export enum LogStatus {
RUNNING = "RUNNING",
SUCCESS = "SUCCESS",

View File

@@ -0,0 +1,66 @@
import uuid
from sqlalchemy import Column, String, Text, TIMESTAMP, Integer, BigInteger, Numeric, JSON, Boolean
from sqlalchemy.sql import func
from app.db.session import Base
class CollectionTemplate(Base):
"""归集模板表(UUID 主键) -> t_dc_collection_templates"""
__tablename__ = "t_dc_collection_templates"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="模板ID(UUID)")
name = Column(String(255), nullable=False, comment="模板名称")
description = Column(Text, nullable=True, comment="模板描述")
source_type = Column(String(64), nullable=False, comment="源数据源类型")
source_name = Column(String(64), nullable=False, comment="源数据源名称")
target_type = Column(String(64), nullable=False, comment="目标数据源类型")
target_name = Column(String(64), nullable=False, comment="目标数据源名称")
template_content = Column(JSON, nullable=False, comment="模板内容")
built_in = Column(Boolean, default=False, comment="是否系统内置模板")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
created_by = Column(String(255), nullable=True, comment="创建者")
updated_by = Column(String(255), nullable=True, comment="更新者")
class CollectionTask(Base):
"""归集任务表(UUID 主键) -> t_dc_collection_tasks"""
__tablename__ = "t_dc_collection_tasks"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
name = Column(String(255), nullable=False, comment="任务名称")
description = Column(Text, nullable=True, comment="任务描述")
sync_mode = Column(String(20), nullable=False, server_default="ONCE", comment="同步模式:ONCE/SCHEDULED")
template_id = Column(String(36), nullable=False, comment="归集模板ID")
template_name = Column(String(255), nullable=False, comment="归集模板名称")
target_path = Column(String(1000), nullable=True, server_default="", comment="目标存储路径")
config = Column(JSON, nullable=False, comment="归集配置(DataX配置),包含源端和目标端配置信息")
schedule_expression = Column(String(255), nullable=True, comment="Cron调度表达式")
status = Column(String(20), nullable=True, server_default="DRAFT", comment="任务状态:DRAFT/READY/RUNNING/SUCCESS/FAILED/STOPPED")
retry_count = Column(Integer, nullable=True, server_default="3", comment="重试次数")
timeout_seconds = Column(Integer, nullable=True, server_default="3600", comment="超时时间(秒)")
last_execution_id = Column(String(36), nullable=True, comment="最后执行ID(UUID)")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
created_by = Column(String(255), nullable=True, comment="创建者")
updated_by = Column(String(255), nullable=True, comment="更新者")
class TaskExecution(Base):
"""任务执行记录表(UUID 主键) -> t_dc_task_executions"""
__tablename__ = "t_dc_task_executions"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="执行记录ID(UUID)")
task_id = Column(String(36), nullable=False, comment="任务ID")
task_name = Column(String(255), nullable=False, comment="任务名称")
status = Column(String(20), nullable=True, server_default="RUNNING", comment="执行状态:RUNNING/SUCCESS/FAILED/STOPPED")
log_path = Column(String(1000), nullable=True, server_default="", comment="日志文件路径")
started_at = Column(TIMESTAMP, nullable=True, comment="开始时间")
completed_at = Column(TIMESTAMP, nullable=True, comment="完成时间")
duration_seconds = Column(Integer, nullable=True, server_default="0", comment="执行时长(秒)")
error_message = Column(Text, nullable=True, comment="错误信息")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
created_by = Column(String(255), nullable=True, comment="创建者")
updated_by = Column(String(255), nullable=True, comment="更新者")

View File

@@ -5,6 +5,7 @@ from .annotation.interface import router as annotation_router
from .ratio.interface import router as ratio_router
from .generation.interface import router as generation_router
from .evaluation.interface import router as evaluation_router
from .collection.interface import router as collection_route
router = APIRouter(
prefix="/api"
@@ -15,5 +16,6 @@ router.include_router(annotation_router)
router.include_router(ratio_router)
router.include_router(generation_router)
router.include_router(evaluation_router)
router.include_router(collection_route)
__all__ = ["router"]

View File

@@ -0,0 +1,200 @@
import json
import threading
import subprocess
from datetime import datetime
from pathlib import Path
from typing import Dict, Any
from app.core.logging import get_logger
from app.db.models.data_collection import CollectionTask, TaskExecution, CollectionTemplate
from app.module.collection.schema.collection import CollectionConfig, SyncMode
from app.module.shared.schema import TaskStatus
logger = get_logger(__name__)
class DataxClient:
def __init__(self, task: CollectionTask, execution: TaskExecution):
self.execution = execution
self.task = task
self.config_file_path = f"/flow/data-collection/{task.id}/config.json"
self.python_path = "python"
self.datax_main = "/opt/datax/bin/datax.py"
Path(self.config_file_path).parent.mkdir(parents=True, exist_ok=True)
def validate_json_string(self) -> Dict[str, Any]:
"""
验证 JSON 字符串
Returns:
解析后的配置字典
"""
try:
config = json.loads(self.task.config)
# 基本验证
if 'job' not in config:
raise ValueError("JSON 必须包含 'job' 字段")
if 'content' not in config.get('job', {}):
raise ValueError("job 必须包含 'content' 字段")
logger.info("JSON 配置验证通过")
return config
except json.JSONDecodeError as e:
raise ValueError(f"JSON 格式错误: {e}")
except Exception as e:
raise ValueError(f"配置验证失败: {e}")
@staticmethod
def generate_datx_config(task_config: CollectionConfig, template: CollectionTemplate, target_path: str):
# 校验参数
reader_parameter = {
**(task_config.parameter if task_config.parameter else {}),
**(task_config.reader if task_config.reader else {})
}
writer_parameter = {
**(task_config.parameter if task_config.parameter else {}),
**(task_config.writer if task_config.writer else {}),
"destPath": target_path
}
# 生成任务运行配置
job_config = {
"content": [
{
"reader": {
"name": template.source_type,
"parameter": reader_parameter
},
"writer": {
"name": template.target_type,
"parameter": writer_parameter
}
}
],
"setting": {
"speed": {
"channel": 2
}
}
}
task_config.job = job_config
def create__config_file(self) -> str:
"""
创建配置文件
Returns:
临时文件路径
"""
# 验证 JSON
config = self.validate_json_string()
# 写入临时文件
with open(self.config_file_path, 'w', encoding='utf-8') as f:
json.dump(config, f, indent=2, ensure_ascii=False)
logger.debug(f"创建配置文件: {self.config_file_path}")
return self.config_file_path
def run_datax_job(self):
"""
启动 DataX 任务
Returns:
执行结果字典
"""
# 创建配置文件
self.create__config_file()
try:
# 构建命令
cmd = [self.python_path, str(self.datax_main), str(self.config_file_path)]
cmd_str = ' '.join(cmd)
logger.info(f"执行命令: {cmd_str}")
if not self.execution.started_at:
self.execution.started_at = datetime.now()
# 执行命令并写入日志
with open(self.execution.log_path, 'w', encoding='utf-8') as log_f:
# 写入头信息
self.write_header_log(cmd_str, log_f)
# 启动datax进程
exit_code = self._run_process(cmd, log_f)
# 记录结束时间
self.execution.completed_at = datetime.now()
self.execution.duration_seconds = (self.execution.completed_at - self.execution.started_at).total_seconds()
# 写入结束信息
self.write_tail_log(exit_code, log_f)
if exit_code == 0:
logger.info(f"DataX 任务执行成功: {self.execution.id}")
logger.info(f"执行耗时: {self.execution.duration_seconds:.2f}")
self.execution.status = TaskStatus.COMPLETED.name
else:
self.execution.error_message = self.execution.error_message or f"DataX 任务执行失败,退出码: {exit_code}"
self.execution.status = TaskStatus.FAILED.name
logger.error(self.execution.error_message)
except Exception as e:
self.execution.completed_at = datetime.now()
self.execution.duration_seconds = (self.execution.completed_at - self.execution.started_at).total_seconds()
self.execution.error_message = f"执行异常: {e}"
self.execution.status = TaskStatus.FAILED.name
logger.error(f"执行异常: {e}", exc_info=True)
if self.task.sync_mode == SyncMode.ONCE:
self.task.status = self.execution.status
def _run_process(self, cmd: list[str], log_f) -> int:
# 启动进程
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
encoding='utf-8',
bufsize=1,
universal_newlines=True
)
# 创建读取线程
stdout_thread = threading.Thread(target=lambda stream=process.stdout: self.read_stream(stream, log_f))
stderr_thread = threading.Thread(target=lambda stream=process.stderr: self.read_stream(stream, log_f))
stdout_thread.start()
stderr_thread.start()
# 等待进程完成
try:
exit_code = process.wait(timeout=self.task.timeout_seconds)
except subprocess.TimeoutExpired:
process.kill()
exit_code = -1
self.execution.error_message = f"任务执行超时({self.task.timeout_seconds}秒)"
logger.error(f"任务执行超时({self.task.timeout_seconds}秒)")
# 等待线程完成
stdout_thread.join(timeout=5)
stderr_thread.join(timeout=5)
return exit_code
def write_tail_log(self, exit_code: int, log_f):
log_f.write("\n" + "=" * 100 + "\n")
log_f.write(f"End Time: {self.execution.completed_at}\n")
log_f.write(f"Execution Time: {self.execution.duration_seconds:.2f} seconds\n")
log_f.write(f"Exit Code: {exit_code}\n")
log_f.write(f"Status: {'SUCCESS' if exit_code == 0 else 'FAILED'}\n")
def write_header_log(self, cmd: str, log_f):
log_f.write(f"DataX Task Execution Log\n")
log_f.write(f"Job ID: {self.execution.id}\n")
log_f.write(f"Start Time: {self.execution.started_at}\n")
log_f.write(f"Config Source: JSON String\n")
log_f.write(f"Command: {cmd}\n")
log_f.write("=" * 100 + "\n\n")
@staticmethod
def read_stream(stream, log_f):
"""读取输出流"""
for line in stream:
line = line.rstrip('\n')
if line:
# 写入日志文件
log_f.write(f"{line}\n")
log_f.flush()

View File

@@ -0,0 +1,15 @@
from fastapi import APIRouter
router = APIRouter(
prefix="/data-collection",
tags = ["data-collection"]
)
# Include sub-routers
from .collection import router as collection_router
from .execution import router as execution_router
from .template import router as template_router
router.include_router(collection_router)
router.include_router(execution_router)
router.include_router(template_router)

View File

@@ -0,0 +1,157 @@
import math
import uuid
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.logging import get_logger
from app.db.models.data_collection import CollectionTask, TaskExecution, CollectionTemplate
from app.db.session import get_db
from app.module.collection.client.datax_client import DataxClient
from app.module.collection.schema.collection import CollectionTaskBase, CollectionTaskCreate, converter_to_response, \
convert_for_create
from app.module.collection.service.collection import CollectionTaskService
from app.module.shared.schema import StandardResponse, PaginatedData
router = APIRouter(
prefix="/tasks",
tags=["data-collection/tasks"],
)
logger = get_logger(__name__)
@router.post("", response_model=StandardResponse[CollectionTaskBase])
async def create_task(
request: CollectionTaskCreate,
db: AsyncSession = Depends(get_db)
):
"""创建归集任务"""
try:
template = await db.execute(select(CollectionTemplate).where(CollectionTemplate.id == request.template_id))
template = template.scalar_one_or_none()
if not template:
raise HTTPException(status_code=400, detail="Template not found")
task_id = str(uuid.uuid4())
DataxClient.generate_datx_config(request.config, template, f"/dataset/local/{task_id}")
task = convert_for_create(request, task_id)
task.template_name = template.name
task_service = CollectionTaskService(db)
task = await task_service.create_task(task)
task = await db.execute(select(CollectionTask).where(CollectionTask.id == task.id))
task = task.scalar_one_or_none()
await db.commit()
return StandardResponse(
code=200,
message="Success",
data=converter_to_response(task)
)
except HTTPException:
await db.rollback()
raise
except Exception as e:
await db.rollback()
logger.error(f"Failed to create collection task: {str(e)}", e)
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("", response_model=StandardResponse[PaginatedData[CollectionTaskBase]])
async def list_tasks(
page: int = 1,
size: int = 20,
name: Optional[str] = Query(None, description="任务名称模糊查询"),
db: AsyncSession = Depends(get_db)
):
"""分页查询归集任务"""
try:
# 构建查询条件
page = page if page > 0 else 1
size = size if size > 0 else 20
query = select(CollectionTask)
if name:
query = query.where(CollectionTask.name.ilike(f"%{name}%"))
# 获取总数
count_query = select(func.count()).select_from(query.subquery())
total = (await db.execute(count_query)).scalar_one()
# 分页查询
offset = (page - 1) * size
tasks = (await db.execute(
query.order_by(CollectionTask.created_at.desc())
.offset(offset)
.limit(size)
)).scalars().all()
# 转换为响应模型
items = [converter_to_response(task) for task in tasks]
total_pages = math.ceil(total / size) if total > 0 else 0
return StandardResponse(
code=200,
message="Success",
data=PaginatedData(
content=items,
total_elements=total,
total_pages=total_pages,
page=page,
size=size,
)
)
except Exception as e:
logger.error(f"Failed to list evaluation tasks: {str(e)}", e)
raise HTTPException(status_code=500, detail="Internal server error")
@router.delete("", response_model=StandardResponse[str], status_code=200)
async def delete_collection_tasks(
ids: list[str] = Query(..., description="要删除的任务ID列表"),
db: AsyncSession = Depends(get_db),
):
"""
删除归集任务
Args:
ids: 任务ID
db: 数据库会话
Returns:
StandardResponse[str]: 删除结果
"""
try:
# 检查任务是否存在
task_id = ids[0]
task = await db.get(CollectionTask, task_id)
if not task:
raise HTTPException(status_code=404, detail="Collection task not found")
# 删除任务执行记录
await db.execute(
TaskExecution.__table__.delete()
.where(TaskExecution.task_id == task_id)
)
# 删除任务
await db.delete(task)
await db.commit()
return StandardResponse(
code=200,
message="Collection task deleted successfully",
data="success"
)
except HTTPException:
await db.rollback()
raise
except Exception as e:
await db.rollback()
logger.error(f"Failed to delete collection task: {str(e)}")
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -0,0 +1,120 @@
import math
import os
from pathlib import Path
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import FileResponse
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.logging import get_logger
from app.db.models.data_collection import TaskExecution
from app.db.session import get_db
from app.module.collection.schema.collection import TaskExecutionBase, converter_execution_to_response
from app.module.shared.schema import StandardResponse, PaginatedData
router = APIRouter(
prefix="/executions",
tags=["data-collection/executions"],
)
logger = get_logger(__name__)
@router.get("", response_model=StandardResponse[PaginatedData[TaskExecutionBase]])
async def list_executions(
page: int = 1,
size: int = 20,
task_id: Optional[str] = Query(None, description="任务ID"),
task_name: Optional[str] = Query(None, description="任务名称模糊查询"),
start_time: Optional[datetime] = Query(None, description="开始执行时间范围-起(started_at >= start_time)"),
end_time: Optional[datetime] = Query(None, description="开始执行时间范围-止(started_at <= end_time)"),
db: AsyncSession = Depends(get_db)
):
"""分页查询归集任务执行记录"""
try:
query = select(TaskExecution)
if task_id:
query = query.where(TaskExecution.task_id == task_id)
if task_name:
query = query.where(TaskExecution.task_name.ilike(f"%{task_name}%"))
if start_time:
query = query.where(TaskExecution.started_at >= start_time)
if end_time:
query = query.where(TaskExecution.started_at <= end_time)
count_query = select(func.count()).select_from(query.subquery())
total = (await db.execute(count_query)).scalar_one()
offset = (page - 1) * size
executions = (await db.execute(
query.order_by(TaskExecution.created_at.desc())
.offset(offset)
.limit(size)
)).scalars().all()
items = [converter_execution_to_response(exe) for exe in executions]
total_pages = math.ceil(total / size) if total > 0 else 0
return StandardResponse(
code=200,
message="Success",
data=PaginatedData(
content=items,
total_elements=total,
total_pages=total_pages,
page=page,
size=size,
)
)
except Exception as e:
logger.error(f"Failed to list task executions: {str(e)}", e)
raise HTTPException(status_code=500, detail="Internal server error")
@router.get("/{execution_id}/log")
async def get_execution_log(
execution_id: str,
db: AsyncSession = Depends(get_db)
):
"""获取执行记录对应的日志文件内容"""
try:
execution = await db.get(TaskExecution, execution_id)
if not execution:
raise HTTPException(status_code=404, detail="Execution record not found")
log_path = getattr(execution, "log_path", None)
if not log_path:
raise HTTPException(status_code=404, detail="Log path not found")
path = Path(str(log_path))
if not path.is_absolute():
path = Path(os.getcwd()) / path
path = path.resolve()
if not path.exists() or not path.is_file():
raise HTTPException(status_code=404, detail="Log file not found")
filename = path.name
headers = {
"Content-Disposition": f'inline; filename="{filename}"'
}
return FileResponse(
path=str(path),
media_type="text/plain; charset=utf-8",
filename=filename,
headers=headers,
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get execution log: {str(e)}", e)
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -0,0 +1,67 @@
import math
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.logging import get_logger
from app.db.models.data_collection import CollectionTemplate
from app.db.session import get_db
from app.module.collection.schema.collection import CollectionTemplateBase, converter_template_to_response
from app.module.shared.schema import StandardResponse, PaginatedData
router = APIRouter(
prefix="/templates",
tags=["data-collection/templates"],
)
logger = get_logger(__name__)
@router.get("", response_model=StandardResponse[PaginatedData[CollectionTemplateBase]])
async def list_templates(
page: int = 1,
size: int = 20,
name: Optional[str] = Query(None, description="模板名称模糊查询"),
built_in: Optional[bool] = Query(None, description="是否系统内置模板"),
db: AsyncSession = Depends(get_db)
):
"""分页查询归集任务模板"""
try:
query = select(CollectionTemplate)
if name:
query = query.where(CollectionTemplate.name.ilike(f"%{name}%"))
if built_in is not None:
query = query.where(CollectionTemplate.built_in == built_in)
count_query = select(func.count()).select_from(query.subquery())
total = (await db.execute(count_query)).scalar_one()
offset = (page - 1) * size
templates = (await db.execute(
query.order_by(CollectionTemplate.created_at.desc())
.offset(offset)
.limit(size)
)).scalars().all()
items = [converter_template_to_response(tpl) for tpl in templates]
total_pages = math.ceil(total / size) if total > 0 else 0
return StandardResponse(
code=200,
message="Success",
data=PaginatedData(
content=items,
total_elements=total,
total_pages=total_pages,
page=page,
size=size,
)
)
except Exception as e:
logger.error(f"Failed to list collection templates: {str(e)}", e)
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -0,0 +1,182 @@
import json
import uuid
from datetime import datetime
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field, validator, ConfigDict
from pydantic.alias_generators import to_camel
from app.db.models.data_collection import CollectionTask, TaskExecution, CollectionTemplate
from app.module.shared.schema import TaskStatus
class SyncMode(str, Enum):
ONCE = "ONCE"
SCHEDULED = "SCHEDULED"
class CollectionConfig(BaseModel):
parameter: Optional[dict] = Field(None, description="模板参数")
reader: Optional[dict] = Field(None, description="reader参数")
writer: Optional[dict] = Field(None, description="writer参数")
job: Optional[dict] = Field(None, description="任务配置")
class CollectionTaskBase(BaseModel):
id: str = Field(..., description="任务id")
name: str = Field(..., description="任务名称")
description: Optional[str] = Field(None, description="任务描述")
target_path: str = Field(..., description="目标存放路径")
config: CollectionConfig = Field(..., description="任务配置")
template_id: str = Field(..., description="模板ID")
template_name: Optional[str] = Field(None, description="模板名称")
status: TaskStatus = Field(..., description="任务状态")
sync_mode: SyncMode = Field(default=SyncMode.ONCE, description="同步方式")
schedule_expression: Optional[str] = Field(None, description="调度表达式(cron)")
retry_count: int = Field(default=3, description="重试次数")
timeout_seconds: int = Field(default=3600, description="超时时间")
last_execution_id: Optional[str] = Field(None, description="最后执行id")
created_at: Optional[datetime] = Field(None, description="创建时间")
updated_at: Optional[datetime] = Field(None, description="更新时间")
created_by: Optional[str] = Field(None, description="创建人")
updated_by: Optional[str] = Field(None, description="更新人")
model_config = ConfigDict(
alias_generator=to_camel,
populate_by_name=True
)
class CollectionTaskCreate(BaseModel):
name: str = Field(..., description="任务名称")
description: Optional[str] = Field(None, description="任务描述")
sync_mode: SyncMode = Field(default=SyncMode.ONCE, description="同步方式")
schedule_expression: Optional[str] = Field(None, description="调度表达式(cron)")
config: CollectionConfig = Field(..., description="任务配置")
template_id: str = Field(..., description="模板ID")
model_config = ConfigDict(
alias_generator=to_camel,
populate_by_name=True
)
def converter_to_response(task: CollectionTask) -> CollectionTaskBase:
return CollectionTaskBase(
id=task.id,
name=task.name,
description=task.description,
sync_mode=task.sync_mode,
template_id=task.template_id,
template_name=task.template_name,
target_path=task.target_path,
config=json.loads(task.config),
schedule_expression=task.schedule_expression,
status=task.status,
retry_count=task.retry_count,
timeout_seconds=task.timeout_seconds,
last_execution_id=task.last_execution_id,
created_at=task.created_at,
updated_at=task.updated_at,
created_by=task.created_by,
updated_by=task.updated_by,
)
def convert_for_create(task: CollectionTaskCreate, task_id: str) -> CollectionTask:
return CollectionTask(
id=task_id,
name=task.name,
description=task.description,
sync_mode=task.sync_mode,
template_id=task.template_id,
target_path=f"/dataset/local/{task_id}",
config=json.dumps(task.config.dict()),
schedule_expression=task.schedule_expression,
status=TaskStatus.PENDING.name
)
def create_execute_record(task: CollectionTask) -> TaskExecution:
execution_id = str(uuid.uuid4())
return TaskExecution(
id=execution_id,
task_id=task.id,
task_name=task.name,
status=TaskStatus.RUNNING.name,
started_at=datetime.now(),
log_path=f"/flow/data-collection/{task.id}/{execution_id}.log"
)
class TaskExecutionBase(BaseModel):
id: str = Field(..., description="执行记录ID")
task_id: str = Field(..., description="任务ID")
task_name: str = Field(..., description="任务名称")
status: Optional[str] = Field(None, description="执行状态")
log_path: Optional[str] = Field(None, description="日志文件路径")
started_at: Optional[datetime] = Field(None, description="开始时间")
completed_at: Optional[datetime] = Field(None, description="完成时间")
duration_seconds: Optional[int] = Field(None, description="执行时长(秒)")
error_message: Optional[str] = Field(None, description="错误信息")
created_at: Optional[datetime] = Field(None, description="创建时间")
updated_at: Optional[datetime] = Field(None, description="更新时间")
created_by: Optional[str] = Field(None, description="创建者")
updated_by: Optional[str] = Field(None, description="更新者")
model_config = ConfigDict(
alias_generator=to_camel,
populate_by_name=True
)
def converter_execution_to_response(execution: TaskExecution) -> TaskExecutionBase:
return TaskExecutionBase(
id=execution.id,
task_id=execution.task_id,
task_name=execution.task_name,
status=execution.status,
log_path=execution.log_path,
started_at=execution.started_at,
completed_at=execution.completed_at,
duration_seconds=execution.duration_seconds,
error_message=execution.error_message,
created_at=execution.created_at,
updated_at=execution.updated_at,
created_by=execution.created_by,
updated_by=execution.updated_by,
)
class CollectionTemplateBase(BaseModel):
id: str = Field(..., description="模板ID")
name: str = Field(..., description="模板名称")
description: Optional[str] = Field(None, description="模板描述")
source_type: str = Field(..., description="源数据源类型")
source_name: str = Field(..., description="源数据源名称")
target_type: str = Field(..., description="目标数据源类型")
target_name: str = Field(..., description="目标数据源名称")
template_content: dict = Field(..., description="模板内容")
built_in: Optional[bool] = Field(None, description="是否系统内置模板")
created_at: Optional[datetime] = Field(None, description="创建时间")
updated_at: Optional[datetime] = Field(None, description="更新时间")
created_by: Optional[str] = Field(None, description="创建者")
updated_by: Optional[str] = Field(None, description="更新者")
model_config = ConfigDict(
alias_generator=to_camel,
populate_by_name=True
)
def converter_template_to_response(template: CollectionTemplate) -> CollectionTemplateBase:
return CollectionTemplateBase(
id=template.id,
name=template.name,
description=template.description,
source_type=template.source_type,
source_name=template.source_name,
target_type=template.target_type,
target_name=template.target_name,
template_content=template.template_content,
built_in=template.built_in,
created_at=template.created_at,
updated_at=template.updated_at,
created_by=template.created_by,
updated_by=template.updated_by,
)

View File

@@ -0,0 +1,70 @@
import asyncio
from dataclasses import dataclass
from typing import Any, Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.logging import get_logger
from app.db.models.data_collection import CollectionTask, CollectionTemplate
from app.db.session import AsyncSessionLocal
from app.module.collection.client.datax_client import DataxClient
from app.module.collection.schema.collection import SyncMode, create_execute_record
from app.module.shared.schema import TaskStatus
logger = get_logger(__name__)
@dataclass
class _RuntimeTask:
id: str
config: str
timeout_seconds: int
sync_mode: str
status: Optional[str] = None
@dataclass
class _RuntimeExecution:
id: str
log_path: str
started_at: Optional[Any] = None
completed_at: Optional[Any] = None
duration_seconds: Optional[float] = None
error_message: Optional[str] = None
status: Optional[str] = None
class CollectionTaskService:
def __init__(self, db: AsyncSession):
self.db = db
async def create_task(self, task: CollectionTask) -> CollectionTask:
self.db.add(task)
# If it's a one-time task, execute it immediately
if task.sync_mode == SyncMode.ONCE:
task.status = TaskStatus.RUNNING.name
await self.db.commit()
asyncio.create_task(CollectionTaskService.run_async(task.id))
return task
@staticmethod
async def run_async(task_id: str):
logger.info(f"start to execute task {task_id}")
async with AsyncSessionLocal() as session:
task = await session.execute(select(CollectionTask).where(CollectionTask.id == task_id))
task = task.scalar_one_or_none()
if not task:
logger.error(f"task {task_id} not exist")
return
template = await session.execute(select(CollectionTemplate).where(CollectionTemplate.id == task.template_id))
if not template:
logger.error(f"template {task.template_name} not exist")
return
task_execution = create_execute_record(task)
session.add(task_execution)
await session.commit()
await asyncio.to_thread(
DataxClient(execution=task_execution, task=task).run_datax_job
)
await session.commit()

View File

@@ -236,13 +236,13 @@
<!-- </includes>-->
<!-- <outputDirectory>datax</outputDirectory>-->
<!-- </fileSet>-->
<!-- <fileSet>-->
<!-- <directory>starrocksreader/target/datax/</directory>-->
<!-- <includes>-->
<!-- <include>**/*.*</include>-->
<!-- </includes>-->
<!-- <outputDirectory>datax</outputDirectory>-->
<!-- </fileSet>-->
<fileSet>
<directory>starrocksreader/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<!-- <fileSet>-->
<!-- <directory>dorisreader/target/datax/</directory>-->
<!-- <includes>-->
@@ -287,13 +287,13 @@
<!-- </includes>-->
<!-- <outputDirectory>datax</outputDirectory>-->
<!-- </fileSet>-->
<!-- <fileSet>-->
<!-- <directory>starrockswriter/target/datax/</directory>-->
<!-- <includes>-->
<!-- <include>**/*.*</include>-->
<!-- </includes>-->
<!-- <outputDirectory>datax</outputDirectory>-->
<!-- </fileSet>-->
<fileSet>
<directory>starrockswriter/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<!-- <fileSet>-->
<!-- <directory>drdswriter/target/datax/</directory>-->
<!-- <includes>-->

View File

@@ -81,13 +81,13 @@
<!-- <module>opentsdbreader</module>-->
<!-- <module>loghubreader</module>-->
<!-- <module>datahubreader</module>-->
<!-- <module>starrocksreader</module>-->
<module>starrocksreader</module>
<!-- <module>sybasereader</module>-->
<!-- <module>dorisreader</module>-->
<module>nfsreader</module>
<!-- writer -->
<module>mysqlwriter</module>
<!-- <module>starrockswriter</module>-->
<module>starrockswriter</module>
<!-- <module>drdswriter</module>-->
<!-- <module>databendwriter</module>-->
<!-- <module>oraclewriter</module>-->

View File

@@ -10,35 +10,7 @@ USE datamate;
-- 删除现有表(支持重复执行 调测阶段使用)
DROP TABLE IF EXISTS t_dc_task_executions;
DROP TABLE IF EXISTS t_dc_collection_tasks;
DROP TABLE IF EXISTS t_dc_datax_templates;
CREATE TABLE t_dc_task_executions (
id VARCHAR(36) PRIMARY KEY COMMENT '执行记录ID(UUID)',
task_id VARCHAR(36) NOT NULL COMMENT '任务ID',
task_name VARCHAR(255) NOT NULL COMMENT '任务名称',
status VARCHAR(20) DEFAULT 'RUNNING' COMMENT '执行状态:RUNNING/SUCCESS/FAILED/STOPPED',
progress DECIMAL(5,2) DEFAULT 0.00 COMMENT '进度百分比',
records_total BIGINT DEFAULT 0 COMMENT '总记录数',
records_processed BIGINT DEFAULT 0 COMMENT '已处理记录数',
records_success BIGINT DEFAULT 0 COMMENT '成功记录数',
records_failed BIGINT DEFAULT 0 COMMENT '失败记录数',
throughput DECIMAL(10,2) DEFAULT 0.00 COMMENT '吞吐量(条/秒)',
data_size_bytes BIGINT DEFAULT 0 COMMENT '数据量(字节)',
started_at TIMESTAMP NULL COMMENT '开始时间',
completed_at TIMESTAMP NULL COMMENT '完成时间',
duration_seconds INT DEFAULT 0 COMMENT '执行时长(秒)',
config JSON COMMENT '执行配置',
error_message TEXT COMMENT '错误信息',
datax_job_id TEXT COMMENT 'datax任务ID',
result TEXT COMMENT '执行结果',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
created_by VARCHAR(255) COMMENT '创建者',
updated_by VARCHAR(255) COMMENT '更新者',
INDEX idx_task_id (task_id),
INDEX idx_status (status),
INDEX idx_started_at (started_at)
) COMMENT='任务执行明细表';
DROP TABLE IF EXISTS t_dc_collection_templates;
-- 数据归集任务表
CREATE TABLE t_dc_collection_tasks (
@@ -46,120 +18,60 @@ CREATE TABLE t_dc_collection_tasks (
name VARCHAR(255) NOT NULL COMMENT '任务名称',
description TEXT COMMENT '任务描述',
sync_mode VARCHAR(20) DEFAULT 'ONCE' COMMENT '同步模式:ONCE/SCHEDULED',
task_type VARCHAR(20) DEFAULT 'NAS' COMMENT '任务类型:NAS/OBS/MYSQL/CUSTOM',
template_id VARCHAR(36) NOT NULL COMMENT '归集模板ID',
template_name VARCHAR(255) NOT NULL COMMENT '归集模板名称',
target_path VARCHAR(1000) DEFAULT '' COMMENT '目标存储路径',
config TEXT NOT NULL COMMENT '归集配置(DataX配置),包含源端和目标端配置信息',
config JSON NOT NULL COMMENT '归集配置(DataX配置),包含源端和目标端配置信息',
schedule_expression VARCHAR(255) COMMENT 'Cron调度表达式',
status VARCHAR(20) DEFAULT 'DRAFT' COMMENT '任务状态:DRAFT/READY/RUNNING/SUCCESS/FAILED/STOPPED',
retry_count INT DEFAULT 3 COMMENT '重试次数',
timeout_seconds INT DEFAULT 3600 COMMENT '超时时间(秒)',
max_records BIGINT COMMENT '最大处理记录数',
sort_field VARCHAR(100) COMMENT '增量字段',
last_execution_id VARCHAR(36) COMMENT '最后执行ID(UUID)',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
created_by VARCHAR(255) COMMENT '创建者',
updated_by VARCHAR(255) COMMENT '更新者',
INDEX idx_status (status),
INDEX idx_created_at (created_at),
INDEX idx_schedule (schedule_expression)
INDEX idx_created_at (created_at)
) COMMENT='数据归集任务表';
-- 任务执行记录表
CREATE TABLE t_dc_task_log (
CREATE TABLE t_dc_task_executions (
id VARCHAR(36) PRIMARY KEY COMMENT '执行记录ID(UUID)',
task_id VARCHAR(36) NOT NULL COMMENT '任务ID',
task_name VARCHAR(255) NOT NULL COMMENT '任务名称',
sync_mode VARCHAR(20) DEFAULT 'FULL' COMMENT '同步模式:FULL/INCREMENTAL',
status VARCHAR(20) DEFAULT 'RUNNING' COMMENT '执行状态:RUNNING/SUCCESS/FAILED/STOPPED',
start_time TIMESTAMP NULL COMMENT '开始时间',
end_time TIMESTAMP NULL COMMENT '结束时间',
duration BIGINT COMMENT '执行时长(毫秒)',
process_id VARCHAR(50) COMMENT '进程ID',
log_path VARCHAR(500) COMMENT '日志文件路径',
error_msg LONGTEXT COMMENT '错误信息',
result LONGTEXT COMMENT '执行结果',
retry_times INT DEFAULT 0 COMMENT '重试次数',
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间'
) COMMENT='任务执行记录表';
-- DataX模板配置表
CREATE TABLE t_dc_datax_templates (
id VARCHAR(36) PRIMARY KEY COMMENT '模板ID(UUID)',
name VARCHAR(255) NOT NULL UNIQUE COMMENT '模板名称',
source_type VARCHAR(50) NOT NULL COMMENT '源数据源类型',
target_type VARCHAR(50) NOT NULL COMMENT '目标数据源类型',
template_content TEXT NOT NULL COMMENT '模板内容',
description TEXT COMMENT '模板描述',
version VARCHAR(20) DEFAULT '1.0.0' COMMENT '版本号',
is_system BOOLEAN DEFAULT FALSE COMMENT '是否系统模板',
log_path VARCHAR(1000) NOT NULL COMMENT '日志文件路径',
started_at TIMESTAMP NULL COMMENT '开始时间',
completed_at TIMESTAMP NULL COMMENT '完成时间',
duration_seconds INT DEFAULT 0 COMMENT '执行时长(秒)',
error_message TEXT COMMENT '错误信息',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
created_by VARCHAR(255) COMMENT '创建者',
INDEX idx_source_target (source_type, target_type),
INDEX idx_system (is_system)
) COMMENT='DataX模板配置表';
updated_by VARCHAR(255) COMMENT '更新者',
INDEX idx_task_id (task_id),
INDEX idx_status (status),
INDEX idx_started_at (started_at)
) COMMENT='任务执行明细表';
-- =====================================
-- DML语句 - 数据操作
-- =====================================
-- 插入默认的DataX模板
INSERT INTO t_dc_datax_templates (id, name, source_type, target_type, template_content, description, is_system, created_by) VALUES
-- MySQL to MySQL 模板
('e4272e51-d431-4681-a370-1b3d0b036cd0', 'MySQL到MySQL', 'MYSQL', 'MYSQL', JSON_OBJECT(
'job', JSON_OBJECT(
'setting', JSON_OBJECT(
'speed', JSON_OBJECT('channel', 3)
),
'content', JSON_ARRAY(
JSON_OBJECT(
'reader', JSON_OBJECT(
'name', 'mysqlreader',
'parameter', JSON_OBJECT(
'username', '${source.username}',
'password', '${source.password}',
'column', JSON_ARRAY('*'),
'splitPk', '${source.splitPk:id}',
'connection', JSON_ARRAY(
JSON_OBJECT(
'jdbcUrl', JSON_ARRAY('${source.jdbcUrl}'),
'table', JSON_ARRAY('${source.table}')
)
)
)
),
'writer', JSON_OBJECT(
'name', 'mysqlwriter',
'parameter', JSON_OBJECT(
'writeMode', 'insert',
'username', '${target.username}',
'password', '${target.password}',
'column', JSON_ARRAY('*'),
'session', JSON_ARRAY('set session sql_mode="PIPES_AS_CONCAT"'),
'preSql', JSON_ARRAY('${target.preSql:}'),
'connection', JSON_ARRAY(
JSON_OBJECT(
'jdbcUrl', '${target.jdbcUrl}',
'table', JSON_ARRAY('${target.table}')
)
)
)
)
)
)
)
), 'MySQL到MySQL数据同步模板', TRUE, 'system');
-- 插入任务执行记录模拟数据
INSERT INTO t_dc_task_executions (id, task_id, task_name, status, progress, records_total, records_processed, records_success, records_failed, throughput, data_size_bytes, started_at, completed_at, duration_seconds, config) VALUES
-- 成功执行记录
('12128059-a266-4d4f-b647-3cb8c24b8aad', '54cefc4d-3071-43d9-9fbf-baeb87932acd', '用户数据同步', 'SUCCESS', 100.00, 15000, 15000, 15000, 0, 125.50, 2048576,
DATE_SUB(NOW(), INTERVAL 1 DAY), DATE_SUB(NOW(), INTERVAL 1 DAY) + INTERVAL 2 MINUTE, 120,
JSON_OBJECT('batchSize', 1000, 'parallelism', 3)),
('9d418e0c-fa54-4f01-8633-3a5ad57f46a1', '3039a5c8-c894-42ab-ad49-5c2c5eccda31', '订单增量同步', 'SUCCESS', 100.00, 8500, 8500, 8500, 0, 94.44, 1536000,
DATE_SUB(NOW(), INTERVAL 12 HOUR), DATE_SUB(NOW(), INTERVAL 12 HOUR) + INTERVAL 90 SECOND, 90,
JSON_OBJECT('batchSize', 2000, 'parallelism', 2));
-- 数据归集模板配置表
CREATE TABLE t_dc_collection_templates (
id VARCHAR(36) PRIMARY KEY COMMENT '模板ID(UUID)',
name VARCHAR(255) NOT NULL UNIQUE COMMENT '模板名称',
description TEXT COMMENT '模板描述',
source_type VARCHAR(64) NOT NULL COMMENT '源数据源类型',
source_name VARCHAR(64) NOT NULL COMMENT '源数据源名称',
target_type VARCHAR(64) NOT NULL COMMENT '目标数据源类型',
target_name VARCHAR(64) NOT NULL COMMENT '目标数据源名称',
template_content JSON NOT NULL COMMENT '模板内容',
built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
created_by VARCHAR(255) COMMENT '创建者',
updated_by VARCHAR(255) COMMENT '更新者',
INDEX idx_source_target (source_type, target_type)
) COMMENT='数据归集模板配置表';
INSERT IGNORE INTO t_dc_collection_templates(id, name, description, source_type, source_name, target_type, target_name, template_content, built_in, created_by, updated_by)
VALUES ('1', 'NAS归集模板', '将NAS存储上的文件归集到DataMate平台上。', 'nfsreader', 'nfsreader', 'nfswriter', 'nfswriter', '{"parameter": {}, "reader": {}, "writer": {}}', True, 'system', 'system'),
('2', 'OBS归集模板', '将OBS存储上的文件归集到DataMate平台上。', 'obsreader', 'obsreader', 'obswriter', 'obswriter', '{"parameter": {"endpoint": {"name": "服务地址","description": "OBS的服务地址。","type": "input"},"bucket": {"name": "存储桶名称","description": "OBS存储桶名称。","type": "input"},"accessKey": {"name": "访问密钥","description": "OBS访问密钥。","type": "input"},"secretKey": {"name": "密钥","description": "OBS密钥。","type": "input"},"prefix": {"name": "匹配前缀","description": "按照匹配前缀去选中OBS中的文件进行归集。","type": "input"}}, "reader": {}, "writer": {}}', True, 'system', 'system');

View File

@@ -1,9 +1,26 @@
FROM maven:3-eclipse-temurin-8 AS datax-builder
RUN apt-get update && \
apt-get install -y git && \
git clone https://github.com/alibaba/DataX.git
COPY runtime/datax/ DataX/
RUN cd DataX && \
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
FROM python:3.12-slim
# Single-stage image with build cache optimization using BuildKit cache mounts.
# Note: to use the cache mount syntax you must build with BuildKit enabled:
# DOCKER_BUILDKIT=1 docker build . -f scripts/images/datamate-python/Dockerfile -t datamate-backend-python
RUN apt-get update \
&& apt-get install -y --no-install-recommends openjdk-21-jre-headless \
&& rm -rf /var/lib/apt/lists/*
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
# Poetry configuration
@@ -12,7 +29,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
POETRY_VIRTUALENVS_CREATE=false \
POETRY_CACHE_DIR=/tmp/poetry_cache
ENV PATH="/root/.local/bin:$PATH"
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64
ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH"
WORKDIR /app
@@ -22,6 +41,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \
&& pip install --root-user-action=ignore pipx \
&& pipx install "poetry==$POETRY_VERSION"
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
# Copy only dependency files first (leverages layer caching when dependencies don't change)
COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/

View File

@@ -1,28 +1,19 @@
FROM maven:3-eclipse-temurin-21 AS builder
RUN apt-get update && \
apt-get install -y git && \
git clone https://github.com/ModelEngine-Group/Terrabase.git && \
cd Terrabase && \
git -c core.quotepath=false -c log.showSignature=false checkout -b pyh/feat_terrabase_develop origin/pyh/feat_terrabase_develop -- && \
mvn -U clean package install -Dmaven.test.skip=true
COPY backend/ /opt/gateway
RUN cd /opt/gateway/api-gateway && \
mvn -U clean package -Dmaven.test.skip=true && \
ls /opt/gateway/api-gateway/target
mvn -U clean package -Dmaven.test.skip=true
FROM eclipse-temurin:21-jdk
RUN apt-get update && \
apt-get install -y vim wget curl python3 python3-pip python-is-python3 dos2unix && \
apt-get install -y vim wget curl dos2unix && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/gateway/api-gateway/target/gateway.jar /opt/gateway/gateway.jar
COPY --from=builder /Terrabase/enterprise-impl-commercial/target/*.jar /opt/terrabase/
COPY scripts/images/gateway/start.sh /opt/gateway/start.sh