You've already forked DataMate
feature:mysql数据库归集为csv文件 (#76)
* fix:配比任务需要能够跳转到目标数据集 * feature:增加配比任务详情接口 * fix:删除不存在的配比详情页面 * fix:使用正式的逻辑来展示标签 * fix:参数默认值去掉多余的- * fix:修复配比任务相关操作 * fix:去除不需要的日志打印和import * feature:数据归集创建时将obs、mysql归集也放出 * refactor:重构数据归集的代码 * refactor:重构数据归集的代码 * feature:增加实现mysql归集为csv文件
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
package com.datamate.collection.application;
|
||||
|
||||
import com.datamate.collection.common.enums.TemplateType;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.datamate.collection.domain.model.entity.TaskExecution;
|
||||
import com.datamate.collection.common.enums.TaskStatus;
|
||||
@@ -9,6 +10,7 @@ import com.datamate.collection.domain.repository.TaskExecutionRepository;
|
||||
import com.datamate.datamanagement.application.DatasetApplicationService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
@@ -50,7 +52,9 @@ public class TaskExecutionService {
|
||||
executionRepository.completeExecution(executionId, TaskStatus.SUCCESS.name(), LocalDateTime.now(),
|
||||
0, 0L, 0L, 0L, null);
|
||||
collectionTaskRepository.updateStatus(task.getId(), TaskStatus.SUCCESS.name());
|
||||
if (StringUtils.isNotBlank(datasetId)) {
|
||||
datasetApplicationService.processDataSourceAsync(datasetId, task.getId());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("DataX execution failed", e);
|
||||
executionRepository.completeExecution(executionId, TaskStatus.FAILED.name(), LocalDateTime.now(),
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
// java
|
||||
package com.datamate.collection.infrastructure.datax;
|
||||
|
||||
import com.datamate.collection.common.enums.TemplateType;
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.datamate.collection.domain.process.ProcessRunner;
|
||||
import com.datamate.collection.infrastructure.datax.config.MysqlConfig;
|
||||
import com.datamate.collection.infrastructure.datax.config.NasConfig;
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||
@@ -15,10 +17,10 @@ import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.*;
|
||||
import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@@ -30,7 +32,10 @@ public class DataxProcessRunner implements ProcessRunner {
|
||||
@Override
|
||||
public int runJob(CollectionTask task, String executionId, int timeoutSeconds) throws Exception {
|
||||
Path job = buildJobFile(task);
|
||||
return runJob(job.toFile(), executionId, Duration.ofSeconds(timeoutSeconds));
|
||||
int code = runJob(job.toFile(), executionId, Duration.ofSeconds(timeoutSeconds));
|
||||
// 任务成功后做后处理(仅针对 MYSQL 类型)
|
||||
postProcess(task);
|
||||
return code;
|
||||
}
|
||||
|
||||
private int runJob(File jobFile, String executionId, Duration timeout) throws Exception {
|
||||
@@ -90,11 +95,12 @@ public class DataxProcessRunner implements ProcessRunner {
|
||||
switch (templateType) {
|
||||
case NAS:
|
||||
// NAS 特殊处理
|
||||
// 移除 templateType 字段
|
||||
NasConfig nasConfig = objectMapper.readValue(task.getConfig(), NasConfig.class);
|
||||
return nasConfig.toJobConfig(objectMapper, task);
|
||||
case OBS:
|
||||
case MYSQL:
|
||||
MysqlConfig mysqlConfig = objectMapper.readValue(task.getConfig(), MysqlConfig.class);
|
||||
return mysqlConfig.toJobConfig(objectMapper, task);
|
||||
default:
|
||||
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR, "Unsupported template type: " + templateType);
|
||||
}
|
||||
@@ -103,4 +109,35 @@ public class DataxProcessRunner implements ProcessRunner {
|
||||
throw new RuntimeException("Failed to parse task config", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void postProcess(CollectionTask task) throws IOException {
|
||||
if (task.getTaskType() != TemplateType.MYSQL) {
|
||||
return;
|
||||
}
|
||||
String targetPath = task.getTargetPath();
|
||||
// 将targetPath下所有不以.csv结尾的文件修改为以.csv结尾
|
||||
Path dir = Paths.get(targetPath);
|
||||
if (!Files.exists(dir) || !Files.isDirectory(dir)) {
|
||||
log.info("Target path {} does not exist or is not a directory for task {}, skip post processing.", targetPath, task.getId());
|
||||
return;
|
||||
}
|
||||
|
||||
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
|
||||
for (Path path : stream) {
|
||||
if (!Files.isRegularFile(path)) continue;
|
||||
String name = path.getFileName().toString();
|
||||
if (name.toLowerCase().endsWith(".csv")) continue;
|
||||
|
||||
Path target = dir.resolve(name + ".csv");
|
||||
try {
|
||||
Files.move(path, target, StandardCopyOption.REPLACE_EXISTING);
|
||||
log.info("Renamed file for task {}: {} -> {}", task.getId(), name, target.getFileName().toString());
|
||||
} catch (IOException ex) {
|
||||
log.warn("Failed to rename file {} for task {}: {}", path, task.getId(), ex.getMessage(), ex);
|
||||
}
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
log.warn("Error scanning target directory {} for task {}: {}", targetPath, task.getId(), ioe.getMessage(), ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
package com.datamate.collection.infrastructure.datax.config;
|
||||
|
||||
import com.datamate.collection.domain.model.entity.CollectionTask;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class MysqlConfig {
|
||||
private String jdbcUrl;
|
||||
|
||||
private String username;
|
||||
|
||||
private String password;
|
||||
|
||||
private String querySql;
|
||||
|
||||
private List<String> headers;
|
||||
|
||||
/**
|
||||
* 将当前 MYSQL 配置构造成 DataX 所需的 job JSON 字符串。
|
||||
*/
|
||||
public String toJobConfig(ObjectMapper objectMapper, CollectionTask task) throws Exception {
|
||||
Map<String, Object> mysqlParameter = new HashMap<>();
|
||||
Map<String, Object> connection = new HashMap<>();
|
||||
if (username != null) mysqlParameter.put("username", username);
|
||||
if (password != null) mysqlParameter.put("password", password);
|
||||
if (jdbcUrl != null) connection.put("jdbcUrl", Collections.singletonList(jdbcUrl));
|
||||
if (querySql != null) connection.put("querySql", Collections.singletonList(querySql));
|
||||
mysqlParameter.put("connection", Collections.singletonList(connection));
|
||||
|
||||
Map<String, Object> job = new HashMap<>();
|
||||
Map<String, Object> content = new HashMap<>();
|
||||
Map<String, Object> reader = new HashMap<>();
|
||||
reader.put("name", "mysqlreader");
|
||||
reader.put("parameter", mysqlParameter);
|
||||
content.put("reader", reader);
|
||||
|
||||
Map<String, Object> writer = new HashMap<>();
|
||||
Map<String, Object> writerParameter = new HashMap<>();
|
||||
writer.put("name", "txtfilewriter");
|
||||
if (CollectionUtils.isNotEmpty(headers)) {
|
||||
writerParameter.put("header", headers);
|
||||
}
|
||||
writerParameter.put("path", task.getTargetPath());
|
||||
writerParameter.put("fileName", "collectionResult");
|
||||
writerParameter.put("writeMode", "truncate");
|
||||
writerParameter.put("dateFormat", "yyyy-MM-dd HH:mm:ss");
|
||||
writerParameter.put("fileFormat", "csv");
|
||||
writerParameter.put("encoding", "UTF-8");
|
||||
writerParameter.put("fieldDelimiter", ",");
|
||||
writer.put("parameter", writerParameter);
|
||||
content.put("writer", writer);
|
||||
|
||||
job.put("content", List.of(content));
|
||||
Map<String, Object> setting = new HashMap<>();
|
||||
Map<String, Object> channel = new HashMap<>();
|
||||
channel.put("channel", 1);
|
||||
setting.put("speed", channel);
|
||||
job.put("setting", setting);
|
||||
|
||||
Map<String, Object> jobConfig = new HashMap<>();
|
||||
jobConfig.put("job", job);
|
||||
return objectMapper.writeValueAsString(jobConfig);
|
||||
}
|
||||
}
|
||||
@@ -41,8 +41,7 @@ public interface CollectionTaskConverter {
|
||||
default Map<String, Object> parseJsonToMap(String json) {
|
||||
try {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
return
|
||||
objectMapper.readValue(json, Map.class);
|
||||
return objectMapper.readValue(json, Map.class);
|
||||
} catch (Exception e) {
|
||||
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
|
||||
}
|
||||
|
||||
@@ -287,19 +287,4 @@ public class DatasetApplicationService {
|
||||
log.info("获取到归集任务详情: {}", taskDetail);
|
||||
return Collections.singletonList(taskDetail.getTargetPath());
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析任务配置
|
||||
*/
|
||||
private LocalCollectionConfig parseTaskConfig(Map<String, Object> configMap) {
|
||||
try {
|
||||
if (configMap == null || configMap.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return objectMapper.convertValue(configMap, LocalCollectionConfig.class);
|
||||
} catch (Exception e) {
|
||||
log.error("解析任务配置失败", e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -321,21 +321,15 @@ export default function CollectionTaskCreate() {
|
||||
{selectedTemplate === TemplateType.MYSQL && (
|
||||
<div className="grid grid-cols-2 gap-3 px-2 bg-blue-50 rounded">
|
||||
<Form.Item
|
||||
name={["config", "host"]}
|
||||
rules={[{ required: true, message: "请输入MYSQL主机名" }]}
|
||||
label="MYSQL主机名"
|
||||
name={["config", "jdbcUrl"]}
|
||||
rules={[{ required: true, message: "请输入数据库链接" }]}
|
||||
label="数据库链接"
|
||||
className="col-span-2"
|
||||
>
|
||||
<Input placeholder="192.168.1.100" />
|
||||
<Input placeholder="jdbc:mysql://localhost:3306/mysql?useUnicode=true&characterEncoding=utf8" />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name={["config", "port"]}
|
||||
rules={[{ required: true, message: "请输入端口号" }]}
|
||||
label="端口号"
|
||||
>
|
||||
<Input placeholder="3306" />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name={["config", "user"]}
|
||||
name={["config", "username"]}
|
||||
rules={[{ required: true, message: "请输入用户名" }]}
|
||||
label="用户名"
|
||||
>
|
||||
@@ -346,22 +340,22 @@ export default function CollectionTaskCreate() {
|
||||
rules={[{ required: true, message: "请输入密码" }]}
|
||||
label="密码"
|
||||
>
|
||||
<Input placeholder="" />
|
||||
<Input type="password" className="h-8 text-xs" placeholder="Secret Key" />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name={["config", "schema"]}
|
||||
rules={[{ required: true, message: "请输入数据库" }]}
|
||||
label="数据库"
|
||||
>
|
||||
<Input placeholder="public" />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name={["config", "sql"]}
|
||||
name={["config", "querySql"]}
|
||||
rules={[{ required: true, message: "请输入查询语句" }]}
|
||||
label="查询语句"
|
||||
>
|
||||
<Input placeholder="select * from your_table" />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name={["config", "headers"]}
|
||||
label="列名"
|
||||
className="col-span-2"
|
||||
>
|
||||
<Select placeholder="请输入列名" mode="tags" />
|
||||
</Form.Item>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
|
||||
@@ -315,13 +315,13 @@
|
||||
<!-- </includes>-->
|
||||
<!-- <outputDirectory>datax</outputDirectory>-->
|
||||
<!-- </fileSet>-->
|
||||
<!-- <fileSet>-->
|
||||
<!-- <directory>txtfilewriter/target/datax/</directory>-->
|
||||
<!-- <includes>-->
|
||||
<!-- <include>**/*.*</include>-->
|
||||
<!-- </includes>-->
|
||||
<!-- <outputDirectory>datax</outputDirectory>-->
|
||||
<!-- </fileSet>-->
|
||||
<fileSet>
|
||||
<directory>txtfilewriter/target/datax/</directory>
|
||||
<includes>
|
||||
<include>**/*.*</include>
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<!-- <fileSet>-->
|
||||
<!-- <directory>ftpwriter/target/datax/</directory>-->
|
||||
<!-- <includes>-->
|
||||
|
||||
@@ -111,7 +111,7 @@
|
||||
<!-- <module>kuduwriter</module>-->
|
||||
<!-- <module>ftpwriter</module>-->
|
||||
<!-- <module>hdfswriter</module>-->
|
||||
<!-- <module>txtfilewriter</module>-->
|
||||
<module>txtfilewriter</module>
|
||||
<!-- <module>streamwriter</module>-->
|
||||
|
||||
<!-- <module>elasticsearchwriter</module>-->
|
||||
|
||||
Reference in New Issue
Block a user