init datamate

This commit is contained in:
Dallas98
2025-10-21 23:00:48 +08:00
commit 1c97afed7d
692 changed files with 135442 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

View File

@@ -0,0 +1,87 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.datamate</groupId>
<artifactId>data-mate-platform</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>data-cleaning-service</artifactId>
<name>Data Cleaning Service</name>
<description>数据清洗服务</description>
<dependencies>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.openapitools</groupId>
<artifactId>jackson-databind-nullable</artifactId>
</dependency>
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.26.1</version>
</dependency>
<dependency>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct</artifactId>
</dependency>
<dependency>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct-processor</artifactId>
<version>${mapstruct.version}</version>
<scope>provided</scope> <!-- 编译时需要,运行时不需要 -->
</dependency>
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-commons</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,22 @@
package com.datamate.cleaning;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
/**
* 数据归集服务配置类
*
* 基于DataX的数据归集和同步服务,支持多种数据源的数据采集和归集
*/
@SpringBootApplication
@EnableAsync
@EnableScheduling
@ComponentScan(basePackages = {
"com.datamate.cleaning",
"com.datamate.shared"
})
public class DataCleaningServiceConfiguration {
// Configuration class for JAR packaging - no main method needed
}

View File

@@ -0,0 +1,120 @@
package com.datamate.cleaning.application.httpclient;
import com.datamate.cleaning.domain.model.CreateDatasetRequest;
import com.datamate.cleaning.domain.model.DatasetResponse;
import com.datamate.cleaning.domain.model.PagedDatasetFileResponse;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.ErrorCodeImpl;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.domain.PageRequest;
import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.text.MessageFormat;
import java.time.Duration;
import java.util.Map;
import java.util.stream.Collectors;
@Slf4j
public class DatasetClient {
private static final String BASE_URL = "http://localhost:8080/api";
private static final String CREATE_DATASET_URL = BASE_URL + "/data-management/datasets";
private static final String GET_DATASET_URL = BASE_URL + "/data-management/datasets/{0}";
private static final String GET_DATASET_FILE_URL = BASE_URL + "/data-management/datasets/{0}/files";
private static final HttpClient CLIENT = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
static {
OBJECT_MAPPER.registerModule(new JavaTimeModule());
}
public static DatasetResponse createDataset(String name, String type) {
CreateDatasetRequest createDatasetRequest = new CreateDatasetRequest();
createDatasetRequest.setName(name);
createDatasetRequest.setDatasetType(type);
String jsonPayload;
try {
jsonPayload = OBJECT_MAPPER.writeValueAsString(createDatasetRequest);
} catch (IOException e) {
log.error("Error occurred while converting the object.", e);
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(CREATE_DATASET_URL))
.timeout(Duration.ofSeconds(30))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(jsonPayload))
.build();
return sendAndReturn(request, DatasetResponse.class);
}
public static DatasetResponse getDataset(String datasetId) {
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(MessageFormat.format(GET_DATASET_URL, datasetId)))
.timeout(Duration.ofSeconds(30))
.header("Content-Type", "application/json")
.GET()
.build();
return sendAndReturn(request, DatasetResponse.class);
}
public static PagedDatasetFileResponse getDatasetFile(String datasetId, PageRequest page) {
String url = buildQueryParams(MessageFormat.format(GET_DATASET_FILE_URL, datasetId),
Map.of("page", page.getPageNumber(), "size", page.getPageSize()));
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofSeconds(30))
.header("Content-Type", "application/json")
.GET()
.build();
return sendAndReturn(request, PagedDatasetFileResponse.class);
}
private static <T> T sendAndReturn(HttpRequest request, Class<T> clazz) {
try {
HttpResponse<String> response = CLIENT.send(request, HttpResponse.BodyHandlers.ofString());
int statusCode = response.statusCode();
String responseBody = response.body();
JsonNode jsonNode = OBJECT_MAPPER.readTree(responseBody);
if (statusCode < 200 || statusCode >= 300) {
String code = jsonNode.get("code").asText();
String message = jsonNode.get("message").asText();
throw BusinessException.of(ErrorCodeImpl.of(code, message));
}
return OBJECT_MAPPER.treeToValue(jsonNode.get("data"), clazz);
} catch (IOException | InterruptedException e) {
log.error("Error occurred while making the request.", e);
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
}
private static String buildQueryParams(String baseUrl, Map<String, Object> params) {
if (params == null || params.isEmpty()) {
return baseUrl;
}
String queryString = params.entrySet().stream()
.map(entry -> entry.getKey() + entry.getValue().toString())
.collect(Collectors.joining("&"));
return baseUrl + (baseUrl.contains("?") ? "&" : "?") + queryString;
}
}

View File

@@ -0,0 +1,54 @@
package com.datamate.cleaning.application.httpclient;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import lombok.extern.slf4j.Slf4j;
import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.text.MessageFormat;
import java.time.Duration;
@Slf4j
public class RuntimeClient {
private static final String BASE_URL = "http://runtime:8081/api";
private static final String CREATE_TASK_URL = BASE_URL + "/task/{0}/submit";
private static final String STOP_TASK_URL = BASE_URL + "/task/{0}/stop";
private static final HttpClient CLIENT = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
public static void submitTask(String taskId) {
send(MessageFormat.format(CREATE_TASK_URL, taskId));
}
public static void stopTask(String taskId) {
send(MessageFormat.format(STOP_TASK_URL, taskId));
}
private static void send(String url) {
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofSeconds(30))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.noBody())
.build();
try {
HttpResponse<String> response = CLIENT.send(request, HttpResponse.BodyHandlers.ofString());
int statusCode = response.statusCode();
if (statusCode < 200 || statusCode >= 300) {
log.error("Request failed with status code: {}", statusCode);
throw BusinessException.of(SystemErrorCode.SYSTEM_BUSY);
}
} catch (IOException | InterruptedException e) {
log.error("Error occurred while making the request.", e);
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
}
}

View File

@@ -0,0 +1,40 @@
package com.datamate.cleaning.application.scheduler;
import com.datamate.cleaning.application.httpclient.RuntimeClient;
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
import com.datamate.cleaning.interfaces.dto.CleaningTask;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import java.time.LocalDateTime;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@Service
@RequiredArgsConstructor
public class CleaningTaskScheduler {
private final CleaningTaskMapper cleaningTaskMapper;
private final ExecutorService taskExecutor = Executors.newFixedThreadPool(5);
public void executeTask(String taskId) {
taskExecutor.submit(() -> submitTask(taskId));
}
private void submitTask(String taskId) {
CleaningTask task = new CleaningTask();
task.setId(taskId);
task.setStatus(CleaningTask.StatusEnum.RUNNING);
task.setStartedAt(LocalDateTime.now());
cleaningTaskMapper.updateTask(task);
RuntimeClient.submitTask(taskId);
}
public void stopTask(String taskId) {
RuntimeClient.stopTask(taskId);
CleaningTask task = new CleaningTask();
task.setId(taskId);
task.setStatus(CleaningTask.StatusEnum.STOPPED);
cleaningTaskMapper.updateTask(task);
}
}

View File

@@ -0,0 +1,186 @@
package com.datamate.cleaning.application.service;
import com.datamate.cleaning.application.httpclient.DatasetClient;
import com.datamate.cleaning.application.scheduler.CleaningTaskScheduler;
import com.datamate.cleaning.domain.converter.OperatorInstanceConverter;
import com.datamate.cleaning.domain.model.DatasetResponse;
import com.datamate.cleaning.domain.model.ExecutorType;
import com.datamate.cleaning.domain.model.OperatorInstancePo;
import com.datamate.cleaning.domain.model.PagedDatasetFileResponse;
import com.datamate.cleaning.domain.model.TaskProcess;
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper;
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
import com.datamate.cleaning.interfaces.dto.CleaningTask;
import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
import com.datamate.cleaning.interfaces.dto.OperatorInstance;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.domain.PageRequest;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.yaml.snakeyaml.DumperOptions;
import org.yaml.snakeyaml.Yaml;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.UUID;
@Slf4j
@Service
@RequiredArgsConstructor
public class CleaningTaskService {
private final CleaningTaskMapper cleaningTaskMapper;
private final OperatorInstanceMapper operatorInstanceMapper;
private final CleaningResultMapper cleaningResultMapper;
private final CleaningTaskScheduler taskScheduler;
private final String DATASET_PATH = "/dataset";
private final String FLOW_PATH = "/flow";
public List<CleaningTask> getTasks(String status, String keywords, Integer page, Integer size) {
Integer offset = page * size;
return cleaningTaskMapper.findTasks(status, keywords, size, offset);
}
public int countTasks(String status, String keywords) {
return cleaningTaskMapper.findTasks(status, keywords, null, null).size();
}
@Transactional
public CleaningTask createTask(CreateCleaningTaskRequest request) {
DatasetResponse destDataset = DatasetClient.createDataset(request.getDestDatasetName(),
request.getDestDatasetType());
DatasetResponse srcDataset = DatasetClient.getDataset(request.getSrcDatasetId());
CleaningTask task = new CleaningTask();
task.setName(request.getName());
task.setDescription(request.getDescription());
task.setStatus(CleaningTask.StatusEnum.PENDING);
String taskId = UUID.randomUUID().toString();
task.setId(taskId);
task.setSrcDatasetId(request.getSrcDatasetId());
task.setSrcDatasetName(request.getSrcDatasetName());
task.setDestDatasetId(destDataset.getId());
task.setDestDatasetName(destDataset.getName());
task.setBeforeSize(srcDataset.getTotalSize());
cleaningTaskMapper.insertTask(task);
List<OperatorInstancePo> instancePos = request.getInstance().stream()
.map(OperatorInstanceConverter.INSTANCE::operatorToDo).toList();
operatorInstanceMapper.insertInstance(taskId, instancePos);
prepareTask(task, request.getInstance());
scanDataset(taskId, request.getSrcDatasetId());
executeTask(taskId);
return task;
}
public CleaningTask getTask(String taskId) {
return cleaningTaskMapper.findTaskById(taskId);
}
@Transactional
public void deleteTask(String taskId) {
cleaningTaskMapper.deleteTask(taskId);
operatorInstanceMapper.deleteByInstanceId(taskId);
cleaningResultMapper.deleteByInstanceId(taskId);
}
public void executeTask(String taskId) {
taskScheduler.executeTask(taskId);
}
private void prepareTask(CleaningTask task, List<OperatorInstance> instances) {
TaskProcess process = new TaskProcess();
process.setInstanceId(task.getId());
process.setDatasetId(task.getDestDatasetId());
process.setDatasetPath(FLOW_PATH + "/" + task.getId() + "/dataset.jsonl");
process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId());
process.setExecutorType(ExecutorType.DATA_PLATFORM.getValue());
process.setProcess(instances.stream()
.map(instance -> Map.of(instance.getId(), instance.getOverrides()))
.toList());
ObjectMapper jsonMapper = new ObjectMapper(new YAMLFactory());
jsonMapper.setPropertyNamingStrategy(PropertyNamingStrategies.SNAKE_CASE);
JsonNode jsonNode = jsonMapper.valueToTree(process);
DumperOptions options = new DumperOptions();
options.setIndent(2);
options.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK);
Yaml yaml = new Yaml(options);
File file = new File(FLOW_PATH + "/" + process.getInstanceId() + "/process.yaml");
file.getParentFile().mkdirs();
try (FileWriter writer = new FileWriter(file)) {
yaml.dump(jsonMapper.treeToValue(jsonNode, Map.class), writer);
} catch (IOException e) {
log.error("Failed to prepare process.yaml.", e);
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
}
}
private void scanDataset(String taskId, String srcDatasetId) {
int pageNumber = 0;
int pageSize = 500;
PageRequest pageRequest = PageRequest.of(pageNumber, pageSize);
PagedDatasetFileResponse datasetFile;
do {
datasetFile = DatasetClient.getDatasetFile(srcDatasetId, pageRequest);
if (datasetFile.getContent() != null && datasetFile.getContent().isEmpty()) {
break;
}
List<Map<String, Object>> files = datasetFile.getContent().stream()
.map(content -> Map.of("fileName", (Object) content.getFileName(),
"fileSize", content.getFileSize(),
"filePath", content.getFilePath(),
"fileType", content.getFileType(),
"fileId", content.getId()))
.toList();
writeListMapToJsonlFile(files, FLOW_PATH + "/" + taskId + "/dataset.jsonl");
pageNumber += 1;
} while (pageNumber < datasetFile.getTotalPages());
}
private void writeListMapToJsonlFile(List<Map<String, Object>> mapList, String fileName) {
ObjectMapper objectMapper = new ObjectMapper();
try (BufferedWriter writer = new BufferedWriter(new FileWriter(fileName))) {
if (!mapList.isEmpty()) { // 检查列表是否为空,避免异常
String jsonString = objectMapper.writeValueAsString(mapList.get(0));
writer.write(jsonString);
for (int i = 1; i < mapList.size(); i++) {
writer.newLine();
jsonString = objectMapper.writeValueAsString(mapList.get(i));
writer.write(jsonString);
}
}
} catch (IOException e) {
log.error("Failed to prepare dataset.jsonl.", e);
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
}
}
public void stopTask(String taskId) {
taskScheduler.stopTask(taskId);
}
}

View File

@@ -0,0 +1,95 @@
package com.datamate.cleaning.application.service;
import com.datamate.cleaning.domain.converter.OperatorInstanceConverter;
import com.datamate.cleaning.domain.model.OperatorInstancePo;
import com.datamate.cleaning.domain.model.TemplateWithInstance;
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTemplateMapper;
import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
import com.datamate.cleaning.interfaces.dto.CleaningTemplate;
import com.datamate.cleaning.interfaces.dto.CreateCleaningTemplateRequest;
import com.datamate.cleaning.interfaces.dto.OperatorResponse;
import com.datamate.cleaning.interfaces.dto.UpdateCleaningTemplateRequest;
import lombok.RequiredArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.function.Function;
import java.util.stream.Collectors;
@Service
@RequiredArgsConstructor
public class CleaningTemplateService {
private final CleaningTemplateMapper cleaningTemplateMapper;
private final OperatorInstanceMapper operatorInstanceMapper;
public List<CleaningTemplate> getTemplates(String keywords) {
List<OperatorResponse> allOperators = cleaningTemplateMapper.findAllOperators();
Map<String, OperatorResponse> operatorsMap = allOperators.stream()
.collect(Collectors.toMap(OperatorResponse::getId, Function.identity()));
List<TemplateWithInstance> allTemplates = cleaningTemplateMapper.findAllTemplates(keywords);
Map<String, List<TemplateWithInstance>> templatesMap = allTemplates.stream()
.collect(Collectors.groupingBy(TemplateWithInstance::getId));
return templatesMap.entrySet().stream().map(twi -> {
List<TemplateWithInstance> value = twi.getValue();
CleaningTemplate template = new CleaningTemplate();
template.setId(twi.getKey());
template.setName(value.get(0).getName());
template.setDescription(value.get(0).getDescription());
template.setInstance(value.stream().filter(v -> StringUtils.isNotBlank(v.getOperatorId()))
.sorted(Comparator.comparingInt(TemplateWithInstance::getOpIndex))
.map(v -> {
OperatorResponse operator = operatorsMap.get(v.getOperatorId());
if (StringUtils.isNotBlank(v.getSettingsOverride())) {
operator.setSettings(v.getSettingsOverride());
}
return operator;
}).toList());
template.setCreatedAt(value.get(0).getCreatedAt());
template.setUpdatedAt(value.get(0).getUpdatedAt());
return template;
}).toList();
}
@Transactional
public CleaningTemplate createTemplate(CreateCleaningTemplateRequest request) {
CleaningTemplate template = new CleaningTemplate();
String templateId = UUID.randomUUID().toString();
template.setId(templateId);
template.setName(request.getName());
template.setDescription(request.getDescription());
cleaningTemplateMapper.insertTemplate(template);
List<OperatorInstancePo> instancePos = request.getInstance().stream()
.map(OperatorInstanceConverter.INSTANCE::operatorToDo).toList();
operatorInstanceMapper.insertInstance(templateId, instancePos);
return template;
}
public CleaningTemplate getTemplate(String templateId) {
return cleaningTemplateMapper.findTemplateById(templateId);
}
@Transactional
public CleaningTemplate updateTemplate(String templateId, UpdateCleaningTemplateRequest request) {
CleaningTemplate template = cleaningTemplateMapper.findTemplateById(templateId);
if (template != null) {
template.setName(request.getName());
template.setDescription(request.getDescription());
cleaningTemplateMapper.updateTemplate(template);
}
return template;
}
@Transactional
public void deleteTemplate(String templateId) {
cleaningTemplateMapper.deleteTemplate(templateId);
operatorInstanceMapper.deleteByInstanceId(templateId);
}
}

View File

@@ -0,0 +1,33 @@
package com.datamate.cleaning.domain.converter;
import com.datamate.cleaning.domain.model.OperatorInstancePo;
import com.datamate.cleaning.interfaces.dto.OperatorInstance;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.mapstruct.Mapper;
import org.mapstruct.Mapping;
import org.mapstruct.Named;
import org.mapstruct.factory.Mappers;
import java.util.Map;
@Mapper
public interface OperatorInstanceConverter {
OperatorInstanceConverter INSTANCE = Mappers.getMapper(OperatorInstanceConverter.class);
@Mapping(target = "overrides", source = "overrides", qualifiedByName = "mapToJson")
OperatorInstancePo operatorToDo(OperatorInstance instance);
@Named("mapToJson")
static String mapToJson(Map<String, Object> objects) {
ObjectMapper objectMapper = new ObjectMapper();
try {
return objectMapper.writeValueAsString(objects);
} catch (JsonProcessingException e) {
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
}
}

View File

@@ -0,0 +1,26 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.util.List;
@Getter
@Setter
@NoArgsConstructor
public class CreateDatasetRequest {
/** 数据集名称 */
private String name;
/** 数据集描述 */
private String description;
/** 数据集类型 */
private String datasetType;
/** 标签列表 */
private List<String> tags;
/** 数据源 */
private String dataSource;
/** 目标位置 */
private String targetLocation;
}

View File

@@ -0,0 +1,36 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.time.LocalDateTime;
@Getter
@Setter
@NoArgsConstructor
public class DatasetFileResponse {
/** 文件ID */
private String id;
/** 文件名 */
private String fileName;
/** 原始文件名 */
private String originalName;
/** 文件类型 */
private String fileType;
/** 文件大小(字节) */
private Long fileSize;
/** 文件状态 */
private String status;
/** 文件描述 */
private String description;
/** 文件路径 */
private String filePath;
/** 上传时间 */
private LocalDateTime uploadTime;
/** 最后更新时间 */
private LocalDateTime lastAccessTime;
/** 上传者 */
private String uploadedBy;
}

View File

@@ -0,0 +1,44 @@
package com.datamate.cleaning.domain.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.time.LocalDateTime;
/**
* 数据集实体(与数据库表 t_dm_datasets 对齐)
*/
@Getter
@Setter
@NoArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
public class DatasetResponse {
/** 数据集ID */
private String id;
/** 数据集名称 */
private String name;
/** 数据集描述 */
private String description;
/** 数据集类型 */
private String datasetType;
/** 数据集状态 */
private String status;
/** 数据源 */
private String dataSource;
/** 目标位置 */
private String targetLocation;
/** 文件数量 */
private Integer fileCount;
/** 总大小(字节) */
private Long totalSize;
/** 完成率(0-100) */
private Float completionRate;
/** 创建时间 */
private LocalDateTime createdAt;
/** 更新时间 */
private LocalDateTime updatedAt;
/** 创建者 */
private String createdBy;
}

View File

@@ -0,0 +1,23 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.Setter;
import java.util.List;
/**
* 数据集类型响应DTO
*/
@Getter
@Setter
public class DatasetTypeResponse {
/** 类型编码 */
private String code;
/** 类型名称 */
private String name;
/** 类型描述 */
private String description;
/** 支持的文件格式 */
private List<String> supportedFormats;
/** 图标 */
private String icon;
}

View File

@@ -0,0 +1,25 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
@Getter
public enum ExecutorType {
DATA_PLATFORM("data_platform"),
DATA_JUICER_RAY("ray"),
DATA_JUICER_DEFAULT("default");
private final String value;
ExecutorType(String value) {
this.value = value;
}
public static ExecutorType fromValue(String value) {
for (ExecutorType type : ExecutorType.values()) {
if (type.value.equals(value)) {
return type;
}
}
throw new IllegalArgumentException("Unexpected value '" + value + "'");
}
}

View File

@@ -0,0 +1,13 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class OperatorInstancePo {
private String id;
private String overrides;
}

View File

@@ -0,0 +1,28 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.util.List;
@Getter
@Setter
@NoArgsConstructor
public class PagedDatasetFileResponse {
/** 文件内容列表 */
private List<DatasetFileResponse> content;
/** 当前页码 */
private Integer page;
/** 每页大小 */
private Integer size;
/** 总元素数 */
private Integer totalElements;
/** 总页数 */
private Integer totalPages;
/** 是否为第一页 */
private Boolean first;
/** 是否为最后一页 */
private Boolean last;
}

View File

@@ -0,0 +1,24 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.Setter;
import java.util.List;
import java.util.Map;
@Getter
@Setter
public class TaskProcess {
private String instanceId;
private String datasetId;
private String datasetPath;
private String exportPath;
private String executorType;
private List<Map<String, Map<String, Object>>> process;
}

View File

@@ -0,0 +1,30 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.Setter;
import org.springframework.format.annotation.DateTimeFormat;
import java.time.LocalDateTime;
@Getter
@Setter
public class TemplateWithInstance {
private String id;
private String name;
private String description;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime createdAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime updatedAt;
private String operatorId;
private Integer opIndex;
private String settingsOverride;
}

View File

@@ -0,0 +1,19 @@
package com.datamate.cleaning.infrastructure.exception;
import com.datamate.common.infrastructure.exception.ErrorCode;
import lombok.AllArgsConstructor;
import lombok.Getter;
@Getter
@AllArgsConstructor
public enum CleanErrorCode implements ErrorCode {
/**
* 清洗任务名称重复
*/
DUPLICATE_TASK_NAME("clean.0001", "清洗任务名称重复"),
CREATE_DATASET_FAILED("clean.0002", "创建数据集失败");
private final String code;
private final String message;
}

View File

@@ -0,0 +1,9 @@
package com.datamate.cleaning.infrastructure.persistence.mapper;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
@Mapper
public interface CleaningResultMapper {
void deleteByInstanceId(@Param("instanceId") String instanceId);
}

View File

@@ -0,0 +1,21 @@
package com.datamate.cleaning.infrastructure.persistence.mapper;
import com.datamate.cleaning.interfaces.dto.CleaningTask;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
@Mapper
public interface CleaningTaskMapper {
List<CleaningTask> findTasks(@Param("status") String status, @Param("keywords") String keywords,
@Param("size") Integer size, @Param("offset") Integer offset);
CleaningTask findTaskById(@Param("taskId") String taskId);
void insertTask(CleaningTask task);
void updateTask(CleaningTask task);
void deleteTask(@Param("taskId") String taskId);
}

View File

@@ -0,0 +1,25 @@
package com.datamate.cleaning.infrastructure.persistence.mapper;
import com.datamate.cleaning.domain.model.TemplateWithInstance;
import com.datamate.cleaning.interfaces.dto.CleaningTemplate;
import com.datamate.cleaning.interfaces.dto.OperatorResponse;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
@Mapper
public interface CleaningTemplateMapper {
List<TemplateWithInstance> findAllTemplates(@Param("keywords") String keywords);
List<OperatorResponse> findAllOperators();
CleaningTemplate findTemplateById(@Param("templateId") String templateId);
void insertTemplate(CleaningTemplate template);
void updateTemplate(CleaningTemplate template);
void deleteTemplate(@Param("templateId") String templateId);
}

View File

@@ -0,0 +1,17 @@
package com.datamate.cleaning.infrastructure.persistence.mapper;
import com.datamate.cleaning.domain.model.OperatorInstancePo;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
@Mapper
public interface OperatorInstanceMapper {
void insertInstance(@Param("instanceId") String instanceId,
@Param("instances") List<OperatorInstancePo> instances);
void deleteByInstanceId(@Param("instanceId") String instanceId);
}

View File

@@ -0,0 +1,59 @@
package com.datamate.cleaning.interfaces.api;
import com.datamate.cleaning.application.service.CleaningTaskService;
import com.datamate.cleaning.interfaces.dto.CleaningTask;
import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
import com.datamate.common.infrastructure.common.Response;
import com.datamate.common.interfaces.PagedResponse;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@RestController
@RequestMapping("/cleaning/tasks")
@RequiredArgsConstructor
public class CleaningTaskController {
private final CleaningTaskService cleaningTaskService;
@GetMapping
public ResponseEntity<Response<PagedResponse<CleaningTask>>> cleaningTasksGet(
@RequestParam("page") Integer page,
@RequestParam("size") Integer size, @RequestParam(value = "status", required = false) String status,
@RequestParam(value = "keywords", required = false) String keywords) {
List<CleaningTask> tasks = cleaningTaskService.getTasks(status, keywords, page, size);
int count = cleaningTaskService.countTasks(status, keywords);
int totalPages = (count + size + 1) / size;
return ResponseEntity.ok(Response.ok(PagedResponse.of(tasks, page, count, totalPages)));
}
@PostMapping
public ResponseEntity<Response<CleaningTask>> cleaningTasksPost(@RequestBody CreateCleaningTaskRequest request) {
return ResponseEntity.ok(Response.ok(cleaningTaskService.createTask(request)));
}
@PostMapping("/{taskId}/stop")
public ResponseEntity<Response<Object>> cleaningTasksStop(@PathVariable("taskId") String taskId) {
cleaningTaskService.stopTask(taskId);
return ResponseEntity.ok(Response.ok(null));
}
@PostMapping("/{taskId}/execute")
public ResponseEntity<Response<Object>> cleaningTasksStart(@PathVariable("taskId") String taskId) {
cleaningTaskService.executeTask(taskId);
return ResponseEntity.ok(Response.ok(null));
}
@GetMapping("/{taskId}")
public ResponseEntity<Response<CleaningTask>> cleaningTasksTaskIdGet(@PathVariable("taskId") String taskId) {
return ResponseEntity.ok(Response.ok(cleaningTaskService.getTask(taskId)));
}
@DeleteMapping("/{taskId}")
public ResponseEntity<Response<Object>> cleaningTasksTaskIdDelete(@PathVariable("taskId") String taskId) {
cleaningTaskService.deleteTask(taskId);
return ResponseEntity.ok(Response.ok(null));
}
}

View File

@@ -0,0 +1,74 @@
package com.datamate.cleaning.interfaces.api;
import com.datamate.cleaning.application.service.CleaningTemplateService;
import com.datamate.cleaning.interfaces.dto.CleaningTemplate;
import com.datamate.cleaning.interfaces.dto.CreateCleaningTemplateRequest;
import com.datamate.cleaning.interfaces.dto.UpdateCleaningTemplateRequest;
import com.datamate.common.infrastructure.common.Response;
import com.datamate.common.interfaces.PagedResponse;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.DeleteMapping;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.PutMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import java.util.Comparator;
import java.util.List;
@RestController
@RequestMapping("/cleaning/templates")
@RequiredArgsConstructor
public class CleaningTemplateController {
private final CleaningTemplateService cleaningTemplateService;
@GetMapping
public ResponseEntity<Response<PagedResponse<CleaningTemplate>>> cleaningTemplatesGet(
@RequestParam(value = "page", required = false) Integer page,
@RequestParam(value = "size", required = false) Integer size,
@RequestParam(value = "keywords", required = false) String keyword) {
List<CleaningTemplate> templates = cleaningTemplateService.getTemplates(keyword);
if (page == null || size == null) {
return ResponseEntity.ok(Response.ok(PagedResponse.of(templates.stream()
.sorted(Comparator.comparing(CleaningTemplate::getCreatedAt).reversed()).toList())));
}
int count = templates.size();
int totalPages = (count + size + 1) / size;
List<CleaningTemplate> limitTemplates = templates.stream()
.sorted(Comparator.comparing(CleaningTemplate::getCreatedAt).reversed())
.skip((long) page * size)
.limit(size).toList();
return ResponseEntity.ok(Response.ok(PagedResponse.of(limitTemplates, page, count, totalPages)));
}
@PostMapping
public ResponseEntity<Response<CleaningTemplate>> cleaningTemplatesPost(
@RequestBody CreateCleaningTemplateRequest request) {
return ResponseEntity.ok(Response.ok(cleaningTemplateService.createTemplate(request)));
}
@GetMapping("/{templateId}")
public ResponseEntity<Response<CleaningTemplate>> cleaningTemplatesTemplateIdGet(
@PathVariable("templateId") String templateId) {
return ResponseEntity.ok(Response.ok(cleaningTemplateService.getTemplate(templateId)));
}
@PutMapping("/{templateId}")
public ResponseEntity<Response<CleaningTemplate>> cleaningTemplatesTemplateIdPut(
@PathVariable("templateId") String templateId, @RequestBody UpdateCleaningTemplateRequest request) {
return ResponseEntity.ok(Response.ok(cleaningTemplateService.updateTemplate(templateId, request)));
}
@DeleteMapping("/{templateId}")
public ResponseEntity<Response<Object>> cleaningTemplatesTemplateIdDelete(
@PathVariable("templateId") String templateId) {
cleaningTemplateService.deleteTemplate(templateId);
return ResponseEntity.noContent().build();
}
}

View File

@@ -0,0 +1,20 @@
package com.datamate.cleaning.interfaces.dto;
import lombok.Getter;
import lombok.Setter;
/**
* CleaningProcess
*/
@Getter
@Setter
public class CleaningProcess {
private Float process;
private Integer totalFileNum;
private Integer finishedFileNum;
}

View File

@@ -0,0 +1,92 @@
package com.datamate.cleaning.interfaces.dto;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import java.time.LocalDateTime;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import org.springframework.format.annotation.DateTimeFormat;
/**
* CleaningTask
*/
@Getter
@Setter
public class CleaningTask {
private String id;
private String name;
private String description;
private String srcDatasetId;
private String srcDatasetName;
private String destDatasetId;
private String destDatasetName;
private long beforeSize;
private long afterSize;
/**
* 任务当前状态
*/
public enum StatusEnum {
PENDING("PENDING"),
RUNNING("RUNNING"),
COMPLETED("COMPLETED"),
STOPPED("STOPPED"),
FAILED("FAILED");
private final String value;
StatusEnum(String value) {
this.value = value;
}
@JsonValue
public String getValue() {
return value;
}
@JsonCreator
public static StatusEnum fromValue(String value) {
for (StatusEnum b : StatusEnum.values()) {
if (b.value.equals(value)) {
return b;
}
}
throw new IllegalArgumentException("Unexpected value '" + value + "'");
}
}
private StatusEnum status;
private String templateId;
private List<OperatorResponse> instance;
private CleaningProcess progress;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime createdAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime startedAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime finishedAt;
}

View File

@@ -0,0 +1,33 @@
package com.datamate.cleaning.interfaces.dto;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import org.springframework.format.annotation.DateTimeFormat;
/**
* CleaningTemplate
*/
@Getter
@Setter
public class CleaningTemplate {
private String id;
private String name;
private String description;
private List<OperatorResponse> instance = new ArrayList<>();
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime createdAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime updatedAt;
}

View File

@@ -0,0 +1,32 @@
package com.datamate.cleaning.interfaces.dto;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
/**
* CreateCleaningTaskRequest
*/
@Getter
@Setter
public class CreateCleaningTaskRequest {
private String name;
private String description;
private String srcDatasetId;
private String srcDatasetName;
private String destDatasetName;
private String destDatasetType;
private List<OperatorInstance> instance = new ArrayList<>();
}

View File

@@ -0,0 +1,23 @@
package com.datamate.cleaning.interfaces.dto;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
/**
* CreateCleaningTemplateRequest
*/
@Getter
@Setter
public class CreateCleaningTemplateRequest {
private String name;
private String description;
private List<OperatorInstance> instance = new ArrayList<>();
}

View File

@@ -0,0 +1,22 @@
package com.datamate.cleaning.interfaces.dto;
import java.util.HashMap;
import java.util.Map;
import lombok.Getter;
import lombok.Setter;
/**
* OperatorInstance
*/
@Getter
@Setter
public class OperatorInstance {
private String id;
private Map<String, Object> overrides = new HashMap<>();
}

View File

@@ -0,0 +1,41 @@
package com.datamate.cleaning.interfaces.dto;
import java.time.LocalDateTime;
import lombok.Getter;
import lombok.Setter;
import org.springframework.format.annotation.DateTimeFormat;
/**
* OperatorResponse
*/
@Getter
@Setter
public class OperatorResponse {
private String id;
private String name;
private String description;
private String version;
private String inputs;
private String outputs;
private String runtime;
private String settings;
private Boolean isStar;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime createdAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime updatedAt;
}

View File

@@ -0,0 +1,26 @@
package com.datamate.cleaning.interfaces.dto;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
/**
* UpdateCleaningTemplateRequest
*/
@Getter
@Setter
public class UpdateCleaningTemplateRequest {
private String id;
private String name;
private String description;
private List<OperatorInstance> instance = new ArrayList<>();
}

View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper">
<delete id="deleteByInstanceId">
DELETE FROM t_clean_result WHERE instance_id = #{instanceId}
</delete>
</mapper>

View File

@@ -0,0 +1,56 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper">
<sql id="Base_Column_List">
id, name, description, src_dataset_id, src_dataset_name, dest_dataset_id, dest_dataset_name, before_size,
after_size, status, created_at, started_at, finished_at
</sql>
<select id="findTasks" resultType="com.datamate.cleaning.interfaces.dto.CleaningTask">
SELECT <include refid="Base_Column_List"/> FROM t_clean_task
<where>
<if test="status != null and status != ''">
AND status = #{status}
</if>
<if test="keywords != null and status != ''">
AND name LIKE CONCAT('%', #{keywords}, '%')
</if>
</where>
ORDER BY created_at DESC
<if test="size != null and offset != null">
LIMIT ${size} OFFSET ${offset}
</if>
</select>
<select id="findTaskById" resultType="com.datamate.cleaning.interfaces.dto.CleaningTask">
SELECT <include refid="Base_Column_List"/> FROM t_clean_task WHERE id = #{taskId}
</select>
<insert id="insertTask">
INSERT INTO t_clean_task (id, name, description, status, src_dataset_id, src_dataset_name, dest_dataset_id,
dest_dataset_name, before_size, after_size, created_at)
VALUES (#{id}, #{name}, #{description}, #{status}, #{srcDatasetId}, #{srcDatasetName}, #{destDatasetId},
#{destDatasetName}, ${beforeSize}, ${afterSize}, NOW())
</insert>
<update id="updateTask">
UPDATE t_clean_task
<set>
<if test="status != null">
status = #{status.value},
</if>
<if test="startedAt != null">
started_at = #{startedAt},
</if>
<if test="finishedAt != null">
finished_at = #{finishedAt},
</if>
</set>
WHERE id = #{id}
</update>
<delete id="deleteTask">
DELETE FROM t_clean_task WHERE id = #{taskId}
</delete>
</mapper>

View File

@@ -0,0 +1,38 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTemplateMapper">
<select id="findAllTemplates" resultType="com.datamate.cleaning.domain.model.TemplateWithInstance">
SELECT t.id AS id, name, description, created_at, updated_at, created_by, operator_id, op_index, settings_override
FROM t_clean_template t LEFT JOIN t_operator_instance o ON t.id = o.instance_id
<where>
<if test="keywords != null and status != ''">
AND name LIKE CONCAT('%', #{keywords}, '%')
</if>
</where>
ORDER BY created_at DESC
</select>
<select id="findAllOperators" resultType="com.datamate.cleaning.interfaces.dto.OperatorResponse">
SELECT id, name, description, version, inputs, outputs, runtime, settings, is_star, created_at, updated_at
FROM t_operator
</select>
<select id="findTemplateById" resultType="com.datamate.cleaning.interfaces.dto.CleaningTemplate">
SELECT * FROM t_clean_template WHERE id = #{templateId}
</select>
<insert id="insertTemplate">
INSERT INTO t_clean_template (id, name, description, created_at)
VALUES (#{id}, #{name}, #{description}, NOW())
</insert>
<update id="updateTemplate">
UPDATE t_clean_template SET name = #{name}, description = #{description}, updated_at = NOW() WHERE id = #{id}
</update>
<delete id="deleteTemplate">
DELETE FROM t_clean_template WHERE id = #{templateId}
</delete>
</mapper>

View File

@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper">
<insert id="insertInstance">
INSERT INTO t_operator_instance(instance_id, operator_id, op_index, settings_override)
VALUES
<foreach collection="instances" item="operator" separator="," index="index">
(#{instanceId}, #{operator.id}, #{index} + 1, #{operator.overrides})
</foreach>
</insert>
<delete id="deleteByInstanceId">
DELETE FROM t_operator_instance
WHERE instance_id = #{instanceId};
</delete>
</mapper>