init datamate

This commit is contained in:
Dallas98
2025-10-21 23:00:48 +08:00
commit 1c97afed7d
692 changed files with 135442 additions and 0 deletions

View File

@@ -0,0 +1,101 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.datamate</groupId>
<artifactId>data-mate-platform</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>data-annotation-service</artifactId>
<name>Data Annotation Service</name>
<description>数据标注服务</description>
<dependencies>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-websocket</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.version}</version>
</dependency>
<!-- OpenAPI Dependencies -->
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
<version>2.0.4</version>
</dependency>
<dependency>
<groupId>org.openapitools</groupId>
<artifactId>jackson-databind-nullable</artifactId>
<version>0.2.6</version>
</dependency>
<dependency>
<groupId>jakarta.validation</groupId>
<artifactId>jakarta.validation-api</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<!-- OpenAPI Generator Plugin -->
<plugin>
<groupId>org.openapitools</groupId>
<artifactId>openapi-generator-maven-plugin</artifactId>
<version>6.6.0</version>
<executions>
<execution>
<goals>
<goal>generate</goal>
</goals>
<configuration>
<inputSpec>${project.basedir}/../../openapi/specs/data-annotation.yaml</inputSpec>
<generatorName>spring</generatorName>
<output>${project.build.directory}/generated-sources/openapi</output>
<apiPackage>com.datamate.annotation.interfaces.api</apiPackage>
<modelPackage>com.datamate.annotation.interfaces.dto</modelPackage>
<configOptions>
<interfaceOnly>true</interfaceOnly>
<useTags>true</useTags>
<skipDefaultInterface>true</skipDefaultInterface>
<hideGenerationTimestamp>true</hideGenerationTimestamp>
<java8>true</java8>
<dateLibrary>java8</dateLibrary>
<useBeanValidation>true</useBeanValidation>
<performBeanValidation>true</performBeanValidation>
<useSpringBoot3>true</useSpringBoot3>
<documentationProvider>springdoc</documentationProvider>
</configOptions>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

View File

@@ -0,0 +1,87 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.datamate</groupId>
<artifactId>data-mate-platform</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>data-cleaning-service</artifactId>
<name>Data Cleaning Service</name>
<description>数据清洗服务</description>
<dependencies>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.openapitools</groupId>
<artifactId>jackson-databind-nullable</artifactId>
</dependency>
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.26.1</version>
</dependency>
<dependency>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct</artifactId>
</dependency>
<dependency>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct-processor</artifactId>
<version>${mapstruct.version}</version>
<scope>provided</scope> <!-- 编译时需要,运行时不需要 -->
</dependency>
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-commons</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,22 @@
package com.datamate.cleaning;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
/**
* 数据归集服务配置类
*
* 基于DataX的数据归集和同步服务,支持多种数据源的数据采集和归集
*/
@SpringBootApplication
@EnableAsync
@EnableScheduling
@ComponentScan(basePackages = {
"com.datamate.cleaning",
"com.datamate.shared"
})
public class DataCleaningServiceConfiguration {
// Configuration class for JAR packaging - no main method needed
}

View File

@@ -0,0 +1,120 @@
package com.datamate.cleaning.application.httpclient;
import com.datamate.cleaning.domain.model.CreateDatasetRequest;
import com.datamate.cleaning.domain.model.DatasetResponse;
import com.datamate.cleaning.domain.model.PagedDatasetFileResponse;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.ErrorCodeImpl;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.domain.PageRequest;
import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.text.MessageFormat;
import java.time.Duration;
import java.util.Map;
import java.util.stream.Collectors;
@Slf4j
public class DatasetClient {
private static final String BASE_URL = "http://localhost:8080/api";
private static final String CREATE_DATASET_URL = BASE_URL + "/data-management/datasets";
private static final String GET_DATASET_URL = BASE_URL + "/data-management/datasets/{0}";
private static final String GET_DATASET_FILE_URL = BASE_URL + "/data-management/datasets/{0}/files";
private static final HttpClient CLIENT = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
static {
OBJECT_MAPPER.registerModule(new JavaTimeModule());
}
public static DatasetResponse createDataset(String name, String type) {
CreateDatasetRequest createDatasetRequest = new CreateDatasetRequest();
createDatasetRequest.setName(name);
createDatasetRequest.setDatasetType(type);
String jsonPayload;
try {
jsonPayload = OBJECT_MAPPER.writeValueAsString(createDatasetRequest);
} catch (IOException e) {
log.error("Error occurred while converting the object.", e);
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(CREATE_DATASET_URL))
.timeout(Duration.ofSeconds(30))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(jsonPayload))
.build();
return sendAndReturn(request, DatasetResponse.class);
}
public static DatasetResponse getDataset(String datasetId) {
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(MessageFormat.format(GET_DATASET_URL, datasetId)))
.timeout(Duration.ofSeconds(30))
.header("Content-Type", "application/json")
.GET()
.build();
return sendAndReturn(request, DatasetResponse.class);
}
public static PagedDatasetFileResponse getDatasetFile(String datasetId, PageRequest page) {
String url = buildQueryParams(MessageFormat.format(GET_DATASET_FILE_URL, datasetId),
Map.of("page", page.getPageNumber(), "size", page.getPageSize()));
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofSeconds(30))
.header("Content-Type", "application/json")
.GET()
.build();
return sendAndReturn(request, PagedDatasetFileResponse.class);
}
private static <T> T sendAndReturn(HttpRequest request, Class<T> clazz) {
try {
HttpResponse<String> response = CLIENT.send(request, HttpResponse.BodyHandlers.ofString());
int statusCode = response.statusCode();
String responseBody = response.body();
JsonNode jsonNode = OBJECT_MAPPER.readTree(responseBody);
if (statusCode < 200 || statusCode >= 300) {
String code = jsonNode.get("code").asText();
String message = jsonNode.get("message").asText();
throw BusinessException.of(ErrorCodeImpl.of(code, message));
}
return OBJECT_MAPPER.treeToValue(jsonNode.get("data"), clazz);
} catch (IOException | InterruptedException e) {
log.error("Error occurred while making the request.", e);
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
}
private static String buildQueryParams(String baseUrl, Map<String, Object> params) {
if (params == null || params.isEmpty()) {
return baseUrl;
}
String queryString = params.entrySet().stream()
.map(entry -> entry.getKey() + entry.getValue().toString())
.collect(Collectors.joining("&"));
return baseUrl + (baseUrl.contains("?") ? "&" : "?") + queryString;
}
}

View File

@@ -0,0 +1,54 @@
package com.datamate.cleaning.application.httpclient;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import lombok.extern.slf4j.Slf4j;
import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.text.MessageFormat;
import java.time.Duration;
@Slf4j
public class RuntimeClient {
private static final String BASE_URL = "http://runtime:8081/api";
private static final String CREATE_TASK_URL = BASE_URL + "/task/{0}/submit";
private static final String STOP_TASK_URL = BASE_URL + "/task/{0}/stop";
private static final HttpClient CLIENT = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
public static void submitTask(String taskId) {
send(MessageFormat.format(CREATE_TASK_URL, taskId));
}
public static void stopTask(String taskId) {
send(MessageFormat.format(STOP_TASK_URL, taskId));
}
private static void send(String url) {
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofSeconds(30))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.noBody())
.build();
try {
HttpResponse<String> response = CLIENT.send(request, HttpResponse.BodyHandlers.ofString());
int statusCode = response.statusCode();
if (statusCode < 200 || statusCode >= 300) {
log.error("Request failed with status code: {}", statusCode);
throw BusinessException.of(SystemErrorCode.SYSTEM_BUSY);
}
} catch (IOException | InterruptedException e) {
log.error("Error occurred while making the request.", e);
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
}
}

View File

@@ -0,0 +1,40 @@
package com.datamate.cleaning.application.scheduler;
import com.datamate.cleaning.application.httpclient.RuntimeClient;
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
import com.datamate.cleaning.interfaces.dto.CleaningTask;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import java.time.LocalDateTime;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@Service
@RequiredArgsConstructor
public class CleaningTaskScheduler {
private final CleaningTaskMapper cleaningTaskMapper;
private final ExecutorService taskExecutor = Executors.newFixedThreadPool(5);
public void executeTask(String taskId) {
taskExecutor.submit(() -> submitTask(taskId));
}
private void submitTask(String taskId) {
CleaningTask task = new CleaningTask();
task.setId(taskId);
task.setStatus(CleaningTask.StatusEnum.RUNNING);
task.setStartedAt(LocalDateTime.now());
cleaningTaskMapper.updateTask(task);
RuntimeClient.submitTask(taskId);
}
public void stopTask(String taskId) {
RuntimeClient.stopTask(taskId);
CleaningTask task = new CleaningTask();
task.setId(taskId);
task.setStatus(CleaningTask.StatusEnum.STOPPED);
cleaningTaskMapper.updateTask(task);
}
}

View File

@@ -0,0 +1,186 @@
package com.datamate.cleaning.application.service;
import com.datamate.cleaning.application.httpclient.DatasetClient;
import com.datamate.cleaning.application.scheduler.CleaningTaskScheduler;
import com.datamate.cleaning.domain.converter.OperatorInstanceConverter;
import com.datamate.cleaning.domain.model.DatasetResponse;
import com.datamate.cleaning.domain.model.ExecutorType;
import com.datamate.cleaning.domain.model.OperatorInstancePo;
import com.datamate.cleaning.domain.model.PagedDatasetFileResponse;
import com.datamate.cleaning.domain.model.TaskProcess;
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper;
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
import com.datamate.cleaning.interfaces.dto.CleaningTask;
import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
import com.datamate.cleaning.interfaces.dto.OperatorInstance;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.domain.PageRequest;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.yaml.snakeyaml.DumperOptions;
import org.yaml.snakeyaml.Yaml;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.UUID;
@Slf4j
@Service
@RequiredArgsConstructor
public class CleaningTaskService {
private final CleaningTaskMapper cleaningTaskMapper;
private final OperatorInstanceMapper operatorInstanceMapper;
private final CleaningResultMapper cleaningResultMapper;
private final CleaningTaskScheduler taskScheduler;
private final String DATASET_PATH = "/dataset";
private final String FLOW_PATH = "/flow";
public List<CleaningTask> getTasks(String status, String keywords, Integer page, Integer size) {
Integer offset = page * size;
return cleaningTaskMapper.findTasks(status, keywords, size, offset);
}
public int countTasks(String status, String keywords) {
return cleaningTaskMapper.findTasks(status, keywords, null, null).size();
}
@Transactional
public CleaningTask createTask(CreateCleaningTaskRequest request) {
DatasetResponse destDataset = DatasetClient.createDataset(request.getDestDatasetName(),
request.getDestDatasetType());
DatasetResponse srcDataset = DatasetClient.getDataset(request.getSrcDatasetId());
CleaningTask task = new CleaningTask();
task.setName(request.getName());
task.setDescription(request.getDescription());
task.setStatus(CleaningTask.StatusEnum.PENDING);
String taskId = UUID.randomUUID().toString();
task.setId(taskId);
task.setSrcDatasetId(request.getSrcDatasetId());
task.setSrcDatasetName(request.getSrcDatasetName());
task.setDestDatasetId(destDataset.getId());
task.setDestDatasetName(destDataset.getName());
task.setBeforeSize(srcDataset.getTotalSize());
cleaningTaskMapper.insertTask(task);
List<OperatorInstancePo> instancePos = request.getInstance().stream()
.map(OperatorInstanceConverter.INSTANCE::operatorToDo).toList();
operatorInstanceMapper.insertInstance(taskId, instancePos);
prepareTask(task, request.getInstance());
scanDataset(taskId, request.getSrcDatasetId());
executeTask(taskId);
return task;
}
public CleaningTask getTask(String taskId) {
return cleaningTaskMapper.findTaskById(taskId);
}
@Transactional
public void deleteTask(String taskId) {
cleaningTaskMapper.deleteTask(taskId);
operatorInstanceMapper.deleteByInstanceId(taskId);
cleaningResultMapper.deleteByInstanceId(taskId);
}
public void executeTask(String taskId) {
taskScheduler.executeTask(taskId);
}
private void prepareTask(CleaningTask task, List<OperatorInstance> instances) {
TaskProcess process = new TaskProcess();
process.setInstanceId(task.getId());
process.setDatasetId(task.getDestDatasetId());
process.setDatasetPath(FLOW_PATH + "/" + task.getId() + "/dataset.jsonl");
process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId());
process.setExecutorType(ExecutorType.DATA_PLATFORM.getValue());
process.setProcess(instances.stream()
.map(instance -> Map.of(instance.getId(), instance.getOverrides()))
.toList());
ObjectMapper jsonMapper = new ObjectMapper(new YAMLFactory());
jsonMapper.setPropertyNamingStrategy(PropertyNamingStrategies.SNAKE_CASE);
JsonNode jsonNode = jsonMapper.valueToTree(process);
DumperOptions options = new DumperOptions();
options.setIndent(2);
options.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK);
Yaml yaml = new Yaml(options);
File file = new File(FLOW_PATH + "/" + process.getInstanceId() + "/process.yaml");
file.getParentFile().mkdirs();
try (FileWriter writer = new FileWriter(file)) {
yaml.dump(jsonMapper.treeToValue(jsonNode, Map.class), writer);
} catch (IOException e) {
log.error("Failed to prepare process.yaml.", e);
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
}
}
private void scanDataset(String taskId, String srcDatasetId) {
int pageNumber = 0;
int pageSize = 500;
PageRequest pageRequest = PageRequest.of(pageNumber, pageSize);
PagedDatasetFileResponse datasetFile;
do {
datasetFile = DatasetClient.getDatasetFile(srcDatasetId, pageRequest);
if (datasetFile.getContent() != null && datasetFile.getContent().isEmpty()) {
break;
}
List<Map<String, Object>> files = datasetFile.getContent().stream()
.map(content -> Map.of("fileName", (Object) content.getFileName(),
"fileSize", content.getFileSize(),
"filePath", content.getFilePath(),
"fileType", content.getFileType(),
"fileId", content.getId()))
.toList();
writeListMapToJsonlFile(files, FLOW_PATH + "/" + taskId + "/dataset.jsonl");
pageNumber += 1;
} while (pageNumber < datasetFile.getTotalPages());
}
private void writeListMapToJsonlFile(List<Map<String, Object>> mapList, String fileName) {
ObjectMapper objectMapper = new ObjectMapper();
try (BufferedWriter writer = new BufferedWriter(new FileWriter(fileName))) {
if (!mapList.isEmpty()) { // 检查列表是否为空,避免异常
String jsonString = objectMapper.writeValueAsString(mapList.get(0));
writer.write(jsonString);
for (int i = 1; i < mapList.size(); i++) {
writer.newLine();
jsonString = objectMapper.writeValueAsString(mapList.get(i));
writer.write(jsonString);
}
}
} catch (IOException e) {
log.error("Failed to prepare dataset.jsonl.", e);
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
}
}
public void stopTask(String taskId) {
taskScheduler.stopTask(taskId);
}
}

View File

@@ -0,0 +1,95 @@
package com.datamate.cleaning.application.service;
import com.datamate.cleaning.domain.converter.OperatorInstanceConverter;
import com.datamate.cleaning.domain.model.OperatorInstancePo;
import com.datamate.cleaning.domain.model.TemplateWithInstance;
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTemplateMapper;
import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
import com.datamate.cleaning.interfaces.dto.CleaningTemplate;
import com.datamate.cleaning.interfaces.dto.CreateCleaningTemplateRequest;
import com.datamate.cleaning.interfaces.dto.OperatorResponse;
import com.datamate.cleaning.interfaces.dto.UpdateCleaningTemplateRequest;
import lombok.RequiredArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.function.Function;
import java.util.stream.Collectors;
@Service
@RequiredArgsConstructor
public class CleaningTemplateService {
private final CleaningTemplateMapper cleaningTemplateMapper;
private final OperatorInstanceMapper operatorInstanceMapper;
public List<CleaningTemplate> getTemplates(String keywords) {
List<OperatorResponse> allOperators = cleaningTemplateMapper.findAllOperators();
Map<String, OperatorResponse> operatorsMap = allOperators.stream()
.collect(Collectors.toMap(OperatorResponse::getId, Function.identity()));
List<TemplateWithInstance> allTemplates = cleaningTemplateMapper.findAllTemplates(keywords);
Map<String, List<TemplateWithInstance>> templatesMap = allTemplates.stream()
.collect(Collectors.groupingBy(TemplateWithInstance::getId));
return templatesMap.entrySet().stream().map(twi -> {
List<TemplateWithInstance> value = twi.getValue();
CleaningTemplate template = new CleaningTemplate();
template.setId(twi.getKey());
template.setName(value.get(0).getName());
template.setDescription(value.get(0).getDescription());
template.setInstance(value.stream().filter(v -> StringUtils.isNotBlank(v.getOperatorId()))
.sorted(Comparator.comparingInt(TemplateWithInstance::getOpIndex))
.map(v -> {
OperatorResponse operator = operatorsMap.get(v.getOperatorId());
if (StringUtils.isNotBlank(v.getSettingsOverride())) {
operator.setSettings(v.getSettingsOverride());
}
return operator;
}).toList());
template.setCreatedAt(value.get(0).getCreatedAt());
template.setUpdatedAt(value.get(0).getUpdatedAt());
return template;
}).toList();
}
@Transactional
public CleaningTemplate createTemplate(CreateCleaningTemplateRequest request) {
CleaningTemplate template = new CleaningTemplate();
String templateId = UUID.randomUUID().toString();
template.setId(templateId);
template.setName(request.getName());
template.setDescription(request.getDescription());
cleaningTemplateMapper.insertTemplate(template);
List<OperatorInstancePo> instancePos = request.getInstance().stream()
.map(OperatorInstanceConverter.INSTANCE::operatorToDo).toList();
operatorInstanceMapper.insertInstance(templateId, instancePos);
return template;
}
public CleaningTemplate getTemplate(String templateId) {
return cleaningTemplateMapper.findTemplateById(templateId);
}
@Transactional
public CleaningTemplate updateTemplate(String templateId, UpdateCleaningTemplateRequest request) {
CleaningTemplate template = cleaningTemplateMapper.findTemplateById(templateId);
if (template != null) {
template.setName(request.getName());
template.setDescription(request.getDescription());
cleaningTemplateMapper.updateTemplate(template);
}
return template;
}
@Transactional
public void deleteTemplate(String templateId) {
cleaningTemplateMapper.deleteTemplate(templateId);
operatorInstanceMapper.deleteByInstanceId(templateId);
}
}

View File

@@ -0,0 +1,33 @@
package com.datamate.cleaning.domain.converter;
import com.datamate.cleaning.domain.model.OperatorInstancePo;
import com.datamate.cleaning.interfaces.dto.OperatorInstance;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.mapstruct.Mapper;
import org.mapstruct.Mapping;
import org.mapstruct.Named;
import org.mapstruct.factory.Mappers;
import java.util.Map;
@Mapper
public interface OperatorInstanceConverter {
OperatorInstanceConverter INSTANCE = Mappers.getMapper(OperatorInstanceConverter.class);
@Mapping(target = "overrides", source = "overrides", qualifiedByName = "mapToJson")
OperatorInstancePo operatorToDo(OperatorInstance instance);
@Named("mapToJson")
static String mapToJson(Map<String, Object> objects) {
ObjectMapper objectMapper = new ObjectMapper();
try {
return objectMapper.writeValueAsString(objects);
} catch (JsonProcessingException e) {
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
}
}

View File

@@ -0,0 +1,26 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.util.List;
@Getter
@Setter
@NoArgsConstructor
public class CreateDatasetRequest {
/** 数据集名称 */
private String name;
/** 数据集描述 */
private String description;
/** 数据集类型 */
private String datasetType;
/** 标签列表 */
private List<String> tags;
/** 数据源 */
private String dataSource;
/** 目标位置 */
private String targetLocation;
}

View File

@@ -0,0 +1,36 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.time.LocalDateTime;
@Getter
@Setter
@NoArgsConstructor
public class DatasetFileResponse {
/** 文件ID */
private String id;
/** 文件名 */
private String fileName;
/** 原始文件名 */
private String originalName;
/** 文件类型 */
private String fileType;
/** 文件大小(字节) */
private Long fileSize;
/** 文件状态 */
private String status;
/** 文件描述 */
private String description;
/** 文件路径 */
private String filePath;
/** 上传时间 */
private LocalDateTime uploadTime;
/** 最后更新时间 */
private LocalDateTime lastAccessTime;
/** 上传者 */
private String uploadedBy;
}

View File

@@ -0,0 +1,44 @@
package com.datamate.cleaning.domain.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.time.LocalDateTime;
/**
* 数据集实体(与数据库表 t_dm_datasets 对齐)
*/
@Getter
@Setter
@NoArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
public class DatasetResponse {
/** 数据集ID */
private String id;
/** 数据集名称 */
private String name;
/** 数据集描述 */
private String description;
/** 数据集类型 */
private String datasetType;
/** 数据集状态 */
private String status;
/** 数据源 */
private String dataSource;
/** 目标位置 */
private String targetLocation;
/** 文件数量 */
private Integer fileCount;
/** 总大小(字节) */
private Long totalSize;
/** 完成率(0-100) */
private Float completionRate;
/** 创建时间 */
private LocalDateTime createdAt;
/** 更新时间 */
private LocalDateTime updatedAt;
/** 创建者 */
private String createdBy;
}

View File

@@ -0,0 +1,23 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.Setter;
import java.util.List;
/**
* 数据集类型响应DTO
*/
@Getter
@Setter
public class DatasetTypeResponse {
/** 类型编码 */
private String code;
/** 类型名称 */
private String name;
/** 类型描述 */
private String description;
/** 支持的文件格式 */
private List<String> supportedFormats;
/** 图标 */
private String icon;
}

View File

@@ -0,0 +1,25 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
@Getter
public enum ExecutorType {
DATA_PLATFORM("data_platform"),
DATA_JUICER_RAY("ray"),
DATA_JUICER_DEFAULT("default");
private final String value;
ExecutorType(String value) {
this.value = value;
}
public static ExecutorType fromValue(String value) {
for (ExecutorType type : ExecutorType.values()) {
if (type.value.equals(value)) {
return type;
}
}
throw new IllegalArgumentException("Unexpected value '" + value + "'");
}
}

View File

@@ -0,0 +1,13 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class OperatorInstancePo {
private String id;
private String overrides;
}

View File

@@ -0,0 +1,28 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.util.List;
@Getter
@Setter
@NoArgsConstructor
public class PagedDatasetFileResponse {
/** 文件内容列表 */
private List<DatasetFileResponse> content;
/** 当前页码 */
private Integer page;
/** 每页大小 */
private Integer size;
/** 总元素数 */
private Integer totalElements;
/** 总页数 */
private Integer totalPages;
/** 是否为第一页 */
private Boolean first;
/** 是否为最后一页 */
private Boolean last;
}

View File

@@ -0,0 +1,24 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.Setter;
import java.util.List;
import java.util.Map;
@Getter
@Setter
public class TaskProcess {
private String instanceId;
private String datasetId;
private String datasetPath;
private String exportPath;
private String executorType;
private List<Map<String, Map<String, Object>>> process;
}

View File

@@ -0,0 +1,30 @@
package com.datamate.cleaning.domain.model;
import lombok.Getter;
import lombok.Setter;
import org.springframework.format.annotation.DateTimeFormat;
import java.time.LocalDateTime;
@Getter
@Setter
public class TemplateWithInstance {
private String id;
private String name;
private String description;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime createdAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime updatedAt;
private String operatorId;
private Integer opIndex;
private String settingsOverride;
}

View File

@@ -0,0 +1,19 @@
package com.datamate.cleaning.infrastructure.exception;
import com.datamate.common.infrastructure.exception.ErrorCode;
import lombok.AllArgsConstructor;
import lombok.Getter;
@Getter
@AllArgsConstructor
public enum CleanErrorCode implements ErrorCode {
/**
* 清洗任务名称重复
*/
DUPLICATE_TASK_NAME("clean.0001", "清洗任务名称重复"),
CREATE_DATASET_FAILED("clean.0002", "创建数据集失败");
private final String code;
private final String message;
}

View File

@@ -0,0 +1,9 @@
package com.datamate.cleaning.infrastructure.persistence.mapper;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
@Mapper
public interface CleaningResultMapper {
void deleteByInstanceId(@Param("instanceId") String instanceId);
}

View File

@@ -0,0 +1,21 @@
package com.datamate.cleaning.infrastructure.persistence.mapper;
import com.datamate.cleaning.interfaces.dto.CleaningTask;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
@Mapper
public interface CleaningTaskMapper {
List<CleaningTask> findTasks(@Param("status") String status, @Param("keywords") String keywords,
@Param("size") Integer size, @Param("offset") Integer offset);
CleaningTask findTaskById(@Param("taskId") String taskId);
void insertTask(CleaningTask task);
void updateTask(CleaningTask task);
void deleteTask(@Param("taskId") String taskId);
}

View File

@@ -0,0 +1,25 @@
package com.datamate.cleaning.infrastructure.persistence.mapper;
import com.datamate.cleaning.domain.model.TemplateWithInstance;
import com.datamate.cleaning.interfaces.dto.CleaningTemplate;
import com.datamate.cleaning.interfaces.dto.OperatorResponse;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
@Mapper
public interface CleaningTemplateMapper {
List<TemplateWithInstance> findAllTemplates(@Param("keywords") String keywords);
List<OperatorResponse> findAllOperators();
CleaningTemplate findTemplateById(@Param("templateId") String templateId);
void insertTemplate(CleaningTemplate template);
void updateTemplate(CleaningTemplate template);
void deleteTemplate(@Param("templateId") String templateId);
}

View File

@@ -0,0 +1,17 @@
package com.datamate.cleaning.infrastructure.persistence.mapper;
import com.datamate.cleaning.domain.model.OperatorInstancePo;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
@Mapper
public interface OperatorInstanceMapper {
void insertInstance(@Param("instanceId") String instanceId,
@Param("instances") List<OperatorInstancePo> instances);
void deleteByInstanceId(@Param("instanceId") String instanceId);
}

View File

@@ -0,0 +1,59 @@
package com.datamate.cleaning.interfaces.api;
import com.datamate.cleaning.application.service.CleaningTaskService;
import com.datamate.cleaning.interfaces.dto.CleaningTask;
import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
import com.datamate.common.infrastructure.common.Response;
import com.datamate.common.interfaces.PagedResponse;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@RestController
@RequestMapping("/cleaning/tasks")
@RequiredArgsConstructor
public class CleaningTaskController {
private final CleaningTaskService cleaningTaskService;
@GetMapping
public ResponseEntity<Response<PagedResponse<CleaningTask>>> cleaningTasksGet(
@RequestParam("page") Integer page,
@RequestParam("size") Integer size, @RequestParam(value = "status", required = false) String status,
@RequestParam(value = "keywords", required = false) String keywords) {
List<CleaningTask> tasks = cleaningTaskService.getTasks(status, keywords, page, size);
int count = cleaningTaskService.countTasks(status, keywords);
int totalPages = (count + size + 1) / size;
return ResponseEntity.ok(Response.ok(PagedResponse.of(tasks, page, count, totalPages)));
}
@PostMapping
public ResponseEntity<Response<CleaningTask>> cleaningTasksPost(@RequestBody CreateCleaningTaskRequest request) {
return ResponseEntity.ok(Response.ok(cleaningTaskService.createTask(request)));
}
@PostMapping("/{taskId}/stop")
public ResponseEntity<Response<Object>> cleaningTasksStop(@PathVariable("taskId") String taskId) {
cleaningTaskService.stopTask(taskId);
return ResponseEntity.ok(Response.ok(null));
}
@PostMapping("/{taskId}/execute")
public ResponseEntity<Response<Object>> cleaningTasksStart(@PathVariable("taskId") String taskId) {
cleaningTaskService.executeTask(taskId);
return ResponseEntity.ok(Response.ok(null));
}
@GetMapping("/{taskId}")
public ResponseEntity<Response<CleaningTask>> cleaningTasksTaskIdGet(@PathVariable("taskId") String taskId) {
return ResponseEntity.ok(Response.ok(cleaningTaskService.getTask(taskId)));
}
@DeleteMapping("/{taskId}")
public ResponseEntity<Response<Object>> cleaningTasksTaskIdDelete(@PathVariable("taskId") String taskId) {
cleaningTaskService.deleteTask(taskId);
return ResponseEntity.ok(Response.ok(null));
}
}

View File

@@ -0,0 +1,74 @@
package com.datamate.cleaning.interfaces.api;
import com.datamate.cleaning.application.service.CleaningTemplateService;
import com.datamate.cleaning.interfaces.dto.CleaningTemplate;
import com.datamate.cleaning.interfaces.dto.CreateCleaningTemplateRequest;
import com.datamate.cleaning.interfaces.dto.UpdateCleaningTemplateRequest;
import com.datamate.common.infrastructure.common.Response;
import com.datamate.common.interfaces.PagedResponse;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.DeleteMapping;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.PutMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import java.util.Comparator;
import java.util.List;
@RestController
@RequestMapping("/cleaning/templates")
@RequiredArgsConstructor
public class CleaningTemplateController {
private final CleaningTemplateService cleaningTemplateService;
@GetMapping
public ResponseEntity<Response<PagedResponse<CleaningTemplate>>> cleaningTemplatesGet(
@RequestParam(value = "page", required = false) Integer page,
@RequestParam(value = "size", required = false) Integer size,
@RequestParam(value = "keywords", required = false) String keyword) {
List<CleaningTemplate> templates = cleaningTemplateService.getTemplates(keyword);
if (page == null || size == null) {
return ResponseEntity.ok(Response.ok(PagedResponse.of(templates.stream()
.sorted(Comparator.comparing(CleaningTemplate::getCreatedAt).reversed()).toList())));
}
int count = templates.size();
int totalPages = (count + size + 1) / size;
List<CleaningTemplate> limitTemplates = templates.stream()
.sorted(Comparator.comparing(CleaningTemplate::getCreatedAt).reversed())
.skip((long) page * size)
.limit(size).toList();
return ResponseEntity.ok(Response.ok(PagedResponse.of(limitTemplates, page, count, totalPages)));
}
@PostMapping
public ResponseEntity<Response<CleaningTemplate>> cleaningTemplatesPost(
@RequestBody CreateCleaningTemplateRequest request) {
return ResponseEntity.ok(Response.ok(cleaningTemplateService.createTemplate(request)));
}
@GetMapping("/{templateId}")
public ResponseEntity<Response<CleaningTemplate>> cleaningTemplatesTemplateIdGet(
@PathVariable("templateId") String templateId) {
return ResponseEntity.ok(Response.ok(cleaningTemplateService.getTemplate(templateId)));
}
@PutMapping("/{templateId}")
public ResponseEntity<Response<CleaningTemplate>> cleaningTemplatesTemplateIdPut(
@PathVariable("templateId") String templateId, @RequestBody UpdateCleaningTemplateRequest request) {
return ResponseEntity.ok(Response.ok(cleaningTemplateService.updateTemplate(templateId, request)));
}
@DeleteMapping("/{templateId}")
public ResponseEntity<Response<Object>> cleaningTemplatesTemplateIdDelete(
@PathVariable("templateId") String templateId) {
cleaningTemplateService.deleteTemplate(templateId);
return ResponseEntity.noContent().build();
}
}

View File

@@ -0,0 +1,20 @@
package com.datamate.cleaning.interfaces.dto;
import lombok.Getter;
import lombok.Setter;
/**
* CleaningProcess
*/
@Getter
@Setter
public class CleaningProcess {
private Float process;
private Integer totalFileNum;
private Integer finishedFileNum;
}

View File

@@ -0,0 +1,92 @@
package com.datamate.cleaning.interfaces.dto;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import java.time.LocalDateTime;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import org.springframework.format.annotation.DateTimeFormat;
/**
* CleaningTask
*/
@Getter
@Setter
public class CleaningTask {
private String id;
private String name;
private String description;
private String srcDatasetId;
private String srcDatasetName;
private String destDatasetId;
private String destDatasetName;
private long beforeSize;
private long afterSize;
/**
* 任务当前状态
*/
public enum StatusEnum {
PENDING("PENDING"),
RUNNING("RUNNING"),
COMPLETED("COMPLETED"),
STOPPED("STOPPED"),
FAILED("FAILED");
private final String value;
StatusEnum(String value) {
this.value = value;
}
@JsonValue
public String getValue() {
return value;
}
@JsonCreator
public static StatusEnum fromValue(String value) {
for (StatusEnum b : StatusEnum.values()) {
if (b.value.equals(value)) {
return b;
}
}
throw new IllegalArgumentException("Unexpected value '" + value + "'");
}
}
private StatusEnum status;
private String templateId;
private List<OperatorResponse> instance;
private CleaningProcess progress;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime createdAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime startedAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime finishedAt;
}

View File

@@ -0,0 +1,33 @@
package com.datamate.cleaning.interfaces.dto;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import org.springframework.format.annotation.DateTimeFormat;
/**
* CleaningTemplate
*/
@Getter
@Setter
public class CleaningTemplate {
private String id;
private String name;
private String description;
private List<OperatorResponse> instance = new ArrayList<>();
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime createdAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime updatedAt;
}

View File

@@ -0,0 +1,32 @@
package com.datamate.cleaning.interfaces.dto;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
/**
* CreateCleaningTaskRequest
*/
@Getter
@Setter
public class CreateCleaningTaskRequest {
private String name;
private String description;
private String srcDatasetId;
private String srcDatasetName;
private String destDatasetName;
private String destDatasetType;
private List<OperatorInstance> instance = new ArrayList<>();
}

View File

@@ -0,0 +1,23 @@
package com.datamate.cleaning.interfaces.dto;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
/**
* CreateCleaningTemplateRequest
*/
@Getter
@Setter
public class CreateCleaningTemplateRequest {
private String name;
private String description;
private List<OperatorInstance> instance = new ArrayList<>();
}

View File

@@ -0,0 +1,22 @@
package com.datamate.cleaning.interfaces.dto;
import java.util.HashMap;
import java.util.Map;
import lombok.Getter;
import lombok.Setter;
/**
* OperatorInstance
*/
@Getter
@Setter
public class OperatorInstance {
private String id;
private Map<String, Object> overrides = new HashMap<>();
}

View File

@@ -0,0 +1,41 @@
package com.datamate.cleaning.interfaces.dto;
import java.time.LocalDateTime;
import lombok.Getter;
import lombok.Setter;
import org.springframework.format.annotation.DateTimeFormat;
/**
* OperatorResponse
*/
@Getter
@Setter
public class OperatorResponse {
private String id;
private String name;
private String description;
private String version;
private String inputs;
private String outputs;
private String runtime;
private String settings;
private Boolean isStar;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime createdAt;
@DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME)
private LocalDateTime updatedAt;
}

View File

@@ -0,0 +1,26 @@
package com.datamate.cleaning.interfaces.dto;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
/**
* UpdateCleaningTemplateRequest
*/
@Getter
@Setter
public class UpdateCleaningTemplateRequest {
private String id;
private String name;
private String description;
private List<OperatorInstance> instance = new ArrayList<>();
}

View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper">
<delete id="deleteByInstanceId">
DELETE FROM t_clean_result WHERE instance_id = #{instanceId}
</delete>
</mapper>

View File

@@ -0,0 +1,56 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper">
<sql id="Base_Column_List">
id, name, description, src_dataset_id, src_dataset_name, dest_dataset_id, dest_dataset_name, before_size,
after_size, status, created_at, started_at, finished_at
</sql>
<select id="findTasks" resultType="com.datamate.cleaning.interfaces.dto.CleaningTask">
SELECT <include refid="Base_Column_List"/> FROM t_clean_task
<where>
<if test="status != null and status != ''">
AND status = #{status}
</if>
<if test="keywords != null and status != ''">
AND name LIKE CONCAT('%', #{keywords}, '%')
</if>
</where>
ORDER BY created_at DESC
<if test="size != null and offset != null">
LIMIT ${size} OFFSET ${offset}
</if>
</select>
<select id="findTaskById" resultType="com.datamate.cleaning.interfaces.dto.CleaningTask">
SELECT <include refid="Base_Column_List"/> FROM t_clean_task WHERE id = #{taskId}
</select>
<insert id="insertTask">
INSERT INTO t_clean_task (id, name, description, status, src_dataset_id, src_dataset_name, dest_dataset_id,
dest_dataset_name, before_size, after_size, created_at)
VALUES (#{id}, #{name}, #{description}, #{status}, #{srcDatasetId}, #{srcDatasetName}, #{destDatasetId},
#{destDatasetName}, ${beforeSize}, ${afterSize}, NOW())
</insert>
<update id="updateTask">
UPDATE t_clean_task
<set>
<if test="status != null">
status = #{status.value},
</if>
<if test="startedAt != null">
started_at = #{startedAt},
</if>
<if test="finishedAt != null">
finished_at = #{finishedAt},
</if>
</set>
WHERE id = #{id}
</update>
<delete id="deleteTask">
DELETE FROM t_clean_task WHERE id = #{taskId}
</delete>
</mapper>

View File

@@ -0,0 +1,38 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTemplateMapper">
<select id="findAllTemplates" resultType="com.datamate.cleaning.domain.model.TemplateWithInstance">
SELECT t.id AS id, name, description, created_at, updated_at, created_by, operator_id, op_index, settings_override
FROM t_clean_template t LEFT JOIN t_operator_instance o ON t.id = o.instance_id
<where>
<if test="keywords != null and status != ''">
AND name LIKE CONCAT('%', #{keywords}, '%')
</if>
</where>
ORDER BY created_at DESC
</select>
<select id="findAllOperators" resultType="com.datamate.cleaning.interfaces.dto.OperatorResponse">
SELECT id, name, description, version, inputs, outputs, runtime, settings, is_star, created_at, updated_at
FROM t_operator
</select>
<select id="findTemplateById" resultType="com.datamate.cleaning.interfaces.dto.CleaningTemplate">
SELECT * FROM t_clean_template WHERE id = #{templateId}
</select>
<insert id="insertTemplate">
INSERT INTO t_clean_template (id, name, description, created_at)
VALUES (#{id}, #{name}, #{description}, NOW())
</insert>
<update id="updateTemplate">
UPDATE t_clean_template SET name = #{name}, description = #{description}, updated_at = NOW() WHERE id = #{id}
</update>
<delete id="deleteTemplate">
DELETE FROM t_clean_template WHERE id = #{templateId}
</delete>
</mapper>

View File

@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper">
<insert id="insertInstance">
INSERT INTO t_operator_instance(instance_id, operator_id, op_index, settings_override)
VALUES
<foreach collection="instances" item="operator" separator="," index="index">
(#{instanceId}, #{operator.id}, #{index} + 1, #{operator.overrides})
</foreach>
</insert>
<delete id="deleteByInstanceId">
DELETE FROM t_operator_instance
WHERE instance_id = #{instanceId};
</delete>
</mapper>

View File

@@ -0,0 +1,229 @@
# 数据归集服务 (Data Collection Service)
基于DataX的数据归集和同步服务,提供多数据源之间的数据同步功能。
## 功能特性
- 🔗 **多数据源支持**: 支持MySQL、PostgreSQL、Oracle、SQL Server等主流数据库
- 📊 **任务管理**: 创建、配置、执行和监控数据同步任务
-**定时调度**: 支持Cron表达式的定时任务
- 📈 **实时监控**: 任务执行进度、状态和性能指标监控
- 📝 **执行日志**: 详细的任务执行日志记录
- 🔌 **插件化**: DataX Reader/Writer插件化集成
## 技术架构
- **框架**: Spring Boot 3.x
- **数据库**: MySQL + MyBatis
- **同步引擎**: DataX
- **API**: OpenAPI 3.0 自动生成
- **架构模式**: DDD (领域驱动设计)
## 项目结构
```
src/main/java/com/datamate/collection/
├── DataCollectionApplication.java # 应用启动类
├── domain/ # 领域层
│ ├── model/ # 领域模型
│ │ ├── DataSource.java # 数据源实体
│ │ ├── CollectionTask.java # 归集任务实体
│ │ ├── TaskExecution.java # 任务执行记录
│ │ └── ExecutionLog.java # 执行日志
│ └── service/ # 领域服务
│ ├── DataSourceService.java
│ ├── CollectionTaskService.java
│ ├── TaskExecutionService.java
│ └── impl/ # 服务实现
├── infrastructure/ # 基础设施层
│ ├── config/ # 配置类
│ ├── datax/ # DataX执行引擎
│ │ └── DataXExecutionEngine.java
│ └── persistence/ # 持久化
│ ├── mapper/ # MyBatis Mapper
│ └── typehandler/ # 类型处理器
└── interfaces/ # 接口层
├── api/ # OpenAPI生成的接口
├── dto/ # OpenAPI生成的DTO
└── rest/ # REST控制器
├── DataSourceController.java
├── CollectionTaskController.java
├── TaskExecutionController.java
└── exception/ # 异常处理
src/main/resources/
├── mappers/ # MyBatis XML映射文件
├── application.properties # 应用配置
└── ...
```
## 环境要求
- Java 17+
- Maven 3.6+
- MySQL 8.0+
- DataX 3.0+
- Redis (可选,用于缓存)
## 配置说明
### 应用配置 (application.properties)
```properties
# 服务端口
server.port=8090
# 数据库配置
spring.datasource.url=jdbc:mysql://localhost:3306/knowledge_base
spring.datasource.username=root
spring.datasource.password=123456
# DataX配置
datax.home=/runtime/datax
datax.python.path=/runtime/datax/bin/datax.py
datax.job.timeout=7200
datax.job.memory=2g
```
### DataX配置
确保DataX已正确安装并配置:
1. 下载DataX到 `/runtime/datax` 目录
2. 配置相关Reader/Writer插件
3. 确保Python环境可用
## 数据库初始化
执行数据库初始化脚本:
```bash
mysql -u root -p knowledge_base < scripts/db/data-collection-init.sql
```
## 构建和运行
### 1. 编译项目
```bash
cd backend/services/data-collection-service
mvn clean compile
```
这将触发OpenAPI代码生成。
### 2. 打包
```bash
mvn clean package -DskipTests
```
### 3. 运行
作为独立服务运行:
```bash
java -jar target/data-collection-service-1.0.0-SNAPSHOT.jar
```
或通过main-application统一启动:
```bash
cd backend/services/main-application
mvn spring-boot:run
```
## API文档
服务启动后,可通过以下地址访问API文档:
- Swagger UI: http://localhost:8090/swagger-ui.html
- OpenAPI JSON: http://localhost:8090/v3/api-docs
## 主要API端点
### 数据源管理
- `GET /api/v1/collection/datasources` - 获取数据源列表
- `POST /api/v1/collection/datasources` - 创建数据源
- `GET /api/v1/collection/datasources/{id}` - 获取数据源详情
- `PUT /api/v1/collection/datasources/{id}` - 更新数据源
- `DELETE /api/v1/collection/datasources/{id}` - 删除数据源
- `POST /api/v1/collection/datasources/{id}/test` - 测试连接
### 归集任务管理
- `GET /api/v1/collection/tasks` - 获取任务列表
- `POST /api/v1/collection/tasks` - 创建任务
- `GET /api/v1/collection/tasks/{id}` - 获取任务详情
- `PUT /api/v1/collection/tasks/{id}` - 更新任务
- `DELETE /api/v1/collection/tasks/{id}` - 删除任务
### 任务执行管理
- `POST /api/v1/collection/tasks/{id}/execute` - 执行任务
- `POST /api/v1/collection/tasks/{id}/stop` - 停止任务
- `GET /api/v1/collection/executions` - 获取执行历史
- `GET /api/v1/collection/executions/{executionId}` - 获取执行详情
- `GET /api/v1/collection/executions/{executionId}/logs` - 获取执行日志
### 监控统计
- `GET /api/v1/collection/monitor/statistics` - 获取统计信息
## 开发指南
### 添加新的数据源类型
1.`DataSource.DataSourceType` 枚举中添加新类型
2.`DataXExecutionEngine` 中添加对应的Reader/Writer映射
3. 更新数据库表结构和初始化数据
### 自定义DataX插件
1. 将插件放置在 `/runtime/datax/plugin` 目录下
2.`DataXExecutionEngine` 中配置插件映射关系
3. 根据插件要求调整配置模板
### 扩展监控指标
1.`StatisticsService` 中添加新的统计逻辑
2. 更新 `CollectionStatistics` DTO
3. 在数据库中添加相应的统计表或字段
## 故障排查
### 常见问题
1. **DataX执行失败**
- 检查DataX安装路径和Python环境
- 确认数据源连接配置正确
- 查看执行日志获取详细错误信息
2. **数据库连接失败**
- 检查数据库配置和网络连通性
- 确认数据库用户权限
3. **API调用失败**
- 检查请求参数格式
- 查看应用日志获取详细错误信息
### 日志查看
```bash
# 应用日志
tail -f logs/data-collection-service.log
# 任务执行日志
curl http://localhost:8090/api/v1/collection/executions/{executionId}/logs
```
## 贡献指南
1. Fork项目
2. 创建特性分支: `git checkout -b feature/new-feature`
3. 提交更改: `git commit -am 'Add new feature'`
4. 推送分支: `git push origin feature/new-feature`
5. 提交Pull Request
## 许可证
MIT License

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

View File

@@ -0,0 +1,200 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.datamate</groupId>
<artifactId>data-mate-platform</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>data-collection-service</artifactId>
<packaging>jar</packaging>
<name>Data Collection Service</name>
<description>DataX-based data collection and aggregation service</description>
<dependencies>
<!-- Spring Boot Dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-validation</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<!-- MyBatis Dependencies -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
</dependency>
<!-- Database -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.33</version>
<scope>runtime</scope>
</dependency>
<!-- Redis -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<!-- DataX Dependencies (集成DataX插件) -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-exec</artifactId>
<version>1.3</version>
</dependency>
<!-- Connection Pool -->
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP</artifactId>
</dependency>
<!-- Oracle JDBC Driver -->
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
<version>21.5.0.0</version>
</dependency>
<!-- PostgreSQL JDBC Driver -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>
<!-- JSON Processing -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<!-- Shared Domain -->
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>
<!-- OpenAPI Dependencies -->
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
</dependency>
<dependency>
<groupId>org.openapitools</groupId>
<artifactId>jackson-databind-nullable</artifactId>
</dependency>
<dependency>
<groupId>jakarta.validation</groupId>
<artifactId>jakarta.validation-api</artifactId>
</dependency>
<!-- Lombok -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<!-- Test Dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
<scope>compile</scope>
</dependency>
</dependencies>
<build>
<plugins>
<!-- OpenAPI Generator Plugin -->
<plugin>
<groupId>org.openapitools</groupId>
<artifactId>openapi-generator-maven-plugin</artifactId>
<version>6.6.0</version>
<executions>
<execution>
<goals>
<goal>generate</goal>
</goals>
<configuration>
<inputSpec>${project.basedir}/../../openapi/specs/data-collection.yaml</inputSpec>
<generatorName>spring</generatorName>
<output>${project.build.directory}/generated-sources/openapi</output>
<apiPackage>com.datamate.collection.interfaces.api</apiPackage>
<modelPackage>com.datamate.collection.interfaces.dto</modelPackage>
<configOptions>
<interfaceOnly>true</interfaceOnly>
<useTags>true</useTags>
<useSpringBoot3>true</useSpringBoot3>
<documentationProvider>springdoc</documentationProvider>
<dateLibrary>java8-localdatetime</dateLibrary>
<java8>true</java8>
</configOptions>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<skip>true</skip>
<classifier>exec</classifier>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<annotationProcessorPaths>
<!-- 顺序很重要 -->
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
</path>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok-mapstruct-binding</artifactId>
<version>${lombok-mapstruct-binding.version}</version>
</path>
<path>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct-processor</artifactId>
<version>${mapstruct.version}</version>
</path>
</annotationProcessorPaths>
<compilerArgs>
<arg>-parameters</arg>
<arg>-Amapstruct.defaultComponentModel=spring</arg>
</compilerArgs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,24 @@
package com.datamate.collection;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.transaction.annotation.EnableTransactionManagement;
/**
* 数据归集服务配置类
*
* 基于DataX的数据归集和同步服务,支持多种数据源的数据采集和归集
*/
@SpringBootApplication
@EnableAsync
@EnableScheduling
@EnableTransactionManagement
@ComponentScan(basePackages = {
"com.datamate.collection",
"com.datamate.shared"
})
public class DataCollectionServiceConfiguration {
// Configuration class for JAR packaging - no main method needed
}

View File

@@ -0,0 +1,66 @@
package com.datamate.collection.application.scheduler;
import com.datamate.collection.application.service.DataxExecutionService;
import com.datamate.collection.domain.model.CollectionTask;
import com.datamate.collection.domain.model.TaskStatus;
import com.datamate.collection.domain.model.TaskExecution;
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.scheduling.support.CronExpression;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;
import java.time.LocalDateTime;
import java.util.List;
@Slf4j
@Component
@RequiredArgsConstructor
public class TaskSchedulerInitializer {
private final CollectionTaskMapper taskMapper;
private final TaskExecutionMapper executionMapper;
private final DataxExecutionService dataxExecutionService;
// 定期扫描激活的采集任务,根据 Cron 判断是否到期执行
@Scheduled(fixedDelayString = "${datamate.data-collection.scheduler.scan-interval-ms:10000}")
public void scanAndTrigger() {
List<CollectionTask> tasks = taskMapper.selectActiveTasks();
if (tasks == null || tasks.isEmpty()) {
return;
}
LocalDateTime now = LocalDateTime.now();
for (CollectionTask task : tasks) {
String cronExpr = task.getScheduleExpression();
if (!StringUtils.hasText(cronExpr)) {
continue;
}
try {
// 如果最近一次执行仍在运行,则跳过
TaskExecution latest = executionMapper.selectLatestByTaskId(task.getId());
if (latest != null && latest.getStatus() == TaskStatus.RUNNING) {
continue;
}
CronExpression cron = CronExpression.parse(cronExpr);
LocalDateTime base = latest != null && latest.getStartedAt() != null
? latest.getStartedAt()
: now.minusYears(1); // 没有历史记录时,拉长基准时间确保到期判定
LocalDateTime nextTime = cron.next(base);
if (nextTime != null && !nextTime.isAfter(now)) {
// 到期,触发一次执行
TaskExecution exec = dataxExecutionService.createExecution(task);
int timeout = task.getTimeoutSeconds() == null ? 3600 : task.getTimeoutSeconds();
dataxExecutionService.runAsync(task, exec.getId(), timeout);
log.info("Triggered DataX execution for task {} at {}, execId={}", task.getId(), now, exec.getId());
}
} catch (Exception ex) {
log.warn("Skip task {} due to invalid cron or scheduling error: {}", task.getId(), ex.getMessage());
}
}
}
}

View File

@@ -0,0 +1,85 @@
package com.datamate.collection.application.service;
import com.datamate.collection.domain.model.CollectionTask;
import com.datamate.collection.domain.model.TaskExecution;
import com.datamate.collection.domain.model.TaskStatus;
import com.datamate.collection.domain.model.DataxTemplate;
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
import com.datamate.collection.interfaces.dto.SyncMode;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@Slf4j
@Service
@RequiredArgsConstructor
public class CollectionTaskService {
private final CollectionTaskMapper taskMapper;
private final TaskExecutionMapper executionMapper;
private final DataxExecutionService dataxExecutionService;
@Transactional
public CollectionTask create(CollectionTask task) {
task.setStatus(TaskStatus.READY);
task.setCreatedAt(LocalDateTime.now());
task.setUpdatedAt(LocalDateTime.now());
taskMapper.insert(task);
executeTaskNow(task);
return task;
}
private void executeTaskNow(CollectionTask task) {
if (Objects.equals(task.getSyncMode(), SyncMode.ONCE.getValue())) {
TaskExecution exec = dataxExecutionService.createExecution(task);
int timeout = task.getTimeoutSeconds() == null ? 3600 : task.getTimeoutSeconds();
dataxExecutionService.runAsync(task, exec.getId(), timeout);
log.info("Triggered DataX execution for task {} at {}, execId={}", task.getId(), LocalDateTime.now(), exec.getId());
}
}
@Transactional
public CollectionTask update(CollectionTask task) {
task.setUpdatedAt(LocalDateTime.now());
taskMapper.update(task);
return task;
}
@Transactional
public void delete(String id) { taskMapper.deleteById(id); }
public CollectionTask get(String id) { return taskMapper.selectById(id); }
public List<CollectionTask> list(Integer page, Integer size, String status, String name) {
Map<String, Object> p = new HashMap<>();
p.put("status", status);
p.put("name", name);
if (page != null && size != null) {
p.put("offset", page * size);
p.put("limit", size);
}
return taskMapper.selectAll(p);
}
@Transactional
public TaskExecution startExecution(CollectionTask task) {
return dataxExecutionService.createExecution(task);
}
// ---- Template related merged methods ----
public List<DataxTemplate> listTemplates(String sourceType, String targetType, int page, int size) {
int offset = page * size;
return taskMapper.selectList(sourceType, targetType, offset, size);
}
public int countTemplates(String sourceType, String targetType) {
return taskMapper.countTemplates(sourceType, targetType);
}
}

View File

@@ -0,0 +1,60 @@
package com.datamate.collection.application.service;
import com.datamate.collection.domain.model.CollectionTask;
import com.datamate.collection.domain.model.TaskExecution;
import com.datamate.collection.domain.model.TaskStatus;
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
import com.datamate.collection.infrastructure.runtime.datax.DataxJobBuilder;
import com.datamate.collection.infrastructure.runtime.datax.DataxProcessRunner;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.nio.file.Path;
import java.time.Duration;
import java.time.LocalDateTime;
@Slf4j
@Service
@RequiredArgsConstructor
public class DataxExecutionService {
private final DataxJobBuilder jobBuilder;
private final DataxProcessRunner processRunner;
private final TaskExecutionMapper executionMapper;
private final CollectionTaskMapper taskMapper;
@Transactional
public TaskExecution createExecution(CollectionTask task) {
TaskExecution exec = TaskExecution.initTaskExecution();
exec.setTaskId(task.getId());
exec.setTaskName(task.getName());
executionMapper.insert(exec);
taskMapper.updateLastExecution(task.getId(), exec.getId());
taskMapper.updateStatus(task.getId(), TaskStatus.RUNNING.name());
return exec;
}
@Async
public void runAsync(CollectionTask task, String executionId, int timeoutSeconds) {
try {
Path job = jobBuilder.buildJobFile(task);
int code = processRunner.runJob(job.toFile(), executionId, Duration.ofSeconds(timeoutSeconds));
log.info("DataX finished with code {} for execution {}", code, executionId);
// 简化:成功即完成
executionMapper.completeExecution(executionId, TaskStatus.SUCCESS.name(), LocalDateTime.now(),
0, 0L, 0L, 0L, null, null);
taskMapper.updateStatus(task.getId(), TaskStatus.SUCCESS.name());
} catch (Exception e) {
log.error("DataX execution failed", e);
executionMapper.completeExecution(executionId, TaskStatus.FAILED.name(), LocalDateTime.now(),
0, 0L, 0L, 0L, e.getMessage(), null);
taskMapper.updateStatus(task.getId(), TaskStatus.FAILED.name());
}
}
}

View File

@@ -0,0 +1,83 @@
package com.datamate.collection.application.service;
import com.datamate.collection.domain.model.CollectionTask;
import com.datamate.collection.domain.model.TaskExecution;
import com.datamate.collection.domain.model.TaskStatus;
import com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper;
import com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Service
@RequiredArgsConstructor
public class TaskExecutionService {
private final TaskExecutionMapper executionMapper;
private final CollectionTaskMapper taskMapper;
public List<TaskExecution> list(String taskId, String status, LocalDateTime startDate,
LocalDateTime endDate, Integer page, Integer size) {
Map<String, Object> p = new HashMap<>();
p.put("taskId", taskId);
p.put("status", status);
p.put("startDate", startDate);
p.put("endDate", endDate);
if (page != null && size != null) {
p.put("offset", page * size);
p.put("limit", size);
}
return executionMapper.selectAll(p);
}
public long count(String taskId, String status, LocalDateTime startDate, LocalDateTime endDate) {
Map<String, Object> p = new HashMap<>();
p.put("taskId", taskId);
p.put("status", status);
p.put("startDate", startDate);
p.put("endDate", endDate);
return executionMapper.count(p);
}
// --- Added convenience methods ---
public TaskExecution get(String id) { return executionMapper.selectById(id); }
public TaskExecution getLatestByTaskId(String taskId) { return executionMapper.selectLatestByTaskId(taskId); }
@Transactional
public void complete(String executionId, boolean success, long successCount, long failedCount,
long dataSizeBytes, String errorMessage, String resultJson) {
LocalDateTime now = LocalDateTime.now();
TaskExecution exec = executionMapper.selectById(executionId);
if (exec == null) { return; }
int duration = (int) Duration.between(exec.getStartedAt(), now).getSeconds();
executionMapper.completeExecution(executionId, success ? TaskStatus.SUCCESS.name() : TaskStatus.FAILED.name(),
now, duration, successCount, failedCount, dataSizeBytes, errorMessage, resultJson);
CollectionTask task = taskMapper.selectById(exec.getTaskId());
if (task != null) {
taskMapper.updateStatus(task.getId(), success ? TaskStatus.SUCCESS.name() : TaskStatus.FAILED.name());
}
}
@Transactional
public void stop(String executionId) {
TaskExecution exec = executionMapper.selectById(executionId);
if (exec == null || exec.getStatus() != TaskStatus.RUNNING) { return; }
LocalDateTime now = LocalDateTime.now();
int duration = (int) Duration.between(exec.getStartedAt(), now).getSeconds();
// Reuse completeExecution to persist STOPPED status and timing info
executionMapper.completeExecution(exec.getId(), TaskStatus.STOPPED.name(), now, duration,
exec.getRecordsSuccess(), exec.getRecordsFailed(), exec.getDataSizeBytes(), null, exec.getResult());
taskMapper.updateStatus(exec.getTaskId(), TaskStatus.STOPPED.name());
}
@Transactional
public void stopLatestByTaskId(String taskId) {
TaskExecution latest = executionMapper.selectLatestByTaskId(taskId);
if (latest != null) { stop(latest.getId()); }
}
}

View File

@@ -0,0 +1,45 @@
package com.datamate.collection.domain.model;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.Data;
import java.time.LocalDateTime;
import java.util.Collections;
import java.util.Map;
@Data
public class CollectionTask {
private String id;
private String name;
private String description;
private String config; // DataX JSON 配置,包含源端和目标端配置信息
private TaskStatus status;
private String syncMode; // ONCE / SCHEDULED
private String scheduleExpression;
private Integer retryCount;
private Integer timeoutSeconds;
private Long maxRecords;
private String sortField;
private String lastExecutionId;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
private String createdBy;
private String updatedBy;
public void addPath() {
try {
ObjectMapper objectMapper = new ObjectMapper();
Map<String, Object> parameter = objectMapper.readValue(
config,
new TypeReference<>() {}
);
parameter.put("destPath", "/dataset/local/" + id);
parameter.put("filePaths", Collections.singletonList(parameter.get("destPath")));
config = objectMapper.writeValueAsString(parameter);
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
}

View File

@@ -0,0 +1,71 @@
package com.datamate.collection.domain.model;
import lombok.Data;
import lombok.EqualsAndHashCode;
import java.time.LocalDateTime;
@Data
@EqualsAndHashCode(callSuper = false)
public class DataxTemplate {
/**
* 模板ID(UUID)
*/
private String id;
/**
* 模板名称
*/
private String name;
/**
* 源数据源类型
*/
private String sourceType;
/**
* 目标数据源类型
*/
private String targetType;
/**
* 模板内容(JSON格式)
*/
private String templateContent;
/**
* 模板描述
*/
private String description;
/**
* 版本号
*/
private String version;
/**
* 是否为系统模板
*/
private Boolean isSystem;
/**
* 创建时间
*/
private LocalDateTime createdAt;
/**
* 更新时间
*/
private LocalDateTime updatedAt;
/**
* 创建者
*/
private String createdBy;
/**
* 更新者
*/
private String updatedBy;
}

View File

@@ -0,0 +1,39 @@
package com.datamate.collection.domain.model;
import lombok.Data;
import java.time.LocalDateTime;
import java.util.UUID;
@Data
public class TaskExecution {
private String id;
private String taskId;
private String taskName;
private TaskStatus status;
private Double progress;
private Long recordsTotal;
private Long recordsProcessed;
private Long recordsSuccess;
private Long recordsFailed;
private Double throughput;
private Long dataSizeBytes;
private LocalDateTime startedAt;
private LocalDateTime completedAt;
private Integer durationSeconds;
private String errorMessage;
private String dataxJobId;
private String config;
private String result;
private LocalDateTime createdAt;
public static TaskExecution initTaskExecution() {
TaskExecution exec = new TaskExecution();
exec.setId(UUID.randomUUID().toString());
exec.setStatus(TaskStatus.RUNNING);
exec.setProgress(0.0);
exec.setStartedAt(LocalDateTime.now());
exec.setCreatedAt(LocalDateTime.now());
return exec;
}
}

View File

@@ -0,0 +1,21 @@
package com.datamate.collection.domain.model;
/**
* 统一的任务和执行状态枚举
*
* @author Data Mate Platform Team
*/
public enum TaskStatus {
/** 草稿状态 */
DRAFT,
/** 就绪状态 */
READY,
/** 运行中 */
RUNNING,
/** 执行成功(对应原来的COMPLETED) */
SUCCESS,
/** 执行失败 */
FAILED,
/** 已停止 */
STOPPED
}

View File

@@ -0,0 +1,47 @@
package com.datamate.collection.infrastructure.persistence.mapper;
import com.datamate.collection.domain.model.CollectionTask;
import com.datamate.collection.domain.model.DataxTemplate;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
import java.util.Map;
@Mapper
public interface CollectionTaskMapper {
int insert(CollectionTask entity);
int update(CollectionTask entity);
int deleteById(@Param("id") String id);
CollectionTask selectById(@Param("id") String id);
CollectionTask selectByName(@Param("name") String name);
List<CollectionTask> selectByStatus(@Param("status") String status);
List<CollectionTask> selectAll(Map<String, Object> params);
int updateStatus(@Param("id") String id, @Param("status") String status);
int updateLastExecution(@Param("id") String id, @Param("lastExecutionId") String lastExecutionId);
List<CollectionTask> selectActiveTasks();
/**
* 查询模板列表
*
* @param sourceType 源数据源类型(可选)
* @param targetType 目标数据源类型(可选)
* @param offset 偏移量
* @param limit 限制数量
* @return 模板列表
*/
List<DataxTemplate> selectList(@Param("sourceType") String sourceType,
@Param("targetType") String targetType,
@Param("offset") int offset,
@Param("limit") int limit);
/**
* 统计模板数量
*
* @param sourceType 源数据源类型(可选)
* @param targetType 目标数据源类型(可选)
* @return 模板总数
*/
int countTemplates(@Param("sourceType") String sourceType,
@Param("targetType") String targetType);
}

View File

@@ -0,0 +1,38 @@
package com.datamate.collection.infrastructure.persistence.mapper;
import com.datamate.collection.domain.model.TaskExecution;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Map;
@Mapper
public interface TaskExecutionMapper {
int insert(TaskExecution entity);
int update(TaskExecution entity);
int deleteById(@Param("id") String id);
TaskExecution selectById(@Param("id") String id);
List<TaskExecution> selectByTaskId(@Param("taskId") String taskId, @Param("limit") Integer limit);
List<TaskExecution> selectByStatus(@Param("status") String status);
List<TaskExecution> selectAll(Map<String, Object> params);
long count(Map<String, Object> params);
int updateProgress(@Param("id") String id,
@Param("status") String status,
@Param("progress") Double progress,
@Param("recordsProcessed") Long recordsProcessed,
@Param("throughput") Double throughput);
int completeExecution(@Param("id") String id,
@Param("status") String status,
@Param("completedAt") LocalDateTime completedAt,
@Param("durationSeconds") Integer durationSeconds,
@Param("recordsSuccess") Long recordsSuccess,
@Param("recordsFailed") Long recordsFailed,
@Param("dataSizeBytes") Long dataSizeBytes,
@Param("errorMessage") String errorMessage,
@Param("result") String result);
List<TaskExecution> selectRunningExecutions();
TaskExecution selectLatestByTaskId(@Param("taskId") String taskId);
int deleteOldExecutions(@Param("beforeDate") LocalDateTime beforeDate);
}

View File

@@ -0,0 +1,83 @@
package com.datamate.collection.infrastructure.runtime.datax;
import com.datamate.collection.domain.model.CollectionTask;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 根据任务配置拼装 DataX 作业 JSON 文件
*/
@Slf4j
@Component
@RequiredArgsConstructor
public class DataxJobBuilder {
private final DataxProperties props;
public Path buildJobFile(CollectionTask task) throws IOException {
Files.createDirectories(Paths.get(props.getJobConfigPath()));
String fileName = String.format("datax-job-%s.json", task.getId());
Path path = Paths.get(props.getJobConfigPath(), fileName);
// 简化:直接将任务中的 config 字段作为 DataX 作业 JSON
try (FileWriter fw = new FileWriter(path.toFile())) {
String json = task.getConfig() == null || task.getConfig().isEmpty() ?
defaultJobJson() : task.getConfig();
if (StringUtils.isNotBlank(task.getConfig())) {
json = getJobConfig(task);
}
log.info("Job config: {}", json);
fw.write(json);
}
return path;
}
private String getJobConfig(CollectionTask task) {
try {
ObjectMapper objectMapper = new ObjectMapper();
Map<String, Object> parameter = objectMapper.readValue(
task.getConfig(),
new TypeReference<>() {}
);
Map<String, Object> job = new HashMap<>();
Map<String, Object> content = new HashMap<>();
Map<String, Object> reader = new HashMap<>();
reader.put("name", "nfsreader");
reader.put("parameter", parameter);
content.put("reader", reader);
Map<String, Object> writer = new HashMap<>();
writer.put("name", "nfswriter");
writer.put("parameter", parameter);
content.put("writer", writer);
job.put("content", List.of(content));
Map<String, Object> setting = new HashMap<>();
Map<String, Object> channel = new HashMap<>();
channel.put("channel", 2);
setting.put("speed", channel);
job.put("setting", setting);
Map<String, Object> jobConfig = new HashMap<>();
jobConfig.put("job", job);
return objectMapper.writeValueAsString(jobConfig);
} catch (Exception e) {
log.error("Failed to parse task config", e);
throw new RuntimeException("Failed to parse task config", e);
}
}
private String defaultJobJson() {
// 提供一个最小可运行的空 job,实际会被具体任务覆盖
return "{\n \"job\": {\n \"setting\": {\n \"speed\": {\n \"channel\": 1\n }\n },\n \"content\": []\n }\n}";
}
}

View File

@@ -0,0 +1,46 @@
package com.datamate.collection.infrastructure.runtime.datax;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.exec.*;
import org.springframework.stereotype.Component;
import java.io.File;
import java.time.Duration;
@Slf4j
@Component
@RequiredArgsConstructor
public class DataxProcessRunner {
private final DataxProperties props;
public int runJob(File jobFile, String executionId, Duration timeout) throws Exception {
File logFile = new File(props.getLogPath(), String.format("datax-%s.log", executionId));
String python = props.getPythonPath();
String dataxPy = props.getHomePath() + File.separator + "bin" + File.separator + "datax.py";
String cmd = String.format("%s %s %s", python, dataxPy, jobFile.getAbsolutePath());
log.info("Execute DataX: {}", cmd);
CommandLine cl = CommandLine.parse(cmd);
DefaultExecutor executor = new DefaultExecutor();
// 将日志追加输出到文件
File parent = logFile.getParentFile();
if (!parent.exists()) parent.mkdirs();
ExecuteStreamHandler streamHandler = new PumpStreamHandler(
new org.apache.commons.io.output.TeeOutputStream(
new java.io.FileOutputStream(logFile, true), System.out),
new org.apache.commons.io.output.TeeOutputStream(
new java.io.FileOutputStream(logFile, true), System.err)
);
executor.setStreamHandler(streamHandler);
ExecuteWatchdog watchdog = new ExecuteWatchdog(timeout.toMillis());
executor.setWatchdog(watchdog);
return executor.execute(cl);
}
}

View File

@@ -0,0 +1,17 @@
package com.datamate.collection.infrastructure.runtime.datax;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Data
@Configuration
@ConfigurationProperties(prefix = "datamate.data-collection.datax")
public class DataxProperties {
private String homePath; // DATAX_HOME
private String pythonPath; // python 可执行文件
private String jobConfigPath; // 生成的作业文件目录
private String logPath; // 运行日志目录
private Integer maxMemory = 2048;
private Integer channelCount = 5;
}

View File

@@ -0,0 +1,52 @@
package com.datamate.collection.interfaces.converter;
import com.datamate.collection.domain.model.CollectionTask;
import com.datamate.collection.domain.model.DataxTemplate;
import com.datamate.collection.interfaces.dto.*;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.mapstruct.Mapper;
import org.mapstruct.Mapping;
import org.mapstruct.Named;
import org.mapstruct.factory.Mappers;
import java.util.Map;
@Mapper
public interface CollectionTaskConverter {
CollectionTaskConverter INSTANCE = Mappers.getMapper(CollectionTaskConverter.class);
@Mapping(source = "config", target = "config", qualifiedByName = "parseJsonToMap")
CollectionTaskResponse toResponse(CollectionTask task);
CollectionTaskSummary toSummary(CollectionTask task);
DataxTemplateSummary toTemplateSummary(DataxTemplate template);
@Mapping(source = "config", target = "config", qualifiedByName = "mapToJsonString")
CollectionTask toCollectionTask(CreateCollectionTaskRequest request);
@Mapping(source = "config", target = "config", qualifiedByName = "mapToJsonString")
CollectionTask toCollectionTask(UpdateCollectionTaskRequest request);
@Named("parseJsonToMap")
default Map<String, Object> parseJsonToMap(String json) {
try {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(json, Map.class);
} catch (Exception e) {
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
}
}
@Named("mapToJsonString")
default String mapToJsonString(Map<String, Object> map) {
try {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(map != null ? map : Map.of());
} catch (Exception e) {
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
}
}
}

View File

@@ -0,0 +1,83 @@
package com.datamate.collection.interfaces.rest;
import com.datamate.collection.application.service.CollectionTaskService;
import com.datamate.collection.domain.model.CollectionTask;
import com.datamate.collection.domain.model.DataxTemplate;
import com.datamate.collection.interfaces.api.CollectionTaskApi;
import com.datamate.collection.interfaces.converter.CollectionTaskConverter;
import com.datamate.collection.interfaces.dto.*;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.ResponseEntity;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.RestController;
import java.util.*;
import java.util.stream.Collectors;
@Slf4j
@RestController
@RequiredArgsConstructor
@Validated
public class CollectionTaskController implements CollectionTaskApi {
private final CollectionTaskService taskService;
@Override
public ResponseEntity<CollectionTaskResponse> createTask(CreateCollectionTaskRequest request) {
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
task.setId(UUID.randomUUID().toString());
task.addPath();
return ResponseEntity.ok().body(CollectionTaskConverter.INSTANCE.toResponse(taskService.create(task)));
}
@Override
public ResponseEntity<CollectionTaskResponse> updateTask(String id, UpdateCollectionTaskRequest request) {
if (taskService.get(id) == null) {
return ResponseEntity.notFound().build();
}
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
task.setId(id);
return ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(taskService.update(task)));
}
@Override
public ResponseEntity<Void> deleteTask(String id) {
taskService.delete(id);
return ResponseEntity.ok().build();
}
@Override
public ResponseEntity<CollectionTaskResponse> getTaskDetail(String id) {
CollectionTask task = taskService.get(id);
return task == null ? ResponseEntity.notFound().build() : ResponseEntity.ok(CollectionTaskConverter.INSTANCE.toResponse(task));
}
@Override
public ResponseEntity<PagedCollectionTaskSummary> getTasks(Integer page, Integer size, TaskStatus status, String name) {
var list = taskService.list(page, size, status == null ? null : status.getValue(), name);
PagedCollectionTaskSummary response = new PagedCollectionTaskSummary();
response.setContent(list.stream().map(CollectionTaskConverter.INSTANCE::toSummary).collect(Collectors.toList()));
response.setNumber(page);
response.setSize(size);
response.setTotalElements(list.size()); // 简化处理,实际项目中应该有单独的count查询
response.setTotalPages(size == null || size == 0 ? 1 : (int) Math.ceil(list.size() * 1.0 / size));
return ResponseEntity.ok(response);
}
@Override
public ResponseEntity<PagedDataxTemplates> templatesGet(String sourceType, String targetType,
Integer page, Integer size) {
int pageNum = page != null ? page : 0;
int pageSize = size != null ? size : 20;
List<DataxTemplate> templates = taskService.listTemplates(sourceType, targetType, pageNum, pageSize);
int totalElements = taskService.countTemplates(sourceType, targetType);
PagedDataxTemplates response = new PagedDataxTemplates();
response.setContent(templates.stream().map(CollectionTaskConverter.INSTANCE::toTemplateSummary).collect(Collectors.toList()));
response.setNumber(pageNum);
response.setSize(pageSize);
response.setTotalElements(totalElements);
response.setTotalPages(pageSize > 0 ? (int) Math.ceil(totalElements * 1.0 / pageSize) : 1);
return ResponseEntity.ok(response);
}
}

View File

@@ -0,0 +1,101 @@
package com.datamate.collection.interfaces.rest;
import com.datamate.collection.application.service.CollectionTaskService;
import com.datamate.collection.application.service.TaskExecutionService;
import com.datamate.collection.domain.model.TaskExecution;
import com.datamate.collection.interfaces.api.TaskExecutionApi;
import com.datamate.collection.interfaces.dto.PagedTaskExecutions;
import com.datamate.collection.interfaces.dto.TaskExecutionDetail;
import com.datamate.collection.interfaces.dto.TaskExecutionResponse;
import com.datamate.collection.interfaces.dto.TaskStatus; // DTO enum
import lombok.RequiredArgsConstructor;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.RestController;
import java.util.stream.Collectors;
@RestController
@RequiredArgsConstructor
@Validated
public class TaskExecutionController implements TaskExecutionApi {
private final TaskExecutionService executionService;
private final CollectionTaskService taskService;
private TaskExecutionDetail toDetail(TaskExecution e) {
TaskExecutionDetail d = new TaskExecutionDetail();
d.setId(e.getId());
d.setTaskId(e.getTaskId());
d.setTaskName(e.getTaskName());
if (e.getStatus() != null) { d.setStatus(TaskStatus.fromValue(e.getStatus().name())); }
d.setProgress(e.getProgress());
d.setRecordsTotal(e.getRecordsTotal() != null ? e.getRecordsTotal().intValue() : null);
d.setRecordsProcessed(e.getRecordsProcessed() != null ? e.getRecordsProcessed().intValue() : null);
d.setRecordsSuccess(e.getRecordsSuccess() != null ? e.getRecordsSuccess().intValue() : null);
d.setRecordsFailed(e.getRecordsFailed() != null ? e.getRecordsFailed().intValue() : null);
d.setThroughput(e.getThroughput());
d.setDataSizeBytes(e.getDataSizeBytes() != null ? e.getDataSizeBytes().intValue() : null);
d.setStartedAt(e.getStartedAt());
d.setCompletedAt(e.getCompletedAt());
d.setDurationSeconds(e.getDurationSeconds());
d.setErrorMessage(e.getErrorMessage());
return d;
}
// GET /executions/{id}
@Override
public ResponseEntity<TaskExecutionDetail> executionsIdGet(String id) {
var exec = executionService.get(id);
return exec == null ? ResponseEntity.notFound().build() : ResponseEntity.ok(toDetail(exec));
}
// DELETE /executions/{id}
@Override
public ResponseEntity<Void> executionsIdDelete(String id) {
executionService.stop(id); // 幂等处理,在service内部判断状态
return ResponseEntity.noContent().build();
}
// POST /tasks/{id}/execute -> 201
@Override
public ResponseEntity<TaskExecutionResponse> tasksIdExecutePost(String id) {
var task = taskService.get(id);
if (task == null) { return ResponseEntity.notFound().build(); }
var latestExec = executionService.getLatestByTaskId(id);
if (latestExec != null && latestExec.getStatus() == com.datamate.collection.domain.model.TaskStatus.RUNNING) {
TaskExecutionResponse r = new TaskExecutionResponse();
r.setId(latestExec.getId());
r.setTaskId(latestExec.getTaskId());
r.setTaskName(latestExec.getTaskName());
r.setStatus(TaskStatus.fromValue(latestExec.getStatus().name()));
r.setStartedAt(latestExec.getStartedAt());
return ResponseEntity.status(HttpStatus.CREATED).body(r); // 返回已有运行实例
}
var exec = taskService.startExecution(task);
TaskExecutionResponse r = new TaskExecutionResponse();
r.setId(exec.getId());
r.setTaskId(exec.getTaskId());
r.setTaskName(exec.getTaskName());
r.setStatus(TaskStatus.fromValue(exec.getStatus().name()));
r.setStartedAt(exec.getStartedAt());
return ResponseEntity.status(HttpStatus.CREATED).body(r);
}
// GET /tasks/{id}/executions -> 分页
@Override
public ResponseEntity<PagedTaskExecutions> tasksIdExecutionsGet(String id, Integer page, Integer size) {
if (page == null || page < 0) { page = 0; }
if (size == null || size <= 0) { size = 20; }
var list = executionService.list(id, null, null, null, page, size);
long total = executionService.count(id, null, null, null);
PagedTaskExecutions p = new PagedTaskExecutions();
p.setContent(list.stream().map(this::toDetail).collect(Collectors.toList()));
p.setNumber(page);
p.setSize(size);
p.setTotalElements((int) total);
p.setTotalPages(size == 0 ? 1 : (int) Math.ceil(total * 1.0 / size));
return ResponseEntity.ok(p);
}
}

View File

@@ -0,0 +1,23 @@
datamate:
data-collection:
# DataX配置
datax:
home-path: ${DATAX_HOME:D:/datax}
python-path: ${DATAX_PYTHON_PATH:python3}
job-config-path: ${DATAX_JOB_PATH:./data/temp/datax/jobs}
log-path: ${DATAX_LOG_PATH:./logs/datax}
max-memory: ${DATAX_MAX_MEMORY:2048}
channel-count: ${DATAX_CHANNEL_COUNT:5}
# 执行配置
execution:
max-concurrent-tasks: ${DATA_COLLECTION_MAX_CONCURRENT_TASKS:10}
task-timeout-minutes: ${DATA_COLLECTION_TASK_TIMEOUT:120}
retry-count: ${DATA_COLLECTION_RETRY_COUNT:3}
retry-interval-seconds: ${DATA_COLLECTION_RETRY_INTERVAL:30}
# 监控配置
monitoring:
status-check-interval-seconds: ${DATA_COLLECTION_STATUS_CHECK_INTERVAL:30}
log-retention-days: ${DATA_COLLECTION_LOG_RETENTION:30}
enable-metrics: ${DATA_COLLECTION_ENABLE_METRICS:true}

View File

@@ -0,0 +1,188 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.collection.infrastructure.persistence.mapper.CollectionTaskMapper">
<!-- Result Map -->
<resultMap id="CollectionTaskResultMap" type="com.datamate.collection.domain.model.CollectionTask">
<id property="id" column="id"/>
<result property="name" column="name"/>
<result property="description" column="description"/>
<result property="config" column="config"/>
<result property="status" column="status" typeHandler="org.apache.ibatis.type.EnumTypeHandler"/>
<result property="syncMode" column="sync_mode"/>
<result property="scheduleExpression" column="schedule_expression"/>
<result property="retryCount" column="retry_count"/>
<result property="timeoutSeconds" column="timeout_seconds"/>
<result property="maxRecords" column="max_records"/>
<result property="sortField" column="sort_field"/>
<result property="lastExecutionId" column="last_execution_id"/>
<result property="createdAt" column="created_at"/>
<result property="updatedAt" column="updated_at"/>
<result property="createdBy" column="created_by"/>
<result property="updatedBy" column="updated_by"/>
</resultMap>
<!-- 结果映射 (模板) -->
<resultMap id="DataxTemplateResultMap" type="com.datamate.collection.domain.model.DataxTemplate">
<id column="id" property="id" jdbcType="VARCHAR"/>
<result column="name" property="name" jdbcType="VARCHAR"/>
<result column="source_type" property="sourceType" jdbcType="VARCHAR"/>
<result column="target_type" property="targetType" jdbcType="VARCHAR"/>
<result column="template_content" property="templateContent" jdbcType="VARCHAR"/>
<result column="description" property="description" jdbcType="VARCHAR"/>
<result column="version" property="version" jdbcType="VARCHAR"/>
<result column="is_system" property="isSystem" jdbcType="BOOLEAN"/>
<result column="created_at" property="createdAt" jdbcType="TIMESTAMP"/>
<result column="updated_at" property="updatedAt" jdbcType="TIMESTAMP"/>
<result column="created_by" property="createdBy" jdbcType="VARCHAR"/>
</resultMap>
<!-- Base Column List (tasks) -->
<sql id="Base_Column_List">
id,
name, description, config, status, sync_mode,
schedule_expression, retry_count, timeout_seconds, max_records, sort_field,
last_execution_id, created_at, updated_at, created_by, updated_by
</sql>
<!-- Template Column List -->
<sql id="Template_Column_List">
id, name, source_type, target_type, template_content, description, version, is_system, created_at, updated_at, created_by
</sql>
<!-- Insert -->
<insert id="insert" parameterType="com.datamate.collection.domain.model.CollectionTask">
INSERT INTO t_dc_collection_tasks (id, name, description, config, status, sync_mode,
schedule_expression, retry_count, timeout_seconds, max_records, sort_field,
last_execution_id, created_at, updated_at, created_by, updated_by)
VALUES (#{id}, #{name}, #{description}, #{config}, #{status}, #{syncMode},
#{scheduleExpression}, #{retryCount}, #{timeoutSeconds}, #{maxRecords}, #{sortField},
#{lastExecutionId}, #{createdAt}, #{updatedAt}, #{createdBy}, #{updatedBy})
</insert>
<!-- Update -->
<update id="update" parameterType="com.datamate.collection.domain.model.CollectionTask">
UPDATE t_dc_collection_tasks
SET name = #{name},
description = #{description},
config = #{config},
status = #{status},
sync_mode = #{syncMode},
schedule_expression = #{scheduleExpression},
retry_count = #{retryCount},
timeout_seconds = #{timeoutSeconds},
max_records = #{maxRecords},
sort_field = #{sortField},
last_execution_id = #{lastExecutionId},
updated_at = #{updatedAt},
updated_by = #{updatedBy}
WHERE id = #{id}
</update>
<!-- Delete by ID -->
<delete id="deleteById" parameterType="java.lang.String">
DELETE FROM t_dc_collection_tasks WHERE id = #{id}
</delete>
<!-- Select by ID -->
<select id="selectById" parameterType="java.lang.String" resultMap="CollectionTaskResultMap">
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks WHERE id = #{id}
</select>
<!-- Select by Name -->
<select id="selectByName" parameterType="java.lang.String" resultMap="CollectionTaskResultMap">
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks WHERE name = #{name}
</select>
<!-- Select by Status -->
<select id="selectByStatus" parameterType="java.lang.String" resultMap="CollectionTaskResultMap">
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks WHERE status = #{status} ORDER BY created_at DESC
</select>
<!-- Select All with Pagination -->
<select id="selectAll" resultMap="CollectionTaskResultMap">
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks
<where>
<if test="status != null and status != ''">
AND status = #{status}
</if>
<if test="name != null and name != ''">
AND name LIKE CONCAT('%', #{name}, '%')
</if>
</where>
ORDER BY created_at DESC
<if test="offset != null and limit != null">
LIMIT #{offset}, #{limit}
</if>
</select>
<!-- Count Total -->
<select id="count" resultType="java.lang.Long">
SELECT COUNT(*) FROM t_dc_collection_tasks
<where>
<if test="status != null and status != ''">
AND status = #{status}
</if>
<if test="name != null and name != ''">
AND name LIKE CONCAT('%', #{name}, '%')
</if>
<if test="sourceDataSourceId != null and sourceDataSourceId != ''">
AND source_datasource_id = #{sourceDataSourceId}
</if>
<if test="targetDataSourceId != null and targetDataSourceId != ''">
AND target_datasource_id = #{targetDataSourceId}
</if>
</where>
</select>
<!-- Update Status -->
<update id="updateStatus">
UPDATE t_dc_collection_tasks SET status = #{status}, updated_at = NOW() WHERE id = #{id}
</update>
<!-- Update Last Execution -->
<update id="updateLastExecution">
UPDATE t_dc_collection_tasks SET last_execution_id = #{lastExecutionId}, updated_at = NOW() WHERE id = #{id}
</update>
<!-- Select Active Tasks for Scheduling -->
<select id="selectActiveTasks" resultMap="CollectionTaskResultMap">
SELECT <include refid="Base_Column_List"/> FROM t_dc_collection_tasks
WHERE status IN ('READY', 'RUNNING')
AND schedule_expression IS NOT NULL
ORDER BY created_at DESC
</select>
<!-- 查询模板列表 -->
<select id="selectList" resultMap="DataxTemplateResultMap">
SELECT <include refid="Template_Column_List"/> FROM t_dc_datax_templates
<where>
<if test="sourceType != null and sourceType != ''">
AND source_type = #{sourceType}
</if>
<if test="targetType != null and targetType != ''">
AND target_type = #{targetType}
</if>
</where>
ORDER BY is_system DESC, created_at DESC
<if test="limit > 0">
LIMIT #{offset}, #{limit}
</if>
</select>
<!-- 统计模板数量 -->
<select id="countTemplates" resultType="java.lang.Integer">
SELECT COUNT(1) FROM t_dc_datax_templates
<where>
<if test="sourceType != null and sourceType != ''">
AND source_type = #{sourceType}
</if>
<if test="targetType != null and targetType != ''">
AND target_type = #{targetType}
</if>
</where>
</select>
</mapper>

View File

@@ -0,0 +1,191 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.collection.infrastructure.persistence.mapper.TaskExecutionMapper">
<!-- Result Map -->
<resultMap id="TaskExecutionResultMap" type="com.datamate.collection.domain.model.TaskExecution">
<id property="id" column="id"/>
<result property="taskId" column="task_id"/>
<result property="taskName" column="task_name"/>
<result property="status" column="status" typeHandler="org.apache.ibatis.type.EnumTypeHandler"/>
<result property="progress" column="progress"/>
<result property="recordsTotal" column="records_total"/>
<result property="recordsProcessed" column="records_processed"/>
<result property="recordsSuccess" column="records_success"/>
<result property="recordsFailed" column="records_failed"/>
<result property="throughput" column="throughput"/>
<result property="dataSizeBytes" column="data_size_bytes"/>
<result property="startedAt" column="started_at"/>
<result property="completedAt" column="completed_at"/>
<result property="durationSeconds" column="duration_seconds"/>
<result property="errorMessage" column="error_message"/>
<result property="dataxJobId" column="datax_job_id"/>
<result property="config" column="config"/>
<result property="result" column="result"/>
<result property="createdAt" column="created_at"/>
</resultMap>
<!-- Base Column List -->
<sql id="Base_Column_List">
id, task_id, task_name, status, progress, records_total, records_processed,
records_success, records_failed, throughput, data_size_bytes, started_at,
completed_at, duration_seconds, error_message, datax_job_id, config, result, created_at
</sql>
<!-- Insert -->
<insert id="insert" parameterType="com.datamate.collection.domain.model.TaskExecution">
INSERT INTO t_dc_task_executions (
id, task_id, task_name, status, progress, records_total, records_processed,
records_success, records_failed, throughput, data_size_bytes, started_at,
completed_at, duration_seconds, error_message, datax_job_id, config, result, created_at
) VALUES (
#{id}, #{taskId}, #{taskName}, #{status}, #{progress}, #{recordsTotal}, #{recordsProcessed},
#{recordsSuccess}, #{recordsFailed}, #{throughput}, #{dataSizeBytes}, #{startedAt},
#{completedAt}, #{durationSeconds}, #{errorMessage}, #{dataxJobId}, #{config}, #{result}, #{createdAt}
)
</insert>
<!-- Update -->
<update id="update" parameterType="com.datamate.collection.domain.model.TaskExecution">
UPDATE t_dc_task_executions
SET status = #{status},
progress = #{progress},
records_total = #{recordsTotal},
records_processed = #{recordsProcessed},
records_success = #{recordsSuccess},
records_failed = #{recordsFailed},
throughput = #{throughput},
data_size_bytes = #{dataSizeBytes},
completed_at = #{completedAt},
duration_seconds = #{durationSeconds},
error_message = #{errorMessage},
result = #{result}
WHERE id = #{id}
</update>
<!-- Delete by ID -->
<delete id="deleteById" parameterType="java.lang.String">
DELETE FROM t_dc_task_executions WHERE id = #{id}
</delete>
<!-- Select by ID -->
<select id="selectById" parameterType="java.lang.String" resultMap="TaskExecutionResultMap">
SELECT <include refid="Base_Column_List"/>
FROM t_dc_task_executions
WHERE id = #{id}
</select>
<!-- Select by Task ID -->
<select id="selectByTaskId" resultMap="TaskExecutionResultMap">
SELECT <include refid="Base_Column_List"/>
FROM t_dc_task_executions
WHERE task_id = #{taskId}
ORDER BY started_at DESC
<if test="limit != null">
LIMIT #{limit}
</if>
</select>
<!-- Select by Status -->
<select id="selectByStatus" parameterType="java.lang.String" resultMap="TaskExecutionResultMap">
SELECT <include refid="Base_Column_List"/>
FROM t_dc_task_executions
WHERE status = #{status}
ORDER BY started_at DESC
</select>
<!-- Select All with Pagination -->
<select id="selectAll" resultMap="TaskExecutionResultMap">
SELECT <include refid="Base_Column_List"/>
FROM t_dc_task_executions
<where>
<if test="taskId != null and taskId != ''">
AND task_id = #{taskId}
</if>
<if test="status != null and status != ''">
AND status = #{status}
</if>
<if test="startDate != null">
AND started_at >= #{startDate}
</if>
<if test="endDate != null">
AND started_at &lt;= #{endDate}
</if>
</where>
ORDER BY started_at DESC
<if test="offset != null and limit != null">
LIMIT #{offset}, #{limit}
</if>
</select>
<!-- Count Total -->
<select id="count" resultType="java.lang.Long">
SELECT COUNT(*)
FROM t_dc_task_executions
<where>
<if test="taskId != null and taskId != ''">
AND task_id = #{taskId}
</if>
<if test="status != null and status != ''">
AND status = #{status}
</if>
<if test="startDate != null">
AND started_at >= #{startDate}
</if>
<if test="endDate != null">
AND started_at &lt;= #{endDate}
</if>
</where>
</select>
<!-- Update Status and Progress -->
<update id="updateProgress">
UPDATE t_dc_task_executions
SET status = #{status},
progress = #{progress},
records_processed = #{recordsProcessed},
throughput = #{throughput}
WHERE id = #{id}
</update>
<!-- Complete Execution -->
<update id="completeExecution">
UPDATE t_dc_task_executions
SET status = #{status},
progress = 100.00,
completed_at = #{completedAt},
duration_seconds = #{durationSeconds},
records_success = #{recordsSuccess},
records_failed = #{recordsFailed},
data_size_bytes = #{dataSizeBytes},
error_message = #{errorMessage},
result = #{result}
WHERE id = #{id}
</update>
<!-- Select Running Executions -->
<select id="selectRunningExecutions" resultMap="TaskExecutionResultMap">
SELECT <include refid="Base_Column_List"/>
FROM t_dc_task_executions
WHERE status = 'RUNNING'
ORDER BY started_at ASC
</select>
<!-- Select Latest Execution by Task -->
<select id="selectLatestByTaskId" parameterType="java.lang.String" resultMap="TaskExecutionResultMap">
SELECT <include refid="Base_Column_List"/>
FROM t_dc_task_executions
WHERE task_id = #{taskId}
ORDER BY started_at DESC
LIMIT 1
</select>
<!-- Delete Old Executions -->
<delete id="deleteOldExecutions">
DELETE FROM t_dc_task_executions
WHERE started_at &lt; #{beforeDate}
</delete>
</mapper>

View File

@@ -0,0 +1,92 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.datamate</groupId>
<artifactId>data-mate-platform</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>data-evaluation-service</artifactId>
<name>Data Evaluation Service</name>
<description>数据评估服务</description>
<dependencies>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-openfeign</artifactId>
</dependency>
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
</dependency>
<dependency>
<groupId>org.openapitools</groupId>
<artifactId>jackson-databind-nullable</artifactId>
</dependency>
<dependency>
<groupId>jakarta.validation</groupId>
<artifactId>jakarta.validation-api</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.openapitools</groupId>
<artifactId>openapi-generator-maven-plugin</artifactId>
<version>6.6.0</version>
<executions>
<execution>
<goals>
<goal>generate</goal>
</goals>
<configuration>
<inputSpec>${project.basedir}/../../openapi/specs/data-evaluation.yaml</inputSpec>
<generatorName>spring</generatorName>
<output>${project.build.directory}/generated-sources/openapi</output>
<apiPackage>com.datamate.evaluation.interfaces.api</apiPackage>
<modelPackage>com.datamate.evaluation.interfaces.dto</modelPackage>
<configOptions>
<interfaceOnly>true</interfaceOnly>
<useTags>true</useTags>
<useSpringBoot3>true</useSpringBoot3>
<documentationProvider>springdoc</documentationProvider>
</configOptions>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.datamate</groupId>
<artifactId>data-mate-platform</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>data-management-service</artifactId>
<name>Data Management Service</name>
<description>数据管理服务</description>
<dependencies>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-openfeign</artifactId>
</dependency>
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
</dependency>
<dependency>
<groupId>org.openapitools</groupId>
<artifactId>jackson-databind-nullable</artifactId>
</dependency>
<dependency>
<groupId>jakarta.validation</groupId>
<artifactId>jakarta.validation-api</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-commons</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<arguments>true</arguments>
<classifier>exec</classifier>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<annotationProcessorPaths>
<!-- 顺序很重要 -->
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
</path>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok-mapstruct-binding</artifactId>
<version>${lombok-mapstruct-binding.version}</version>
</path>
<path>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct-processor</artifactId>
<version>${mapstruct.version}</version>
</path>
</annotationProcessorPaths>
<compilerArgs>
<arg>-parameters</arg>
<arg>-Amapstruct.defaultComponentModel=spring</arg>
</compilerArgs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,22 @@
package com.datamate.datamanagement;
import org.springframework.cloud.openfeign.EnableFeignClients;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableAsync;
/**
* Data Management Service Configuration
* 数据管理服务配置类 - 多源接入、元数据、血缘治理
*/
@Configuration
@EnableFeignClients(basePackages = "com.datamate.datamanagement.infrastructure.client")
@EnableAsync
@ComponentScan(basePackages = {
"com.datamate.datamanagement",
"com.datamate.shared"
})
public class DataManagementServiceConfiguration {
// Service configuration class for JAR packaging
// 作为jar包形式提供服务的配置类
}

View File

@@ -0,0 +1,288 @@
package com.datamate.datamanagement.application;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.datamate.datamanagement.interfaces.dto.*;
import com.datamate.common.infrastructure.exception.BusinessAssert;
import com.datamate.common.interfaces.PagedResponse;
import com.datamate.datamanagement.domain.model.dataset.Dataset;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.domain.model.dataset.Tag;
import com.datamate.datamanagement.infrastructure.client.CollectionTaskClient;
import com.datamate.datamanagement.infrastructure.client.dto.CollectionTaskDetailResponse;
import com.datamate.datamanagement.infrastructure.client.dto.LocalCollectionConfig;
import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode;
import com.datamate.datamanagement.infrastructure.persistence.mapper.TagMapper;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.*;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.StringUtils;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
* 数据集应用服务(对齐 DB schema,使用 UUID 字符串主键)
*/
@Slf4j
@Service
@Transactional
@RequiredArgsConstructor
public class DatasetApplicationService {
private final DatasetRepository datasetRepository;
private final TagMapper tagMapper;
private final DatasetFileRepository datasetFileRepository;
private final CollectionTaskClient collectionTaskClient;
private final FileMetadataService fileMetadataService;
private final ObjectMapper objectMapper;
@Value("${dataset.base.path:/dataset}")
private String datasetBasePath;
/**
* 创建数据集
*/
@Transactional
public Dataset createDataset(CreateDatasetRequest createDatasetRequest) {
BusinessAssert.isTrue(datasetRepository.findByName(createDatasetRequest.getName()) == null, DataManagementErrorCode.DATASET_ALREADY_EXISTS);
// 创建数据集对象
Dataset dataset = DatasetConverter.INSTANCE.convertToDataset(createDatasetRequest);
dataset.initCreateParam(datasetBasePath);
// 处理标签
Set<Tag> processedTags = Optional.ofNullable(createDatasetRequest.getTags())
.filter(CollectionUtils::isNotEmpty)
.map(this::processTagNames)
.orElseGet(HashSet::new);
dataset.setTags(processedTags);
datasetRepository.save(dataset);
//todo 需要解耦这块逻辑
if (StringUtils.hasText(createDatasetRequest.getDataSource())) {
// 数据源id不为空,使用异步线程进行文件扫盘落库
processDataSourceAsync(dataset.getId(), createDatasetRequest.getDataSource());
}
return dataset;
}
public Dataset updateDataset(String datasetId, UpdateDatasetRequest updateDatasetRequest) {
Dataset dataset = datasetRepository.getById(datasetId);
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
if (StringUtils.hasText(updateDatasetRequest.getName())) {
dataset.setName(updateDatasetRequest.getName());
}
if (StringUtils.hasText(updateDatasetRequest.getDescription())) {
dataset.setDescription(updateDatasetRequest.getDescription());
}
if (CollectionUtils.isNotEmpty(updateDatasetRequest.getTags())) {
dataset.setTags(processTagNames(updateDatasetRequest.getTags()));
}
if (Objects.nonNull(updateDatasetRequest.getStatus())) {
dataset.setStatus(updateDatasetRequest.getStatus());
}
if (StringUtils.hasText(updateDatasetRequest.getDataSource())) {
// 数据源id不为空,使用异步线程进行文件扫盘落库
processDataSourceAsync(dataset.getId(), updateDatasetRequest.getDataSource());
}
datasetRepository.updateById(dataset);
return dataset;
}
/**
* 删除数据集
*/
public void deleteDataset(String datasetId) {
datasetRepository.removeById(datasetId);
}
/**
* 获取数据集详情
*/
@Transactional(readOnly = true)
public Dataset getDataset(String datasetId) {
Dataset dataset = datasetRepository.getById(datasetId);
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
return dataset;
}
/**
* 分页查询数据集
*/
@Transactional(readOnly = true)
public PagedResponse<DatasetResponse> getDatasets(DatasetPagingQuery query) {
IPage<Dataset> page = new Page<>(query.getPage(), query.getSize());
page = datasetRepository.findByCriteria(page, query);
return PagedResponse.of(DatasetConverter.INSTANCE.convertToResponse(page.getRecords()), page.getCurrent(), page.getTotal(), page.getPages());
}
/**
* 处理标签名称,创建或获取标签
*/
private Set<Tag> processTagNames(List<String> tagNames) {
Set<Tag> tags = new HashSet<>();
for (String tagName : tagNames) {
Tag tag = tagMapper.findByName(tagName);
if (tag == null) {
Tag newTag = new Tag(tagName, null, null, "#007bff");
newTag.setUsageCount(0L);
newTag.setId(UUID.randomUUID().toString());
tagMapper.insert(newTag);
tag = newTag;
}
tag.setUsageCount(tag.getUsageCount() == null ? 1L : tag.getUsageCount() + 1);
tagMapper.updateUsageCount(tag.getId(), tag.getUsageCount());
tags.add(tag);
}
return tags;
}
/**
* 获取数据集统计信息
*/
@Transactional(readOnly = true)
public Map<String, Object> getDatasetStatistics(String datasetId) {
Dataset dataset = datasetRepository.getById(datasetId);
if (dataset == null) {
throw new IllegalArgumentException("Dataset not found: " + datasetId);
}
Map<String, Object> statistics = new HashMap<>();
// 基础统计
Long totalFiles = datasetFileRepository.countByDatasetId(datasetId);
Long completedFiles = datasetFileRepository.countCompletedByDatasetId(datasetId);
Long totalSize = datasetFileRepository.sumSizeByDatasetId(datasetId);
statistics.put("totalFiles", totalFiles != null ? totalFiles.intValue() : 0);
statistics.put("completedFiles", completedFiles != null ? completedFiles.intValue() : 0);
statistics.put("totalSize", totalSize != null ? totalSize : 0L);
// 完成率计算
float completionRate = 0.0f;
if (totalFiles != null && totalFiles > 0) {
completionRate = (completedFiles != null ? completedFiles.floatValue() : 0.0f) / totalFiles.floatValue() * 100.0f;
}
statistics.put("completionRate", completionRate);
// 文件类型分布统计
Map<String, Integer> fileTypeDistribution = new HashMap<>();
List<DatasetFile> allFiles = datasetFileRepository.findAllByDatasetId(datasetId);
if (allFiles != null) {
for (DatasetFile file : allFiles) {
String fileType = file.getFileType() != null ? file.getFileType() : "unknown";
fileTypeDistribution.put(fileType, fileTypeDistribution.getOrDefault(fileType, 0) + 1);
}
}
statistics.put("fileTypeDistribution", fileTypeDistribution);
// 状态分布统计
Map<String, Integer> statusDistribution = new HashMap<>();
if (allFiles != null) {
for (DatasetFile file : allFiles) {
String status = file.getStatus() != null ? file.getStatus() : "unknown";
statusDistribution.put(status, statusDistribution.getOrDefault(status, 0) + 1);
}
}
statistics.put("statusDistribution", statusDistribution);
return statistics;
}
/**
* 获取所有数据集的汇总统计信息
*/
public AllDatasetStatisticsResponse getAllDatasetStatistics() {
return datasetRepository.getAllDatasetStatistics();
}
/**
* 异步处理数据源文件扫描
*
* @param datasetId 数据集ID
* @param dataSourceId 数据源ID(归集任务ID)
*/
@Async
public void processDataSourceAsync(String datasetId, String dataSourceId) {
try {
log.info("开始处理数据源文件扫描,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId);
// 1. 调用数据归集服务获取任务详情
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId).getData();
if (taskDetail == null) {
log.error("获取归集任务详情失败,任务ID: {}", dataSourceId);
return;
}
log.info("获取到归集任务详情: {}", taskDetail);
// 2. 解析任务配置
LocalCollectionConfig config = parseTaskConfig(taskDetail.getConfig());
if (config == null) {
log.error("解析任务配置失败,任务ID: {}", dataSourceId);
return;
}
// 4. 获取文件路径列表
List<String> filePaths = config.getFilePaths();
if (CollectionUtils.isEmpty(filePaths)) {
log.warn("文件路径列表为空,任务ID: {}", dataSourceId);
return;
}
log.info("开始扫描文件,共 {} 个文件路径", filePaths.size());
// 5. 扫描文件元数据
List<DatasetFile> datasetFiles = fileMetadataService.scanFiles(filePaths, datasetId);
// 查询数据集中已存在的文件
List<DatasetFile> existDatasetFileList = datasetFileRepository.findAllByDatasetId(datasetId);
Map<String, DatasetFile> existDatasetFilePathMap = existDatasetFileList.stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity()));
Dataset dataset = datasetRepository.getById(datasetId);
// 6. 批量插入数据集文件表
if (CollectionUtils.isNotEmpty(datasetFiles)) {
for (DatasetFile datasetFile : datasetFiles) {
if (existDatasetFilePathMap.containsKey(datasetFile.getFilePath())) {
DatasetFile existDatasetFile = existDatasetFilePathMap.get(datasetFile.getFilePath());
dataset.removeFile(existDatasetFile);
existDatasetFile.setFileSize(datasetFile.getFileSize());
dataset.addFile(existDatasetFile);
datasetFileRepository.updateById(existDatasetFile);
} else {
dataset.addFile(datasetFile);
datasetFileRepository.save(datasetFile);
}
}
log.info("文件元数据写入完成,共写入 {} 条记录", datasetFiles.size());
} else {
log.warn("未扫描到有效文件");
}
datasetRepository.updateById(dataset);
} catch (Exception e) {
log.error("处理数据源文件扫描失败,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId, e);
}
}
/**
* 解析任务配置
*/
private LocalCollectionConfig parseTaskConfig(Map<String, Object> configMap) {
try {
if (configMap == null || configMap.isEmpty()) {
return null;
}
return objectMapper.convertValue(configMap, LocalCollectionConfig.class);
} catch (Exception e) {
log.error("解析任务配置失败", e);
return null;
}
}
}

View File

@@ -0,0 +1,306 @@
package com.datamate.datamanagement.application;
import com.datamate.common.domain.model.ChunkUploadPreRequest;
import com.datamate.common.domain.model.FileUploadResult;
import com.datamate.common.domain.service.FileService;
import com.datamate.common.domain.utils.AnalyzerUtils;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.datamanagement.domain.contants.DatasetConstant;
import com.datamate.datamanagement.domain.model.dataset.Dataset;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.domain.model.dataset.DatasetFileUploadCheckInfo;
import com.datamate.datamanagement.domain.model.dataset.StatusConstants;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import jakarta.servlet.http.HttpServletResponse;
import lombok.extern.slf4j.Slf4j;
import org.apache.ibatis.session.RowBounds;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.core.io.UrlResource;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageImpl;
import org.springframework.data.domain.Pageable;
import org.springframework.http.HttpHeaders;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Objects;
import java.util.UUID;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
/**
* 数据集文件应用服务
*/
@Slf4j
@Service
@Transactional
public class DatasetFileApplicationService {
private final DatasetFileRepository datasetFileRepository;
private final DatasetRepository datasetRepository;
private final Path fileStorageLocation;
private final FileService fileService;
@Value("${dataset.base.path:/dataset}")
private String datasetBasePath;
@Autowired
public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository,
DatasetRepository datasetRepository, FileService fileService,
@Value("${app.file.upload-dir:./dataset}") String uploadDir) {
this.datasetFileRepository = datasetFileRepository;
this.datasetRepository = datasetRepository;
this.fileStorageLocation = Paths.get(uploadDir).toAbsolutePath().normalize();
this.fileService = fileService;
try {
Files.createDirectories(this.fileStorageLocation);
} catch (Exception ex) {
throw new RuntimeException("Could not create the directory where the uploaded files will be stored.", ex);
}
}
/**
* 上传文件到数据集
*/
public DatasetFile uploadFile(String datasetId, MultipartFile file) {
Dataset dataset = datasetRepository.getById(datasetId);
if (dataset == null) {
throw new IllegalArgumentException("Dataset not found: " + datasetId);
}
String originalFilename = file.getOriginalFilename();
String fileName = originalFilename != null ? originalFilename : "file";
try {
// 保存文件到磁盘
Path targetLocation = this.fileStorageLocation.resolve(datasetId + File.separator + fileName);
// 确保目标目录存在
Files.createDirectories(targetLocation);
Files.copy(file.getInputStream(), targetLocation, StandardCopyOption.REPLACE_EXISTING);
// 创建文件实体(UUID 主键)
DatasetFile datasetFile = new DatasetFile();
datasetFile.setId(UUID.randomUUID().toString());
datasetFile.setDatasetId(datasetId);
datasetFile.setFileName(fileName);
datasetFile.setFilePath(targetLocation.toString());
datasetFile.setFileType(getFileExtension(originalFilename));
datasetFile.setFileSize(file.getSize());
datasetFile.setUploadTime(LocalDateTime.now());
datasetFile.setStatus(StatusConstants.DatasetFileStatuses.COMPLETED);
// 保存到数据库
datasetFileRepository.save(datasetFile);
// 更新数据集统计
dataset.addFile(datasetFile);
datasetRepository.updateById(dataset);
return datasetFileRepository.findByDatasetIdAndFileName(datasetId, fileName);
} catch (IOException ex) {
log.error("Could not store file {}", fileName, ex);
throw new RuntimeException("Could not store file " + fileName, ex);
}
}
/**
* 获取数据集文件列表
*/
@Transactional(readOnly = true)
public Page<DatasetFile> getDatasetFiles(String datasetId, String fileType,
String status, Pageable pageable) {
RowBounds bounds = new RowBounds(pageable.getPageNumber() * pageable.getPageSize(), pageable.getPageSize());
List<DatasetFile> content = datasetFileRepository.findByCriteria(datasetId, fileType, status, bounds);
long total = content.size() < pageable.getPageSize() && pageable.getPageNumber() == 0 ? content.size() : content.size() + (long) pageable.getPageNumber() * pageable.getPageSize();
return new PageImpl<>(content, pageable, total);
}
/**
* 获取文件详情
*/
@Transactional(readOnly = true)
public DatasetFile getDatasetFile(String datasetId, String fileId) {
DatasetFile file = datasetFileRepository.getById(fileId);
if (file == null) {
throw new IllegalArgumentException("File not found: " + fileId);
}
if (!file.getDatasetId().equals(datasetId)) {
throw new IllegalArgumentException("File does not belong to the specified dataset");
}
return file;
}
/**
* 删除文件
*/
public void deleteDatasetFile(String datasetId, String fileId) {
DatasetFile file = getDatasetFile(datasetId, fileId);
try {
Path filePath = Paths.get(file.getFilePath());
Files.deleteIfExists(filePath);
} catch (IOException ex) {
// ignore
}
datasetFileRepository.removeById(fileId);
Dataset dataset = datasetRepository.getById(datasetId);
// 简单刷新统计(精确处理可从DB统计)
dataset.setFileCount(Math.max(0, dataset.getFileCount() - 1));
dataset.setSizeBytes(Math.max(0, dataset.getSizeBytes() - (file.getFileSize() != null ? file.getFileSize() : 0)));
datasetRepository.updateById(dataset);
}
/**
* 下载文件
*/
@Transactional(readOnly = true)
public Resource downloadFile(String datasetId, String fileId) {
DatasetFile file = getDatasetFile(datasetId, fileId);
try {
Path filePath = Paths.get(file.getFilePath()).normalize();
Resource resource = new UrlResource(filePath.toUri());
if (resource.exists()) {
return resource;
} else {
throw new RuntimeException("File not found: " + file.getFileName());
}
} catch (MalformedURLException ex) {
throw new RuntimeException("File not found: " + file.getFileName(), ex);
}
}
/**
* 下载文件
*/
@Transactional(readOnly = true)
public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) {
List<DatasetFile> allByDatasetId = datasetFileRepository.findAllByDatasetId(datasetId);
response.setContentType("application/zip");
String zipName = String.format("dataset_%s.zip",
LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss")));
response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + zipName);
try (ZipOutputStream zos = new ZipOutputStream(response.getOutputStream())) {
for (DatasetFile file : allByDatasetId) {
addToZipFile(file, zos);
}
} catch (IOException e) {
log.error("Failed to download files in batches.", e);
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
}
}
private void addToZipFile(DatasetFile file, ZipOutputStream zos) throws IOException {
if (file.getFilePath() == null || !Files.exists(Paths.get(file.getFilePath()))) {
log.warn("The file hasn't been found on filesystem, id: {}", file.getId());
return;
}
try (InputStream fis = Files.newInputStream(Paths.get(file.getFilePath()));
BufferedInputStream bis = new BufferedInputStream(fis)) {
ZipEntry zipEntry = new ZipEntry(file.getFileName());
zos.putNextEntry(zipEntry);
byte[] buffer = new byte[8192];
int length;
while ((length = bis.read(buffer)) >= 0) {
zos.write(buffer, 0, length);
}
zos.closeEntry();
}
}
private String getFileExtension(String fileName) {
if (fileName == null || fileName.isEmpty()) {
return null;
}
int lastDotIndex = fileName.lastIndexOf(".");
if (lastDotIndex == -1) {
return null;
}
return fileName.substring(lastDotIndex + 1);
}
/**
* 预上传
*
* @param chunkUploadRequest 上传请求
* @param datasetId 数据集id
* @return 请求id
*/
@Transactional
public String preUpload(UploadFilesPreRequest chunkUploadRequest, String datasetId) {
ChunkUploadPreRequest request = ChunkUploadPreRequest.builder().build();
request.setUploadPath(datasetBasePath + File.separator + datasetId);
request.setTotalFileNum(chunkUploadRequest.getTotalFileNum());
request.setServiceId(DatasetConstant.SERVICE_ID);
DatasetFileUploadCheckInfo checkInfo = new DatasetFileUploadCheckInfo();
checkInfo.setDatasetId(datasetId);
checkInfo.setHasArchive(chunkUploadRequest.isHasArchive());
try {
ObjectMapper objectMapper = new ObjectMapper();
String checkInfoJson = objectMapper.writeValueAsString(checkInfo);
request.setCheckInfo(checkInfoJson);
} catch (JsonProcessingException e) {
throw new IllegalArgumentException("Failed to serialize checkInfo to JSON", e);
}
return fileService.preUpload(request);
}
/**
* 切片上传
*
* @param uploadFileRequest 上传请求
*/
@Transactional
public void chunkUpload(String datasetId, UploadFileRequest uploadFileRequest) {
FileUploadResult uploadResult = fileService.chunkUpload(DatasetConverter.INSTANCE.toChunkUploadRequest(uploadFileRequest));
saveFileInfoToDb(uploadResult, uploadFileRequest, datasetId);
if (uploadResult.isAllFilesUploaded()) {
// 解析文件,后续依据需求看是否添加校验文件元数据和解析半结构化文件的逻辑,
}
}
private void saveFileInfoToDb(FileUploadResult fileUploadResult, UploadFileRequest uploadFile, String datasetId) {
if (Objects.isNull(fileUploadResult.getSavedFile())) {
// 文件切片上传没有完成
return;
}
Dataset dataset = datasetRepository.getById(datasetId);
File savedFile = fileUploadResult.getSavedFile();
LocalDateTime currentTime = LocalDateTime.now();
DatasetFile datasetFile = DatasetFile.builder()
.id(UUID.randomUUID().toString())
.datasetId(datasetId)
.fileSize(savedFile.length())
.uploadTime(currentTime)
.lastAccessTime(currentTime)
.fileName(uploadFile.getFileName())
.filePath(savedFile.getPath())
.fileType(AnalyzerUtils.getExtension(uploadFile.getFileName()))
.build();
datasetFileRepository.save(datasetFile);
dataset.addFile(datasetFile);
datasetRepository.updateById(dataset);
}
}

View File

@@ -0,0 +1,127 @@
package com.datamate.datamanagement.application;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
/**
* 文件元数据扫描服务
*/
@Slf4j
@Service
public class FileMetadataService {
/**
* 扫描文件路径列表,提取文件元数据
* @param datasetId 数据集ID
* @return 数据集文件列表
*/
public List<DatasetFile> scanFiles(List<String> filePaths, String datasetId) {
List<DatasetFile> datasetFiles = new ArrayList<>();
if (filePaths == null || filePaths.isEmpty()) {
log.warn("文件路径列表为空,跳过扫描");
return datasetFiles;
}
for (String filePath : filePaths) {
try {
Path path = Paths.get(filePath);
if (!Files.exists(path)) {
log.warn("路径不存在: {}", filePath);
continue;
}
if (Files.isDirectory(path)) {
scanDirectory(datasetId, filePath, path, datasetFiles);
} else {
// 如果是文件,直接处理
DatasetFile datasetFile = extractFileMetadata(filePath, datasetId);
if (datasetFile != null) {
datasetFiles.add(datasetFile);
}
}
} catch (Exception e) {
log.error("扫描路径失败: {}, 错误: {}", filePath, e.getMessage(), e);
}
}
log.info("文件扫描完成,共扫描 {} 个文件", datasetFiles.size());
return datasetFiles;
}
private void scanDirectory(String datasetId, String filePath, Path path,
List<DatasetFile> datasetFiles) throws IOException {
// 如果是目录,扫描该目录下的所有文件(非递归)
List<Path> filesInDir = Files.list(path)
.filter(Files::isRegularFile)
.toList();
for (Path file : filesInDir) {
try {
DatasetFile datasetFile = extractFileMetadata(file.toString(), datasetId);
if (datasetFile != null) {
datasetFiles.add(datasetFile);
}
} catch (Exception e) {
log.error("处理目录中的文件失败: {}, 错误: {}", file, e.getMessage(), e);
}
}
log.info("已扫描目录 {} 下的 {} 个文件", filePath, filesInDir.size());
}
/**
* @param filePath 文件路径
* @param datasetId 数据集ID
* @return 数据集文件对象
*/
private DatasetFile extractFileMetadata(String filePath, String datasetId) throws IOException {
Path path = Paths.get(filePath);
if (!Files.exists(path)) {
log.warn("文件不存在: {}", filePath);
return null;
}
if (!Files.isRegularFile(path)) {
log.warn("路径不是文件: {}", filePath);
return null;
}
String fileName = path.getFileName().toString();
long fileSize = Files.size(path);
String fileType = getFileExtension(fileName);
return DatasetFile.builder()
.id(UUID.randomUUID().toString())
.datasetId(datasetId)
.fileName(fileName)
.filePath(filePath)
.fileSize(fileSize)
.fileType(fileType)
.uploadTime(LocalDateTime.now())
.lastAccessTime(LocalDateTime.now())
.status("UPLOADED")
.build();
}
/**
* 获取文件扩展名
*/
private String getFileExtension(String fileName) {
int lastDotIndex = fileName.lastIndexOf('.');
if (lastDotIndex > 0 && lastDotIndex < fileName.length() - 1) {
return fileName.substring(lastDotIndex + 1).toLowerCase();
}
return "unknown";
}
}

View File

@@ -0,0 +1,116 @@
package com.datamate.datamanagement.application;
import com.datamate.datamanagement.domain.model.dataset.Tag;
import com.datamate.datamanagement.infrastructure.persistence.mapper.TagMapper;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.CollectionUtils;
import java.util.List;
import java.util.UUID;
/**
* 标签应用服务(UUID 主键)
*/
@Service
@Transactional
public class TagApplicationService {
private final TagMapper tagMapper;
@Autowired
public TagApplicationService(TagMapper tagMapper) {
this.tagMapper = tagMapper;
}
/**
* 创建标签
*/
public Tag createTag(String name, String color, String description) {
// 检查名称是否已存在
if (tagMapper.findByName(name) != null) {
throw new IllegalArgumentException("Tag with name '" + name + "' already exists");
}
Tag tag = new Tag(name, description, null, color);
tag.setUsageCount(0L);
tag.setId(UUID.randomUUID().toString());
tagMapper.insert(tag);
return tagMapper.findById(tag.getId());
}
/**
* 更新标签
*
* @param tag 待更新的标签实体,必须包含有效的 ID
* @return 更新结果
*/
@Transactional
public Tag updateTag(Tag tag) {
Tag existingTag = tagMapper.findById(tag.getId());
if (existingTag == null) {
throw new IllegalArgumentException("Tag not found: " + tag.getId());
}
existingTag.setName(tag.getName());
existingTag.setColor(tag.getColor());
existingTag.setDescription(tag.getDescription());
tagMapper.update(existingTag);
return tagMapper.findById(existingTag.getId());
}
@Transactional
public void deleteTag(List<String> tagIds) {
List<Tag> tags = tagMapper.findByIdIn(tagIds);
if (tags.stream().anyMatch(tag -> tag.getUsageCount() > 0)) {
throw new IllegalArgumentException("Cannot delete tags that are in use");
}
if (CollectionUtils.isEmpty(tags)) {
return;
}
tagMapper.deleteTagsById(tags.stream().map(Tag::getId).toList());
}
/**
* 获取所有标签
*/
@Transactional(readOnly = true)
public List<Tag> getAllTags() {
return tagMapper.findAllByOrderByUsageCountDesc();
}
/**
* 根据关键词搜索标签
*/
@Transactional(readOnly = true)
public List<Tag> searchTags(String keyword) {
if (keyword == null || keyword.trim().isEmpty()) {
return getAllTags();
}
return tagMapper.findByKeyword(keyword.trim());
}
/**
* 获取标签详情
*/
@Transactional(readOnly = true)
public Tag getTag(String tagId) {
Tag tag = tagMapper.findById(tagId);
if (tag == null) {
throw new IllegalArgumentException("Tag not found: " + tagId);
}
return tag;
}
/**
* 根据名称获取标签
*/
@Transactional(readOnly = true)
public Tag getTagByName(String name) {
Tag tag = tagMapper.findByName(name);
if (tag == null) {
throw new IllegalArgumentException("Tag not found: " + name);
}
return tag;
}
}

View File

@@ -0,0 +1,41 @@
package com.datamate.datamanagement.common.enums;
/**
* 数据集状态类型
* <p>数据集可以处于以下几种状态:
* <p>草稿(DRAFT):数据集正在创建中,尚未完成。
* <p>活动(ACTIVE):数据集处于活动状态, 可以被查询和使用,也可以被更新和删除。
* <p>处理中(PROCESSING):数据集正在处理中,可能需要一些时间,处理完成后会变成活动状态。
* <p>已归档(ARCHIVED):数据集已被归档,不可以更新文件,可以解锁变成活动状态。
* <p>已发布(PUBLISHED):数据集已被发布,可供外部使用,外部用户可以查询和使用数据集。
* <p>已弃用(DEPRECATED):数据集已被弃用,不建议再使用。
*
* @author dallas
* @since 2025-10-17
*/
public enum DatasetStatusType {
/**
* 草稿状态
*/
DRAFT,
/**
* 活动状态
*/
ACTIVE,
/**
* 处理中状态
*/
PROCESSING,
/**
* 已归档状态
*/
ARCHIVED,
/**
* 已发布状态
*/
PUBLISHED,
/**
* 已弃用状态
*/
DEPRECATED
}

View File

@@ -0,0 +1,28 @@
package com.datamate.datamanagement.common.enums;
import lombok.Getter;
/**
* 数据集类型值对象
*
* @author DataMate
* @since 2025-10-15
*/
public enum DatasetType {
TEXT("text", "文本数据集"),
IMAGE("image", "图像数据集"),
AUDIO("audio", "音频数据集"),
VIDEO("video", "视频数据集"),
OTHER("other", "其他数据集");
@Getter
private final String code;
@Getter
private final String description;
DatasetType(String code, String description) {
this.code = code;
this.description = description;
}
}

View File

@@ -0,0 +1,11 @@
package com.datamate.datamanagement.domain.contants;
/**
* 数据集常量
*/
public interface DatasetConstant {
/**
* 服务ID
*/
String SERVICE_ID = "DATA_MANAGEMENT";
}

View File

@@ -0,0 +1,146 @@
package com.datamate.datamanagement.domain.model.dataset;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableName;
import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
import com.datamate.common.domain.model.base.BaseEntity;
import com.datamate.datamanagement.common.enums.DatasetStatusType;
import com.datamate.datamanagement.common.enums.DatasetType;
import lombok.Getter;
import lombok.Setter;
import java.io.File;
import java.time.LocalDateTime;
import java.util.*;
/**
* 数据集实体(与数据库表 t_dm_datasets 对齐)
*/
@Getter
@Setter
@TableName(value = "t_dm_datasets", autoResultMap = true)
public class Dataset extends BaseEntity<String> {
/**
* 数据集名称
*/
private String name;
/**
* 数据集描述
*/
private String description;
/**
* 数据集类型
*/
private DatasetType datasetType;
/**
* 数据集分类
*/
private String category;
/**
* 数据集路径
*/
private String path;
/**
* 数据集格式
*/
private String format;
/**
* 数据集模式信息,JSON格式, 用于解析当前数据集的文件结构
*/
private String schemaInfo;
/**
* 数据集大小(字节)
*/
private Long sizeBytes = 0L;
/**
* 文件数量
*/
private Long fileCount = 0L;
/**
* 记录数量
*/
private Long recordCount = 0L;
/**
* 数据集保留天数
*/
private Integer retentionDays = 0;
/**
* 标签列表, JSON格式
*/
@TableField(typeHandler = JacksonTypeHandler.class)
private Collection<Tag> tags = new HashSet<>();
/**
* 额外元数据,JSON格式
*/
private String metadata;
/**
* 数据集状态
*/
private DatasetStatusType status;
/**
* 是否为公共数据集
*/
private Boolean isPublic = false;
/**
* 是否为精选数据集
*/
private Boolean isFeatured = false;
/**
* 数据集版本号
*/
private Long version = 0L;
@TableField(exist = false)
private List<DatasetFile> files = new ArrayList<>();
public Dataset() {
}
public Dataset(String name, String description, DatasetType datasetType, String category, String path,
String format, DatasetStatusType status, String createdBy) {
this.name = name;
this.description = description;
this.datasetType = datasetType;
this.category = category;
this.path = path;
this.format = format;
this.status = status;
this.createdBy = createdBy;
this.createdAt = LocalDateTime.now();
this.updatedAt = LocalDateTime.now();
}
public void initCreateParam(String datasetBasePath) {
this.id = UUID.randomUUID().toString();
this.path = datasetBasePath + File.separator + this.id;
this.status = DatasetStatusType.DRAFT;
}
public void updateBasicInfo(String name, String description, String category) {
if (name != null && !name.isEmpty()) this.name = name;
if (description != null) this.description = description;
if (category != null) this.category = category;
this.updatedAt = LocalDateTime.now();
}
public void updateStatus(DatasetStatusType status, String updatedBy) {
this.status = status;
this.updatedBy = updatedBy;
this.updatedAt = LocalDateTime.now();
}
public void addFile(DatasetFile file) {
this.files.add(file);
this.fileCount = this.fileCount + 1;
this.sizeBytes = this.sizeBytes + (file.getFileSize() != null ? file.getFileSize() : 0L);
this.updatedAt = LocalDateTime.now();
}
public void removeFile(DatasetFile file) {
if (this.files.remove(file)) {
this.fileCount = Math.max(0, this.fileCount - 1);
this.sizeBytes = Math.max(0, this.sizeBytes - (file.getFileSize() != null ? file.getFileSize() : 0L));
this.updatedAt = LocalDateTime.now();
}
}
}

View File

@@ -0,0 +1,35 @@
package com.datamate.datamanagement.domain.model.dataset;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.*;
import java.time.LocalDateTime;
import java.util.List;
/**
* 数据集文件实体(与数据库表 t_dm_dataset_files 对齐)
*/
@Getter
@Setter
@Builder
@NoArgsConstructor
@AllArgsConstructor
@TableName("t_dm_dataset_files")
public class DatasetFile {
@TableId
private String id; // UUID
private String datasetId; // UUID
private String fileName;
private String filePath;
private String fileType; // JPG/PNG/DCM/TXT
private Long fileSize; // bytes
private String checkSum;
private List<String> tags;
private String metadata;
private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR
private LocalDateTime uploadTime;
private LocalDateTime lastAccessTime;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
}

View File

@@ -0,0 +1,18 @@
package com.datamate.datamanagement.domain.model.dataset;
import com.datamate.common.domain.model.UploadCheckInfo;
import lombok.Getter;
import lombok.Setter;
/**
* 数据集文件上传检查信息
*/
@Getter
@Setter
public class DatasetFileUploadCheckInfo extends UploadCheckInfo {
/** 数据集id */
private String datasetId;
/** 是否为压缩包上传 */
private boolean hasArchive;
}

View File

@@ -0,0 +1,33 @@
package com.datamate.datamanagement.domain.model.dataset;
/**
* 状态常量类 - 统一管理所有状态枚举值
*/
public final class StatusConstants {
/**
* 数据集状态
*/
public static final class DatasetStatuses {
public static final String DRAFT = "DRAFT";
public static final String ACTIVE = "ACTIVE";
public static final String ARCHIVED = "ARCHIVED";
public static final String PROCESSING = "PROCESSING";
private DatasetStatuses() {}
}
/**
* 数据集文件状态
*/
public static final class DatasetFileStatuses {
public static final String UPLOADED = "UPLOADED";
public static final String PROCESSING = "PROCESSING";
public static final String COMPLETED = "COMPLETED";
public static final String ERROR = "ERROR";
private DatasetFileStatuses() {}
}
private StatusConstants() {}
}

View File

@@ -0,0 +1,33 @@
package com.datamate.datamanagement.domain.model.dataset;
import com.datamate.common.domain.model.base.BaseEntity;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
/**
* 标签实体(与数据库表 t_dm_tags 对齐)
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class Tag extends BaseEntity<String> {
private String name;
private String description;
private String category;
private String color;
private Long usageCount = 0L;
public Tag(String name, String description, String category, String color) {
this.name = name;
this.description = description;
this.category = category;
this.color = color;
}
public void decrementUsage() {
if (this.usageCount != null && this.usageCount > 0) this.usageCount--;
}
}

View File

@@ -0,0 +1,22 @@
package com.datamate.datamanagement.infrastructure.client;
import com.datamate.common.infrastructure.common.Response;
import com.datamate.datamanagement.infrastructure.client.dto.CollectionTaskDetailResponse;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
/**
* 数据归集服务 Feign Client
*/
@FeignClient(name = "collection-service", url = "${collection.service.url:http://localhost:8080}")
public interface CollectionTaskClient {
/**
* 获取归集任务详情
* @param taskId 任务ID
* @return 任务详情
*/
@GetMapping("/api/data-collection/tasks/{id}")
Response<CollectionTaskDetailResponse> getTaskDetail(@PathVariable("id") String taskId);
}

View File

@@ -0,0 +1,23 @@
package com.datamate.datamanagement.infrastructure.client.dto;
import lombok.Data;
import java.time.LocalDateTime;
import java.util.Map;
/**
* 归集任务详情响应
*/
@Data
public class CollectionTaskDetailResponse {
private String id;
private String name;
private String description;
private Map<String, Object> config;
private String status;
private String syncMode;
private String scheduleExpression;
private String lastExecutionId;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
}

View File

@@ -0,0 +1,21 @@
package com.datamate.datamanagement.infrastructure.client.dto;
import lombok.Data;
import java.util.List;
/**
* 本地归集任务配置
*/
@Data
public class LocalCollectionConfig {
/**
* 归集类型
*/
private String type;
/**
* 文件路径列表
*/
private List<String> filePaths;
}

View File

@@ -0,0 +1,37 @@
package com.datamate.datamanagement.infrastructure.config;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.cache.CacheManager;
import org.springframework.cache.annotation.EnableCaching;
import org.springframework.cache.concurrent.ConcurrentMapCacheManager;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.transaction.annotation.EnableTransactionManagement;
import org.springframework.web.multipart.support.StandardServletMultipartResolver;
/**
* 数据管理服务配置
*/
@Configuration
@EnableTransactionManagement
@EnableCaching
@EnableConfigurationProperties(DataManagementProperties.class)
public class DataManagementConfig {
/**
* 缓存管理器
*/
@Bean
public CacheManager cacheManager() {
return new ConcurrentMapCacheManager("datasets", "datasetFiles", "tags");
}
/**
* 文件上传解析器
*/
@Bean
public StandardServletMultipartResolver multipartResolver() {
StandardServletMultipartResolver resolver = new StandardServletMultipartResolver();
return resolver;
}
}

View File

@@ -0,0 +1,82 @@
package com.datamate.datamanagement.infrastructure.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
/**
* 数据管理服务配置属性
*/
@Configuration
@ConfigurationProperties(prefix = "datamanagement")
public class DataManagementProperties {
private FileStorage fileStorage = new FileStorage();
private Cache cache = new Cache();
public FileStorage getFileStorage() {
return fileStorage;
}
public void setFileStorage(FileStorage fileStorage) {
this.fileStorage = fileStorage;
}
public Cache getCache() {
return cache;
}
public void setCache(Cache cache) {
this.cache = cache;
}
public static class FileStorage {
private String uploadDir = "./uploads";
private long maxFileSize = 10485760; // 10MB
private long maxRequestSize = 52428800; // 50MB
public String getUploadDir() {
return uploadDir;
}
public void setUploadDir(String uploadDir) {
this.uploadDir = uploadDir;
}
public long getMaxFileSize() {
return maxFileSize;
}
public void setMaxFileSize(long maxFileSize) {
this.maxFileSize = maxFileSize;
}
public long getMaxRequestSize() {
return maxRequestSize;
}
public void setMaxRequestSize(long maxRequestSize) {
this.maxRequestSize = maxRequestSize;
}
}
public static class Cache {
private int ttl = 3600; // 1 hour
private int maxSize = 1000;
public int getTtl() {
return ttl;
}
public void setTtl(int ttl) {
this.ttl = ttl;
}
public int getMaxSize() {
return maxSize;
}
public void setMaxSize(int maxSize) {
this.maxSize = maxSize;
}
}
}

View File

@@ -0,0 +1,39 @@
package com.datamate.datamanagement.infrastructure.exception;
import com.datamate.common.infrastructure.exception.ErrorCode;
import lombok.AllArgsConstructor;
import lombok.Getter;
/**
* 数据管理模块错误码
*
* @author dallas
* @since 2025-10-20
*/
@Getter
@AllArgsConstructor
public enum DataManagementErrorCode implements ErrorCode {
/**
* 数据集不存在
*/
DATASET_NOT_FOUND("data_management.0001", "数据集不存在"),
/**
* 数据集已存在
*/
DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"),
/**
* 数据集状态错误
*/
DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"),
/**
* 数据集标签不存在
*/
DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"),
/**
* 数据集标签已存在
*/
DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在");
private final String code;
private final String message;
}

View File

@@ -0,0 +1,30 @@
package com.datamate.datamanagement.infrastructure.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.session.RowBounds;
import java.util.List;
@Mapper
public interface DatasetFileMapper extends BaseMapper<DatasetFile> {
DatasetFile findById(@Param("id") String id);
List<DatasetFile> findByDatasetId(@Param("datasetId") String datasetId, RowBounds rowBounds);
List<DatasetFile> findByDatasetIdAndStatus(@Param("datasetId") String datasetId, @Param("status") String status, RowBounds rowBounds);
List<DatasetFile> findByDatasetIdAndFileType(@Param("datasetId") String datasetId, @Param("fileType") String fileType, RowBounds rowBounds);
Long countByDatasetId(@Param("datasetId") String datasetId);
Long countCompletedByDatasetId(@Param("datasetId") String datasetId);
Long sumSizeByDatasetId(@Param("datasetId") String datasetId);
DatasetFile findByDatasetIdAndFileName(@Param("datasetId") String datasetId, @Param("fileName") String fileName);
List<DatasetFile> findAllByDatasetId(@Param("datasetId") String datasetId);
List<DatasetFile> findByCriteria(@Param("datasetId") String datasetId,
@Param("fileType") String fileType,
@Param("status") String status,
RowBounds rowBounds);
int insert(DatasetFile file);
int update(DatasetFile file);
int deleteById(@Param("id") String id);
}

View File

@@ -0,0 +1,33 @@
package com.datamate.datamanagement.infrastructure.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.datamate.datamanagement.domain.model.dataset.Dataset;
import com.datamate.datamanagement.interfaces.dto.AllDatasetStatisticsResponse;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.session.RowBounds;
import java.util.List;
@Mapper
public interface DatasetMapper extends BaseMapper<Dataset> {
Dataset findById(@Param("id") String id);
Dataset findByName(@Param("name") String name);
List<Dataset> findByStatus(@Param("status") String status);
List<Dataset> findByCreatedBy(@Param("createdBy") String createdBy, RowBounds rowBounds);
List<Dataset> findByTypeCode(@Param("typeCode") String typeCode, RowBounds rowBounds);
List<Dataset> findByTagNames(@Param("tagNames") List<String> tagNames, RowBounds rowBounds);
List<Dataset> findByKeyword(@Param("keyword") String keyword, RowBounds rowBounds);
List<Dataset> findByCriteria(@Param("typeCode") String typeCode,
@Param("status") String status,
@Param("keyword") String keyword,
@Param("tagNames") List<String> tagNames,
RowBounds rowBounds);
long countByCriteria(@Param("typeCode") String typeCode,
@Param("status") String status,
@Param("keyword") String keyword,
@Param("tagNames") List<String> tagNames);
int deleteById(@Param("id") String id);
AllDatasetStatisticsResponse getAllDatasetStatistics();
}

View File

@@ -0,0 +1,27 @@
package com.datamate.datamanagement.infrastructure.persistence.mapper;
import com.datamate.datamanagement.domain.model.dataset.Tag;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.List;
@Mapper
public interface TagMapper {
Tag findById(@Param("id") String id);
Tag findByName(@Param("name") String name);
List<Tag> findByNameIn(@Param("list") List<String> names);
List<Tag> findByIdIn(@Param("ids") List<String> ids);
List<Tag> findByKeyword(@Param("keyword") String keyword);
List<Tag> findAllByOrderByUsageCountDesc();
int insert(Tag tag);
int update(Tag tag);
int updateUsageCount(@Param("id") String id, @Param("usageCount") Long usageCount);
// Relations with dataset
int insertDatasetTag(@Param("datasetId") String datasetId, @Param("tagId") String tagId);
int deleteDatasetTagsByDatasetId(@Param("datasetId") String datasetId);
List<Tag> findByDatasetId(@Param("datasetId") String datasetId);
void deleteTagsById(@Param("ids") List<String> ids);
}

View File

@@ -0,0 +1,27 @@
package com.datamate.datamanagement.infrastructure.persistence.repository;
import com.baomidou.mybatisplus.extension.repository.IRepository;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import org.apache.ibatis.session.RowBounds;
import java.util.List;
/**
* 数据集文件仓储接口
*
* @author dallas
* @since 2025-10-15
*/
public interface DatasetFileRepository extends IRepository<DatasetFile> {
Long countByDatasetId(String datasetId);
Long countCompletedByDatasetId(String datasetId);
Long sumSizeByDatasetId(String datasetId);
List<DatasetFile> findAllByDatasetId(String datasetId);
DatasetFile findByDatasetIdAndFileName(String datasetId, String fileName);
List<DatasetFile> findByCriteria(String datasetId, String fileType, String status, RowBounds bounds);
}

View File

@@ -0,0 +1,29 @@
package com.datamate.datamanagement.infrastructure.persistence.repository;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.repository.IRepository;
import com.datamate.datamanagement.domain.model.dataset.Dataset;
import com.datamate.datamanagement.interfaces.dto.AllDatasetStatisticsResponse;
import com.datamate.datamanagement.interfaces.dto.DatasetPagingQuery;
import org.apache.ibatis.session.RowBounds;
import java.util.List;
/**
* 数据集仓储层
*
* @author dallas
* @since 2025-10-15
*/
public interface DatasetRepository extends IRepository<Dataset> {
Dataset findByName(String name);
List<Dataset> findByCriteria(String type, String status, String keyword, List<String> tagList, RowBounds bounds);
long countByCriteria(String type, String status, String keyword, List<String> tagList);
AllDatasetStatisticsResponse getAllDatasetStatistics();
IPage<Dataset> findByCriteria(IPage<Dataset> page, DatasetPagingQuery query);
}

View File

@@ -0,0 +1,54 @@
package com.datamate.datamanagement.infrastructure.persistence.repository.impl;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.infrastructure.persistence.mapper.DatasetFileMapper;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import lombok.RequiredArgsConstructor;
import org.apache.ibatis.session.RowBounds;
import org.springframework.stereotype.Repository;
import java.util.List;
/**
* 数据集文件仓储实现类
*
* @author dallas
* @since 2025-10-15
*/
@Repository
@RequiredArgsConstructor
public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper, DatasetFile> implements DatasetFileRepository {
private final DatasetFileMapper datasetFileMapper;
@Override
public Long countByDatasetId(String datasetId) {
return datasetFileMapper.selectCount(new LambdaQueryWrapper<DatasetFile>().eq(DatasetFile::getDatasetId, datasetId));
}
@Override
public Long countCompletedByDatasetId(String datasetId) {
return datasetFileMapper.countCompletedByDatasetId(datasetId);
}
@Override
public Long sumSizeByDatasetId(String datasetId) {
return datasetFileMapper.sumSizeByDatasetId(datasetId);
}
@Override
public List<DatasetFile> findAllByDatasetId(String datasetId) {
return datasetFileMapper.findAllByDatasetId(datasetId);
}
@Override
public DatasetFile findByDatasetIdAndFileName(String datasetId, String fileName) {
return datasetFileMapper.findByDatasetIdAndFileName(datasetId, fileName);
}
@Override
public List<DatasetFile> findByCriteria(String datasetId, String fileType, String status, RowBounds bounds) {
return datasetFileMapper.findByCriteria(datasetId, fileType, status, bounds);
}
}

View File

@@ -0,0 +1,73 @@
package com.datamate.datamanagement.infrastructure.persistence.repository.impl;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
import com.datamate.datamanagement.domain.model.dataset.Dataset;
import com.datamate.datamanagement.infrastructure.persistence.mapper.DatasetMapper;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
import com.datamate.datamanagement.interfaces.dto.AllDatasetStatisticsResponse;
import com.datamate.datamanagement.interfaces.dto.DatasetPagingQuery;
import lombok.RequiredArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.apache.ibatis.session.RowBounds;
import org.springframework.stereotype.Repository;
import java.util.List;
/**
* 数据集仓储层实现类
*
* @author dallas
* @since 2025-10-15
*/
@Repository
@RequiredArgsConstructor
public class DatasetRepositoryImpl extends CrudRepository<DatasetMapper, Dataset> implements DatasetRepository {
private final DatasetMapper datasetMapper;
@Override
public Dataset findByName(String name) {
return datasetMapper.selectOne(new LambdaQueryWrapper<Dataset>().eq(Dataset::getName, name));
}
@Override
public List<Dataset> findByCriteria(String type, String status, String keyword, List<String> tagList,
RowBounds bounds) {
return datasetMapper.findByCriteria(type, status, keyword, tagList, bounds);
}
@Override
public long countByCriteria(String type, String status, String keyword, List<String> tagList) {
return datasetMapper.countByCriteria(type, status, keyword, tagList);
}
@Override
public AllDatasetStatisticsResponse getAllDatasetStatistics() {
return datasetMapper.getAllDatasetStatistics();
}
@Override
public IPage<Dataset> findByCriteria(IPage<Dataset> page, DatasetPagingQuery query) {
LambdaQueryWrapper<Dataset> wrapper = new LambdaQueryWrapper<Dataset>()
.eq(query.getType() != null, Dataset::getDatasetType, query.getType())
.eq(query.getStatus() != null, Dataset::getStatus, query.getStatus())
.like(StringUtils.isNotBlank(query.getKeyword()), Dataset::getName, query.getKeyword())
.like(StringUtils.isNotBlank(query.getKeyword()), Dataset::getDescription, query.getKeyword());
/*
标签过滤 {@link Tag}
*/
for (String tagName : query.getTags()) {
wrapper.and(w ->
w.apply("tags IS NOT NULL " +
"AND JSON_VALID(tags) = 1 " +
"AND JSON_LENGTH(tags) > 0 " +
"AND JSON_SEARCH(tags, 'one', {0}, NULL, '$[*].name') IS NOT NULL", tagName)
);
}
wrapper.orderByDesc(Dataset::getCreatedAt);
return datasetMapper.selectPage(page, wrapper);
}
}

View File

@@ -0,0 +1,53 @@
package com.datamate.datamanagement.interfaces.converter;
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
import com.datamate.common.domain.model.ChunkUploadRequest;
import com.datamate.datamanagement.domain.model.dataset.Dataset;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.interfaces.dto.*;
import org.mapstruct.Mapper;
import org.mapstruct.Mapping;
import org.mapstruct.factory.Mappers;
import java.util.List;
/**
* 数据集文件转换器
*/
@Mapper
public interface DatasetConverter {
/** 单例实例 */
DatasetConverter INSTANCE = Mappers.getMapper(DatasetConverter.class);
/**
* 将数据集转换为响应
*/
@Mapping(source = "sizeBytes", target = "totalSize")
@Mapping(source = "path", target = "targetLocation")
DatasetResponse convertToResponse(Dataset dataset);
/**
* 将数据集转换为响应
*/
@Mapping(target = "tags", ignore = true)
Dataset convertToDataset(CreateDatasetRequest createDatasetRequest);
/**
* 将上传文件请求转换为分片上传请求
*/
ChunkUploadRequest toChunkUploadRequest(UploadFileRequest uploadFileRequest);
/**
* 将数据集转换为响应
*/
List<DatasetResponse> convertToResponse(List<Dataset> datasets);
/**
*
* 将数据集文件转换为响应
*/
DatasetFileResponse convertToResponse(DatasetFile datasetFile);
}

View File

@@ -0,0 +1,30 @@
package com.datamate.datamanagement.interfaces.converter;
import com.datamate.datamanagement.domain.model.dataset.Tag;
import com.datamate.datamanagement.interfaces.dto.TagResponse;
import com.datamate.datamanagement.interfaces.dto.UpdateTagRequest;
import org.mapstruct.Mapper;
import org.mapstruct.factory.Mappers;
/**
* 标签转换器
*/
@Mapper
public interface TagConverter {
/** 单例实例 */
TagConverter INSTANCE = Mappers.getMapper(TagConverter.class);
/**
* 将 UpdateTagRequest 转换为 Tag 实体
* @param request 更新标签请求DTO
* @return 标签实体
*/
Tag updateRequestToTag(UpdateTagRequest request);
/**
* 将 Tag 实体转换为 TagResponse DTO
* @param tag 标签实体
* @return 标签响应DTO
*/
TagResponse convertToResponse(Tag tag);
}

View File

@@ -0,0 +1,20 @@
package com.datamate.datamanagement.interfaces.dto;
import lombok.Getter;
import lombok.Setter;
/**
* 所有数据集统计信息响应DTO
*/
@Getter
@Setter
public class AllDatasetStatisticsResponse {
/** 总数据集数 */
private Integer totalDatasets;
/** 总文件数 */
private Long totalSize;
/** 总大小(字节) */
private Long totalFiles;
}

View File

@@ -0,0 +1,35 @@
package com.datamate.datamanagement.interfaces.dto;
import com.datamate.datamanagement.common.enums.DatasetType;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.util.List;
/**
* 创建数据集请求DTO
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class CreateDatasetRequest {
/** 数据集名称 */
@NotBlank(message = "数据集名称不能为空")
private String name;
/** 数据集描述 */
private String description;
/** 数据集类型 */
@NotNull(message = "数据集类型不能为空")
private DatasetType datasetType;
/** 标签列表 */
private List<String> tags;
/** 数据源 */
private String dataSource;
/** 目标位置 */
private String targetLocation;
}

View File

@@ -0,0 +1,18 @@
package com.datamate.datamanagement.interfaces.dto;
import lombok.Getter;
import lombok.Setter;
/**
* 创建标签请求DTO
*/
@Getter
@Setter
public class CreateTagRequest {
/** 标签名称 */
private String name;
/** 标签颜色 */
private String color;
/** 标签描述 */
private String description;
}

Some files were not shown because too many files have changed in this diff Show More