You've already forked DataMate
优化清洗重试机制,优化清洗进度展示,修复模板无法展示参数 (#113)
* bugfix: 模板无法展示参数 * bugfix: 优化清洗进度展示 * bugfix: 优化清洗重试机制
This commit is contained in:
@@ -41,6 +41,7 @@ import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@Slf4j
|
||||
@@ -116,7 +117,7 @@ public class CleaningTaskService {
|
||||
|
||||
prepareTask(task, request.getInstance());
|
||||
scanDataset(taskId, request.getSrcDatasetId());
|
||||
executeTask(taskId);
|
||||
taskScheduler.executeTask(taskId);
|
||||
return task;
|
||||
}
|
||||
|
||||
@@ -170,6 +171,11 @@ public class CleaningTaskService {
|
||||
}
|
||||
|
||||
public void executeTask(String taskId) {
|
||||
List<CleaningResultDto> failed = cleaningResultRepo.findByInstanceId(taskId, "FAILED");
|
||||
Set<String> failedSet = failed.stream().map(CleaningResultDto::getSrcFileId).collect(Collectors.toSet());
|
||||
CleaningTaskDto task = cleaningTaskRepo.findTaskById(taskId);
|
||||
scanDataset(taskId, task.getSrcDatasetId(), failedSet);
|
||||
cleaningResultRepo.deleteByInstanceId(taskId, "FAILED");
|
||||
taskScheduler.executeTask(taskId);
|
||||
}
|
||||
|
||||
@@ -226,6 +232,29 @@ public class CleaningTaskService {
|
||||
} while (pageNumber < datasetFiles.getTotalPages());
|
||||
}
|
||||
|
||||
private void scanDataset(String taskId, String srcDatasetId, Set<String> failedFiles) {
|
||||
int pageNumber = 0;
|
||||
int pageSize = 500;
|
||||
PagingQuery pageRequest = new PagingQuery(pageNumber, pageSize);
|
||||
PagedResponse<DatasetFile> datasetFiles;
|
||||
do {
|
||||
datasetFiles = datasetFileService.getDatasetFiles(srcDatasetId, null, null,null, pageRequest);
|
||||
if (datasetFiles.getContent().isEmpty()) {
|
||||
break;
|
||||
}
|
||||
List<Map<String, Object>> files = datasetFiles.getContent().stream()
|
||||
.filter(content -> failedFiles.contains(content.getId()))
|
||||
.map(content -> Map.of("fileName", (Object) content.getFileName(),
|
||||
"fileSize", content.getFileSize(),
|
||||
"filePath", content.getFilePath(),
|
||||
"fileType", content.getFileType(),
|
||||
"fileId", content.getId()))
|
||||
.toList();
|
||||
writeListMapToJsonlFile(files, FLOW_PATH + "/" + taskId + "/dataset.jsonl");
|
||||
pageNumber += 1;
|
||||
} while (pageNumber < datasetFiles.getTotalPages());
|
||||
}
|
||||
|
||||
private void writeListMapToJsonlFile(List<Map<String, Object>> mapList, String fileName) {
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
|
||||
@@ -6,8 +6,13 @@ import com.datamate.cleaning.domain.repository.OperatorInstanceRepository;
|
||||
import com.datamate.cleaning.infrastructure.validator.CleanTaskValidator;
|
||||
import com.datamate.cleaning.interfaces.dto.*;
|
||||
import com.datamate.cleaning.domain.model.entity.TemplateWithInstance;
|
||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||
import com.datamate.operator.application.OperatorService;
|
||||
import com.datamate.operator.domain.repository.OperatorViewRepository;
|
||||
import com.datamate.operator.infrastructure.exception.OperatorErrorCode;
|
||||
import com.datamate.operator.interfaces.dto.OperatorDto;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
@@ -31,6 +36,10 @@ public class CleaningTemplateService {
|
||||
|
||||
private final CleanTaskValidator cleanTaskValidator;
|
||||
|
||||
private final OperatorService operatorService;
|
||||
|
||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
public List<CleaningTemplateDto> getTemplates(String keywords) {
|
||||
List<OperatorDto> allOperators =
|
||||
operatorViewRepo.findOperatorsByCriteria(null, null, null, null, null);
|
||||
@@ -50,7 +59,12 @@ public class CleaningTemplateService {
|
||||
.map(v -> {
|
||||
OperatorDto operator = operatorsMap.get(v.getOperatorId());
|
||||
if (StringUtils.isNotBlank(v.getSettingsOverride())) {
|
||||
operator.setSettings(v.getSettingsOverride());
|
||||
try {
|
||||
operator.setOverrides(objectMapper.readValue(v.getSettingsOverride(), Map.class));
|
||||
} catch (JsonProcessingException e) {
|
||||
throw BusinessException.of(OperatorErrorCode.SETTINGS_PARSE_FAILED, e.getMessage());
|
||||
}
|
||||
operatorService.overrideSettings(operator);
|
||||
}
|
||||
return operator;
|
||||
}).toList());
|
||||
|
||||
@@ -10,7 +10,11 @@ import java.util.List;
|
||||
public interface CleaningResultRepository extends IRepository<CleaningResult> {
|
||||
void deleteByInstanceId(String instanceId);
|
||||
|
||||
void deleteByInstanceId(String instanceId, String status);
|
||||
|
||||
int[] countByInstanceId(String instanceId);
|
||||
|
||||
List<CleaningResultDto> findByInstanceId(String instanceId);
|
||||
|
||||
List<CleaningResultDto> findByInstanceId(String instanceId, String status);
|
||||
}
|
||||
|
||||
@@ -22,8 +22,14 @@ public class CleaningResultRepositoryImpl extends CrudRepository<CleaningResultM
|
||||
|
||||
@Override
|
||||
public void deleteByInstanceId(String instanceId) {
|
||||
deleteByInstanceId(instanceId, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteByInstanceId(String instanceId, String status) {
|
||||
LambdaQueryWrapper<CleaningResult> queryWrapper = new LambdaQueryWrapper<>();
|
||||
queryWrapper.eq(CleaningResult::getInstanceId, instanceId);
|
||||
queryWrapper.eq(CleaningResult::getInstanceId, instanceId)
|
||||
.eq(StringUtils.isNotBlank(status), CleaningResult::getStatus, status);
|
||||
mapper.delete(queryWrapper);
|
||||
}
|
||||
|
||||
@@ -40,8 +46,13 @@ public class CleaningResultRepositoryImpl extends CrudRepository<CleaningResultM
|
||||
}
|
||||
|
||||
public List<CleaningResultDto> findByInstanceId(String instanceId) {
|
||||
return findByInstanceId(instanceId, null);
|
||||
}
|
||||
|
||||
public List<CleaningResultDto> findByInstanceId(String instanceId, String status) {
|
||||
LambdaQueryWrapper<CleaningResult> queryWrapper = new LambdaQueryWrapper<>();
|
||||
queryWrapper.eq(CleaningResult::getInstanceId, instanceId);
|
||||
queryWrapper.eq(CleaningResult::getInstanceId, instanceId)
|
||||
.eq(StringUtils.isNotBlank(status), CleaningResult::getStatus, status);
|
||||
return CleaningResultConverter.INSTANCE.convertEntityToDto(mapper.selectList(queryWrapper));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,7 +128,7 @@ public class OperatorService {
|
||||
return operatorBasePath + File.separator + "extract" + File.separator + fileName;
|
||||
}
|
||||
|
||||
private void overrideSettings(OperatorDto operatorDto) {
|
||||
public void overrideSettings(OperatorDto operatorDto) {
|
||||
if (StringUtils.isBlank(operatorDto.getSettings()) || MapUtils.isEmpty(operatorDto.getOverrides())) {
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user