You've already forked DataMate
feat: 支持npu自动扩缩容 (#197)
* feat: npu动态调度 * feat: 数据集分页优化 * feat: 支持npu自动扩缩容 * feat: 支持npu自动扩缩容 * feat: 支持npu自动扩缩容 * feat: clean code
This commit is contained in:
2
Makefile
2
Makefile
@@ -181,7 +181,7 @@ build-%: %-docker-build
|
|||||||
@:
|
@:
|
||||||
|
|
||||||
.PHONY: build
|
.PHONY: build
|
||||||
build: database-docker-build backend-docker-build frontend-docker-build runtime-docker-build backend-python-docker-build
|
build: database-docker-build gateway-docker-build backend-docker-build frontend-docker-build runtime-docker-build backend-python-docker-build
|
||||||
|
|
||||||
# ========== Utility Targets ==========
|
# ========== Utility Targets ==========
|
||||||
|
|
||||||
|
|||||||
@@ -20,12 +20,19 @@ import com.datamate.datamanagement.common.enums.DatasetType;
|
|||||||
import com.datamate.datamanagement.domain.model.dataset.Dataset;
|
import com.datamate.datamanagement.domain.model.dataset.Dataset;
|
||||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||||
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
|
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
|
||||||
|
import com.datamate.operator.domain.repository.OperatorRepository;
|
||||||
|
import com.datamate.operator.infrastructure.exception.OperatorErrorCode;
|
||||||
|
import com.datamate.operator.interfaces.dto.OperatorDto;
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
import com.fasterxml.jackson.databind.JsonNode;
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
|
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
|
||||||
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
|
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
import org.yaml.snakeyaml.DumperOptions;
|
import org.yaml.snakeyaml.DumperOptions;
|
||||||
@@ -39,6 +46,8 @@ import java.nio.file.Files;
|
|||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.function.Predicate;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@@ -52,6 +61,8 @@ public class CleaningTaskService {
|
|||||||
|
|
||||||
private final OperatorInstanceRepository operatorInstanceRepo;
|
private final OperatorInstanceRepository operatorInstanceRepo;
|
||||||
|
|
||||||
|
private final OperatorRepository operatorRepo;
|
||||||
|
|
||||||
private final CleaningResultRepository cleaningResultRepo;
|
private final CleaningResultRepository cleaningResultRepo;
|
||||||
|
|
||||||
private final CleaningTaskScheduler taskScheduler;
|
private final CleaningTaskScheduler taskScheduler;
|
||||||
@@ -66,11 +77,16 @@ public class CleaningTaskService {
|
|||||||
|
|
||||||
private final String FLOW_PATH = "/flow";
|
private final String FLOW_PATH = "/flow";
|
||||||
|
|
||||||
private final Pattern LEVEL_PATTERN = Pattern.compile(
|
private static final Pattern STANDARD_LEVEL_PATTERN = Pattern.compile(
|
||||||
"\\b(TRACE|DEBUG|INFO|WARN|WARNING|ERROR|FATAL)\\b",
|
"\\b(DEBUG|Debug|INFO|Info|WARN|Warn|WARNING|Warning|ERROR|Error|FATAL|Fatal)\\b"
|
||||||
Pattern.CASE_INSENSITIVE
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
private static final Pattern EXCEPTION_SUFFIX_PATTERN = Pattern.compile(
|
||||||
|
"\\b\\w+(Warning|Error|Exception)\\b"
|
||||||
|
);
|
||||||
|
|
||||||
|
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
public List<CleaningTaskDto> getTasks(String status, String keywords, Integer page, Integer size) {
|
public List<CleaningTaskDto> getTasks(String status, String keywords, Integer page, Integer size) {
|
||||||
List<CleaningTaskDto> tasks = cleaningTaskRepo.findTasks(status, keywords, page, size);
|
List<CleaningTaskDto> tasks = cleaningTaskRepo.findTasks(status, keywords, page, size);
|
||||||
tasks.forEach(this::setProcess);
|
tasks.forEach(this::setProcess);
|
||||||
@@ -133,6 +149,7 @@ public class CleaningTaskService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<CleaningTaskLog> getTaskLog(String taskId) {
|
public List<CleaningTaskLog> getTaskLog(String taskId) {
|
||||||
|
cleanTaskValidator.checkTaskId(taskId);
|
||||||
String logPath = FLOW_PATH + "/" + taskId + "/output.log";
|
String logPath = FLOW_PATH + "/" + taskId + "/output.log";
|
||||||
try (Stream<String> lines = Files.lines(Paths.get(logPath))) {
|
try (Stream<String> lines = Files.lines(Paths.get(logPath))) {
|
||||||
List<CleaningTaskLog> logs = new ArrayList<>();
|
List<CleaningTaskLog> logs = new ArrayList<>();
|
||||||
@@ -156,18 +173,31 @@ public class CleaningTaskService {
|
|||||||
return defaultLevel;
|
return defaultLevel;
|
||||||
}
|
}
|
||||||
|
|
||||||
Matcher matcher = LEVEL_PATTERN.matcher(logLine);
|
Matcher stdMatcher = STANDARD_LEVEL_PATTERN.matcher(logLine);
|
||||||
if (matcher.find()) {
|
if (stdMatcher.find()) {
|
||||||
return matcher.group(1).toUpperCase();
|
return stdMatcher.group(1).toUpperCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
Matcher exMatcher = EXCEPTION_SUFFIX_PATTERN.matcher(logLine);
|
||||||
|
if (exMatcher.find()) {
|
||||||
|
String match = exMatcher.group(1).toUpperCase();
|
||||||
|
if ("WARNING".equals(match)) return "WARN";
|
||||||
|
if ("ERROR".equals(match) || "EXCEPTION".equals(match)) return "ERROR";
|
||||||
}
|
}
|
||||||
return defaultLevel;
|
return defaultLevel;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Transactional
|
@Transactional
|
||||||
public void deleteTask(String taskId) {
|
public void deleteTask(String taskId) {
|
||||||
|
cleanTaskValidator.checkTaskId(taskId);
|
||||||
cleaningTaskRepo.deleteTaskById(taskId);
|
cleaningTaskRepo.deleteTaskById(taskId);
|
||||||
operatorInstanceRepo.deleteByInstanceId(taskId);
|
operatorInstanceRepo.deleteByInstanceId(taskId);
|
||||||
cleaningResultRepo.deleteByInstanceId(taskId);
|
cleaningResultRepo.deleteByInstanceId(taskId);
|
||||||
|
try {
|
||||||
|
FileUtils.deleteDirectory(new File(FLOW_PATH + "/" + taskId));
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.warn("Can't delete flow path with task id: {}.", taskId, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void executeTask(String taskId) {
|
public void executeTask(String taskId) {
|
||||||
@@ -180,6 +210,11 @@ public class CleaningTaskService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void prepareTask(CleaningTaskDto task, List<OperatorInstanceDto> instances) {
|
private void prepareTask(CleaningTaskDto task, List<OperatorInstanceDto> instances) {
|
||||||
|
List<OperatorDto> allOperators = operatorRepo.findAllOperators();
|
||||||
|
Map<String, OperatorDto> defaultSettings = allOperators.stream()
|
||||||
|
.filter(operatorDto -> StringUtils.isNotBlank(operatorDto.getSettings()))
|
||||||
|
.collect(Collectors.toMap(OperatorDto::getId, Function.identity()));
|
||||||
|
|
||||||
TaskProcess process = new TaskProcess();
|
TaskProcess process = new TaskProcess();
|
||||||
process.setInstanceId(task.getId());
|
process.setInstanceId(task.getId());
|
||||||
process.setDatasetId(task.getDestDatasetId());
|
process.setDatasetId(task.getDestDatasetId());
|
||||||
@@ -187,7 +222,14 @@ public class CleaningTaskService {
|
|||||||
process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId());
|
process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId());
|
||||||
process.setExecutorType(ExecutorType.DATAMATE.getValue());
|
process.setExecutorType(ExecutorType.DATAMATE.getValue());
|
||||||
process.setProcess(instances.stream()
|
process.setProcess(instances.stream()
|
||||||
.map(instance -> Map.of(instance.getId(), instance.getOverrides()))
|
.map(instance -> {
|
||||||
|
OperatorDto operatorDto = defaultSettings.get(instance.getId());
|
||||||
|
Map<String, Object> stringObjectMap = getDefaultValue(operatorDto);
|
||||||
|
stringObjectMap.putAll(instance.getOverrides());
|
||||||
|
Map<String, Object> runtime = getRuntime(operatorDto);
|
||||||
|
stringObjectMap.putAll(runtime);
|
||||||
|
return Map.of(instance.getId(), stringObjectMap);
|
||||||
|
})
|
||||||
.toList());
|
.toList());
|
||||||
|
|
||||||
ObjectMapper jsonMapper = new ObjectMapper(new YAMLFactory());
|
ObjectMapper jsonMapper = new ObjectMapper(new YAMLFactory());
|
||||||
@@ -210,67 +252,113 @@ public class CleaningTaskService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void scanDataset(String taskId, String srcDatasetId) {
|
private Map<String, Object> getDefaultValue(OperatorDto operatorDto) {
|
||||||
int pageNumber = 0;
|
if (StringUtils.isBlank(operatorDto.getSettings())) {
|
||||||
int pageSize = 500;
|
return new HashMap<>();
|
||||||
PagingQuery pageRequest = new PagingQuery(pageNumber, pageSize);
|
|
||||||
PagedResponse<DatasetFile> datasetFiles;
|
|
||||||
do {
|
|
||||||
datasetFiles = datasetFileService.getDatasetFiles(srcDatasetId, null, null,null, pageRequest);
|
|
||||||
if (datasetFiles.getContent().isEmpty()) {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
List<Map<String, Object>> files = datasetFiles.getContent().stream()
|
|
||||||
.map(content -> Map.of("fileName", (Object) content.getFileName(),
|
Map<String, Object> defaultSettings = new HashMap<>();
|
||||||
"fileSize", content.getFileSize(),
|
try {
|
||||||
"filePath", content.getFilePath(),
|
Map<String, Map<String, Object>> settings = OBJECT_MAPPER.readValue(operatorDto.getSettings(), Map.class);
|
||||||
"fileType", content.getFileType(),
|
for (Map.Entry<String, Map<String, Object>> entry : settings.entrySet()) {
|
||||||
"fileId", content.getId()))
|
String key = entry.getKey();
|
||||||
.toList();
|
Map<String, Object> setting = entry.getValue();
|
||||||
writeListMapToJsonlFile(files, FLOW_PATH + "/" + taskId + "/dataset.jsonl");
|
String type = setting.get("type").toString();
|
||||||
pageNumber += 1;
|
switch (type) {
|
||||||
} while (pageNumber < datasetFiles.getTotalPages());
|
case "slider":
|
||||||
|
case "switch":
|
||||||
|
case "select":
|
||||||
|
case "input":
|
||||||
|
case "radio":
|
||||||
|
case "checkbox":
|
||||||
|
if (setting.containsKey("defaultVal")) {
|
||||||
|
defaultSettings.put(key, setting.get("defaultVal"));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "range":
|
||||||
|
List<Object> rangeDefault = getRangeDefault(setting);
|
||||||
|
if (CollectionUtils.isNotEmpty(rangeDefault)) {
|
||||||
|
defaultSettings.put(key, rangeDefault);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return defaultSettings;
|
||||||
|
} catch (JsonProcessingException e) {
|
||||||
|
throw BusinessException.of(OperatorErrorCode.SETTINGS_PARSE_FAILED, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Object> getRangeDefault(Map<String, Object> setting) {
|
||||||
|
List<Object> defaultValue = new ArrayList<>();
|
||||||
|
Object properties = setting.get("properties");
|
||||||
|
if (properties instanceof List<?> list) {
|
||||||
|
for (Object o : list) {
|
||||||
|
Map<String, Object> map = OBJECT_MAPPER.convertValue(o, Map.class);
|
||||||
|
if (map.containsKey("defaultVal")) {
|
||||||
|
defaultValue.add(map.get("defaultVal"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> getRuntime(OperatorDto operatorDto) {
|
||||||
|
if (StringUtils.isBlank(operatorDto.getRuntime())) {
|
||||||
|
return new HashMap<>();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return OBJECT_MAPPER.readValue(operatorDto.getRuntime(), Map.class);
|
||||||
|
} catch (JsonProcessingException e) {
|
||||||
|
throw BusinessException.of(OperatorErrorCode.SETTINGS_PARSE_FAILED, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void scanDataset(String taskId, String srcDatasetId) {
|
||||||
|
doScan(taskId, srcDatasetId, file -> true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void scanDataset(String taskId, String srcDatasetId, Set<String> succeedFiles) {
|
private void scanDataset(String taskId, String srcDatasetId, Set<String> succeedFiles) {
|
||||||
|
doScan(taskId, srcDatasetId, file -> !succeedFiles.contains(file.getId()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doScan(String taskId, String srcDatasetId, Predicate<DatasetFile> filterCondition) {
|
||||||
|
cleanTaskValidator.checkTaskId(taskId);
|
||||||
|
String targetFilePath = FLOW_PATH + "/" + taskId + "/dataset.jsonl";
|
||||||
|
File targetFile = new File(targetFilePath);
|
||||||
|
if (targetFile.getParentFile() != null && !targetFile.getParentFile().exists()) {
|
||||||
|
targetFile.getParentFile().mkdirs();
|
||||||
|
}
|
||||||
|
|
||||||
int pageNumber = 0;
|
int pageNumber = 0;
|
||||||
int pageSize = 500;
|
int pageSize = 500;
|
||||||
PagingQuery pageRequest = new PagingQuery(pageNumber, pageSize);
|
try (BufferedWriter writer = new BufferedWriter(new FileWriter(targetFile))) {
|
||||||
PagedResponse<DatasetFile> datasetFiles;
|
PagedResponse<DatasetFile> datasetFiles;
|
||||||
do {
|
do {
|
||||||
|
PagingQuery pageRequest = new PagingQuery(pageNumber, pageSize);
|
||||||
datasetFiles = datasetFileService.getDatasetFiles(srcDatasetId, null, null, null, pageRequest);
|
datasetFiles = datasetFileService.getDatasetFiles(srcDatasetId, null, null, null, pageRequest);
|
||||||
if (datasetFiles.getContent().isEmpty()) {
|
if (datasetFiles.getContent().isEmpty()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
List<Map<String, Object>> files = datasetFiles.getContent().stream()
|
for (DatasetFile content : datasetFiles.getContent()) {
|
||||||
.filter(content -> !succeedFiles.contains(content.getId()))
|
if (!filterCondition.test(content)) {
|
||||||
.map(content -> Map.of("fileName", (Object) content.getFileName(),
|
continue;
|
||||||
|
}
|
||||||
|
Map<String, Object> fileMap = Map.of(
|
||||||
|
"fileName", content.getFileName(),
|
||||||
"fileSize", content.getFileSize(),
|
"fileSize", content.getFileSize(),
|
||||||
"filePath", content.getFilePath(),
|
"filePath", content.getFilePath(),
|
||||||
"fileType", content.getFileType(),
|
"fileType", content.getFileType(),
|
||||||
"fileId", content.getId()))
|
"fileId", content.getId()
|
||||||
.toList();
|
);
|
||||||
writeListMapToJsonlFile(files, FLOW_PATH + "/" + taskId + "/dataset.jsonl");
|
writer.write(OBJECT_MAPPER.writeValueAsString(fileMap));
|
||||||
pageNumber += 1;
|
|
||||||
} while (pageNumber < datasetFiles.getTotalPages());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void writeListMapToJsonlFile(List<Map<String, Object>> mapList, String fileName) {
|
|
||||||
ObjectMapper objectMapper = new ObjectMapper();
|
|
||||||
|
|
||||||
try (BufferedWriter writer = new BufferedWriter(new FileWriter(fileName))) {
|
|
||||||
if (!mapList.isEmpty()) { // 检查列表是否为空,避免异常
|
|
||||||
String jsonString = objectMapper.writeValueAsString(mapList.getFirst());
|
|
||||||
writer.write(jsonString);
|
|
||||||
|
|
||||||
for (int i = 1; i < mapList.size(); i++) {
|
|
||||||
writer.newLine();
|
writer.newLine();
|
||||||
jsonString = objectMapper.writeValueAsString(mapList.get(i));
|
|
||||||
writer.write(jsonString);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
pageNumber++;
|
||||||
|
} while (pageNumber < datasetFiles.getTotalPages());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
log.error("Failed to prepare dataset.jsonl.", e);
|
log.error("Failed to write dataset.jsonl for taskId: {}", taskId, e);
|
||||||
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
|
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,12 +4,14 @@ import com.datamate.cleaning.common.exception.CleanErrorCode;
|
|||||||
import com.datamate.cleaning.domain.repository.CleaningTaskRepository;
|
import com.datamate.cleaning.domain.repository.CleaningTaskRepository;
|
||||||
import com.datamate.cleaning.interfaces.dto.OperatorInstanceDto;
|
import com.datamate.cleaning.interfaces.dto.OperatorInstanceDto;
|
||||||
import com.datamate.common.infrastructure.exception.BusinessException;
|
import com.datamate.common.infrastructure.exception.BusinessException;
|
||||||
|
import com.datamate.common.infrastructure.exception.SystemErrorCode;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
@@ -17,6 +19,10 @@ import java.util.Locale;
|
|||||||
public class CleanTaskValidator {
|
public class CleanTaskValidator {
|
||||||
private final CleaningTaskRepository cleaningTaskRepo;
|
private final CleaningTaskRepository cleaningTaskRepo;
|
||||||
|
|
||||||
|
private final Pattern UUID_PATTERN = Pattern.compile(
|
||||||
|
"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
|
||||||
|
);
|
||||||
|
|
||||||
public void checkNameDuplication(String name) {
|
public void checkNameDuplication(String name) {
|
||||||
if (cleaningTaskRepo.isNameExist(name)) {
|
if (cleaningTaskRepo.isNameExist(name)) {
|
||||||
throw BusinessException.of(CleanErrorCode.DUPLICATE_TASK_NAME);
|
throw BusinessException.of(CleanErrorCode.DUPLICATE_TASK_NAME);
|
||||||
@@ -39,4 +45,10 @@ public class CleanTaskValidator {
|
|||||||
front.getName(), back.getName()));
|
front.getName(), back.getName()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void checkTaskId(String id) {
|
||||||
|
if (id == null || !UUID_PATTERN.matcher(id).matches()) {
|
||||||
|
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -126,7 +126,7 @@ worker:
|
|||||||
groupName: workergroup
|
groupName: workergroup
|
||||||
replicas: 1
|
replicas: 1
|
||||||
minReplicas: 1
|
minReplicas: 1
|
||||||
maxReplicas: 3
|
maxReplicas: 1
|
||||||
labels: {}
|
labels: {}
|
||||||
serviceAccountName: ""
|
serviceAccountName: ""
|
||||||
restartPolicy: ""
|
restartPolicy: ""
|
||||||
|
|||||||
@@ -189,8 +189,20 @@ runtime:
|
|||||||
ray-cluster:
|
ray-cluster:
|
||||||
enabled: true
|
enabled: true
|
||||||
head:
|
head:
|
||||||
|
enableInTreeAutoscaling: true
|
||||||
|
autoscalerOptions:
|
||||||
|
upscalingMode: Default
|
||||||
|
idleTimeoutSeconds: 60
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "500m"
|
||||||
|
memory: "512Mi"
|
||||||
|
requests:
|
||||||
|
cpu: "500m"
|
||||||
|
memory: "512Mi"
|
||||||
rayStartParams:
|
rayStartParams:
|
||||||
num-cpus: '0'
|
num-cpus: "0"
|
||||||
containerEnv:
|
containerEnv:
|
||||||
- name: RAY_DEDUP_LOGS
|
- name: RAY_DEDUP_LOGS
|
||||||
value: "0"
|
value: "0"
|
||||||
@@ -206,6 +218,8 @@ ray-cluster:
|
|||||||
value: *dbPass
|
value: *dbPass
|
||||||
- name: MYSQL_DATABASE
|
- name: MYSQL_DATABASE
|
||||||
value: "datamate"
|
value: "datamate"
|
||||||
|
- name: RAY_enable_autoscaler_v2
|
||||||
|
value: "1"
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
cpu: "4"
|
cpu: "4"
|
||||||
@@ -283,3 +297,58 @@ ray-cluster:
|
|||||||
- mountPath: /usr/local/lib/ops/site-packages
|
- mountPath: /usr/local/lib/ops/site-packages
|
||||||
name: operator-volume
|
name: operator-volume
|
||||||
subPath: site-packages
|
subPath: site-packages
|
||||||
|
additionalWorkerGroups:
|
||||||
|
npuGroup:
|
||||||
|
disabled: false
|
||||||
|
replicas: 0
|
||||||
|
minReplicas: 0
|
||||||
|
maxReplicas: 8
|
||||||
|
rayStartParams:
|
||||||
|
resources: '"{\"npu\": 1}"'
|
||||||
|
containerEnv:
|
||||||
|
- name: RAY_DEDUP_LOGS
|
||||||
|
value: "0"
|
||||||
|
- name: RAY_TQDM_PATCH_PRINT
|
||||||
|
value: "0"
|
||||||
|
- name: MYSQL_HOST
|
||||||
|
value: "datamate-database"
|
||||||
|
- name: MYSQL_PORT
|
||||||
|
value: "3306"
|
||||||
|
- name: MYSQL_USER
|
||||||
|
value: "root"
|
||||||
|
- name: MYSQL_PASSWORD
|
||||||
|
value: *dbPass
|
||||||
|
- name: MYSQL_DATABASE
|
||||||
|
value: "datamate"
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: "8"
|
||||||
|
memory: "64G"
|
||||||
|
huawei.com/Ascend910: 1
|
||||||
|
requests:
|
||||||
|
cpu: "1"
|
||||||
|
memory: "2G"
|
||||||
|
huawei.com/Ascend910: 1
|
||||||
|
volumes:
|
||||||
|
- *datasetVolume
|
||||||
|
- *flowVolume
|
||||||
|
- *logVolume
|
||||||
|
- *operatorVolume
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /tmp/ray
|
||||||
|
name: log-volume
|
||||||
|
subPathExpr: ray/$(POD_NAME)
|
||||||
|
- mountPath: /dataset
|
||||||
|
name: dataset-volume
|
||||||
|
- mountPath: /flow
|
||||||
|
name: flow-volume
|
||||||
|
- mountPath: /opt/runtime/datamate/ops/user
|
||||||
|
name: operator-volume
|
||||||
|
subPath: extract
|
||||||
|
- mountPath: /usr/local/lib/ops/site-packages
|
||||||
|
name: operator-volume
|
||||||
|
subPath: site-packages
|
||||||
|
|||||||
@@ -22,17 +22,6 @@ from core.base_op import Filter as RELATIVE_Filter, Mapper as RELATIVE_Mapper, S
|
|||||||
rd.DataContext.get_current().enable_progress_bars = False
|
rd.DataContext.get_current().enable_progress_bars = False
|
||||||
|
|
||||||
|
|
||||||
def is_valid_path(item, dataset_dir):
|
|
||||||
full_path = os.path.abspath(os.path.join(dataset_dir, item))
|
|
||||||
return os.path.exists(full_path)
|
|
||||||
|
|
||||||
|
|
||||||
def new_get_num_npus(init_kwargs):
|
|
||||||
if init_kwargs.get("accelerator", "cpu") != "npu":
|
|
||||||
return 0.0
|
|
||||||
return 0.1
|
|
||||||
|
|
||||||
|
|
||||||
class Formatters(Enum):
|
class Formatters(Enum):
|
||||||
"""
|
"""
|
||||||
抽取算子和落盘算子枚举类
|
抽取算子和落盘算子枚举类
|
||||||
@@ -163,22 +152,19 @@ class RayDataset(BasicDataset):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def _run_single_op(self, operators_cls, init_kwargs, **kwargs):
|
def _run_single_op(self, operators_cls, init_kwargs, **kwargs):
|
||||||
|
|
||||||
num_npus = new_get_num_npus(init_kwargs)
|
|
||||||
max_actor_nums = os.getenv("MAX_ACTOR_NUMS", "20")
|
max_actor_nums = os.getenv("MAX_ACTOR_NUMS", "20")
|
||||||
|
|
||||||
# 分辨是否是onnx算子,如果是需要限制Actor并发数量
|
|
||||||
if self._use_onnx_model(init_kwargs['op_name']):
|
|
||||||
max_actor_nums = 4
|
|
||||||
|
|
||||||
resources = {}
|
resources = {}
|
||||||
|
|
||||||
if num_npus > 0:
|
if init_kwargs.get("npu", 0) > 0:
|
||||||
resources["node_npu"] = 0.1
|
resources["npu"] = init_kwargs.get("npu")
|
||||||
|
|
||||||
if init_kwargs.get("arch", "arm").startswith("x86"):
|
if init_kwargs.get("arch", "arm").startswith("x86"):
|
||||||
resources["arch"] = "x86"
|
resources["arch"] = "x86"
|
||||||
|
|
||||||
|
cpu = init_kwargs.get("cpu", 0.05)
|
||||||
|
memory = init_kwargs.get("memory", None)
|
||||||
|
|
||||||
kwargs.update({"ext_params": {}, "failed_reason": {}, "target_type": None})
|
kwargs.update({"ext_params": {}, "failed_reason": {}, "target_type": None})
|
||||||
try:
|
try:
|
||||||
if issubclass(operators_cls, (Mapper, RELATIVE_Mapper)):
|
if issubclass(operators_cls, (Mapper, RELATIVE_Mapper)):
|
||||||
@@ -186,7 +172,8 @@ class RayDataset(BasicDataset):
|
|||||||
fn_constructor_kwargs=init_kwargs,
|
fn_constructor_kwargs=init_kwargs,
|
||||||
fn_kwargs=kwargs,
|
fn_kwargs=kwargs,
|
||||||
resources=resources,
|
resources=resources,
|
||||||
num_cpus=0.05,
|
num_cpus=cpu,
|
||||||
|
memory=memory,
|
||||||
compute=rd.ActorPoolStrategy(min_size=1,
|
compute=rd.ActorPoolStrategy(min_size=1,
|
||||||
max_size=int(max_actor_nums)))
|
max_size=int(max_actor_nums)))
|
||||||
|
|
||||||
@@ -195,7 +182,8 @@ class RayDataset(BasicDataset):
|
|||||||
fn_constructor_kwargs=init_kwargs,
|
fn_constructor_kwargs=init_kwargs,
|
||||||
fn_kwargs=kwargs,
|
fn_kwargs=kwargs,
|
||||||
resources=resources,
|
resources=resources,
|
||||||
num_cpus=0.05,
|
num_cpus=cpu,
|
||||||
|
memory=memory,
|
||||||
compute=rd.ActorPoolStrategy(min_size=1,
|
compute=rd.ActorPoolStrategy(min_size=1,
|
||||||
max_size=int(max_actor_nums)))
|
max_size=int(max_actor_nums)))
|
||||||
|
|
||||||
@@ -204,7 +192,8 @@ class RayDataset(BasicDataset):
|
|||||||
fn_constructor_kwargs=init_kwargs,
|
fn_constructor_kwargs=init_kwargs,
|
||||||
fn_kwargs=kwargs,
|
fn_kwargs=kwargs,
|
||||||
resources=resources,
|
resources=resources,
|
||||||
num_cpus=0.05,
|
num_cpus=cpu,
|
||||||
|
memory=memory,
|
||||||
compute=rd.ActorPoolStrategy(min_size=1,
|
compute=rd.ActorPoolStrategy(min_size=1,
|
||||||
max_size=int(max_actor_nums)))
|
max_size=int(max_actor_nums)))
|
||||||
else:
|
else:
|
||||||
@@ -214,13 +203,3 @@ class RayDataset(BasicDataset):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
raise Exception("Error! Ops Details:") from e
|
raise Exception("Error! Ops Details:") from e
|
||||||
|
|
||||||
def _use_onnx_model(self, ops_name):
|
|
||||||
if ops_name in self.onnx_ops_name:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _use_npu_model(self, ops_name):
|
|
||||||
if ops_name in self.npu_ops_name:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|||||||
Reference in New Issue
Block a user