bugfix: 创建清洗任务时修改数据集状态;无法删除已在模板/运行任务的算子

* bugfix: 创建清洗任务时修改数据集状态;无法删除已在模板/运行任务的算子
This commit is contained in:
hhhhsc701
2025-11-27 17:34:53 +08:00
committed by GitHub
parent 91390cace0
commit f1bffdcd61
15 changed files with 46 additions and 15 deletions

View File

@@ -244,11 +244,9 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" mi
cd deployment/docker/datamate && export REGISTRY=$(REGISTRY) && docker compose up -d datamate-mineru; \
elif [ "$*" = "datamate" ]; then \
if docker compose ls --filter name=deer-flow | grep -q deer-flow; then \
(cd deployment/docker/datamate && NGINX_CONF="./backend-with-deer-flow.conf" REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d) && \
$(MAKE) label-studio-docker-install; \
(cd deployment/docker/datamate && NGINX_CONF="./backend-with-deer-flow.conf" REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d); \
else \
(cd deployment/docker/datamate && REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d) && \
$(MAKE) label-studio-docker-install; \
(cd deployment/docker/datamate && REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d); \
fi; \
elif [ "$*" = "deer-flow" ]; then \
cd deployment/docker/datamate && export NGINX_CONF="./backend-with-deer-flow.conf" && export REGISTRY=$(REGISTRY) && docker compose -f docker-compose.yml up -d; \

View File

@@ -93,6 +93,7 @@ public class CleaningTaskService {
CreateDatasetRequest createDatasetRequest = new CreateDatasetRequest();
createDatasetRequest.setName(request.getDestDatasetName());
createDatasetRequest.setDatasetType(DatasetType.valueOf(request.getDestDatasetType()));
createDatasetRequest.setStatus("ACTIVE");
Dataset destDataset = datasetService.createDataset(createDatasetRequest);
Dataset srcDataset = datasetService.getDataset(request.getSrcDatasetId());

View File

@@ -113,7 +113,9 @@ public class Dataset extends BaseEntity<String> {
public void initCreateParam(String datasetBasePath) {
this.id = UUID.randomUUID().toString();
this.path = datasetBasePath + File.separator + this.id;
this.status = DatasetStatusType.DRAFT;
if (this.status == null) {
this.status = DatasetStatusType.DRAFT;
}
}
public void updateBasicInfo(String name, String description, String category) {

View File

@@ -35,4 +35,6 @@ public class CreateDatasetRequest {
private String dataSource;
/** 保留天数 */
private Integer retentionDays;
/** 数据集状态 */
private String status;
}

View File

@@ -88,6 +88,9 @@ public class OperatorService {
@Transactional
public void deleteOperator(String id) {
if (operatorRepo.operatorInTemplateOrRunning(id)) {
throw BusinessException.of(OperatorErrorCode.OPERATOR_IN_INSTANCE);
}
operatorRepo.deleteOperator(id);
relationRepo.deleteByOperatorId(id);
}

View File

@@ -16,4 +16,6 @@ public interface OperatorRepository extends IRepository<Operator> {
void deleteOperator(String id);
int countOperatorByStar(boolean isStar);
boolean operatorInTemplateOrRunning(String operatorId);
}

View File

@@ -16,7 +16,9 @@ public enum OperatorErrorCode implements ErrorCode {
FIELD_NOT_FOUND("op.0003", "缺少必要的字段"),
SETTINGS_PARSE_FAILED("op.0004", "settings字段解析失败");
SETTINGS_PARSE_FAILED("op.0004", "settings字段解析失败"),
OPERATOR_IN_INSTANCE("op.0005", "算子已被编排在模板或未完成的任务中");
private final String code;
private final String message;

View File

@@ -43,4 +43,9 @@ public class OperatorRepositoryImpl extends CrudRepository<OperatorMapper, Opera
queryWrapper.eq(Operator::getIsStar, isStar);
return Math.toIntExact(mapper.selectCount(queryWrapper));
}
@Override
public boolean operatorInTemplateOrRunning(String operatorId) {
return mapper.operatorInTemplate(operatorId) > 0 && mapper.operatorInUnstopTask(operatorId) > 0;
}
}

View File

@@ -3,7 +3,16 @@ package com.datamate.operator.infrastructure.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.datamate.operator.domain.model.Operator;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Select;
@Mapper
public interface OperatorMapper extends BaseMapper<Operator> {
@Select("SELECT count(1) FROM t_operator_instance oi JOIN t_clean_template t ON oi.instance_id = t.id " +
"WHERE oi.operator_id = #{operatorId}")
int operatorInTemplate(String operatorId);
@Select("SELECT count(1) FROM t_operator_instance oi JOIN t_clean_task t ON oi.instance_id = t.id " +
"WHERE oi.operator_id = #{operatorId} AND t.status != 'COMPLETED'")
int operatorInUnstopTask(String operatorId);
}

View File

@@ -58,7 +58,10 @@ const OperatorList: React.FC<OperatorListProps> = ({
</div>
<span
className="cursor-pointer"
onClick={() => handleStar(operator, toggleFavorite)}
onClick={(event) => {
event.stopPropagation();
handleStar(operator, toggleFavorite);
}}
>
{favorites.has(operator.id) ? (
<StarFilled style={{ color: "#FFD700" }} />

View File

@@ -1,4 +1,4 @@
import {Button, Modal, Table, Badge, Input} from "antd";
import {Button, Modal, Table, Badge, Input, Popover} from "antd";
import { Download } from "lucide-react";
import {useEffect, useState} from "react";
import {useParams} from "react-router";
@@ -259,9 +259,9 @@ export default function FileTable({result, fetchTaskResult}) {
</Button>
)}
<Button type="link" size="small">
</Button>
<Popover content="暂未开放">
<Button type="link" size="small" disabled></Button>
</Popover>
</div>
),
},

View File

@@ -186,7 +186,7 @@ export default function TaskList() {
title: "已处理文件数",
dataIndex: "finishedFileNum",
key: "finishedFileNum",
width: 150,
width: 120,
align: "right",
ellipsis: true,
},
@@ -194,7 +194,7 @@ export default function TaskList() {
title: "总文件数",
dataIndex: "totalFileNum",
key: "totalFileNum",
width: 150,
width: 100,
align: "right",
ellipsis: true,
},
@@ -202,7 +202,7 @@ export default function TaskList() {
title: "执行耗时",
dataIndex: "duration",
key: "duration",
width: 180,
width: 100,
ellipsis: true,
},
{

View File

@@ -3,4 +3,4 @@
from datamate.core.base_op import OPERATORS
OPERATORS.register_module(module_name='MineruFormatter',
module_path="ops.formatter.external_pdf_formatter.process")
module_path="ops.formatter.mineru_formatter.process")

View File

@@ -25,6 +25,8 @@ class MineruFormatter(Mapper):
def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]:
start = time.time()
filename = sample[self.filename_key]
if not filename.lower().endswith(".pdf"):
return sample
try:
data = {"source_path": sample[self.filepath_key], "export_path": sample[self.export_path_key]}
response = http_request(method="POST", url=self.pdf_extract_url, data=data)

View File

@@ -25,6 +25,8 @@ class UnstructuredFormatter(Mapper):
start = time.time()
filepath = sample.get(self.filepath_key)
filename = sample.get(self.filename_key)
if not filename.lower().endswith((".ppt", ".pptx", "docx", "xlsx", ".csv")):
return sample
try:
elements = partition(filename=filepath)
sample[self.text_key] = "\n\n".join([str(el) for el in elements])