You've already forked DataMate
bugfix: 创建清洗任务时修改数据集状态;无法删除已在模板/运行任务的算子
* bugfix: 创建清洗任务时修改数据集状态;无法删除已在模板/运行任务的算子
This commit is contained in:
6
Makefile
6
Makefile
@@ -244,11 +244,9 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" mi
|
|||||||
cd deployment/docker/datamate && export REGISTRY=$(REGISTRY) && docker compose up -d datamate-mineru; \
|
cd deployment/docker/datamate && export REGISTRY=$(REGISTRY) && docker compose up -d datamate-mineru; \
|
||||||
elif [ "$*" = "datamate" ]; then \
|
elif [ "$*" = "datamate" ]; then \
|
||||||
if docker compose ls --filter name=deer-flow | grep -q deer-flow; then \
|
if docker compose ls --filter name=deer-flow | grep -q deer-flow; then \
|
||||||
(cd deployment/docker/datamate && NGINX_CONF="./backend-with-deer-flow.conf" REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d) && \
|
(cd deployment/docker/datamate && NGINX_CONF="./backend-with-deer-flow.conf" REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d); \
|
||||||
$(MAKE) label-studio-docker-install; \
|
|
||||||
else \
|
else \
|
||||||
(cd deployment/docker/datamate && REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d) && \
|
(cd deployment/docker/datamate && REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d); \
|
||||||
$(MAKE) label-studio-docker-install; \
|
|
||||||
fi; \
|
fi; \
|
||||||
elif [ "$*" = "deer-flow" ]; then \
|
elif [ "$*" = "deer-flow" ]; then \
|
||||||
cd deployment/docker/datamate && export NGINX_CONF="./backend-with-deer-flow.conf" && export REGISTRY=$(REGISTRY) && docker compose -f docker-compose.yml up -d; \
|
cd deployment/docker/datamate && export NGINX_CONF="./backend-with-deer-flow.conf" && export REGISTRY=$(REGISTRY) && docker compose -f docker-compose.yml up -d; \
|
||||||
|
|||||||
@@ -93,6 +93,7 @@ public class CleaningTaskService {
|
|||||||
CreateDatasetRequest createDatasetRequest = new CreateDatasetRequest();
|
CreateDatasetRequest createDatasetRequest = new CreateDatasetRequest();
|
||||||
createDatasetRequest.setName(request.getDestDatasetName());
|
createDatasetRequest.setName(request.getDestDatasetName());
|
||||||
createDatasetRequest.setDatasetType(DatasetType.valueOf(request.getDestDatasetType()));
|
createDatasetRequest.setDatasetType(DatasetType.valueOf(request.getDestDatasetType()));
|
||||||
|
createDatasetRequest.setStatus("ACTIVE");
|
||||||
Dataset destDataset = datasetService.createDataset(createDatasetRequest);
|
Dataset destDataset = datasetService.createDataset(createDatasetRequest);
|
||||||
|
|
||||||
Dataset srcDataset = datasetService.getDataset(request.getSrcDatasetId());
|
Dataset srcDataset = datasetService.getDataset(request.getSrcDatasetId());
|
||||||
|
|||||||
@@ -113,7 +113,9 @@ public class Dataset extends BaseEntity<String> {
|
|||||||
public void initCreateParam(String datasetBasePath) {
|
public void initCreateParam(String datasetBasePath) {
|
||||||
this.id = UUID.randomUUID().toString();
|
this.id = UUID.randomUUID().toString();
|
||||||
this.path = datasetBasePath + File.separator + this.id;
|
this.path = datasetBasePath + File.separator + this.id;
|
||||||
this.status = DatasetStatusType.DRAFT;
|
if (this.status == null) {
|
||||||
|
this.status = DatasetStatusType.DRAFT;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateBasicInfo(String name, String description, String category) {
|
public void updateBasicInfo(String name, String description, String category) {
|
||||||
|
|||||||
@@ -35,4 +35,6 @@ public class CreateDatasetRequest {
|
|||||||
private String dataSource;
|
private String dataSource;
|
||||||
/** 保留天数 */
|
/** 保留天数 */
|
||||||
private Integer retentionDays;
|
private Integer retentionDays;
|
||||||
|
/** 数据集状态 */
|
||||||
|
private String status;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -88,6 +88,9 @@ public class OperatorService {
|
|||||||
|
|
||||||
@Transactional
|
@Transactional
|
||||||
public void deleteOperator(String id) {
|
public void deleteOperator(String id) {
|
||||||
|
if (operatorRepo.operatorInTemplateOrRunning(id)) {
|
||||||
|
throw BusinessException.of(OperatorErrorCode.OPERATOR_IN_INSTANCE);
|
||||||
|
}
|
||||||
operatorRepo.deleteOperator(id);
|
operatorRepo.deleteOperator(id);
|
||||||
relationRepo.deleteByOperatorId(id);
|
relationRepo.deleteByOperatorId(id);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,4 +16,6 @@ public interface OperatorRepository extends IRepository<Operator> {
|
|||||||
void deleteOperator(String id);
|
void deleteOperator(String id);
|
||||||
|
|
||||||
int countOperatorByStar(boolean isStar);
|
int countOperatorByStar(boolean isStar);
|
||||||
|
|
||||||
|
boolean operatorInTemplateOrRunning(String operatorId);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,9 @@ public enum OperatorErrorCode implements ErrorCode {
|
|||||||
|
|
||||||
FIELD_NOT_FOUND("op.0003", "缺少必要的字段"),
|
FIELD_NOT_FOUND("op.0003", "缺少必要的字段"),
|
||||||
|
|
||||||
SETTINGS_PARSE_FAILED("op.0004", "settings字段解析失败");
|
SETTINGS_PARSE_FAILED("op.0004", "settings字段解析失败"),
|
||||||
|
|
||||||
|
OPERATOR_IN_INSTANCE("op.0005", "算子已被编排在模板或未完成的任务中");
|
||||||
|
|
||||||
private final String code;
|
private final String code;
|
||||||
private final String message;
|
private final String message;
|
||||||
|
|||||||
@@ -43,4 +43,9 @@ public class OperatorRepositoryImpl extends CrudRepository<OperatorMapper, Opera
|
|||||||
queryWrapper.eq(Operator::getIsStar, isStar);
|
queryWrapper.eq(Operator::getIsStar, isStar);
|
||||||
return Math.toIntExact(mapper.selectCount(queryWrapper));
|
return Math.toIntExact(mapper.selectCount(queryWrapper));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean operatorInTemplateOrRunning(String operatorId) {
|
||||||
|
return mapper.operatorInTemplate(operatorId) > 0 && mapper.operatorInUnstopTask(operatorId) > 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,16 @@ package com.datamate.operator.infrastructure.persistence.mapper;
|
|||||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||||
import com.datamate.operator.domain.model.Operator;
|
import com.datamate.operator.domain.model.Operator;
|
||||||
import org.apache.ibatis.annotations.Mapper;
|
import org.apache.ibatis.annotations.Mapper;
|
||||||
|
import org.apache.ibatis.annotations.Select;
|
||||||
|
|
||||||
@Mapper
|
@Mapper
|
||||||
public interface OperatorMapper extends BaseMapper<Operator> {
|
public interface OperatorMapper extends BaseMapper<Operator> {
|
||||||
|
|
||||||
|
@Select("SELECT count(1) FROM t_operator_instance oi JOIN t_clean_template t ON oi.instance_id = t.id " +
|
||||||
|
"WHERE oi.operator_id = #{operatorId}")
|
||||||
|
int operatorInTemplate(String operatorId);
|
||||||
|
|
||||||
|
@Select("SELECT count(1) FROM t_operator_instance oi JOIN t_clean_task t ON oi.instance_id = t.id " +
|
||||||
|
"WHERE oi.operator_id = #{operatorId} AND t.status != 'COMPLETED'")
|
||||||
|
int operatorInUnstopTask(String operatorId);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -58,7 +58,10 @@ const OperatorList: React.FC<OperatorListProps> = ({
|
|||||||
</div>
|
</div>
|
||||||
<span
|
<span
|
||||||
className="cursor-pointer"
|
className="cursor-pointer"
|
||||||
onClick={() => handleStar(operator, toggleFavorite)}
|
onClick={(event) => {
|
||||||
|
event.stopPropagation();
|
||||||
|
handleStar(operator, toggleFavorite);
|
||||||
|
}}
|
||||||
>
|
>
|
||||||
{favorites.has(operator.id) ? (
|
{favorites.has(operator.id) ? (
|
||||||
<StarFilled style={{ color: "#FFD700" }} />
|
<StarFilled style={{ color: "#FFD700" }} />
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import {Button, Modal, Table, Badge, Input} from "antd";
|
import {Button, Modal, Table, Badge, Input, Popover} from "antd";
|
||||||
import { Download } from "lucide-react";
|
import { Download } from "lucide-react";
|
||||||
import {useEffect, useState} from "react";
|
import {useEffect, useState} from "react";
|
||||||
import {useParams} from "react-router";
|
import {useParams} from "react-router";
|
||||||
@@ -259,9 +259,9 @@ export default function FileTable({result, fetchTaskResult}) {
|
|||||||
对比
|
对比
|
||||||
</Button>
|
</Button>
|
||||||
)}
|
)}
|
||||||
<Button type="link" size="small">
|
<Popover content="暂未开放">
|
||||||
下载
|
<Button type="link" size="small" disabled>下载</Button>
|
||||||
</Button>
|
</Popover>
|
||||||
</div>
|
</div>
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -186,7 +186,7 @@ export default function TaskList() {
|
|||||||
title: "已处理文件数",
|
title: "已处理文件数",
|
||||||
dataIndex: "finishedFileNum",
|
dataIndex: "finishedFileNum",
|
||||||
key: "finishedFileNum",
|
key: "finishedFileNum",
|
||||||
width: 150,
|
width: 120,
|
||||||
align: "right",
|
align: "right",
|
||||||
ellipsis: true,
|
ellipsis: true,
|
||||||
},
|
},
|
||||||
@@ -194,7 +194,7 @@ export default function TaskList() {
|
|||||||
title: "总文件数",
|
title: "总文件数",
|
||||||
dataIndex: "totalFileNum",
|
dataIndex: "totalFileNum",
|
||||||
key: "totalFileNum",
|
key: "totalFileNum",
|
||||||
width: 150,
|
width: 100,
|
||||||
align: "right",
|
align: "right",
|
||||||
ellipsis: true,
|
ellipsis: true,
|
||||||
},
|
},
|
||||||
@@ -202,7 +202,7 @@ export default function TaskList() {
|
|||||||
title: "执行耗时",
|
title: "执行耗时",
|
||||||
dataIndex: "duration",
|
dataIndex: "duration",
|
||||||
key: "duration",
|
key: "duration",
|
||||||
width: 180,
|
width: 100,
|
||||||
ellipsis: true,
|
ellipsis: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -3,4 +3,4 @@
|
|||||||
from datamate.core.base_op import OPERATORS
|
from datamate.core.base_op import OPERATORS
|
||||||
|
|
||||||
OPERATORS.register_module(module_name='MineruFormatter',
|
OPERATORS.register_module(module_name='MineruFormatter',
|
||||||
module_path="ops.formatter.external_pdf_formatter.process")
|
module_path="ops.formatter.mineru_formatter.process")
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ class MineruFormatter(Mapper):
|
|||||||
def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]:
|
def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
start = time.time()
|
start = time.time()
|
||||||
filename = sample[self.filename_key]
|
filename = sample[self.filename_key]
|
||||||
|
if not filename.lower().endswith(".pdf"):
|
||||||
|
return sample
|
||||||
try:
|
try:
|
||||||
data = {"source_path": sample[self.filepath_key], "export_path": sample[self.export_path_key]}
|
data = {"source_path": sample[self.filepath_key], "export_path": sample[self.export_path_key]}
|
||||||
response = http_request(method="POST", url=self.pdf_extract_url, data=data)
|
response = http_request(method="POST", url=self.pdf_extract_url, data=data)
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ class UnstructuredFormatter(Mapper):
|
|||||||
start = time.time()
|
start = time.time()
|
||||||
filepath = sample.get(self.filepath_key)
|
filepath = sample.get(self.filepath_key)
|
||||||
filename = sample.get(self.filename_key)
|
filename = sample.get(self.filename_key)
|
||||||
|
if not filename.lower().endswith((".ppt", ".pptx", "docx", "xlsx", ".csv")):
|
||||||
|
return sample
|
||||||
try:
|
try:
|
||||||
elements = partition(filename=filepath)
|
elements = partition(filename=filepath)
|
||||||
sample[self.text_key] = "\n\n".join([str(el) for el in elements])
|
sample[self.text_key] = "\n\n".join([str(el) for el in elements])
|
||||||
|
|||||||
Reference in New Issue
Block a user