From f1bffdcd6186f00ae0be5fda4412885339577738 Mon Sep 17 00:00:00 2001 From: hhhhsc701 <56435672+hhhhsc701@users.noreply.github.com> Date: Thu, 27 Nov 2025 17:34:53 +0800 Subject: [PATCH] =?UTF-8?q?bugfix:=20=E5=88=9B=E5=BB=BA=E6=B8=85=E6=B4=97?= =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E6=97=B6=E4=BF=AE=E6=94=B9=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E9=9B=86=E7=8A=B6=E6=80=81=EF=BC=9B=E6=97=A0=E6=B3=95=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=B7=B2=E5=9C=A8=E6=A8=A1=E6=9D=BF/=E8=BF=90?= =?UTF-8?q?=E8=A1=8C=E4=BB=BB=E5=8A=A1=E7=9A=84=E7=AE=97=E5=AD=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * bugfix: 创建清洗任务时修改数据集状态;无法删除已在模板/运行任务的算子 --- Makefile | 6 ++---- .../cleaning/application/CleaningTaskService.java | 1 + .../datamanagement/domain/model/dataset/Dataset.java | 4 +++- .../interfaces/dto/CreateDatasetRequest.java | 2 ++ .../datamate/operator/application/OperatorService.java | 3 +++ .../operator/domain/repository/OperatorRepository.java | 2 ++ .../infrastructure/exception/OperatorErrorCode.java | 4 +++- .../persistence/Impl/OperatorRepositoryImpl.java | 5 +++++ .../persistence/mapper/OperatorMapper.java | 9 +++++++++ .../DataCleansing/Create/components/OperatorLibrary.tsx | 5 ++++- .../pages/DataCleansing/Detail/components/FileTable.tsx | 8 ++++---- .../src/pages/DataCleansing/Home/components/TaskList.tsx | 6 +++--- runtime/ops/formatter/mineru_formatter/__init__.py | 2 +- runtime/ops/formatter/mineru_formatter/process.py | 2 ++ runtime/ops/formatter/unstructured_formatter/process.py | 2 ++ 15 files changed, 46 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 3829682..0da83b6 100644 --- a/Makefile +++ b/Makefile @@ -244,11 +244,9 @@ VALID_SERVICE_TARGETS := datamate backend frontend runtime mineru "deer-flow" mi cd deployment/docker/datamate && export REGISTRY=$(REGISTRY) && docker compose up -d datamate-mineru; \ elif [ "$*" = "datamate" ]; then \ if docker compose ls --filter name=deer-flow | grep -q deer-flow; then \ - (cd deployment/docker/datamate && NGINX_CONF="./backend-with-deer-flow.conf" REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d) && \ - $(MAKE) label-studio-docker-install; \ + (cd deployment/docker/datamate && NGINX_CONF="./backend-with-deer-flow.conf" REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d); \ else \ - (cd deployment/docker/datamate && REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d) && \ - $(MAKE) label-studio-docker-install; \ + (cd deployment/docker/datamate && REGISTRY=$(REGISTRY) docker compose -f docker-compose.yml up -d); \ fi; \ elif [ "$*" = "deer-flow" ]; then \ cd deployment/docker/datamate && export NGINX_CONF="./backend-with-deer-flow.conf" && export REGISTRY=$(REGISTRY) && docker compose -f docker-compose.yml up -d; \ diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTaskService.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTaskService.java index 3eba706..58c417d 100644 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTaskService.java +++ b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTaskService.java @@ -93,6 +93,7 @@ public class CleaningTaskService { CreateDatasetRequest createDatasetRequest = new CreateDatasetRequest(); createDatasetRequest.setName(request.getDestDatasetName()); createDatasetRequest.setDatasetType(DatasetType.valueOf(request.getDestDatasetType())); + createDatasetRequest.setStatus("ACTIVE"); Dataset destDataset = datasetService.createDataset(createDatasetRequest); Dataset srcDataset = datasetService.getDataset(request.getSrcDatasetId()); diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/Dataset.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/Dataset.java index 3f1522c..24988a4 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/Dataset.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/Dataset.java @@ -113,7 +113,9 @@ public class Dataset extends BaseEntity { public void initCreateParam(String datasetBasePath) { this.id = UUID.randomUUID().toString(); this.path = datasetBasePath + File.separator + this.id; - this.status = DatasetStatusType.DRAFT; + if (this.status == null) { + this.status = DatasetStatusType.DRAFT; + } } public void updateBasicInfo(String name, String description, String category) { diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java index 498f69c..4f58677 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java @@ -35,4 +35,6 @@ public class CreateDatasetRequest { private String dataSource; /** 保留天数 */ private Integer retentionDays; + /** 数据集状态 */ + private String status; } diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/application/OperatorService.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/application/OperatorService.java index da0a883..a43e723 100644 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/application/OperatorService.java +++ b/backend/services/operator-market-service/src/main/java/com/datamate/operator/application/OperatorService.java @@ -88,6 +88,9 @@ public class OperatorService { @Transactional public void deleteOperator(String id) { + if (operatorRepo.operatorInTemplateOrRunning(id)) { + throw BusinessException.of(OperatorErrorCode.OPERATOR_IN_INSTANCE); + } operatorRepo.deleteOperator(id); relationRepo.deleteByOperatorId(id); } diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorRepository.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorRepository.java index d99c1d0..aadc105 100644 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorRepository.java +++ b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorRepository.java @@ -16,4 +16,6 @@ public interface OperatorRepository extends IRepository { void deleteOperator(String id); int countOperatorByStar(boolean isStar); + + boolean operatorInTemplateOrRunning(String operatorId); } diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/exception/OperatorErrorCode.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/exception/OperatorErrorCode.java index f8c88d8..f94511b 100644 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/exception/OperatorErrorCode.java +++ b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/exception/OperatorErrorCode.java @@ -16,7 +16,9 @@ public enum OperatorErrorCode implements ErrorCode { FIELD_NOT_FOUND("op.0003", "缺少必要的字段"), - SETTINGS_PARSE_FAILED("op.0004", "settings字段解析失败"); + SETTINGS_PARSE_FAILED("op.0004", "settings字段解析失败"), + + OPERATOR_IN_INSTANCE("op.0005", "算子已被编排在模板或未完成的任务中"); private final String code; private final String message; diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorRepositoryImpl.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorRepositoryImpl.java index 505038e..482c35c 100644 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorRepositoryImpl.java +++ b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorRepositoryImpl.java @@ -43,4 +43,9 @@ public class OperatorRepositoryImpl extends CrudRepository 0 && mapper.operatorInUnstopTask(operatorId) > 0; + } } diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorMapper.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorMapper.java index f5b9459..7e91385 100644 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorMapper.java +++ b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorMapper.java @@ -3,7 +3,16 @@ package com.datamate.operator.infrastructure.persistence.mapper; import com.baomidou.mybatisplus.core.mapper.BaseMapper; import com.datamate.operator.domain.model.Operator; import org.apache.ibatis.annotations.Mapper; +import org.apache.ibatis.annotations.Select; @Mapper public interface OperatorMapper extends BaseMapper { + + @Select("SELECT count(1) FROM t_operator_instance oi JOIN t_clean_template t ON oi.instance_id = t.id " + + "WHERE oi.operator_id = #{operatorId}") + int operatorInTemplate(String operatorId); + + @Select("SELECT count(1) FROM t_operator_instance oi JOIN t_clean_task t ON oi.instance_id = t.id " + + "WHERE oi.operator_id = #{operatorId} AND t.status != 'COMPLETED'") + int operatorInUnstopTask(String operatorId); } diff --git a/frontend/src/pages/DataCleansing/Create/components/OperatorLibrary.tsx b/frontend/src/pages/DataCleansing/Create/components/OperatorLibrary.tsx index e4e5a75..0792e9a 100644 --- a/frontend/src/pages/DataCleansing/Create/components/OperatorLibrary.tsx +++ b/frontend/src/pages/DataCleansing/Create/components/OperatorLibrary.tsx @@ -58,7 +58,10 @@ const OperatorList: React.FC = ({ handleStar(operator, toggleFavorite)} + onClick={(event) => { + event.stopPropagation(); + handleStar(operator, toggleFavorite); + }} > {favorites.has(operator.id) ? ( diff --git a/frontend/src/pages/DataCleansing/Detail/components/FileTable.tsx b/frontend/src/pages/DataCleansing/Detail/components/FileTable.tsx index a13ff6a..c879046 100644 --- a/frontend/src/pages/DataCleansing/Detail/components/FileTable.tsx +++ b/frontend/src/pages/DataCleansing/Detail/components/FileTable.tsx @@ -1,4 +1,4 @@ -import {Button, Modal, Table, Badge, Input} from "antd"; +import {Button, Modal, Table, Badge, Input, Popover} from "antd"; import { Download } from "lucide-react"; import {useEffect, useState} from "react"; import {useParams} from "react-router"; @@ -259,9 +259,9 @@ export default function FileTable({result, fetchTaskResult}) { 对比 )} - + + + ), }, diff --git a/frontend/src/pages/DataCleansing/Home/components/TaskList.tsx b/frontend/src/pages/DataCleansing/Home/components/TaskList.tsx index a0d4fae..deeed9a 100644 --- a/frontend/src/pages/DataCleansing/Home/components/TaskList.tsx +++ b/frontend/src/pages/DataCleansing/Home/components/TaskList.tsx @@ -186,7 +186,7 @@ export default function TaskList() { title: "已处理文件数", dataIndex: "finishedFileNum", key: "finishedFileNum", - width: 150, + width: 120, align: "right", ellipsis: true, }, @@ -194,7 +194,7 @@ export default function TaskList() { title: "总文件数", dataIndex: "totalFileNum", key: "totalFileNum", - width: 150, + width: 100, align: "right", ellipsis: true, }, @@ -202,7 +202,7 @@ export default function TaskList() { title: "执行耗时", dataIndex: "duration", key: "duration", - width: 180, + width: 100, ellipsis: true, }, { diff --git a/runtime/ops/formatter/mineru_formatter/__init__.py b/runtime/ops/formatter/mineru_formatter/__init__.py index 698b608..43e2120 100644 --- a/runtime/ops/formatter/mineru_formatter/__init__.py +++ b/runtime/ops/formatter/mineru_formatter/__init__.py @@ -3,4 +3,4 @@ from datamate.core.base_op import OPERATORS OPERATORS.register_module(module_name='MineruFormatter', - module_path="ops.formatter.external_pdf_formatter.process") + module_path="ops.formatter.mineru_formatter.process") diff --git a/runtime/ops/formatter/mineru_formatter/process.py b/runtime/ops/formatter/mineru_formatter/process.py index afd7dd7..181b870 100644 --- a/runtime/ops/formatter/mineru_formatter/process.py +++ b/runtime/ops/formatter/mineru_formatter/process.py @@ -25,6 +25,8 @@ class MineruFormatter(Mapper): def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]: start = time.time() filename = sample[self.filename_key] + if not filename.lower().endswith(".pdf"): + return sample try: data = {"source_path": sample[self.filepath_key], "export_path": sample[self.export_path_key]} response = http_request(method="POST", url=self.pdf_extract_url, data=data) diff --git a/runtime/ops/formatter/unstructured_formatter/process.py b/runtime/ops/formatter/unstructured_formatter/process.py index 3107f8b..db173a8 100644 --- a/runtime/ops/formatter/unstructured_formatter/process.py +++ b/runtime/ops/formatter/unstructured_formatter/process.py @@ -25,6 +25,8 @@ class UnstructuredFormatter(Mapper): start = time.time() filepath = sample.get(self.filepath_key) filename = sample.get(self.filename_key) + if not filename.lower().endswith((".ppt", ".pptx", "docx", "xlsx", ".csv")): + return sample try: elements = partition(filename=filepath) sample[self.text_key] = "\n\n".join([str(el) for el in elements])