From ccfb84c0340a2208a55957de828e7499300f8bd0 Mon Sep 17 00:00:00 2001 From: hefanli <76611805+hefanli@users.noreply.github.com> Date: Sun, 4 Jan 2026 19:05:08 +0800 Subject: [PATCH] feature: add mysql collection and starrocks collection (#222) * fix: fix the path for backend-python imaage building * feature: add mysql collection and starrocks collection * feature: add mysql collection and starrocks collection * fix: change the permission of those files which collected from nfs to 754 * fix: delete collected files, config files and log files while deleting collection task * fix: add the collection task detail api * fix: change the log of collecting for dataset * fix: add collection task selecting while creating and updating dataset * fix: set the umask value to 0022 for java process --- .../workflows/docker-image-backend-python.yml | 4 +- .../DatasetApplicationService.java | 2 +- .../DataCollection/Create/CreateTask.tsx | 182 ++++++++++-------- .../Create/components/BasicInformation.tsx | 22 +++ .../module/collection/client/datax_client.py | 34 +++- .../module/collection/interface/collection.py | 35 ++++ .../module/collection/service/collection.py | 3 +- .../plugin/reader/nfsreader/NfsReader.java | 3 + .../plugin/writer/nfswriter/NfsWriter.java | 2 +- scripts/db/data-collection-init.sql | 6 +- scripts/images/backend-python/Dockerfile | 10 +- scripts/images/backend/Dockerfile | 16 +- scripts/images/backend/start.sh | 4 +- 13 files changed, 208 insertions(+), 115 deletions(-) diff --git a/.github/workflows/docker-image-backend-python.yml b/.github/workflows/docker-image-backend-python.yml index 65d9755..28fea0c 100644 --- a/.github/workflows/docker-image-backend-python.yml +++ b/.github/workflows/docker-image-backend-python.yml @@ -4,14 +4,14 @@ on: push: branches: [ "main" ] paths: - - 'scripts/images/datamate-python/**' + - 'scripts/images/backend-python/**' - 'runtime/datamate-python/**' - '.github/workflows/docker-image-backend-python.yml' - '.github/workflows/docker-images-reusable.yml' pull_request: branches: [ "main" ] paths: - - 'scripts/images/datamate-python/**' + - 'scripts/images/backend-python/**' - 'runtime/datamate-python/**' - '.github/workflows/docker-image-backend-python.yml' - '.github/workflows/docker-images-reusable.yml' diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java index e170a51..e5c8d03 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java @@ -237,8 +237,8 @@ public class DatasetApplicationService { if (CollectionUtils.isEmpty(filePaths)) { return; } - log.info("Starting file scan, total files: {}", filePaths.size()); datasetFileApplicationService.copyFilesToDatasetDir(datasetId, new CopyFilesRequest(filePaths)); + log.info("Success file scan, total files: {}", filePaths.size()); } catch (Exception e) { log.error("处理数据源文件扫描失败,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId, e); } diff --git a/frontend/src/pages/DataCollection/Create/CreateTask.tsx b/frontend/src/pages/DataCollection/Create/CreateTask.tsx index a2cbbef..313f8e9 100644 --- a/frontend/src/pages/DataCollection/Create/CreateTask.tsx +++ b/frontend/src/pages/DataCollection/Create/CreateTask.tsx @@ -81,19 +81,7 @@ export default function CollectionTaskCreate() { const handleSubmit = async () => { try { await form.validateFields(); - - const values = form.getFieldsValue(true); - const payload = { - name: values.name, - description: values.description, - syncMode: values.syncMode, - scheduleExpression: values.scheduleExpression, - timeoutSeconds: values.timeoutSeconds, - templateId: values.templateId, - config: values.config, - }; - - await createTaskUsingPost(payload); + await createTaskUsingPost(newTask); message.success("任务创建成功"); navigate("/data/collection"); } catch (error) { @@ -104,88 +92,108 @@ export default function CollectionTaskCreate() { const selectedTemplate = templates.find((t) => t.id === selectedTemplateId); const renderTemplateFields = ( - section: "parameter" | "reader" | "writer", + section: any[], defs: Record | undefined ) => { if (!defs || typeof defs !== "object") return null; + let items_ = [] - const items = Object.entries(defs).map(([key, def]) => { + Object.entries(defs).sort(([key1, def1], [key2, def2]) => { + const def1Order = def1?.index || 0; + const def2Order = def2?.index || 0; + return def1Order - def2Order; + }).forEach(([key, def]) => { const label = def?.name || key; const description = def?.description; const fieldType = (def?.type || "input").toLowerCase(); const required = def?.required !== false; - const rules = required ? [{ required: true, message: `请输入${label}` }] : undefined; + const name = section.concat(key) - if (fieldType === "password") { - return ( - - - - ); + switch (fieldType) { + case "password": + items_.push(( + + + + )); + break; + case "selecttag": + items_.push(( + + + + )); + break; + case "multiple": + const itemsMultiple = renderTemplateFields(name, def?.properties) + items_.push(itemsMultiple) + break; + case "multiplelist": + const realName = name.concat(0) + const itemsMultipleList = renderTemplateFields(realName, def?.properties) + items_.push(itemsMultipleList) + break; + case "inputlist": + items_.push(( + + + + )); + break; + default: + items_.push(( + + + + )); } + }) - if (fieldType === "textarea") { - return ( - -