You've already forked DataMate
feature: add mysql collection and starrocks collection (#222)
* fix: fix the path for backend-python imaage building * feature: add mysql collection and starrocks collection * feature: add mysql collection and starrocks collection * fix: change the permission of those files which collected from nfs to 754 * fix: delete collected files, config files and log files while deleting collection task * fix: add the collection task detail api * fix: change the log of collecting for dataset * fix: add collection task selecting while creating and updating dataset * fix: set the umask value to 0022 for java process
This commit is contained in:
@@ -4,14 +4,14 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches: [ "main" ]
|
branches: [ "main" ]
|
||||||
paths:
|
paths:
|
||||||
- 'scripts/images/datamate-python/**'
|
- 'scripts/images/backend-python/**'
|
||||||
- 'runtime/datamate-python/**'
|
- 'runtime/datamate-python/**'
|
||||||
- '.github/workflows/docker-image-backend-python.yml'
|
- '.github/workflows/docker-image-backend-python.yml'
|
||||||
- '.github/workflows/docker-images-reusable.yml'
|
- '.github/workflows/docker-images-reusable.yml'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ "main" ]
|
branches: [ "main" ]
|
||||||
paths:
|
paths:
|
||||||
- 'scripts/images/datamate-python/**'
|
- 'scripts/images/backend-python/**'
|
||||||
- 'runtime/datamate-python/**'
|
- 'runtime/datamate-python/**'
|
||||||
- '.github/workflows/docker-image-backend-python.yml'
|
- '.github/workflows/docker-image-backend-python.yml'
|
||||||
- '.github/workflows/docker-images-reusable.yml'
|
- '.github/workflows/docker-images-reusable.yml'
|
||||||
|
|||||||
@@ -237,8 +237,8 @@ public class DatasetApplicationService {
|
|||||||
if (CollectionUtils.isEmpty(filePaths)) {
|
if (CollectionUtils.isEmpty(filePaths)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
log.info("Starting file scan, total files: {}", filePaths.size());
|
|
||||||
datasetFileApplicationService.copyFilesToDatasetDir(datasetId, new CopyFilesRequest(filePaths));
|
datasetFileApplicationService.copyFilesToDatasetDir(datasetId, new CopyFilesRequest(filePaths));
|
||||||
|
log.info("Success file scan, total files: {}", filePaths.size());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("处理数据源文件扫描失败,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId, e);
|
log.error("处理数据源文件扫描失败,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId, e);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,19 +81,7 @@ export default function CollectionTaskCreate() {
|
|||||||
const handleSubmit = async () => {
|
const handleSubmit = async () => {
|
||||||
try {
|
try {
|
||||||
await form.validateFields();
|
await form.validateFields();
|
||||||
|
await createTaskUsingPost(newTask);
|
||||||
const values = form.getFieldsValue(true);
|
|
||||||
const payload = {
|
|
||||||
name: values.name,
|
|
||||||
description: values.description,
|
|
||||||
syncMode: values.syncMode,
|
|
||||||
scheduleExpression: values.scheduleExpression,
|
|
||||||
timeoutSeconds: values.timeoutSeconds,
|
|
||||||
templateId: values.templateId,
|
|
||||||
config: values.config,
|
|
||||||
};
|
|
||||||
|
|
||||||
await createTaskUsingPost(payload);
|
|
||||||
message.success("任务创建成功");
|
message.success("任务创建成功");
|
||||||
navigate("/data/collection");
|
navigate("/data/collection");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -104,88 +92,108 @@ export default function CollectionTaskCreate() {
|
|||||||
const selectedTemplate = templates.find((t) => t.id === selectedTemplateId);
|
const selectedTemplate = templates.find((t) => t.id === selectedTemplateId);
|
||||||
|
|
||||||
const renderTemplateFields = (
|
const renderTemplateFields = (
|
||||||
section: "parameter" | "reader" | "writer",
|
section: any[],
|
||||||
defs: Record<string, TemplateFieldDef> | undefined
|
defs: Record<string, TemplateFieldDef> | undefined
|
||||||
) => {
|
) => {
|
||||||
if (!defs || typeof defs !== "object") return null;
|
if (!defs || typeof defs !== "object") return null;
|
||||||
|
let items_ = []
|
||||||
|
|
||||||
const items = Object.entries(defs).map(([key, def]) => {
|
Object.entries(defs).sort(([key1, def1], [key2, def2]) => {
|
||||||
|
const def1Order = def1?.index || 0;
|
||||||
|
const def2Order = def2?.index || 0;
|
||||||
|
return def1Order - def2Order;
|
||||||
|
}).forEach(([key, def]) => {
|
||||||
const label = def?.name || key;
|
const label = def?.name || key;
|
||||||
const description = def?.description;
|
const description = def?.description;
|
||||||
const fieldType = (def?.type || "input").toLowerCase();
|
const fieldType = (def?.type || "input").toLowerCase();
|
||||||
const required = def?.required !== false;
|
const required = def?.required !== false;
|
||||||
|
|
||||||
const rules = required
|
const rules = required
|
||||||
? [{ required: true, message: `请输入${label}` }]
|
? [{ required: true, message: `请输入${label}` }]
|
||||||
: undefined;
|
: undefined;
|
||||||
|
const name = section.concat(key)
|
||||||
|
|
||||||
if (fieldType === "password") {
|
switch (fieldType) {
|
||||||
return (
|
case "password":
|
||||||
<Form.Item
|
items_.push((
|
||||||
key={`${section}.${key}`}
|
<Form.Item
|
||||||
name={["config", section, key]}
|
key={`${section}.${key}`}
|
||||||
label={label}
|
name={name}
|
||||||
tooltip={description}
|
label={label}
|
||||||
rules={rules}
|
tooltip={description}
|
||||||
>
|
rules={rules}
|
||||||
<Input.Password placeholder={description || `请输入${label}`} />
|
>
|
||||||
</Form.Item>
|
<Input.Password placeholder={description || `请输入${label}`} />
|
||||||
);
|
</Form.Item>
|
||||||
|
));
|
||||||
|
break;
|
||||||
|
case "selecttag":
|
||||||
|
items_.push((
|
||||||
|
<Form.Item
|
||||||
|
name={name}
|
||||||
|
label={label}
|
||||||
|
rules={rules}
|
||||||
|
>
|
||||||
|
<Select placeholder={description || `请输入${label}`} mode="tags" />
|
||||||
|
</Form.Item>
|
||||||
|
));
|
||||||
|
break;
|
||||||
|
case "select":
|
||||||
|
const options = (def?.options || []).map((opt: any) => {
|
||||||
|
if (typeof opt === "string" || typeof opt === "number") {
|
||||||
|
return { label: String(opt), value: opt };
|
||||||
|
}
|
||||||
|
return { label: opt?.label ?? String(opt?.value), value: opt?.value };
|
||||||
|
});
|
||||||
|
items_.push((
|
||||||
|
<Form.Item
|
||||||
|
key={`${section}.${key}`}
|
||||||
|
name={name}
|
||||||
|
label={label}
|
||||||
|
tooltip={description}
|
||||||
|
rules={rules}
|
||||||
|
>
|
||||||
|
<Select placeholder={description || `请选择${label}`} options={options} />
|
||||||
|
</Form.Item>
|
||||||
|
));
|
||||||
|
break;
|
||||||
|
case "multiple":
|
||||||
|
const itemsMultiple = renderTemplateFields(name, def?.properties)
|
||||||
|
items_.push(itemsMultiple)
|
||||||
|
break;
|
||||||
|
case "multiplelist":
|
||||||
|
const realName = name.concat(0)
|
||||||
|
const itemsMultipleList = renderTemplateFields(realName, def?.properties)
|
||||||
|
items_.push(itemsMultipleList)
|
||||||
|
break;
|
||||||
|
case "inputlist":
|
||||||
|
items_.push((
|
||||||
|
<Form.Item
|
||||||
|
key={`${section}.${key}`}
|
||||||
|
name={name.concat(0)}
|
||||||
|
label={label}
|
||||||
|
tooltip={description}
|
||||||
|
rules={rules}
|
||||||
|
>
|
||||||
|
<Input placeholder={description || `请输入${label}`} />
|
||||||
|
</Form.Item>
|
||||||
|
));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
items_.push((
|
||||||
|
<Form.Item
|
||||||
|
key={`${section}.${key}`}
|
||||||
|
name={name}
|
||||||
|
label={label}
|
||||||
|
tooltip={description}
|
||||||
|
rules={rules}
|
||||||
|
>
|
||||||
|
<Input placeholder={description || `请输入${label}`} />
|
||||||
|
</Form.Item>
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
})
|
||||||
|
|
||||||
if (fieldType === "textarea") {
|
return items_
|
||||||
return (
|
|
||||||
<Form.Item
|
|
||||||
key={`${section}.${key}`}
|
|
||||||
name={["config", section, key]}
|
|
||||||
label={label}
|
|
||||||
tooltip={description}
|
|
||||||
rules={rules}
|
|
||||||
className="md:col-span-2"
|
|
||||||
>
|
|
||||||
<TextArea rows={4} placeholder={description || `请输入${label}`} />
|
|
||||||
</Form.Item>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fieldType === "select") {
|
|
||||||
const options = (def?.options || []).map((opt: any) => {
|
|
||||||
if (typeof opt === "string" || typeof opt === "number") {
|
|
||||||
return { label: String(opt), value: opt };
|
|
||||||
}
|
|
||||||
return { label: opt?.label ?? String(opt?.value), value: opt?.value };
|
|
||||||
});
|
|
||||||
return (
|
|
||||||
<Form.Item
|
|
||||||
key={`${section}.${key}`}
|
|
||||||
name={["config", section, key]}
|
|
||||||
label={label}
|
|
||||||
tooltip={description}
|
|
||||||
rules={rules}
|
|
||||||
>
|
|
||||||
<Select placeholder={description || `请选择${label}`} options={options} />
|
|
||||||
</Form.Item>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return (
|
|
||||||
<Form.Item
|
|
||||||
key={`${section}.${key}`}
|
|
||||||
name={["config", section, key]}
|
|
||||||
label={label}
|
|
||||||
tooltip={description}
|
|
||||||
rules={rules}
|
|
||||||
>
|
|
||||||
<Input placeholder={description || `请输入${label}`} />
|
|
||||||
</Form.Item>
|
|
||||||
);
|
|
||||||
});
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-x-4 gap-y-2">
|
|
||||||
{items}
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const getPropertyCountSafe = (obj: any) => {
|
const getPropertyCountSafe = (obj: any) => {
|
||||||
@@ -342,10 +350,12 @@ export default function CollectionTaskCreate() {
|
|||||||
<h3 className="font-medium text-gray-900 pt-2 mb-2">
|
<h3 className="font-medium text-gray-900 pt-2 mb-2">
|
||||||
模板参数
|
模板参数
|
||||||
</h3>
|
</h3>
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 gap-x-4 gap-y-2">
|
||||||
{renderTemplateFields(
|
{renderTemplateFields(
|
||||||
"parameter",
|
["config", "parameter"],
|
||||||
selectedTemplate.templateContent?.parameter as Record<string, TemplateFieldDef>
|
selectedTemplate.templateContent?.parameter as Record<string, TemplateFieldDef>
|
||||||
)}
|
)}
|
||||||
|
</div>
|
||||||
</>
|
</>
|
||||||
): null}
|
): null}
|
||||||
|
|
||||||
@@ -354,10 +364,12 @@ export default function CollectionTaskCreate() {
|
|||||||
<h3 className="font-medium text-gray-900 pt-2 mb-2">
|
<h3 className="font-medium text-gray-900 pt-2 mb-2">
|
||||||
源端参数
|
源端参数
|
||||||
</h3>
|
</h3>
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 gap-x-4 gap-y-2">
|
||||||
{renderTemplateFields(
|
{renderTemplateFields(
|
||||||
"reader",
|
["config", "reader"],
|
||||||
selectedTemplate.templateContent?.reader as Record<string, TemplateFieldDef>
|
selectedTemplate.templateContent?.reader as Record<string, TemplateFieldDef>
|
||||||
)}
|
)}
|
||||||
|
</div>
|
||||||
</>
|
</>
|
||||||
) : null}
|
) : null}
|
||||||
|
|
||||||
@@ -366,10 +378,12 @@ export default function CollectionTaskCreate() {
|
|||||||
<h3 className="font-medium text-gray-900 pt-2 mb-2">
|
<h3 className="font-medium text-gray-900 pt-2 mb-2">
|
||||||
目标端参数
|
目标端参数
|
||||||
</h3>
|
</h3>
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 gap-x-4 gap-y-2">
|
||||||
{renderTemplateFields(
|
{renderTemplateFields(
|
||||||
"writer",
|
["config", "writer"],
|
||||||
selectedTemplate.templateContent?.writer as Record<string, TemplateFieldDef>
|
selectedTemplate.templateContent?.writer as Record<string, TemplateFieldDef>
|
||||||
)}
|
)}
|
||||||
|
</div>
|
||||||
</>
|
</>
|
||||||
) : null}
|
) : null}
|
||||||
</>
|
</>
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import { Input, Select, Form } from "antd";
|
|||||||
import { datasetTypes } from "../../dataset.const";
|
import { datasetTypes } from "../../dataset.const";
|
||||||
import { useEffect, useState } from "react";
|
import { useEffect, useState } from "react";
|
||||||
import { queryDatasetTagsUsingGet } from "../../dataset.api";
|
import { queryDatasetTagsUsingGet } from "../../dataset.api";
|
||||||
|
import {queryTasksUsingGet} from "@/pages/DataCollection/collection.apis.ts";
|
||||||
|
|
||||||
export default function BasicInformation({
|
export default function BasicInformation({
|
||||||
data,
|
data,
|
||||||
@@ -20,6 +21,7 @@ export default function BasicInformation({
|
|||||||
options: { label: JSX.Element; value: string }[];
|
options: { label: JSX.Element; value: string }[];
|
||||||
}[]
|
}[]
|
||||||
>([]);
|
>([]);
|
||||||
|
const [collectionOptions, setCollectionOptions] = useState([]);
|
||||||
|
|
||||||
// 获取标签
|
// 获取标签
|
||||||
const fetchTags = async () => {
|
const fetchTags = async () => {
|
||||||
@@ -36,8 +38,23 @@ export default function BasicInformation({
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// 获取归集任务
|
||||||
|
const fetchCollectionTasks = async () => {
|
||||||
|
try {
|
||||||
|
const res = await queryTasksUsingGet({ page: 0, size: 100 });
|
||||||
|
const options = res.data.content.map((task: any) => ({
|
||||||
|
label: task.name,
|
||||||
|
value: task.id,
|
||||||
|
}));
|
||||||
|
setCollectionOptions(options);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error fetching collection tasks:", error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
fetchTags();
|
fetchTags();
|
||||||
|
fetchCollectionTasks();
|
||||||
}, []);
|
}, []);
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
@@ -78,6 +95,11 @@ export default function BasicInformation({
|
|||||||
/>
|
/>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
)}
|
)}
|
||||||
|
{!hidden.includes("dataSource") && (
|
||||||
|
<Form.Item name="dataSource" label="关联归集任务">
|
||||||
|
<Select placeholder="请选择归集任务" options={collectionOptions} />
|
||||||
|
</Form.Item>
|
||||||
|
)}
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,9 +13,10 @@ from app.module.shared.schema import TaskStatus
|
|||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
class DataxClient:
|
class DataxClient:
|
||||||
def __init__(self, task: CollectionTask, execution: TaskExecution):
|
def __init__(self, task: CollectionTask, execution: TaskExecution, template: CollectionTemplate):
|
||||||
self.execution = execution
|
self.execution = execution
|
||||||
self.task = task
|
self.task = task
|
||||||
|
self.template = template
|
||||||
self.config_file_path = f"/flow/data-collection/{task.id}/config.json"
|
self.config_file_path = f"/flow/data-collection/{task.id}/config.json"
|
||||||
self.python_path = "python"
|
self.python_path = "python"
|
||||||
self.datax_main = "/opt/datax/bin/datax.py"
|
self.datax_main = "/opt/datax/bin/datax.py"
|
||||||
@@ -53,10 +54,21 @@ class DataxClient:
|
|||||||
**(task_config.parameter if task_config.parameter else {}),
|
**(task_config.parameter if task_config.parameter else {}),
|
||||||
**(task_config.reader if task_config.reader else {})
|
**(task_config.reader if task_config.reader else {})
|
||||||
}
|
}
|
||||||
|
dest_parameter = {}
|
||||||
|
if template.target_type == "txtfilewriter":
|
||||||
|
dest_parameter = {
|
||||||
|
"path": target_path,
|
||||||
|
"fileName": "collection_result",
|
||||||
|
"writeMode": "truncate"
|
||||||
|
}
|
||||||
|
elif template.target_type == "nfswriter" or template.target_type == "obswriter":
|
||||||
|
dest_parameter = {
|
||||||
|
"destPath": target_path
|
||||||
|
}
|
||||||
writer_parameter = {
|
writer_parameter = {
|
||||||
**(task_config.parameter if task_config.parameter else {}),
|
**(task_config.parameter if task_config.parameter else {}),
|
||||||
**(task_config.writer if task_config.writer else {}),
|
**(task_config.writer if task_config.writer else {}),
|
||||||
"destPath": target_path
|
**dest_parameter
|
||||||
}
|
}
|
||||||
# 生成任务运行配置
|
# 生成任务运行配置
|
||||||
job_config = {
|
job_config = {
|
||||||
@@ -128,6 +140,7 @@ class DataxClient:
|
|||||||
logger.info(f"DataX 任务执行成功: {self.execution.id}")
|
logger.info(f"DataX 任务执行成功: {self.execution.id}")
|
||||||
logger.info(f"执行耗时: {self.execution.duration_seconds:.2f} 秒")
|
logger.info(f"执行耗时: {self.execution.duration_seconds:.2f} 秒")
|
||||||
self.execution.status = TaskStatus.COMPLETED.name
|
self.execution.status = TaskStatus.COMPLETED.name
|
||||||
|
self.rename_collection_result()
|
||||||
else:
|
else:
|
||||||
self.execution.error_message = self.execution.error_message or f"DataX 任务执行失败,退出码: {exit_code}"
|
self.execution.error_message = self.execution.error_message or f"DataX 任务执行失败,退出码: {exit_code}"
|
||||||
self.execution.status = TaskStatus.FAILED.name
|
self.execution.status = TaskStatus.FAILED.name
|
||||||
@@ -141,6 +154,23 @@ class DataxClient:
|
|||||||
if self.task.sync_mode == SyncMode.ONCE:
|
if self.task.sync_mode == SyncMode.ONCE:
|
||||||
self.task.status = self.execution.status
|
self.task.status = self.execution.status
|
||||||
|
|
||||||
|
def rename_collection_result(self):
|
||||||
|
if self.template.target_type != "txtfilewriter":
|
||||||
|
return
|
||||||
|
target_path = Path(self.task.target_path)
|
||||||
|
if not target_path.exists():
|
||||||
|
logger.warning(f"Target path does not exist: {target_path}")
|
||||||
|
return
|
||||||
|
# If it's a directory, find all files without extensions
|
||||||
|
for file_path in target_path.iterdir():
|
||||||
|
if file_path.is_file() and not file_path.suffix:
|
||||||
|
new_path = file_path.with_suffix('.csv')
|
||||||
|
try:
|
||||||
|
file_path.rename(new_path)
|
||||||
|
logger.info(f"Renamed {file_path} to {new_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to rename {file_path} to {new_path}: {str(e)}")
|
||||||
|
|
||||||
def _run_process(self, cmd: list[str], log_f) -> int:
|
def _run_process(self, cmd: list[str], log_f) -> int:
|
||||||
# 启动进程
|
# 启动进程
|
||||||
process = subprocess.Popen(
|
process = subprocess.Popen(
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
import math
|
import math
|
||||||
import uuid
|
import uuid
|
||||||
|
import shutil
|
||||||
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
@@ -138,6 +140,13 @@ async def delete_collection_tasks(
|
|||||||
.where(TaskExecution.task_id == task_id)
|
.where(TaskExecution.task_id == task_id)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
target_path = f"/dataset/local/{task_id}"
|
||||||
|
if os.path.exists(target_path):
|
||||||
|
shutil.rmtree(target_path)
|
||||||
|
job_path = f"/flow/data-collection/{task_id}"
|
||||||
|
if os.path.exists(job_path):
|
||||||
|
shutil.rmtree(job_path)
|
||||||
|
|
||||||
# 删除任务
|
# 删除任务
|
||||||
await db.delete(task)
|
await db.delete(task)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
@@ -155,3 +164,29 @@ async def delete_collection_tasks(
|
|||||||
await db.rollback()
|
await db.rollback()
|
||||||
logger.error(f"Failed to delete collection task: {str(e)}")
|
logger.error(f"Failed to delete collection task: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail="Internal server error")
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|
||||||
|
@router.get("/{task_id}", response_model=StandardResponse[CollectionTaskBase])
|
||||||
|
async def get_task(
|
||||||
|
task_id: str,
|
||||||
|
db: AsyncSession = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""获取归集任务详情"""
|
||||||
|
try:
|
||||||
|
# Query the task by ID
|
||||||
|
task = await db.get(CollectionTask, task_id)
|
||||||
|
if not task:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Task with ID {task_id} not found"
|
||||||
|
)
|
||||||
|
|
||||||
|
return StandardResponse(
|
||||||
|
code=200,
|
||||||
|
message="Success",
|
||||||
|
data=converter_to_response(task)
|
||||||
|
)
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to get task {task_id}: {str(e)}", e)
|
||||||
|
raise HTTPException(status_code=500, detail="Internal server error")
|
||||||
|
|||||||
@@ -58,6 +58,7 @@ class CollectionTaskService:
|
|||||||
logger.error(f"task {task_id} not exist")
|
logger.error(f"task {task_id} not exist")
|
||||||
return
|
return
|
||||||
template = await session.execute(select(CollectionTemplate).where(CollectionTemplate.id == task.template_id))
|
template = await session.execute(select(CollectionTemplate).where(CollectionTemplate.id == task.template_id))
|
||||||
|
template = template.scalar_one_or_none()
|
||||||
if not template:
|
if not template:
|
||||||
logger.error(f"template {task.template_name} not exist")
|
logger.error(f"template {task.template_name} not exist")
|
||||||
return
|
return
|
||||||
@@ -65,6 +66,6 @@ class CollectionTaskService:
|
|||||||
session.add(task_execution)
|
session.add(task_execution)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
await asyncio.to_thread(
|
await asyncio.to_thread(
|
||||||
DataxClient(execution=task_execution, task=task).run_datax_job
|
DataxClient(execution=task_execution, task=task, template=template).run_datax_job
|
||||||
)
|
)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|||||||
@@ -69,12 +69,14 @@ public class NfsReader extends Reader {
|
|||||||
private Configuration jobConfig;
|
private Configuration jobConfig;
|
||||||
private String mountPoint;
|
private String mountPoint;
|
||||||
private Set<String> fileType;
|
private Set<String> fileType;
|
||||||
|
private List<String> files;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init() {
|
public void init() {
|
||||||
this.jobConfig = super.getPluginJobConf();
|
this.jobConfig = super.getPluginJobConf();
|
||||||
this.mountPoint = this.jobConfig.getString("mountPoint");
|
this.mountPoint = this.jobConfig.getString("mountPoint");
|
||||||
this.fileType = new HashSet<>(this.jobConfig.getList("fileType", Collections.emptyList(), String.class));
|
this.fileType = new HashSet<>(this.jobConfig.getList("fileType", Collections.emptyList(), String.class));
|
||||||
|
this.files = this.jobConfig.getList("files", Collections.emptyList(), String.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -83,6 +85,7 @@ public class NfsReader extends Reader {
|
|||||||
List<String> files = stream.filter(Files::isRegularFile)
|
List<String> files = stream.filter(Files::isRegularFile)
|
||||||
.filter(file -> fileType.isEmpty() || fileType.contains(getFileSuffix(file)))
|
.filter(file -> fileType.isEmpty() || fileType.contains(getFileSuffix(file)))
|
||||||
.map(path -> path.getFileName().toString())
|
.map(path -> path.getFileName().toString())
|
||||||
|
.filter(fileName -> this.files.isEmpty() || this.files.contains(fileName))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
files.forEach(filePath -> {
|
files.forEach(filePath -> {
|
||||||
Record record = recordSender.createRecord();
|
Record record = recordSender.createRecord();
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ public class NfsWriter extends Writer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
String filePath = this.mountPoint + "/" + fileName;
|
String filePath = this.mountPoint + "/" + fileName;
|
||||||
ShellUtil.runCommand("rsync", Arrays.asList("--no-links", "--chmod=750", "--", filePath,
|
ShellUtil.runCommand("rsync", Arrays.asList("--no-links", "--chmod=754", "--", filePath,
|
||||||
this.destPath + "/" + fileName));
|
this.destPath + "/" + fileName));
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|||||||
@@ -73,5 +73,7 @@ CREATE TABLE t_dc_collection_templates (
|
|||||||
) COMMENT='数据归集模板配置表';
|
) COMMENT='数据归集模板配置表';
|
||||||
|
|
||||||
INSERT IGNORE INTO t_dc_collection_templates(id, name, description, source_type, source_name, target_type, target_name, template_content, built_in, created_by, updated_by)
|
INSERT IGNORE INTO t_dc_collection_templates(id, name, description, source_type, source_name, target_type, target_name, template_content, built_in, created_by, updated_by)
|
||||||
VALUES ('1', 'NAS归集模板', '将NAS存储上的文件归集到DataMate平台上。', 'nfsreader', 'nfsreader', 'nfswriter', 'nfswriter', '{"parameter": {}, "reader": {}, "writer": {}}', True, 'system', 'system'),
|
VALUES ('1', 'NAS归集模板', '将NAS存储上的文件归集到DataMate平台上。', 'nfsreader', 'nfsreader', 'nfswriter', 'nfswriter', '{"parameter": {"ip": {"name": "NAS地址","description": "NAS服务的地址,可以为IP或者域名。","type": "input", "required": true, "index": 1}, "path": {"name": "共享路径","description": "NAS服务的共享路径。","type": "input", "required": true, "index": 2}, "files": {"name": "文件列表","description": "指定文件列表进行归集。","type": "selectTag", "required": false, "index": 3}}, "reader": {}, "writer": {}}', True, 'system', 'system'),
|
||||||
('2', 'OBS归集模板', '将OBS存储上的文件归集到DataMate平台上。', 'obsreader', 'obsreader', 'obswriter', 'obswriter', '{"parameter": {"endpoint": {"name": "服务地址","description": "OBS的服务地址。","type": "input"},"bucket": {"name": "存储桶名称","description": "OBS存储桶名称。","type": "input"},"accessKey": {"name": "访问密钥","description": "OBS访问密钥。","type": "input"},"secretKey": {"name": "密钥","description": "OBS密钥。","type": "input"},"prefix": {"name": "匹配前缀","description": "按照匹配前缀去选中OBS中的文件进行归集。","type": "input"}}, "reader": {}, "writer": {}}', True, 'system', 'system');
|
('2', 'OBS归集模板', '将OBS存储上的文件归集到DataMate平台上。', 'obsreader', 'obsreader', 'obswriter', 'obswriter', '{"parameter": {"endpoint": {"name": "服务地址","description": "OBS的服务地址。","type": "input", "required": true, "index": 1},"bucket": {"name": "存储桶名称","description": "OBS存储桶名称。","type": "input", "required": true, "index": 2},"accessKey": {"name": "AK","description": "OBS访问密钥。","type": "input", "required": true, "index": 3},"secretKey": {"name": "SK","description": "OBS密钥。","type": "password", "required": true, "index": 4},"prefix": {"name": "匹配前缀","description": "按照匹配前缀去选中OBS中的文件进行归集。","type": "input", "required": true, "index": 5}}, "reader": {}, "writer": {}}', True, 'system', 'system'),
|
||||||
|
('3', 'MYSQL归集模板', '将MYSQL数据库中的数据以csv文件的形式归集到DataMate平台上。', 'mysqlreader', 'mysqlreader', 'txtfilewriter', 'txtfilewriter', '{"parameter": {}, "reader": {"username": {"name": "用户名","description": "数据库的用户名。","type": "input", "required": true, "index": 2}, "password": {"name": "密码","description": "数据库的密码。","type": "password", "required": true, "index": 3}, "connection": {"name": "数据库连接信息", "description": "数据库连接信息。", "type": "multipleList", "size": 1, "index": 1, "properties": {"jdbcUrl": {"type": "inputList", "name": "数据库连接", "description": "数据库连接url。", "required": true, "index": 1}, "querySql": {"type": "inputList", "name": "查询sql", "description": "输入符合语法的sql查询语句。", "required": true, "index": 2}}}}, "writer": {"header": {"name": "列名","description": "查询结果的列名,最终会体现为csv文件的表头。","type": "selectTag", "required": false}}}', True, 'system', 'system'),
|
||||||
|
('4', 'StarRocks归集模板', '将StarRocks中的数据以csv文件的形式归集到DataMate平台上。', 'starrocksreader', 'starrocksreader', 'txtfilewriter', 'txtfilewriter', '{"parameter": {}, "reader": {"username": {"name": "用户名","description": "数据库的用户名。","type": "input", "required": true, "index": 2}, "password": {"name": "密码","description": "数据库的密码。","type": "password", "required": true, "index": 3}, "connection": {"name": "数据库连接信息", "description": "数据库连接信息。", "type": "multipleList", "size": 1, "index": 1, "properties": {"jdbcUrl": {"type": "inputList", "name": "数据库连接", "description": "数据库连接url。", "required": true, "index": 1}, "querySql": {"type": "inputList", "name": "查询sql", "description": "输入符合语法的sql查询语句。", "required": true, "index": 2}}}}, "writer": {"header": {"name": "列名","description": "查询结果的列名,最终会体现为csv文件的表头。","type": "selectTag", "required": false}}}', True, 'system', 'system');
|
||||||
|
|||||||
@@ -17,19 +17,18 @@ FROM python:3.12-slim
|
|||||||
# Note: to use the cache mount syntax you must build with BuildKit enabled:
|
# Note: to use the cache mount syntax you must build with BuildKit enabled:
|
||||||
# DOCKER_BUILDKIT=1 docker build . -f scripts/images/datamate-python/Dockerfile -t datamate-backend-python
|
# DOCKER_BUILDKIT=1 docker build . -f scripts/images/datamate-python/Dockerfile -t datamate-backend-python
|
||||||
|
|
||||||
RUN apt-get update \
|
RUN apt-get update && \
|
||||||
&& apt-get install -y --no-install-recommends openjdk-21-jre-headless \
|
apt-get install -y --no-install-recommends vim openjdk-21-jre nfs-common rsync && \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
PYTHONUNBUFFERED=1 \
|
PYTHONUNBUFFERED=1 \
|
||||||
# Poetry configuration
|
|
||||||
POETRY_VERSION=2.2.1 \
|
POETRY_VERSION=2.2.1 \
|
||||||
POETRY_NO_INTERACTION=1 \
|
POETRY_NO_INTERACTION=1 \
|
||||||
POETRY_VIRTUALENVS_CREATE=false \
|
POETRY_VIRTUALENVS_CREATE=false \
|
||||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||||
|
|
||||||
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64
|
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk
|
||||||
|
|
||||||
ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH"
|
ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH"
|
||||||
|
|
||||||
@@ -42,6 +41,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
|||||||
&& pipx install "poetry==$POETRY_VERSION"
|
&& pipx install "poetry==$POETRY_VERSION"
|
||||||
|
|
||||||
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
|
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
|
||||||
|
RUN cp /opt/datax/plugin/reader/mysqlreader/libs/mysql* /opt/datax/plugin/reader/starrocksreader/libs/
|
||||||
|
|
||||||
# Copy only dependency files first (leverages layer caching when dependencies don't change)
|
# Copy only dependency files first (leverages layer caching when dependencies don't change)
|
||||||
COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/
|
COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/
|
||||||
|
|||||||
@@ -1,16 +1,3 @@
|
|||||||
FROM maven:3-eclipse-temurin-8 AS datax-builder
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y git && \
|
|
||||||
git clone https://github.com/alibaba/DataX.git
|
|
||||||
|
|
||||||
COPY runtime/datax/ DataX/
|
|
||||||
|
|
||||||
RUN cd DataX && \
|
|
||||||
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
|
|
||||||
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
|
|
||||||
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
|
|
||||||
|
|
||||||
FROM maven:3-eclipse-temurin-21 AS builder
|
FROM maven:3-eclipse-temurin-21 AS builder
|
||||||
|
|
||||||
COPY backend/ /opt/backend
|
COPY backend/ /opt/backend
|
||||||
@@ -22,12 +9,11 @@ RUN cd /opt/backend/services && \
|
|||||||
FROM eclipse-temurin:21-jdk
|
FROM eclipse-temurin:21-jdk
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y vim wget curl nfs-common rsync python3 python3-pip python-is-python3 dos2unix && \
|
apt-get install -y vim wget curl rsync python3 python3-pip python-is-python3 dos2unix && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
COPY --from=builder /opt/backend/services/main-application/target/datamate.jar /opt/backend/datamate.jar
|
COPY --from=builder /opt/backend/services/main-application/target/datamate.jar /opt/backend/datamate.jar
|
||||||
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
|
|
||||||
|
|
||||||
COPY scripts/images/backend/start.sh /opt/backend/start.sh
|
COPY scripts/images/backend/start.sh /opt/backend/start.sh
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
rpcbind
|
umask 0022
|
||||||
|
|
||||||
echo "Starting main application..."
|
echo "Starting main application..."
|
||||||
exec "$@"
|
exec "$@"
|
||||||
Reference in New Issue
Block a user