glusterfs support

This commit is contained in:
2026-01-09 13:49:18 +08:00
parent fa755faf72
commit 010ffceab5
17 changed files with 820 additions and 2 deletions

View File

@@ -0,0 +1,80 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>datax-all</artifactId>
<groupId>com.alibaba.datax</groupId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>glusterfsreader</artifactId>
<name>glusterfsreader</name>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-core</artifactId>
<version>${datax-project-version}</version>
</dependency>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/java</directory>
<includes>
<include>**/*.properties</include>
</includes>
</resource>
</resources>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${jdk-version}</source>
<target>${jdk-version}</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<!-- assembly plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,35 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
<include>plugin_job_template.json</include>
</includes>
<outputDirectory>plugin/reader/glusterfsreader</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>glusterfsreader-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/reader/glusterfsreader</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/reader/glusterfsreader/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@@ -0,0 +1,119 @@
package com.datamate.plugin.reader.glusterfsreader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.DirectoryNotEmptyException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
/**
* GlusterFS 挂载工具类
* 通过系统命令 mount -t glusterfs 进行挂载
*/
public final class GlusterfsMountUtil {
private static final Logger LOG = LoggerFactory.getLogger(GlusterfsMountUtil.class);
private GlusterfsMountUtil() {
}
/**
* 挂载 GlusterFS 卷
*
* @param remote 远程地址,格式: ip:/volume
* @param mountPoint 本地挂载点
* @param subPath 卷内子路径(可选,用于后续读取)
*/
public static void mount(String remote, String mountPoint, String subPath) {
try {
Path mp = Paths.get(mountPoint);
if (isMounted(mountPoint)) {
throw new IOException("Already mounted: " + mountPoint);
}
Files.createDirectories(mp);
ProcessBuilder pb = new ProcessBuilder();
pb.command("mount", "-t", "glusterfs", remote, mountPoint);
LOG.info("Mounting GlusterFS: {}", pb.command());
pb.redirectErrorStream(true);
Process p = pb.start();
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream()))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append(System.lineSeparator());
}
}
int rc = p.waitFor();
if (rc != 0) {
throw new RuntimeException("GlusterFS mount failed, exit=" + rc + ", output: " + output);
}
LOG.info("GlusterFS mounted successfully: {} -> {}", remote, mountPoint);
} catch (IOException | InterruptedException e) {
throw new RuntimeException("Failed to mount GlusterFS: " + remote, e);
}
}
/**
* 卸载挂载点
*
* @param mountPoint 挂载点路径
* @throws IOException 卸载失败
* @throws InterruptedException 进程等待中断
*/
public static void umount(String mountPoint) throws IOException, InterruptedException {
if (!isMounted(mountPoint)) {
return;
}
ProcessBuilder pb = new ProcessBuilder("umount", "-l", mountPoint);
pb.redirectErrorStream(true);
Process p = pb.start();
StringBuilder output = new StringBuilder();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream()))) {
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append(System.lineSeparator());
}
}
int rc = p.waitFor();
if (rc != 0) {
throw new RuntimeException("GlusterFS umount failed, exit=" + rc + ", output: " + output);
}
// 清理空目录
try {
Files.deleteIfExists(Paths.get(mountPoint));
} catch (DirectoryNotEmptyException ignore) {
// 目录非空,保留
}
LOG.info("GlusterFS unmounted: {}", mountPoint);
}
/**
* 判断挂载点是否已挂载
*
* @param mountPoint 挂载点路径
* @return true 表示已挂载
* @throws IOException 读取 /proc/mounts 失败
*/
public static boolean isMounted(String mountPoint) throws IOException {
Path procMounts = Paths.get("/proc/mounts");
if (!Files.exists(procMounts)) {
throw new IOException("/proc/mounts not found");
}
String expected = mountPoint.trim();
List<String> lines = Files.readAllLines(procMounts);
return lines.stream()
.map(l -> l.split("\\s+"))
.filter(a -> a.length >= 2)
.anyMatch(a -> a[1].equals(expected));
}
}

View File

@@ -0,0 +1,133 @@
package com.datamate.plugin.reader.glusterfsreader;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.spi.Reader;
import com.alibaba.datax.common.util.Configuration;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* GlusterFS Reader 插件
* 通过 mount -t glusterfs 挂载 GlusterFS 卷,读取文件列表
*/
public class GlusterfsReader extends Reader {
private static final Logger LOG = LoggerFactory.getLogger(GlusterfsReader.class);
public static class Job extends Reader.Job {
private Configuration jobConfig = null;
private String mountPoint;
@Override
public void init() {
this.jobConfig = super.getPluginJobConf();
}
@Override
public void prepare() {
this.mountPoint = "/dataset/mount/" + UUID.randomUUID();
this.jobConfig.set("mountPoint", this.mountPoint);
String ip = this.jobConfig.getString("ip");
String volume = this.jobConfig.getString("volume");
String subPath = this.jobConfig.getString("path", "");
// GlusterFS mount 格式: mount -t glusterfs ip:/volume /mountpoint
String remote = ip + ":/" + volume;
GlusterfsMountUtil.mount(remote, mountPoint, subPath);
}
@Override
public List<Configuration> split(int adviceNumber) {
return Collections.singletonList(this.jobConfig);
}
@Override
public void post() {
try {
GlusterfsMountUtil.umount(this.mountPoint);
new File(this.mountPoint).deleteOnExit();
} catch (IOException | InterruptedException e) {
throw new RuntimeException(e);
}
}
@Override
public void destroy() {
}
}
public static class Task extends Reader.Task {
private Configuration jobConfig;
private String mountPoint;
private String subPath;
private Set<String> fileType;
private List<String> files;
@Override
public void init() {
this.jobConfig = super.getPluginJobConf();
this.mountPoint = this.jobConfig.getString("mountPoint");
this.subPath = this.jobConfig.getString("path", "");
this.fileType = new HashSet<>(this.jobConfig.getList("fileType", Collections.emptyList(), String.class));
this.files = this.jobConfig.getList("files", Collections.emptyList(), String.class);
}
@Override
public void startRead(RecordSender recordSender) {
String readPath = this.mountPoint;
if (StringUtils.isNotBlank(this.subPath)) {
readPath = this.mountPoint + "/" + this.subPath.replaceFirst("^/+", "");
}
try (Stream<Path> stream = Files.list(Paths.get(readPath))) {
List<String> fileList = stream.filter(Files::isRegularFile)
.filter(file -> fileType.isEmpty() || fileType.contains(getFileSuffix(file)))
.map(path -> path.getFileName().toString())
.filter(fileName -> this.files.isEmpty() || this.files.contains(fileName))
.collect(Collectors.toList());
fileList.forEach(filePath -> {
Record record = recordSender.createRecord();
record.addColumn(new StringColumn(filePath));
recordSender.sendToWriter(record);
});
this.jobConfig.set("columnNumber", 1);
} catch (IOException e) {
LOG.error("Error reading files from GlusterFS mount point: {}", readPath, e);
throw new RuntimeException(e);
}
}
private String getFileSuffix(Path path) {
String fileName = path.getFileName().toString();
int lastDotIndex = fileName.lastIndexOf('.');
if (lastDotIndex == -1 || lastDotIndex == fileName.length() - 1) {
return "";
}
return fileName.substring(lastDotIndex + 1);
}
@Override
public void destroy() {
}
}
}

View File

@@ -0,0 +1,6 @@
{
"name": "glusterfsreader",
"class": "com.datamate.plugin.reader.glusterfsreader.GlusterfsReader",
"description": "read file list from GlusterFS distributed file system",
"developer": "datamate"
}

View File

@@ -0,0 +1,10 @@
{
"name": "glusterfsreader",
"parameter": {
"ip": "",
"volume": "",
"path": "",
"fileType": [],
"files": []
}
}