You've already forked DataMate
localfs support
This commit is contained in:
75
runtime/datax/localreader/pom.xml
Normal file
75
runtime/datax/localreader/pom.xml
Normal file
@@ -0,0 +1,75 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-all</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>localreader</artifactId>
|
||||
<name>localreader</name>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-core</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-common</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>src/main/java</directory>
|
||||
<includes>
|
||||
<include>**/*.properties</include>
|
||||
</includes>
|
||||
</resource>
|
||||
</resources>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>${jdk-version}</source>
|
||||
<target>${jdk-version}</target>
|
||||
<encoding>${project-sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||
</descriptors>
|
||||
<finalName>datax</finalName>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dwzip</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
35
runtime/datax/localreader/src/main/assembly/package.xml
Normal file
35
runtime/datax/localreader/src/main/assembly/package.xml
Normal file
@@ -0,0 +1,35 @@
|
||||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id></id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>src/main/resources</directory>
|
||||
<includes>
|
||||
<include>plugin.json</include>
|
||||
<include>plugin_job_template.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/localreader</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
<include>localreader-0.0.1-SNAPSHOT.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/localreader</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<outputDirectory>plugin/reader/localreader/libs</outputDirectory>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
||||
@@ -0,0 +1,116 @@
|
||||
package com.datamate.plugin.reader.localreader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.element.StringColumn;
|
||||
import com.alibaba.datax.common.plugin.RecordSender;
|
||||
import com.alibaba.datax.common.spi.Reader;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* 本地文件夹读取器
|
||||
* 从本地文件系统的指定目录读取文件列表
|
||||
*/
|
||||
public class LocalReader extends Reader {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(LocalReader.class);
|
||||
|
||||
public static class Job extends Reader.Job {
|
||||
private Configuration jobConfig = null;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.jobConfig = super.getPluginJobConf();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
String path = this.jobConfig.getString("path");
|
||||
if (path == null || path.isEmpty()) {
|
||||
throw new RuntimeException("path is required for localreader");
|
||||
}
|
||||
Path dirPath = Paths.get(path);
|
||||
if (!Files.exists(dirPath)) {
|
||||
throw new RuntimeException("path does not exist: " + path);
|
||||
}
|
||||
if (!Files.isDirectory(dirPath)) {
|
||||
throw new RuntimeException("path is not a directory: " + path);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int adviceNumber) {
|
||||
return Collections.singletonList(this.jobConfig);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
}
|
||||
|
||||
public static class Task extends Reader.Task {
|
||||
|
||||
private Configuration jobConfig;
|
||||
private String path;
|
||||
private Set<String> fileType;
|
||||
private List<String> files;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.jobConfig = super.getPluginJobConf();
|
||||
this.path = this.jobConfig.getString("path");
|
||||
this.fileType = new HashSet<>(this.jobConfig.getList("fileType", Collections.emptyList(), String.class));
|
||||
this.files = this.jobConfig.getList("files", Collections.emptyList(), String.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startRead(RecordSender recordSender) {
|
||||
try (Stream<Path> stream = Files.list(Paths.get(this.path))) {
|
||||
List<String> fileList = stream.filter(Files::isRegularFile)
|
||||
.filter(file -> fileType.isEmpty() || fileType.contains(getFileSuffix(file)))
|
||||
.map(p -> p.getFileName().toString())
|
||||
.filter(fileName -> this.files.isEmpty() || this.files.contains(fileName))
|
||||
.collect(Collectors.toList());
|
||||
fileList.forEach(filePath -> {
|
||||
Record record = recordSender.createRecord();
|
||||
record.addColumn(new StringColumn(filePath));
|
||||
recordSender.sendToWriter(record);
|
||||
});
|
||||
this.jobConfig.set("columnNumber", 1);
|
||||
} catch (IOException e) {
|
||||
LOG.error("Error reading files from local path: {}", this.path, e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private String getFileSuffix(Path path) {
|
||||
String fileName = path.getFileName().toString();
|
||||
int lastDotIndex = fileName.lastIndexOf('.');
|
||||
if (lastDotIndex == -1 || lastDotIndex == fileName.length() - 1) {
|
||||
return "";
|
||||
}
|
||||
return fileName.substring(lastDotIndex + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
}
|
||||
}
|
||||
6
runtime/datax/localreader/src/main/resources/plugin.json
Normal file
6
runtime/datax/localreader/src/main/resources/plugin.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "localreader",
|
||||
"class": "com.datamate.plugin.reader.localreader.LocalReader",
|
||||
"description": "read from local file system",
|
||||
"developer": "datamate"
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "localreader",
|
||||
"parameter": {
|
||||
"path": "/data/source"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user