s3-compatible-fs support

This commit is contained in:
2026-01-09 14:25:59 +08:00
parent ba210d3d4f
commit a98eeb530f
13 changed files with 684 additions and 1 deletions

View File

@@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-all</artifactId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<artifactId>s3writer</artifactId>
<name>s3writer</name>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-core</artifactId>
<version>${datax-project-version}</version>
</dependency>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>s3</artifactId>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/java</directory>
<includes>
<include>**/*.properties</include>
</includes>
</resource>
</resources>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${jdk-version}</source>
<target>${jdk-version}</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,35 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
<include>plugin_job_template.json</include>
</includes>
<outputDirectory>plugin/writer/s3writer</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>s3writer-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/writer/s3writer</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/writer/s3writer/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@@ -0,0 +1,181 @@
package com.datamate.plugin.writer.s3writer;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.CommonErrorCode;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.spi.Writer;
import com.alibaba.datax.common.util.Configuration;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
import software.amazon.awssdk.core.sync.ResponseTransformer;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.S3Configuration;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
/**
* S3兼容对象存储写入器
* 从S3兼容存储下载文件到本地目标目录
*/
public class S3Writer extends Writer {
private static final Logger LOG = LoggerFactory.getLogger(S3Writer.class);
public static class Job extends Writer.Job {
private Configuration jobConfig = null;
@Override
public void init() {
this.jobConfig = super.getPluginJobConf();
}
@Override
public void prepare() {
String destPath = this.jobConfig.getString("destPath");
if (StringUtils.isBlank(destPath)) {
throw new RuntimeException("destPath is required for s3writer");
}
try {
Files.createDirectories(Paths.get(destPath));
} catch (IOException e) {
throw new RuntimeException("Failed to create destination directory: " + destPath, e);
}
}
@Override
public List<Configuration> split(int adviceNumber) {
return Collections.singletonList(this.jobConfig);
}
@Override
public void post() {
}
@Override
public void destroy() {
}
}
public static class Task extends Writer.Task {
private Configuration jobConfig;
private Set<String> fileType;
private String endpoint;
private String accessKey;
private String secretKey;
private String bucket;
private String destPath;
private String region;
private S3Client s3;
@Override
public void init() {
this.jobConfig = super.getPluginJobConf();
this.fileType = new HashSet<>(this.jobConfig.getList("fileType", Collections.emptyList(), String.class));
this.endpoint = this.jobConfig.getString("endpoint");
this.accessKey = this.jobConfig.getString("accessKey");
this.secretKey = this.jobConfig.getString("secretKey");
this.bucket = this.jobConfig.getString("bucket");
this.destPath = this.jobConfig.getString("destPath");
this.region = this.jobConfig.getString("region", "us-east-1");
this.s3 = getS3Client();
}
private S3Client getS3Client() {
try {
AwsBasicCredentials creds = AwsBasicCredentials.create(accessKey, secretKey);
S3Configuration serviceConfig = S3Configuration.builder()
.pathStyleAccessEnabled(true)
.build();
return S3Client.builder()
.endpointOverride(new URI(endpoint))
.region(Region.of(region))
.serviceConfiguration(serviceConfig)
.credentialsProvider(StaticCredentialsProvider.create(creds))
.build();
} catch (Exception e) {
LOG.error("Error init S3 client: {}", this.endpoint, e);
throw DataXException.asDataXException(CommonErrorCode.RUNTIME_ERROR, e);
}
}
@Override
public void startWrite(RecordReceiver lineReceiver) {
try {
Record record;
while ((record = lineReceiver.getFromReader()) != null) {
String key = record.getColumn(0).asString();
if (StringUtils.isBlank(key)) {
continue;
}
copyFileFromS3(key);
}
} catch (Exception e) {
LOG.error("Error writing files from S3 compatible storage: {}", this.endpoint, e);
throw DataXException.asDataXException(CommonErrorCode.RUNTIME_ERROR, e);
}
}
private void copyFileFromS3(String key) throws IOException {
if (StringUtils.isBlank(endpoint) || StringUtils.isBlank(bucket)) {
throw new IllegalArgumentException("endpoint and bucket must be provided");
}
try {
Path targetDir = Paths.get(destPath);
try {
Files.createDirectories(targetDir);
} catch (IOException e) {
LOG.warn("Create dest dir {} failed: {}", targetDir, e.getMessage(), e);
}
String fileName = Paths.get(key).getFileName().toString();
if (StringUtils.isBlank(fileName)) {
LOG.warn("Skip object with empty file name for key {}", key);
return;
}
Path target = targetDir.resolve(fileName);
try {
if (Files.exists(target)) {
Files.delete(target);
}
GetObjectRequest getReq = GetObjectRequest.builder()
.bucket(bucket)
.key(key)
.build();
s3.getObject(getReq, ResponseTransformer.toFile(target));
LOG.info("Downloaded S3 object {} to {}", key, target.toString());
} catch (Exception ex) {
LOG.warn("Failed to download object {}: {}", key, ex.getMessage(), ex);
}
} catch (Exception e) {
LOG.warn("Failed to download object {}: {}", key, e.getMessage(), e);
}
}
@Override
public void destroy() {
if (s3 != null) {
try {
s3.close();
} catch (Exception ignore) {
}
}
}
}
}

View File

@@ -0,0 +1,6 @@
{
"name": "s3writer",
"class": "com.datamate.plugin.writer.s3writer.S3Writer",
"description": "write S3 compatible object storage files to local",
"developer": "datamate"
}

View File

@@ -0,0 +1,12 @@
{
"name": "s3writer",
"parameter": {
"endpoint": "http://127.0.0.1:9000",
"bucket": "test-bucket",
"accessKey": "ak-xxx",
"secretKey": "sk-xxx",
"prefix": "/test",
"region": "us-east-1",
"destPath": "/data/dest"
}
}