Skip to content

Commit 310f300

Browse files
committed
export import poc
1 parent 60e58d4 commit 310f300

11 files changed

Lines changed: 33877 additions & 1 deletion

File tree

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Dockerfile located inside cdap-export-import/
2+
3+
FROM eclipse-temurin:8-jre-focal
4+
WORKDIR /opt/app
5+
6+
# The path is now relative to the cdap-export-import directory
7+
COPY target/cdap-export-import-*.jar app.jar
8+
9+
ENTRYPOINT ["java", "-jar", "app.jar"]

cdap-export-import/pom.xml

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Copyright © 2025 Cask Data, Inc.
4+
5+
Licensed under the Apache License, Version 2.0 (the "License"); you may not
6+
use this file except in compliance with the License. You may obtain a copy of
7+
the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13+
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14+
License for the specific language governing permissions and limitations under
15+
the License.
16+
-->
17+
18+
<project xmlns="http://maven.apache.org/POM/4.0.0"
19+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
20+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
21+
<modelVersion>4.0.0</modelVersion>
22+
<parent>
23+
<groupId>io.cdap.cdap</groupId>
24+
<artifactId>cdap</artifactId>
25+
<version>6.12.0-SNAPSHOT</version>
26+
</parent>
27+
28+
<artifactId>cdap-export-import</artifactId>
29+
<name>CDAP Export-Import</name>
30+
<packaging>jar</packaging>
31+
32+
<dependencyManagement>
33+
<dependencies>
34+
<dependency>
35+
<groupId>com.google.cloud</groupId>
36+
<artifactId>libraries-bom</artifactId>
37+
<version>26.32.0</version>
38+
<type>pom</type>
39+
<scope>import</scope>
40+
</dependency>
41+
</dependencies>
42+
</dependencyManagement>
43+
44+
<dependencies>
45+
<dependency>
46+
<groupId>junit</groupId>
47+
<artifactId>junit</artifactId>
48+
<scope>test</scope>
49+
</dependency>
50+
<dependency>
51+
<groupId>io.cdap.cdap</groupId>
52+
<artifactId>cdap-storage-spi</artifactId>
53+
<version>6.12.0-SNAPSHOT</version>
54+
<scope>compile</scope>
55+
</dependency>
56+
<dependency>
57+
<groupId>io.cdap.cdap</groupId>
58+
<artifactId>cdap-proto</artifactId>
59+
<version>6.12.0-SNAPSHOT</version>
60+
<scope>compile</scope>
61+
</dependency>
62+
<dependency>
63+
<groupId>com.google.cloud</groupId>
64+
<artifactId>google-cloud-storage</artifactId>
65+
</dependency>
66+
<dependency>
67+
<groupId>io.cdap.cdap</groupId>
68+
<artifactId>cdap-data-fabric</artifactId>
69+
<version>6.12.0-SNAPSHOT</version>
70+
<scope>compile</scope>
71+
</dependency>
72+
<dependency>
73+
<groupId>com.google.guava</groupId>
74+
<artifactId>guava</artifactId>
75+
</dependency>
76+
</dependencies>
77+
78+
<build>
79+
<plugins>
80+
<!-- THIS PLUGIN CREATES THE SELF-CONTAINED, SHADED JAR -->
81+
<plugin>
82+
<groupId>org.apache.maven.plugins</groupId>
83+
<artifactId>maven-shade-plugin</artifactId>
84+
<version>3.2.4</version>
85+
<executions>
86+
<execution>
87+
<phase>package</phase>
88+
<goals>
89+
<goal>shade</goal>
90+
</goals>
91+
<configuration>
92+
<createDependencyReducedPom>false</createDependencyReducedPom>
93+
<relocations>
94+
<relocation>
95+
<pattern>com.google.common</pattern>
96+
<shadedPattern>cdap.shaded.com.google.common</shadedPattern>
97+
</relocation>
98+
<relocation>
99+
<pattern>com.google.api</pattern>
100+
<shadedPattern>cdap.shaded.com.google.api</shadedPattern>
101+
</relocation>
102+
<!-- Add other relocations as needed -->
103+
</relocations>
104+
<filters>
105+
<filter>
106+
<artifact>*:*</artifact>
107+
<excludes>
108+
<exclude>META-INF/*.SF</exclude>
109+
<exclude>META-INF/*.DSA</exclude>
110+
<exclude>META-INF/*.RSA</exclude>
111+
</excludes>
112+
</filter>
113+
</filters>
114+
</configuration>
115+
</execution>
116+
</executions>
117+
</plugin>
118+
</plugins>
119+
</build>
120+
121+
<!-- <build>-->
122+
<!-- <plugins>-->
123+
<!-- <plugin>-->
124+
<!-- <groupId>org.apache.maven.plugins</groupId>-->
125+
<!-- <artifactId>maven-shade-plugin</artifactId>-->
126+
<!-- <version>3.2.4</version> <executions>-->
127+
<!-- <execution>-->
128+
<!-- <phase>package</phase>-->
129+
<!-- <goals>-->
130+
<!-- <goal>shade</goal>-->
131+
<!-- </goals>-->
132+
<!-- <configuration>-->
133+
<!-- <transformers>-->
134+
<!-- <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">-->
135+
<!-- <mainClass>io.cdap.cdap.ExportJobMain</mainClass>-->
136+
<!-- </transformer>-->
137+
<!-- </transformers>-->
138+
<!-- <filters>-->
139+
<!-- <filter>-->
140+
<!-- <artifact>*:*</artifact>-->
141+
<!-- <excludes>-->
142+
<!-- <exclude>META-INF/*.SF</exclude>-->
143+
<!-- <exclude>META-INF/*.DSA</exclude>-->
144+
<!-- <exclude>META-INF/*.RSA</exclude>-->
145+
<!-- </excludes>-->
146+
<!-- </filter>-->
147+
<!-- </filters>-->
148+
<!-- </configuration>-->
149+
<!-- </execution>-->
150+
<!-- </executions>-->
151+
<!-- </plugin>-->
152+
<!-- </plugins>-->
153+
<!-- </build>-->
154+
</project>
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/*
2+
* Copyright © 2025 Cask Data, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
5+
* use this file except in compliance with the License. You may obtain a copy of
6+
* the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
* License for the specific language governing permissions and limitations under
14+
* the License.
15+
*/
16+
package io.cdap.cdap;
17+
18+
import com.google.gson.Gson;
19+
import com.google.inject.AbstractModule;
20+
import com.google.inject.Guice;
21+
import com.google.inject.Injector;
22+
import com.google.inject.Module;
23+
import com.google.inject.Scopes;
24+
import io.cdap.cdap.api.metrics.MetricsCollectionService;
25+
import io.cdap.cdap.common.conf.CConfiguration;
26+
import io.cdap.cdap.common.guice.ConfigModule;
27+
import io.cdap.cdap.common.metrics.NoOpMetricsCollectionService;
28+
import io.cdap.cdap.data.runtime.StorageModule;
29+
import io.cdap.cdap.store.NamespaceTable;
30+
import java.nio.charset.StandardCharsets;
31+
import java.util.ArrayList;
32+
import java.util.Arrays;
33+
import java.util.List;
34+
import io.cdap.cdap.spi.data.transaction.TransactionRunner;
35+
import io.cdap.cdap.spi.data.transaction.TransactionRunners;
36+
import org.slf4j.Logger;
37+
import org.slf4j.LoggerFactory;
38+
import io.cdap.cdap.proto.NamespaceMeta;
39+
import com.google.cloud.storage.BlobId;
40+
import com.google.cloud.storage.BlobInfo;
41+
import com.google.cloud.storage.Storage;
42+
import com.google.cloud.storage.StorageOptions;
43+
44+
public class ExportJobMainTemp
45+
{
46+
private final static Logger LOG = LoggerFactory.getLogger(ExportJobMainTemp.class);
47+
private static final Gson GSON = new Gson();
48+
public void exportNamespaces(TransactionRunner transactionRunner, String bucketName, Storage gcsClient) {
49+
// CConfiguration cConf = CConfiguration.create();
50+
// List<Module> modules = new ArrayList<>(Arrays.asList(
51+
// new ConfigModule(cConf),
52+
// new StorageModule(),
53+
// new AbstractModule() {
54+
// @Override
55+
// protected void configure() {
56+
// bind(MetricsCollectionService.class).to(NoOpMetricsCollectionService.class)
57+
// .in(Scopes.SINGLETON);
58+
// }
59+
// }
60+
// ));
61+
// Injector injector = Guice.createInjector(modules);
62+
// TransactionRunner transactionRunner = injector.getInstance(TransactionRunner.class);
63+
LOG.debug("Starting export of namespaces");
64+
65+
try {
66+
TransactionRunners.run(transactionRunner, context -> {
67+
NamespaceTable namespaceTable = new NamespaceTable(context);
68+
List<NamespaceMeta> namespaces = namespaceTable.list();
69+
LOG.info("Found {} namespaces to export.", namespaces.size());
70+
71+
// Loop through each namespace and upload its metadata.
72+
for (NamespaceMeta namespace : namespaces) {
73+
String namespaceId = namespace.getName();
74+
LOG.debug("Processing namespace '{}'...", namespaceId);
75+
76+
// Convert the NamespaceMeta object to a JSON string.
77+
String namespaceJson = GSON.toJson(namespace);
78+
79+
// Define the full path for the object in GCS.
80+
String gcsObjectPath = String.format("cdap/namespaces/%s/namespaceMeta", namespaceId);
81+
82+
// Prepare the object for upload.
83+
BlobId blobId = BlobId.of(bucketName, gcsObjectPath);
84+
BlobInfo blobInfo = BlobInfo.newBuilder(blobId).setContentType("application/json").build();
85+
86+
// Upload the JSON content as bytes.
87+
gcsClient.create(blobInfo, namespaceJson.getBytes(StandardCharsets.UTF_8));
88+
89+
LOG.info("Successfully exported namespace '{}' to gs://{}/{}",
90+
namespaceId, bucketName, gcsObjectPath);
91+
}
92+
}, Exception.class);
93+
} catch (Exception e) {
94+
LOG.error("Failed to export namespaces due to an unexpected transaction error.", e);
95+
// In a real K8s job, you might want the pod to fail. Throwing a RuntimeException
96+
// will cause the Java process to exit with a non-zero status code.
97+
throw new RuntimeException("Namespace export failed", e);
98+
}
99+
// TransactionRunners.run(transactionRunner, context -> {
100+
// NamespaceTable namespaceTable = new NamespaceTable(context);
101+
// List<NamespaceMeta> namespaces = namespaceTable.list();
102+
// LOG.debug("Found {} namespaces: {}", namespaces.size(), namespaces);
103+
// });
104+
LOG.debug("Finished exporting namespaces.");
105+
}
106+
public static void main( String[] args )
107+
{
108+
LOG.debug("Args: {}", args);
109+
CConfiguration cConf = CConfiguration.create();
110+
List<Module> modules = new ArrayList<>(Arrays.asList(
111+
new ConfigModule(cConf),
112+
new StorageModule(),
113+
new AbstractModule() {
114+
@Override
115+
protected void configure() {
116+
bind(MetricsCollectionService.class).to(NoOpMetricsCollectionService.class)
117+
.in(Scopes.SINGLETON);
118+
}
119+
}
120+
));
121+
Injector injector = Guice.createInjector(modules);
122+
TransactionRunner transactionRunner = injector.getInstance(TransactionRunner.class);
123+
String gcsBucket = args[0];
124+
Storage gcsClient;
125+
try {
126+
gcsClient = StorageOptions.getDefaultInstance().getService();
127+
LOG.info("Successfully initialized Google Cloud Storage client.");
128+
} catch (Exception e) {
129+
LOG.error("Failed to initialize GCS client. Please check authentication.", e);
130+
return;
131+
}
132+
ExportJobMainTemp exportJob = new ExportJobMainTemp();
133+
exportJob.exportNamespaces(transactionRunner, gcsBucket, gcsClient);
134+
System.out.println("Job finished.");
135+
}
136+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Copyright © 2025 Cask Data, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
5+
* use this file except in compliance with the License. You may obtain a copy of
6+
* the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
* License for the specific language governing permissions and limitations under
14+
* the License.
15+
*/
16+
package io.cdap.cdap;
17+
18+
public class ImportJobMain {
19+
public static void main( String[] args )
20+
{
21+
System.out.println( "Hello World!" );
22+
}
23+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright © 2025 Cask Data, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
5+
* use this file except in compliance with the License. You may obtain a copy of
6+
* the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
* License for the specific language governing permissions and limitations under
14+
* the License.
15+
*/
16+
package io.cdap.cdap;
17+
18+
import junit.framework.Test;
19+
import junit.framework.TestCase;
20+
import junit.framework.TestSuite;
21+
22+
/**
23+
* Unit test for simple App.
24+
*/
25+
public class ExportJobMainTest
26+
extends TestCase
27+
{
28+
/**
29+
* Create the test case
30+
*
31+
* @param testName name of the test case
32+
*/
33+
public ExportJobMainTest( String testName )
34+
{
35+
super( testName );
36+
}
37+
38+
/**
39+
* @return the suite of tests being tested
40+
*/
41+
public static Test suite()
42+
{
43+
return new TestSuite( ExportJobMainTest.class );
44+
}
45+
46+
/**
47+
* Rigourous Test :-)
48+
*/
49+
public void testApp()
50+
{
51+
assertTrue( true );
52+
}
53+
}

0 commit comments

Comments
 (0)