-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
docs(samples): Add Dataflow "Getting started" flex template (#8897)
* Add a basic flex template sample, for the Dataflow flex templates tutorial. * Use unique image tag in test
- Loading branch information
1 parent
27ee41e
commit dc18ac3
Showing
5 changed files
with
512 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# Dataflow flex template: Getting started sample | ||
|
||
## Before you begin | ||
|
||
Make sure you have followed the | ||
[Dataflow setup instructions](../../README.md). | ||
|
||
## Create a Cloud Storage bucket | ||
|
||
```sh | ||
export BUCKET="your-bucket" | ||
gcloud storage buckets create gs://$BUCKET | ||
``` | ||
|
||
## Create an Artifact Registry repository | ||
|
||
```sh | ||
export REGION="us-central1" | ||
export REPOSITORY="your-repository" | ||
|
||
gcloud artifacts repositories create $REPOSITORY \ | ||
--repository-format=docker \ | ||
--location=$REGION | ||
``` | ||
|
||
## Build the JAR file | ||
|
||
```sh | ||
mvn clean package | ||
``` | ||
|
||
## Build the template | ||
|
||
```sh | ||
export PROJECT="project-id" | ||
|
||
gcloud dataflow flex-template build gs://$BUCKET/getting_started_java.json \ | ||
--image-gcr-path "$REGION-docker.pkg.dev/$PROJECT/$REPOSITORY/getting-started-java:latest" \ | ||
--sdk-language "JAVA" \ | ||
--flex-template-base-image JAVA11 \ | ||
--metadata-file "metadata.json" \ | ||
--jar "target/flex-template-getting-started-1.0.jar" \ | ||
--env FLEX_TEMPLATE_JAVA_MAIN_CLASS="com.example.dataflow.FlexTemplateGettingStarted" | ||
``` | ||
|
||
## Run the template | ||
|
||
```sh | ||
|
||
gcloud dataflow flex-template run "flex-`date +%Y%m%d-%H%M%S`" \ | ||
--template-file-gcs-location "gs://$BUCKET/getting_started_java.json" \ | ||
--region $REGION \ | ||
--parameters output="gs://$BUCKET/output-" | ||
``` | ||
|
||
## Clean up | ||
|
||
To delete the resources that you created: | ||
|
||
```sh | ||
gcloud artifacts repositories delete $REPOSITORY --location $REGION --quiet | ||
gcloud storage rm gs://$BUCKET --recursive | ||
``` | ||
|
||
|
||
## What's next? | ||
|
||
For more information about building and running flex templates, see | ||
📝 [Use Flex Templates](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates). | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"name": "Getting started Java flex template", | ||
"description": "An example flex template for Java.", | ||
"parameters": [ | ||
{ | ||
"name": "output", | ||
"label": "Output destination", | ||
"helpText": "The path and filename prefix for writing output files.", | ||
"regexes": [ | ||
"^gs:\\/\\/[^\\n\\r]+$" | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
Licensed to the Apache Software Foundation (ASF) under one or more | ||
contributor license agreements. See the NOTICE file distributed with | ||
this work for additional information regarding copyright ownership. | ||
The ASF licenses this file to You under the Apache License, Version 2.0 | ||
(the "License"); you may not use this file except in compliance with | ||
the License. You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
--> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<parent> | ||
<groupId>com.google.cloud.samples</groupId> | ||
<artifactId>shared-configuration</artifactId> | ||
<version>1.2.0</version> | ||
</parent> | ||
|
||
<groupId>com.example.dataflow</groupId> | ||
<artifactId>flex-template-getting-started</artifactId> | ||
<version>1.0</version> | ||
|
||
<properties> | ||
<maven.compiler.source>11</maven.compiler.source> | ||
<maven.compiler.target>11</maven.compiler.target> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
<beam.version>2.49.0</beam.version> | ||
|
||
<maven-enforcer-plugin.version>3.4.1</maven-enforcer-plugin.version> | ||
<maven-compiler-plugin.version>3.11.0</maven-compiler-plugin.version> | ||
<maven-shade-plugin.version>3.5.0</maven-shade-plugin.version> | ||
<maven-exec-plugin.version>3.1.0</maven-exec-plugin.version> | ||
<slf4j.version>2.0.8</slf4j.version> | ||
</properties> | ||
|
||
<repositories> | ||
<repository> | ||
<id>apache.snapshots</id> | ||
<name>Apache Development Snapshot Repository</name> | ||
<url>https://repository.apache.org/content/repositories/snapshots/</url> | ||
<releases> | ||
<enabled>false</enabled> | ||
</releases> | ||
</repository> | ||
</repositories> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-enforcer-plugin</artifactId> | ||
<version>${maven-enforcer-plugin.version}</version> | ||
<executions> | ||
<execution> | ||
<id>enforce-maven</id> | ||
<goals> | ||
<goal>enforce</goal> | ||
</goals> | ||
<configuration> | ||
<rules> | ||
<requireMavenVersion> | ||
<version>3.0.5</version> | ||
</requireMavenVersion> | ||
</rules> | ||
</configuration> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
|
||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<version>${maven-compiler-plugin.version}</version> | ||
</plugin> | ||
|
||
<!-- The maven shade plugin is used to create an uber-jar with all the | ||
dependencies needed to run as a standalone jar. | ||
Do not minimize the jar since that removes some of the required | ||
classes for the runners. --> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-shade-plugin</artifactId> | ||
<version>${maven-shade-plugin.version}</version> | ||
<executions> | ||
<execution> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>shade</goal> | ||
</goals> | ||
<configuration> | ||
<transformers> | ||
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> | ||
</transformers> | ||
<filters> | ||
<filter> | ||
<artifact>*:*</artifact> | ||
<excludes> | ||
<exclude>META-INF/*.SF</exclude> | ||
<exclude>META-INF/*.DSA</exclude> | ||
<exclude>META-INF/*.RSA</exclude> | ||
</excludes> | ||
</filter> | ||
</filters> | ||
</configuration> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
|
||
<pluginManagement> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.codehaus.mojo</groupId> | ||
<artifactId>exec-maven-plugin</artifactId> | ||
<version>${maven-exec-plugin.version}</version> | ||
<configuration> | ||
<cleanupDaemonThreads>false</cleanupDaemonThreads> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
</pluginManagement> | ||
</build> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-api</artifactId> | ||
<version>${slf4j.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-jdk14</artifactId> | ||
<version>${slf4j.version}</version> | ||
<scope>runtime</scope> | ||
</dependency> | ||
|
||
<!-- Apache Beam | ||
To run on another of the Beam runners, add its module to this pom.xml | ||
according to the runner-specific setup instructions on the Beam website: | ||
http://beam.apache.org/documentation/#runners | ||
--> | ||
<dependency> | ||
<groupId>org.apache.beam</groupId> | ||
<artifactId>beam-sdks-java-core</artifactId> | ||
<version>${beam.version}</version> | ||
</dependency> | ||
|
||
<!-- Direct Runner --> | ||
<dependency> | ||
<groupId>org.apache.beam</groupId> | ||
<artifactId>beam-runners-direct-java</artifactId> | ||
<version>${beam.version}</version> | ||
<scope>runtime</scope> | ||
</dependency> | ||
|
||
<!-- Dataflow Runner --> | ||
<dependency> | ||
<groupId>org.apache.beam</groupId> | ||
<artifactId>beam-runners-google-cloud-dataflow-java</artifactId> | ||
<version>${beam.version}</version> | ||
<scope>runtime</scope> | ||
</dependency> | ||
|
||
<!-- Google Cloud I/O --> | ||
<dependency> | ||
<groupId>org.apache.beam</groupId> | ||
<artifactId>beam-sdks-java-io-google-cloud-platform</artifactId> | ||
<version>${beam.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.google.cloud</groupId> | ||
<artifactId>google-cloud-dataflow</artifactId> | ||
<version>0.34.0</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.google.cloud</groupId> | ||
<artifactId>google-cloud-artifact-registry</artifactId> | ||
<version>1.29.0</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.google.cloud</groupId> | ||
<artifactId>google-cloud-storage</artifactId> | ||
<version>2.29.1</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>4.13.2</version> | ||
<scope>test</scope> | ||
</dependency> | ||
</dependencies> | ||
</project> |
57 changes: 57 additions & 0 deletions
57
...plates/getting_started/src/main/java/com/example/dataflow/FlexTemplateGettingStarted.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// Copyright 2023 Google LLC | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package com.example.dataflow; | ||
|
||
import java.util.Arrays; | ||
import java.util.List; | ||
import org.apache.beam.sdk.Pipeline; | ||
import org.apache.beam.sdk.io.TextIO; | ||
import org.apache.beam.sdk.options.Description; | ||
import org.apache.beam.sdk.options.PipelineOptions; | ||
import org.apache.beam.sdk.options.PipelineOptionsFactory; | ||
import org.apache.beam.sdk.options.Validation; | ||
import org.apache.beam.sdk.transforms.Create; | ||
|
||
/** | ||
* An Apache Beam batch pipeline that writes data to Cloud Storage. | ||
*/ | ||
public class FlexTemplateGettingStarted { | ||
|
||
public interface Options extends PipelineOptions { | ||
@Description("The Cloud Storage bucket to write to") | ||
@Validation.Required | ||
String getOutput(); | ||
|
||
void setOutput(String value); | ||
} | ||
|
||
// Write text data to Cloud Storage. | ||
public static void main(String[] args) { | ||
final List<String> wordsList = Arrays.asList("1", "2", "3", "4"); | ||
|
||
var options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); | ||
var pipeline = Pipeline.create(options); | ||
pipeline | ||
.apply(Create.of(wordsList)) | ||
.apply(TextIO | ||
.write() | ||
.to(options.getOutput()) | ||
.withSuffix(".txt") | ||
); | ||
|
||
// For a Dataflow Flex Template, do NOT call waitUntilFinish(). | ||
pipeline.run(); | ||
} | ||
} |
Oops, something went wrong.