Skip to content

Commit 7c3e5ae

Browse files
authored
Merge pull request #9321 from ibzib/dataflow-region
[Release 2.15.0] [BEAM-7833] warn user when --region flag is not explicitly set
2 parents 6c9564a + 7b24fb1 commit 7c3e5ae

5 files changed

Lines changed: 31 additions & 8 deletions

File tree

runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,14 @@ public static DataflowRunner fromOptions(PipelineOptions options) {
238238
"Missing required values: " + Joiner.on(',').join(missing));
239239
}
240240

241+
if (dataflowOptions.getRegion() == null) {
242+
dataflowOptions.setRegion("us-central1");
243+
LOG.warn(
244+
"--region not set; will default to us-central1. Future releases of Beam will "
245+
+ "require the user to set the region explicitly. "
246+
+ "https://cloud.google.com/compute/docs/regions-zones/regions-zones");
247+
}
248+
241249
PathValidator validator = dataflowOptions.getPathValidator();
242250
String gcpTempLocation;
243251
try {

runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ public interface DataflowPipelineOptions
126126
@Description(
127127
"The Google Compute Engine region for creating Dataflow jobs. See "
128128
+ "https://cloud.google.com/compute/docs/regions-zones/regions-zones for a list of valid "
129-
+ "options. Default is up to the Dataflow service.")
130-
@Default.String("us-central1")
129+
+ "options. Currently defaults to us-central1, but future releases of Beam will "
130+
+ "require the user to set the region explicitly.")
131131
String getRegion();
132132

133133
void setRegion(String region);

runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/DataflowWorkUnitClientTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ public void setUp() throws Exception {
8686
pipelineOptions.setWorkerId(WORKER_ID);
8787
pipelineOptions.setGcpCredential(new TestCredential());
8888
pipelineOptions.setDataflowClient(service);
89+
pipelineOptions.setRegion("us-central1");
8990
}
9091

9192
@Test

sdks/go/pkg/beam/runners/dataflow/dataflow.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ var (
5454
maxNumWorkers = flag.Int64("max_num_workers", 0, "Maximum number of workers during scaling (optional).")
5555
autoscalingAlgorithm = flag.String("autoscaling_algorithm", "", "Autoscaling mode to use (optional).")
5656
zone = flag.String("zone", "", "GCP zone (optional)")
57-
region = flag.String("region", "us-central1", "GCP Region (optional)")
57+
region = flag.String("region", "", "GCP Region (optional but encouraged)")
5858
network = flag.String("network", "", "GCP network (optional)")
5959
tempLocation = flag.String("temp_location", "", "Temp location (optional)")
6060
machineType = flag.String("worker_machine_type", "", "GCE machine type (optional)")
@@ -90,6 +90,12 @@ func Execute(ctx context.Context, p *beam.Pipeline) error {
9090
if *stagingLocation == "" {
9191
return errors.New("no GCS staging location specified. Use --staging_location=gs://<bucket>/<path>")
9292
}
93+
if *region == "" {
94+
*region = "us-central1"
95+
log.Warn(ctx, "--region not set; will default to us-central1. Future releases of Beam will "+
96+
"require the user to set the region explicitly. "+
97+
"https://cloud.google.com/compute/docs/regions-zones/regions-zones")
98+
}
9399
if *image == "" {
94100
*image = getContainerImage(ctx)
95101
}

sdks/python/apache_beam/options/pipeline_options.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -448,13 +448,12 @@ def _add_argparse_args(cls, parser):
448448
parser.add_argument('--temp_location',
449449
default=None,
450450
help='GCS path for saving temporary workflow jobs.')
451-
# The Cloud Dataflow service does not yet honor this setting. However, once
452-
# service support is added then users of this SDK will be able to control
453-
# the region. Default is up to the Dataflow service. See
451+
# The Google Compute Engine region for creating Dataflow jobs. See
454452
# https://cloud.google.com/compute/docs/regions-zones/regions-zones for a
455-
# list of valid options/
453+
# list of valid options. Currently defaults to us-central1, but future
454+
# releases of Beam will require the user to set the region explicitly.
456455
parser.add_argument('--region',
457-
default='us-central1',
456+
default=None,
458457
help='The Google Compute Engine region for creating '
459458
'Dataflow job.')
460459
parser.add_argument('--service_account_email',
@@ -515,6 +514,15 @@ def validate(self, validator):
515514
errors.append('--dataflow_job_file and --template_location '
516515
'are mutually exclusive.')
517516

517+
if self.view_as(GoogleCloudOptions).region is None:
518+
self.view_as(GoogleCloudOptions).region = 'us-central1'
519+
runner = self.view_as(StandardOptions).runner
520+
if runner == 'DataflowRunner' or runner == 'TestDataflowRunner':
521+
logging.warning(
522+
'--region not set; will default to us-central1. Future releases of '
523+
'Beam will require the user to set the region explicitly. '
524+
'https://cloud.google.com/compute/docs/regions-zones/regions-zones')
525+
518526
return errors
519527

520528

0 commit comments

Comments
 (0)