From 665f095a7d921a30acd0b0b94c71a0e32a1e1179 Mon Sep 17 00:00:00 2001 From: Midhat-dot Date: Fri, 6 Feb 2026 15:10:39 +0500 Subject: [PATCH 1/3] Add Python example for schema transforms field selection --- .../content/en/documentation/programming-guide.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/website/www/site/content/en/documentation/programming-guide.md b/website/www/site/content/en/documentation/programming-guide.md index 13900f3a7ceb..d27f9a0665c5 100644 --- a/website/www/site/content/en/documentation/programming-guide.md +++ b/website/www/site/content/en/documentation/programming-guide.md @@ -4215,14 +4215,14 @@ Individual nested fields can be specified using the dot operator. For example, t shipping address one would write {{< /paragraph >}} -{{< highlight java >}} -purchases.apply(Select.fieldNames("shippingAddress.postCode")); +{{< highlight py >}} +import apache_beam as beam + +purchases | beam.Select( + postCode=lambda row: row.shippingAddress.postCode +) {{< /highlight >}} - ##### **Wildcards** {{< paragraph class="language-py" >}} From f93bfc1d274d290ae4321ec42c78c26db1d5c4f0 Mon Sep 17 00:00:00 2001 From: Midhat-dot Date: Fri, 6 Feb 2026 23:20:45 +0500 Subject: [PATCH 2/3] Restore Java example and add Python alongside --- .../www/site/content/en/documentation/programming-guide.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/website/www/site/content/en/documentation/programming-guide.md b/website/www/site/content/en/documentation/programming-guide.md index d27f9a0665c5..6bae3200dc34 100644 --- a/website/www/site/content/en/documentation/programming-guide.md +++ b/website/www/site/content/en/documentation/programming-guide.md @@ -4065,6 +4065,12 @@ and restricting it to a particular type. Beam will automatically infer the schema for PCollections with `NamedTuple` output types. For example: {{< /paragraph >}} + +{{< highlight java >}} +purchases.apply(Select.fieldNames("shippingAddress.postCode")); +{{< /highlight >}} + + {{< highlight py >}} class Transaction(typing.NamedTuple): bank: str From 2783abecc9a9d4fdb1f994b849bd9edc48aaa094 Mon Sep 17 00:00:00 2001 From: Midhat-dot Date: Fri, 6 Feb 2026 23:28:09 +0500 Subject: [PATCH 3/3] Fix Python beam.Row example formatting --- .../en/documentation/programming-guide.md | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/website/www/site/content/en/documentation/programming-guide.md b/website/www/site/content/en/documentation/programming-guide.md index 6bae3200dc34..9a23ff594225 100644 --- a/website/www/site/content/en/documentation/programming-guide.md +++ b/website/www/site/content/en/documentation/programming-guide.md @@ -4066,20 +4066,19 @@ schema for PCollections with `NamedTuple` output types. For example: {{< /paragraph >}} + {{< highlight java >}} purchases.apply(Select.fieldNames("shippingAddress.postCode")); {{< /highlight >}} - {{< highlight py >}} -class Transaction(typing.NamedTuple): - bank: str - purchase_amount: float +import apache_beam as beam -pc = input | beam.Map(lambda ...).with_output_types(Transaction) +purchases | beam.Select( + postCode=lambda row: row.shippingAddress.postCode +) {{< /highlight >}} - {{< paragraph class="language-py" >}} **beam.Row and Select** {{< /paragraph >}} @@ -4091,10 +4090,15 @@ use a lambda that returns instances of `beam.Row`: {{< highlight py >}} input_pc = ... # {"bank": ..., "purchase_amount": ...} -output_pc = input_pc | beam.Map(lambda item: beam.Row(bank=item["bank"], - purchase_amount=item["purchase_amount"]) +output_pc = input_pc | beam.Map( + lambda item: beam.Row( + bank=item["bank"], + purchase_amount=item["purchase_amount"] + ) +) {{< /highlight >}} + {{< paragraph class="language-py" >}} Sometimes it can be more concise to express the same logic with the [`Select`](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.Select) transform: