From 86dfeaee0be593197f1936adaeb0f795d0ad20a1 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 18 Jan 2025 15:33:02 -0800 Subject: [PATCH 1/2] [SPARK-50886][BUILD][3.5] Upgrade Avro to 1.11.4 --- .../main/scala/org/apache/spark/sql/avro/AvroOptions.scala | 4 ++-- dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++--- docs/sql-data-sources-avro.md | 4 ++-- pom.xml | 2 +- project/SparkBuild.scala | 2 +- .../org/apache/spark/sql/hive/client/HiveClientSuite.scala | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala index edaaa8835cc0..5fd39393335d 100644 --- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala +++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala @@ -81,14 +81,14 @@ private[sql] class AvroOptions( /** * Top level record name in write result, which is required in Avro spec. - * See https://avro.apache.org/docs/1.11.2/specification/#schema-record . + * See https://avro.apache.org/docs/1.11.4/specification/#schema-record . * Default value is "topLevelRecord" */ val recordName: String = parameters.getOrElse(RECORD_NAME, "topLevelRecord") /** * Record namespace in write result. Default value is "". - * See Avro spec for details: https://avro.apache.org/docs/1.11.2/specification/#schema-record . + * See Avro spec for details: https://avro.apache.org/docs/1.11.4/specification/#schema-record . */ val recordNamespace: String = parameters.getOrElse(RECORD_NAMESPACE, "") diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 06e6aa199021..4feea62dfe64 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -21,9 +21,9 @@ arrow-memory-core/12.0.1//arrow-memory-core-12.0.1.jar arrow-memory-netty/12.0.1//arrow-memory-netty-12.0.1.jar arrow-vector/12.0.1//arrow-vector-12.0.1.jar audience-annotations/0.5.0//audience-annotations-0.5.0.jar -avro-ipc/1.11.2//avro-ipc-1.11.2.jar -avro-mapred/1.11.2//avro-mapred-1.11.2.jar -avro/1.11.2//avro-1.11.2.jar +avro-ipc/1.11.4//avro-ipc-1.11.4.jar +avro-mapred/1.11.4//avro-mapred-1.11.4.jar +avro/1.11.4//avro-1.11.4.jar aws-java-sdk-bundle/1.12.262//aws-java-sdk-bundle-1.12.262.jar azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md index 1da75e5d7b17..a23c438af6d4 100644 --- a/docs/sql-data-sources-avro.md +++ b/docs/sql-data-sources-avro.md @@ -417,7 +417,7 @@ applications. Read the [Advanced Dependency Management](https://spark.apache Submission Guide for more details. ## Supported types for Avro -> Spark SQL conversion -Currently Spark supports reading all [primitive types](https://avro.apache.org/docs/1.11.2/specification/#primitive-types) and [complex types](https://avro.apache.org/docs/1.11.2/specification/#complex-types) under records of Avro. +Currently Spark supports reading all [primitive types](https://avro.apache.org/docs/1.11.4/specification/#primitive-types) and [complex types](https://avro.apache.org/docs/1.11.4/specification/#complex-types) under records of Avro. @@ -481,7 +481,7 @@ In addition to the types listed above, it supports reading `union` types. The fo 3. `union(something, null)`, where something is any supported Avro type. This will be mapped to the same Spark SQL type as that of something, with nullable set to true. All other union types are considered complex. They will be mapped to StructType where field names are member0, member1, etc., in accordance with members of the union. This is consistent with the behavior when converting between Avro and Parquet. -It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.11.2/specification/#logical-types): +It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.11.4/specification/#logical-types):
Avro typeSpark SQL type
diff --git a/pom.xml b/pom.xml index 0ccb6ac76a9b..7ee25d8a3140 100644 --- a/pom.xml +++ b/pom.xml @@ -158,7 +158,7 @@ --> 4.2.19 - 1.11.2 + 1.11.41.12.01.11.655 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f8659a4f4a25..9be5cc976f2d 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1103,7 +1103,7 @@ object DependencyOverrides { dependencyOverrides += "com.google.guava" % "guava" % guavaVersion, dependencyOverrides += "xerces" % "xercesImpl" % "2.12.2", dependencyOverrides += "jline" % "jline" % "2.14.6", - dependencyOverrides += "org.apache.avro" % "avro" % "1.11.2") + dependencyOverrides += "org.apache.avro" % "avro" % "1.11.4") } /** diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala index 8476c87dc283..d35cc79c6a32 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala @@ -908,7 +908,7 @@ class HiveClientSuite(version: String, allVersions: Seq[String]) test("Decimal support of Avro Hive serde") { val tableName = "tab1" // TODO: add the other logical types. For details, see the link: - // https://avro.apache.org/docs/1.11.2/specification/#logical-types + // https://avro.apache.org/docs/1.11.4/specification/#logical-types val avroSchema = """{ | "name": "test_record", From 0db688ef4d44e9cc64792aad67c3aa4c705e6618 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 18 Jan 2025 19:39:23 -0800 Subject: [PATCH 2/2] Pin commons-compress to 1.23.0 in SparkBuild --- pom.xml | 4 ++++ project/SparkBuild.scala | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7ee25d8a3140..e427f4d4ce68 100644 --- a/pom.xml +++ b/pom.xml @@ -1485,6 +1485,10 @@ avro ${avro.version} + + org.apache.commons + commons-compress + commons-io commons-io diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 9be5cc976f2d..162f34415c53 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1103,7 +1103,8 @@ object DependencyOverrides { dependencyOverrides += "com.google.guava" % "guava" % guavaVersion, dependencyOverrides += "xerces" % "xercesImpl" % "2.12.2", dependencyOverrides += "jline" % "jline" % "2.14.6", - dependencyOverrides += "org.apache.avro" % "avro" % "1.11.4") + dependencyOverrides += "org.apache.avro" % "avro" % "1.11.4", + dependencyOverrides += "org.apache.commons" % "commons-compress" % "1.23.0") } /**
Avro logical typeAvro typeSpark SQL type