-
Notifications
You must be signed in to change notification settings - Fork 347
Description
This error is thrown only on some relations when running AWS Glue job. And, the job reports success, though no data is written to Redshift.
Driver Versions: spark-redshift_2.10-0.5.0.jar, spark-redshift_2.11-2.0.1.jar
2021-10-25 16:09:42,829 ERROR [dag-scheduler-event-loop] scheduler.DAGScheduler (Logging.scala:logError(94)): Failed to update accumulator 2713 (com.databricks.spark.redshift.HashSetAccumulator) for task 38
java.lang.UnsupportedOperationException: Cannot merge com.databricks.spark.redshift.HashSetAccumulator with com.databricks.spark.redshift.HashSetAccumulator
at com.databricks.spark.redshift.HashSetAccumulator.merge(Utils.scala:287)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$updateAccumulators$1(DAGScheduler.scala:1576)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$updateAccumulators$1$adapted(DAGScheduler.scala:1567)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.updateAccumulators(DAGScheduler.scala:1567)
at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1679)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2676)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2621)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2610)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)