org.apache.spark.SparkException: Task failed while writing rows.

Apache's JIRA Issue Tracker | Naden Franciscus | 1 year ago
  1. 0

    We are executing 20 Spark SQL jobs in parallel using Spark Job Server and hitting the following issue pretty routinely. 40GB heap x 6 nodes. Have tried adjusting shuffle.memoryFraction from 0.2 -> 0.1 with no difference. {code} .16): org.apache.spark.SparkException: Task failed while writing rows. at org.apache.spark.sql.execution.datasources.DefaultWriterContainer.writeRows(WriterContainer.scala:250) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1$$anonfun$apply$mcV$sp$3.apply(InsertIntoHadoopFsRelation.scala:150) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1$$anonfun$apply$mcV$sp$3.apply(InsertIntoHadoopFsRelation.scala:150) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.io.IOException: Unable to acquire 16777216 bytes of memory at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.acquireNewPage(UnsafeExternalSorter.java:351) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.<init>(UnsafeExternalSorter.java:138) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.create(UnsafeExternalSorter.java:106) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.<init>(UnsafeExternalRowSorter.java:68) at org.apache.spark.sql.execution.TungstenSort.org$apache$spark$sql$execution$TungstenSort$$preparePartition$1(sort.scala:146) at org.apache.spark.sql.execution.TungstenSort$$anonfun$doExecute$3.apply(sort.scala:169) at org.apache.spark.sql.execution.TungstenSort$$anonfun$doExecute$3.apply(sort.scala:169) at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.prepare(MapPartitionsWithPreparationRDD.scala:50) at org.apache.spark.rdd.ZippedPartitionsBaseRDD$$anonfun$tryPrepareParents$1.applyOrElse(ZippedPartitionsRDD.scala:83) at org.apache.spark.rdd.ZippedPartitionsBaseRDD$$anonfun$tryPrepareParents$1.applyOrElse(ZippedPartitionsRDD.scala:82) at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:33) at scala.collection.TraversableLike$$anonfun$collect$1.apply(TraversableLike.scala:278) at scala.collection.immutable.List.foreach(List.scala:318) at scala.collection.TraversableLike$class.collect(TraversableLike.scala:278) at scala.collection.AbstractTraversable.collect(Traversable.scala:105) at org.apache.spark.rdd.ZippedPartitionsBaseRDD.tryPrepareParents(ZippedPartitionsRDD.scala:82) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:97) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) {code} I have tried setting spark.buffer.pageSize to both 1MB and 64MB (in spark-defaults.conf) and it makes no difference. It also tries to acquire 33554432 bytes of memory in both cases.

    Apache's JIRA Issue Tracker | 1 year ago | Naden Franciscus
    org.apache.spark.SparkException: Task failed while writing rows.
  2. 0

    We are executing 20 Spark SQL jobs in parallel using Spark Job Server and hitting the following issue pretty routinely. 40GB heap x 6 nodes. Have tried adjusting shuffle.memoryFraction from 0.2 -> 0.1 with no difference. {code} .16): org.apache.spark.SparkException: Task failed while writing rows. at org.apache.spark.sql.execution.datasources.DefaultWriterContainer.writeRows(WriterContainer.scala:250) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1$$anonfun$apply$mcV$sp$3.apply(InsertIntoHadoopFsRelation.scala:150) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelation$$anonfun$run$1$$anonfun$apply$mcV$sp$3.apply(InsertIntoHadoopFsRelation.scala:150) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.io.IOException: Unable to acquire 16777216 bytes of memory at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.acquireNewPage(UnsafeExternalSorter.java:351) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.<init>(UnsafeExternalSorter.java:138) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.create(UnsafeExternalSorter.java:106) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.<init>(UnsafeExternalRowSorter.java:68) at org.apache.spark.sql.execution.TungstenSort.org$apache$spark$sql$execution$TungstenSort$$preparePartition$1(sort.scala:146) at org.apache.spark.sql.execution.TungstenSort$$anonfun$doExecute$3.apply(sort.scala:169) at org.apache.spark.sql.execution.TungstenSort$$anonfun$doExecute$3.apply(sort.scala:169) at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.prepare(MapPartitionsWithPreparationRDD.scala:50) at org.apache.spark.rdd.ZippedPartitionsBaseRDD$$anonfun$tryPrepareParents$1.applyOrElse(ZippedPartitionsRDD.scala:83) at org.apache.spark.rdd.ZippedPartitionsBaseRDD$$anonfun$tryPrepareParents$1.applyOrElse(ZippedPartitionsRDD.scala:82) at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:33) at scala.collection.TraversableLike$$anonfun$collect$1.apply(TraversableLike.scala:278) at scala.collection.immutable.List.foreach(List.scala:318) at scala.collection.TraversableLike$class.collect(TraversableLike.scala:278) at scala.collection.AbstractTraversable.collect(Traversable.scala:105) at org.apache.spark.rdd.ZippedPartitionsBaseRDD.tryPrepareParents(ZippedPartitionsRDD.scala:82) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:97) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) {code} I have tried setting spark.buffer.pageSize to both 1MB and 64MB (in spark-defaults.conf) and it makes no difference. It also tries to acquire 33554432 bytes of memory in both cases.

    Apache's JIRA Issue Tracker | 1 year ago | Naden Franciscus
    org.apache.spark.SparkException: Task failed while writing rows.
  3. 0

    Apache Spark Developers List - If you use Spark 1.5 and disabled Tungsten mode ...

    nabble.com | 8 months ago
    org.apache.spark.SparkException: Task failed while writing rows.
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

    1 unregistered visitors
    Not finding the right solution?
    Take a tour to get the most out of Samebug.

    Tired of useless tips?

    Automated exception search integrated into your IDE

    Root Cause Analysis

    1. java.io.IOException

      Unable to acquire 16777216 bytes of memory

      at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.acquireNewPage()
    2. org.apache.spark
      UnsafeExternalSorter.create
      1. org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.acquireNewPage(UnsafeExternalSorter.java:351)
      2. org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.<init>(UnsafeExternalSorter.java:138)
      3. org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.create(UnsafeExternalSorter.java:106)
      3 frames
    3. Spark Project SQL
      TungstenSort$$anonfun$doExecute$3.apply
      1. org.apache.spark.sql.execution.UnsafeExternalRowSorter.<init>(UnsafeExternalRowSorter.java:68)
      2. org.apache.spark.sql.execution.TungstenSort.org$apache$spark$sql$execution$TungstenSort$$preparePartition$1(sort.scala:146)
      3. org.apache.spark.sql.execution.TungstenSort$$anonfun$doExecute$3.apply(sort.scala:169)
      4. org.apache.spark.sql.execution.TungstenSort$$anonfun$doExecute$3.apply(sort.scala:169)
      4 frames
    4. Spark
      ZippedPartitionsBaseRDD$$anonfun$tryPrepareParents$1.applyOrElse
      1. org.apache.spark.rdd.MapPartitionsWithPreparationRDD.prepare(MapPartitionsWithPreparationRDD.scala:50)
      2. org.apache.spark.rdd.ZippedPartitionsBaseRDD$$anonfun$tryPrepareParents$1.applyOrElse(ZippedPartitionsRDD.scala:83)
      3. org.apache.spark.rdd.ZippedPartitionsBaseRDD$$anonfun$tryPrepareParents$1.applyOrElse(ZippedPartitionsRDD.scala:82)
      3 frames
    5. Scala
      AbstractTraversable.collect
      1. scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:33)
      2. scala.collection.TraversableLike$$anonfun$collect$1.apply(TraversableLike.scala:278)
      3. scala.collection.immutable.List.foreach(List.scala:318)
      4. scala.collection.TraversableLike$class.collect(TraversableLike.scala:278)
      5. scala.collection.AbstractTraversable.collect(Traversable.scala:105)
      5 frames
    6. Spark
      RDD.iterator
      1. org.apache.spark.rdd.ZippedPartitionsBaseRDD.tryPrepareParents(ZippedPartitionsRDD.scala:82)
      2. org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:97)
      3. org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
      4. org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
      4 frames