org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 68.0 failed 4 times, most recent failure: Lost task 2.3 in stage 68.0 (TID 352, solrmain): java.lang.NullPointerException

Stack Overflow | browskie | 5 months ago
  1. 0

    Something Wrong with Spark RDD

    Stack Overflow | 5 months ago | browskie
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 68.0 failed 4 times, most recent failure: Lost task 2.3 in stage 68.0 (TID 352, solrmain): java.lang.NullPointerException
  2. 0

    {code:java} stream.map(_._2).map(what ⇒ (key, GuavaConverters.guavaOptionalToScala(...))) .filter(_._2.isDefined) .map { case (key, o) ⇒ { o } .saveToCassandra("keyspace", "table", SomeColumns(...)) } {code} {noformat} Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 8.0 failed 4 times, most recent failure: Lost task 2.3 in stage 8.0 (TID 80, ....): java.lang.NullPointerException at org.apache.spark.metrics.MetricsSystem$$anonfun$getSourcesByName$1.apply(MetricsSystem.scala:146) at org.apache.spark.metrics.MetricsSystem$$anonfun$getSourcesByName$1.apply(MetricsSystem.scala:146) at scala.collection.TraversableLike$$anonfun$filter$1.apply(TraversableLike.scala:264) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at scala.collection.TraversableLike$class.filter(TraversableLike.scala:263) at scala.collection.AbstractTraversable.filter(Traversable.scala:105) at org.apache.spark.metrics.MetricsSystem.getSourcesByName(MetricsSystem.scala:146) at org.apache.spark.TaskContextImpl.getMetricsSources(TaskContextImpl.scala:103) at org.apache.spark.metrics.MetricsUpdater$.getSource(MetricsUpdater.scala:20) at org.apache.spark.metrics.OutputMetricsUpdater$.apply(OutputMetricsUpdater.scala:65) at com.datastax.spark.connector.writer.TableWriter.write(TableWriter.scala:138) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1$$anonfun$apply$1.apply(DStreamFunctions.scala:54) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1$$anonfun$apply$1.apply(DStreamFunctions.scala:54) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1914) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1.apply(DStreamFunctions.scala:54) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1.apply(DStreamFunctions.scala:54) at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:631) at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:631) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:42) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:40) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:40) at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:399) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:40) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:40) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:40) at scala.util.Try$.apply(Try.scala:161) at org.apache.spark.streaming.scheduler.Job.run(Job.scala:34) at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:218) at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:218) at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:218) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57) at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:217) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.NullPointerException at org.apache.spark.metrics.MetricsSystem$$anonfun$getSourcesByName$1.apply(MetricsSystem.scala:146) at org.apache.spark.metrics.MetricsSystem$$anonfun$getSourcesByName$1.apply(MetricsSystem.scala:146) at scala.collection.TraversableLike$$anonfun$filter$1.apply(TraversableLike.scala:264) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at scala.collection.TraversableLike$class.filter(TraversableLike.scala:263) at scala.collection.AbstractTraversable.filter(Traversable.scala:105) at org.apache.spark.metrics.MetricsSystem.getSourcesByName(MetricsSystem.scala:146) at org.apache.spark.TaskContextImpl.getMetricsSources(TaskContextImpl.scala:103) at org.apache.spark.metrics.MetricsUpdater$.getSource(MetricsUpdater.scala:20) at org.apache.spark.metrics.OutputMetricsUpdater$.apply(OutputMetricsUpdater.scala:65) at com.datastax.spark.connector.writer.TableWriter.write(TableWriter.scala:138) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1$$anonfun$apply$1.apply(DStreamFunctions.scala:54) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1$$anonfun$apply$1.apply(DStreamFunctions.scala:54) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) ... 3 more I0606 19:49:01.121261 25017 sched.cpp:1903] Asked to stop the driver {noformat}

    DataStax JIRA | 6 months ago | Jeandeaux Stephane
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 8.0 failed 4 times, most recent failure: Lost task 2.3 in stage 8.0 (TID 80, ....): java.lang.NullPointerException
  3. 0

    {code:java} stream.map(_._2).map(what ⇒ (key, GuavaConverters.guavaOptionalToScala(...))) .filter(_._2.isDefined) .map { case (key, o) ⇒ { o } .saveToCassandra("keyspace", "table", SomeColumns(...)) } {code} {noformat} Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 8.0 failed 4 times, most recent failure: Lost task 2.3 in stage 8.0 (TID 80, ....): java.lang.NullPointerException at org.apache.spark.metrics.MetricsSystem$$anonfun$getSourcesByName$1.apply(MetricsSystem.scala:146) at org.apache.spark.metrics.MetricsSystem$$anonfun$getSourcesByName$1.apply(MetricsSystem.scala:146) at scala.collection.TraversableLike$$anonfun$filter$1.apply(TraversableLike.scala:264) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at scala.collection.TraversableLike$class.filter(TraversableLike.scala:263) at scala.collection.AbstractTraversable.filter(Traversable.scala:105) at org.apache.spark.metrics.MetricsSystem.getSourcesByName(MetricsSystem.scala:146) at org.apache.spark.TaskContextImpl.getMetricsSources(TaskContextImpl.scala:103) at org.apache.spark.metrics.MetricsUpdater$.getSource(MetricsUpdater.scala:20) at org.apache.spark.metrics.OutputMetricsUpdater$.apply(OutputMetricsUpdater.scala:65) at com.datastax.spark.connector.writer.TableWriter.write(TableWriter.scala:138) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1$$anonfun$apply$1.apply(DStreamFunctions.scala:54) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1$$anonfun$apply$1.apply(DStreamFunctions.scala:54) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1824) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1837) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1914) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1.apply(DStreamFunctions.scala:54) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1.apply(DStreamFunctions.scala:54) at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:631) at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:631) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:42) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:40) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:40) at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:399) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:40) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:40) at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:40) at scala.util.Try$.apply(Try.scala:161) at org.apache.spark.streaming.scheduler.Job.run(Job.scala:34) at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:218) at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:218) at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:218) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57) at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:217) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.NullPointerException at org.apache.spark.metrics.MetricsSystem$$anonfun$getSourcesByName$1.apply(MetricsSystem.scala:146) at org.apache.spark.metrics.MetricsSystem$$anonfun$getSourcesByName$1.apply(MetricsSystem.scala:146) at scala.collection.TraversableLike$$anonfun$filter$1.apply(TraversableLike.scala:264) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at scala.collection.TraversableLike$class.filter(TraversableLike.scala:263) at scala.collection.AbstractTraversable.filter(Traversable.scala:105) at org.apache.spark.metrics.MetricsSystem.getSourcesByName(MetricsSystem.scala:146) at org.apache.spark.TaskContextImpl.getMetricsSources(TaskContextImpl.scala:103) at org.apache.spark.metrics.MetricsUpdater$.getSource(MetricsUpdater.scala:20) at org.apache.spark.metrics.OutputMetricsUpdater$.apply(OutputMetricsUpdater.scala:65) at com.datastax.spark.connector.writer.TableWriter.write(TableWriter.scala:138) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1$$anonfun$apply$1.apply(DStreamFunctions.scala:54) at com.datastax.spark.connector.streaming.DStreamFunctions$$anonfun$saveToCassandra$1$$anonfun$apply$1.apply(DStreamFunctions.scala:54) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) ... 3 more I0606 19:49:01.121261 25017 sched.cpp:1903] Asked to stop the driver {noformat}

    DataStax JIRA | 6 months ago | Jeandeaux Stephane
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 8.0 failed 4 times, most recent failure: Lost task 2.3 in stage 8.0 (TID 80, ....): java.lang.NullPointerException
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    Training data with the UniversalRecommender causing "UnknownReason"

    Google Groups | 9 months ago | Carlos Henrique
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 68.0 failed 4 times, most recent failure: Lost task 0.3 in stage 68.0 (TID 182, 10.0.0.139): UnknownReason Driver stacktrace:
  6. 0

    Training data with the UniversalRecommender causing "UnknownReason"

    Google Groups | 9 months ago | Carlos Henrique
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 68.0 failed 4 times, most recent failure: Lost task 0.3 in stage 68.0 (TID 182, 10.0.0.139): UnknownReason Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283)

    Not finding the right solution?
    Take a tour to get the most out of Samebug.

    Tired of useless tips?

    Automated exception search integrated into your IDE

    Root Cause Analysis

    1. org.apache.spark.SparkException

      Job aborted due to stage failure: Task 2 in stage 68.0 failed 4 times, most recent failure: Lost task 2.3 in stage 68.0 (TID 352, solrmain): java.lang.NullPointerException

      at org.apache.spark.api.python.SerDeUtil$$anonfun$toJavaArray$1.apply()
    2. Spark
      SerDeUtil$$anonfun$toJavaArray$1.apply
      1. org.apache.spark.api.python.SerDeUtil$$anonfun$toJavaArray$1.apply(SerDeUtil.scala:102)
      1 frame
    3. Scala
      Iterator$$anon$11.next
      1. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      2. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      3. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      4. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      4 frames
    4. org.apache.spark
      SortBasedAggregationIterator.next
      1. org.apache.spark.sql.execution.aggregate.SortBasedAggregationIterator.processCurrentSortedGroup(SortBasedAggregationIterator.scala:122)
      2. org.apache.spark.sql.execution.aggregate.SortBasedAggregationIterator.next(SortBasedAggregationIterator.scala:152)
      3. org.apache.spark.sql.execution.aggregate.SortBasedAggregationIterator.next(SortBasedAggregationIterator.scala:29)
      3 frames
    5. Scala
      Iterator$$anon$11.next
      1. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      2. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      2 frames
    6. Spark
      Executor$TaskRunner.run
      1. org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:149)
      2. org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
      3. org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
      4. org.apache.spark.scheduler.Task.run(Task.scala:89)
      5. org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227)
      5 frames
    7. Java RT
      Thread.run
      1. java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
      2. java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
      3. java.lang.Thread.run(Thread.java:745)
      3 frames