org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1

JIRA | Neeraja Madabhushi | 2 years ago
tip
Your exception is missing from the Samebug knowledge base.
Here are the best solutions we found on the Internet.
Click on the to mark the helpful solution and get rewards for you help.
  1. 0

    Stepd to reproduce: Followed instructions on page : https://github.com/h2oai/sparkling-water/blob/master/DEVEL.md For section below : Integrate with H2O Algorithms using RDD as algorithm input: Started sparkling water on yarn with 2 executors : Run steps below : val sc = new SparkContext(conf) import org.apache.spark.h2o._ import org.apache.spark.examples.h2o._ val h2oContext = new H2OContext(sc).start() val path = "hdfs://mr-0xd6.0xdata.loc/datasets/prostate_long_1G.csv" val prostateText = sc.textFile(path) val prostateRDD = prostateText.map(_.split(",")).map(row => ProstateParse(row)) import hex.tree.gbm.GBM import hex.tree.gbm.GBMModel.GBMParameters import h2oContext._ val train:DataFrame = prostateRDD scala> val train:DataFrame = prostateRDD [Stage 2:> (0 + 0) / 9]15/02/12 13:33:47 WARN TaskSetManager: Lost task 7.0 in stage 2.0 (TID 22, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.examples.h2o.ProstateParse$.apply(Schemas.scala:21) at $line52.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at $line52.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at org.apache.spark.h2o.H2OContext$.org$apache$spark$h2o$H2OContext$$perRDDPartition(H2OContext.scala:391) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) [Stage 2:> (0 + 2) / 9]15/02/12 13:33:47 ERROR TaskSetManager: Task 7 in stage 2.0 failed 4 times; aborting job 15/02/12 13:33:47 WARN TaskSetManager: Lost task 0.1 in stage 2.0 (TID 28, mr-0xd5.0xdata.loc): TaskKilled (killed intentionally) org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.examples.h2o.ProstateParse$.apply(Schemas.scala:21) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at org.apache.spark.h2o.H2OContext$.org$apache$spark$h2o$H2OContext$$perRDDPartition(H2OContext.scala:391) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)

    JIRA | 2 years ago | Neeraja Madabhushi
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1
  2. 0

    Stepd to reproduce: Followed instructions on page : https://github.com/h2oai/sparkling-water/blob/master/DEVEL.md For section below : Integrate with H2O Algorithms using RDD as algorithm input: Started sparkling water on yarn with 2 executors : Run steps below : val sc = new SparkContext(conf) import org.apache.spark.h2o._ import org.apache.spark.examples.h2o._ val h2oContext = new H2OContext(sc).start() val path = "hdfs://mr-0xd6.0xdata.loc/datasets/prostate_long_1G.csv" val prostateText = sc.textFile(path) val prostateRDD = prostateText.map(_.split(",")).map(row => ProstateParse(row)) import hex.tree.gbm.GBM import hex.tree.gbm.GBMModel.GBMParameters import h2oContext._ val train:DataFrame = prostateRDD scala> val train:DataFrame = prostateRDD [Stage 2:> (0 + 0) / 9]15/02/12 13:33:47 WARN TaskSetManager: Lost task 7.0 in stage 2.0 (TID 22, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.examples.h2o.ProstateParse$.apply(Schemas.scala:21) at $line52.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at $line52.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at org.apache.spark.h2o.H2OContext$.org$apache$spark$h2o$H2OContext$$perRDDPartition(H2OContext.scala:391) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) [Stage 2:> (0 + 2) / 9]15/02/12 13:33:47 ERROR TaskSetManager: Task 7 in stage 2.0 failed 4 times; aborting job 15/02/12 13:33:47 WARN TaskSetManager: Lost task 0.1 in stage 2.0 (TID 28, mr-0xd5.0xdata.loc): TaskKilled (killed intentionally) org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.examples.h2o.ProstateParse$.apply(Schemas.scala:21) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at org.apache.spark.h2o.H2OContext$.org$apache$spark$h2o$H2OContext$$perRDDPartition(H2OContext.scala:391) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)

    JIRA | 2 years ago | Neeraja Madabhushi
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1
  3. 0

    RE: Not Serializable exception when integrating SQL and Spark Streaming

    apache.org | 1 year ago
    org.apache.spark.SparkException: Task not serializable at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:166) at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:158) at org.apache.spark.SparkContext.clean(SparkContext.scala:1435) at org.apache.spark.rdd.RDD.map(RDD.scala:271) at org.apache.spark.api.java.JavaRDDLike$class.map(JavaRDDLike.scala:78) at org.apache.spark.sql.api.java.JavaSchemaRDD.map(JavaSchemaRDD.scala:42) at com.basic.spark.NumberCount$2.call(NumberCount.java:79) at com.basic.spark.NumberCount$2.call(NumberCount.java:67) at org.apache.spark.streaming.api.java.JavaDStreamLike$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:274) at org.apache.spark.streaming.api.java.JavaDStreamLike$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:274) at org.apache.spark.streaming.dstream.DStream$anonfun$foreachRDD$1.apply(DStream.scala:529) at org.apache.spark.streaming.dstream.DStream$anonfun$foreachRDD$1.apply(DStream.scala:529) at org.apache.spark.streaming.dstream.ForEachDStream$anonfun$1.apply$mcV$sp(ForEachDStream.scala:42) at org.apache.spark.streaming.dstream.ForEachDStream$anonfun$1.apply(ForEachDStream.scala:40) at org.apache.spark.streaming.dstream.ForEachDStream$anonfun$1.apply(ForEachDStream.scala:40) at scala.util.Try$.apply(Try.scala:161) at org.apache.spark.streaming.scheduler.Job.run(Job.scala:32) at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:171) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    RE: Not Serializable exception when integrating SQL and Spark Streaming

    apache.org | 1 year ago
    org.apache.spark.SparkException: Task not serializable at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:166) at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:158) at org.apache.spark.SparkContext.clean(SparkContext.scala:1435) at org.apache.spark.rdd.RDD.map(RDD.scala:271) at org.apache.spark.api.java.JavaRDDLike$class.map(JavaRDDLike.scala:78) at org.apache.spark.sql.api.java.JavaSchemaRDD.map(JavaSchemaRDD.scala:42) at com.basic.spark.NumberCount$2.call(NumberCount.java:79) at com.basic.spark.NumberCount$2.call(NumberCount.java:67) at org.apache.spark.streaming.api.java.JavaDStreamLike$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:274) at org.apache.spark.streaming.api.java.JavaDStreamLike$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:274) at org.apache.spark.streaming.dstream.DStream$anonfun$foreachRDD$1.apply(DStream.scala:529) at org.apache.spark.streaming.dstream.DStream$anonfun$foreachRDD$1.apply(DStream.scala:529) at org.apache.spark.streaming.dstream.ForEachDStream$anonfun$1.apply$mcV$sp(ForEachDStream.scala:42) at org.apache.spark.streaming.dstream.ForEachDStream$anonfun$1.apply(ForEachDStream.scala:40) at org.apache.spark.streaming.dstream.ForEachDStream$anonfun$1.apply(ForEachDStream.scala:40) at scala.util.Try$.apply(Try.scala:161) at org.apache.spark.streaming.scheduler.Job.run(Job.scala:32) at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:171) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)

    Root Cause Analysis

    1. org.apache.spark.SparkException

      Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1

      at org.apache.spark.examples.h2o.ProstateParse$.apply()
    2. org.apache.spark
      ProstateParse$.apply
      1. org.apache.spark.examples.h2o.ProstateParse$.apply(Schemas.scala:21)
      1 frame
    3. Unknown
      $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply
      1. $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35)
      2. $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35)
      2 frames
    4. Scala
      AbstractIterator.foreach
      1. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      2. scala.collection.Iterator$class.foreach(Iterator.scala:727)
      3. scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
      3 frames
    5. org.apache.spark
      H2OContext$$anonfun$7.apply
      1. org.apache.spark.h2o.H2OContext$.org$apache$spark$h2o$H2OContext$$perRDDPartition(H2OContext.scala:391)
      2. org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323)
      3. org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323)
      3 frames
    6. Spark
      Executor$TaskRunner.run
      1. org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
      2. org.apache.spark.scheduler.Task.run(Task.scala:56)
      3. org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200)
      3 frames
    7. Java RT
      Thread.run
      1. java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
      2. java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
      3. java.lang.Thread.run(Thread.java:745)
      3 frames