org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1

JIRA | Neeraja Madabhushi | 2 years ago
  1. 0

    Stepd to reproduce: Followed instructions on page : https://github.com/h2oai/sparkling-water/blob/master/DEVEL.md For section below : Integrate with H2O Algorithms using RDD as algorithm input: Started sparkling water on yarn with 2 executors : Run steps below : val sc = new SparkContext(conf) import org.apache.spark.h2o._ import org.apache.spark.examples.h2o._ val h2oContext = new H2OContext(sc).start() val path = "hdfs://mr-0xd6.0xdata.loc/datasets/prostate_long_1G.csv" val prostateText = sc.textFile(path) val prostateRDD = prostateText.map(_.split(",")).map(row => ProstateParse(row)) import hex.tree.gbm.GBM import hex.tree.gbm.GBMModel.GBMParameters import h2oContext._ val train:DataFrame = prostateRDD scala> val train:DataFrame = prostateRDD [Stage 2:> (0 + 0) / 9]15/02/12 13:33:47 WARN TaskSetManager: Lost task 7.0 in stage 2.0 (TID 22, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.examples.h2o.ProstateParse$.apply(Schemas.scala:21) at $line52.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at $line52.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at org.apache.spark.h2o.H2OContext$.org$apache$spark$h2o$H2OContext$$perRDDPartition(H2OContext.scala:391) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) [Stage 2:> (0 + 2) / 9]15/02/12 13:33:47 ERROR TaskSetManager: Task 7 in stage 2.0 failed 4 times; aborting job 15/02/12 13:33:47 WARN TaskSetManager: Lost task 0.1 in stage 2.0 (TID 28, mr-0xd5.0xdata.loc): TaskKilled (killed intentionally) org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.examples.h2o.ProstateParse$.apply(Schemas.scala:21) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at org.apache.spark.h2o.H2OContext$.org$apache$spark$h2o$H2OContext$$perRDDPartition(H2OContext.scala:391) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)

    JIRA | 2 years ago | Neeraja Madabhushi
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1
  2. 0

    Stepd to reproduce: Followed instructions on page : https://github.com/h2oai/sparkling-water/blob/master/DEVEL.md For section below : Integrate with H2O Algorithms using RDD as algorithm input: Started sparkling water on yarn with 2 executors : Run steps below : val sc = new SparkContext(conf) import org.apache.spark.h2o._ import org.apache.spark.examples.h2o._ val h2oContext = new H2OContext(sc).start() val path = "hdfs://mr-0xd6.0xdata.loc/datasets/prostate_long_1G.csv" val prostateText = sc.textFile(path) val prostateRDD = prostateText.map(_.split(",")).map(row => ProstateParse(row)) import hex.tree.gbm.GBM import hex.tree.gbm.GBMModel.GBMParameters import h2oContext._ val train:DataFrame = prostateRDD scala> val train:DataFrame = prostateRDD [Stage 2:> (0 + 0) / 9]15/02/12 13:33:47 WARN TaskSetManager: Lost task 7.0 in stage 2.0 (TID 22, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.examples.h2o.ProstateParse$.apply(Schemas.scala:21) at $line52.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at $line52.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at org.apache.spark.h2o.H2OContext$.org$apache$spark$h2o$H2OContext$$perRDDPartition(H2OContext.scala:391) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) [Stage 2:> (0 + 2) / 9]15/02/12 13:33:47 ERROR TaskSetManager: Task 7 in stage 2.0 failed 4 times; aborting job 15/02/12 13:33:47 WARN TaskSetManager: Lost task 0.1 in stage 2.0 (TID 28, mr-0xd5.0xdata.loc): TaskKilled (killed intentionally) org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.examples.h2o.ProstateParse$.apply(Schemas.scala:21) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at org.apache.spark.h2o.H2OContext$.org$apache$spark$h2o$H2OContext$$perRDDPartition(H2OContext.scala:391) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)

    JIRA | 2 years ago | Neeraja Madabhushi
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1
  3. 0

    ADAM how to improve performance ?

    Google Groups | 2 years ago | Unknown author
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 1 times, most recent failure: Lost task 0.0 in stage 2.0 (TID 4, localhost): java.util.NoSuchElementException: None.get
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    Is the Scala Breeze package thread safe for use in Spark Dataframe processing?

    Stack Overflow | 2 months ago | Tim Ryan
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 10.0 failed 1 times, most recent failure: Lost task 2.0 in stage 10.0 (TID 36, localhost): java.lang.ArrayIndexOutOfBoundsException: 12
  6. 0

    NullPointerException when Training Imagenet

    GitHub | 6 months ago | GalaxyStyle
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 2.0 failed 4 times, most recent failure: Lost task 1.3 in stage 2.0 (TID 10, pc-2): java.lang.NullPointerException

    Not finding the right solution?
    Take a tour to get the most out of Samebug.

    Tired of useless tips?

    Automated exception search integrated into your IDE

    Root Cause Analysis

    1. org.apache.spark.SparkException

      Job aborted due to stage failure: Task 7 in stage 2.0 failed 4 times, most recent failure: Lost task 7.3 in stage 2.0 (TID 27, mr-0xd8.0xdata.loc): java.lang.ArrayIndexOutOfBoundsException: 1

      at org.apache.spark.examples.h2o.ProstateParse$.apply()
    2. org.apache.spark
      ProstateParse$.apply
      1. org.apache.spark.examples.h2o.ProstateParse$.apply(Schemas.scala:21)
      1 frame
    3. Unknown
      $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply
      1. $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35)
      2. $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$2.apply(<console>:35)
      2 frames
    4. Scala
      AbstractIterator.foreach
      1. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      2. scala.collection.Iterator$class.foreach(Iterator.scala:727)
      3. scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
      3 frames
    5. org.apache.spark
      H2OContext$$anonfun$7.apply
      1. org.apache.spark.h2o.H2OContext$.org$apache$spark$h2o$H2OContext$$perRDDPartition(H2OContext.scala:391)
      2. org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323)
      3. org.apache.spark.h2o.H2OContext$$anonfun$7.apply(H2OContext.scala:323)
      3 frames
    6. Spark
      Executor$TaskRunner.run
      1. org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
      2. org.apache.spark.scheduler.Task.run(Task.scala:56)
      3. org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200)
      3 frames
    7. Java RT
      Thread.run
      1. java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
      2. java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
      3. java.lang.Thread.run(Thread.java:745)
      3 frames