org.apache.spark.SparkException: Job aborted due to stage failure: Task 25 in stage 40.0 failed 4 times, most recent failure: Lost task 25.3 in stage 40.0 (TID 3250, blub.bla): java.lang.ArrayIndexOutOfBoundsException: -1

Apache's JIRA Issue Tracker | Stephane Maarek | 2 years ago
  1. 0

    Running the following code: val subgraph = graph.subgraph ( vpred = (id,article) => //working predicate) ).cache() println( s"Subgraph contains ${subgraph.vertices.count} nodes and ${subgraph.edges.count} edges") val prGraph = subgraph.staticPageRank(5).cache val titleAndPrGraph = subgraph.outerJoinVertices(prGraph.vertices) { (v, title, rank) => (rank.getOrElse(0.0), title) } titleAndPrGraph.vertices.top(13) { Ordering.by((entry: (VertexId, (Double, _))) => entry._2._1) }.foreach(t => println(t._2._2._1 + ": " + t._2._1 + ", id:" + t._1)) Returns a graph with 5000 nodes and 4000 edges. Then it crashes during the PageRank with the following: 15/01/29 05:51:07 INFO scheduler.TaskSetManager: Starting task 125.0 in stage 39.0 (TID 1808, *HIDDEN, PROCESS_LOCAL, 2059 bytes) 15/01/29 05:51:07 WARN scheduler.TaskSetManager: Lost task 107.0 in stage 39.0 (TID 1794, *HIDDEN): java.lang.ArrayIndexOutOfBoundsException: -1 at org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap$mcJI$sp.apply$mcJI$sp(GraphXPrimitiveKeyOpenHashMap.scala:64) at org.apache.spark.graphx.impl.EdgePartition.updateVertices(EdgePartition.scala:91) at org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:75) at org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:73) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.graphx.impl.EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply(EdgeRDDImpl.scala:110) at org.apache.spark.graphx.impl.EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply(EdgeRDDImpl.scala:108) at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601) at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61) at org.apache.spark.rdd.RDD.iterator(RDD.scala:228) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61) at org.apache.spark.rdd.RDD.iterator(RDD.scala:228) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744)

    Apache's JIRA Issue Tracker | 2 years ago | Stephane Maarek
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 25 in stage 40.0 failed 4 times, most recent failure: Lost task 25.3 in stage 40.0 (TID 3250, blub.bla): java.lang.ArrayIndexOutOfBoundsException: -1
  2. 0

    Running the following code: val subgraph = graph.subgraph ( vpred = (id,article) => //working predicate) ).cache() println( s"Subgraph contains ${subgraph.vertices.count} nodes and ${subgraph.edges.count} edges") val prGraph = subgraph.staticPageRank(5).cache val titleAndPrGraph = subgraph.outerJoinVertices(prGraph.vertices) { (v, title, rank) => (rank.getOrElse(0.0), title) } titleAndPrGraph.vertices.top(13) { Ordering.by((entry: (VertexId, (Double, _))) => entry._2._1) }.foreach(t => println(t._2._2._1 + ": " + t._2._1 + ", id:" + t._1)) Returns a graph with 5000 nodes and 4000 edges. Then it crashes during the PageRank with the following: 15/01/29 05:51:07 INFO scheduler.TaskSetManager: Starting task 125.0 in stage 39.0 (TID 1808, *HIDDEN, PROCESS_LOCAL, 2059 bytes) 15/01/29 05:51:07 WARN scheduler.TaskSetManager: Lost task 107.0 in stage 39.0 (TID 1794, *HIDDEN): java.lang.ArrayIndexOutOfBoundsException: -1 at org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap$mcJI$sp.apply$mcJI$sp(GraphXPrimitiveKeyOpenHashMap.scala:64) at org.apache.spark.graphx.impl.EdgePartition.updateVertices(EdgePartition.scala:91) at org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:75) at org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:73) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.graphx.impl.EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply(EdgeRDDImpl.scala:110) at org.apache.spark.graphx.impl.EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply(EdgeRDDImpl.scala:108) at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601) at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61) at org.apache.spark.rdd.RDD.iterator(RDD.scala:228) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61) at org.apache.spark.rdd.RDD.iterator(RDD.scala:228) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744)

    Apache's JIRA Issue Tracker | 2 years ago | Stephane Maarek
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 25 in stage 40.0 failed 4 times, most recent failure: Lost task 25.3 in stage 40.0 (TID 3250, blub.bla): java.lang.ArrayIndexOutOfBoundsException: -1
  3. 0

    pyspark - Spark gives a StackOverflowError when training using ALS - Stack Overflow

    xluat.com | 1 year ago
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 40.0 failed 1 times, most recent failure: Lost task 0.0 in stage 40.0 (TID 35, localhost): java.lang.StackOverflowError
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    Spark gives a StackOverflowError when training using ALS

    Stack Overflow | 1 year ago | cheese1756
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 40.0 failed 1 times, most recent failure: Lost task 0.0 in stage 40.0 (TID 35, localhost): java.lang.StackOverflowError
  6. 0

    Compute Cost of Kmeans

    Stack Overflow | 3 months ago | gsamaras
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 821 in stage 40.0 failed 4 times, most recent failure: Lost task 821.3 in stage 40.0: java.lang.IllegalArgumentException: requirement failed

    Not finding the right solution?
    Take a tour to get the most out of Samebug.

    Tired of useless tips?

    Automated exception search integrated into your IDE

    Root Cause Analysis

    1. org.apache.spark.SparkException

      Job aborted due to stage failure: Task 25 in stage 40.0 failed 4 times, most recent failure: Lost task 25.3 in stage 40.0 (TID 3250, blub.bla): java.lang.ArrayIndexOutOfBoundsException: -1

      at org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap$mcJI$sp.apply$mcJI$sp()
    2. Spark Project GraphX
      ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply
      1. org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap$mcJI$sp.apply$mcJI$sp(GraphXPrimitiveKeyOpenHashMap.scala:64)
      2. org.apache.spark.graphx.impl.EdgePartition.updateVertices(EdgePartition.scala:91)
      3. org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:75)
      4. org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:73)
      4 frames
    3. Scala
      Iterator$$anon$13.hasNext
      1. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      2. scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
      2 frames
    4. Spark
      Executor$TaskRunner.run
      1. org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:210)
      2. org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
      3. org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
      4. org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
      5. org.apache.spark.scheduler.Task.run(Task.scala:64)
      6. org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
      6 frames
    5. Java RT
      Thread.run
      1. java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
      2. java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
      3. java.lang.Thread.run(Thread.java:745)
      3 frames