scheduler.TaskSetManager: Lost task 107.0 in stage 39.0 (TID 1794, *HIDDEN): java.lang.ArrayIndexOutOfBoundsException: -1

Apache's JIRA Issue Tracker | Stephane Maarek | 2 years ago
  1. 0

    Running the following code: val subgraph = graph.subgraph ( vpred = (id,article) => //working predicate) ).cache() println( s"Subgraph contains ${subgraph.vertices.count} nodes and ${subgraph.edges.count} edges") val prGraph = subgraph.staticPageRank(5).cache val titleAndPrGraph = subgraph.outerJoinVertices(prGraph.vertices) { (v, title, rank) => (rank.getOrElse(0.0), title) } titleAndPrGraph.vertices.top(13) { Ordering.by((entry: (VertexId, (Double, _))) => entry._2._1) }.foreach(t => println(t._2._2._1 + ": " + t._2._1 + ", id:" + t._1)) Returns a graph with 5000 nodes and 4000 edges. Then it crashes during the PageRank with the following: 15/01/29 05:51:07 INFO scheduler.TaskSetManager: Starting task 125.0 in stage 39.0 (TID 1808, *HIDDEN, PROCESS_LOCAL, 2059 bytes) 15/01/29 05:51:07 WARN scheduler.TaskSetManager: Lost task 107.0 in stage 39.0 (TID 1794, *HIDDEN): java.lang.ArrayIndexOutOfBoundsException: -1 at org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap$mcJI$sp.apply$mcJI$sp(GraphXPrimitiveKeyOpenHashMap.scala:64) at org.apache.spark.graphx.impl.EdgePartition.updateVertices(EdgePartition.scala:91) at org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:75) at org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:73) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.graphx.impl.EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply(EdgeRDDImpl.scala:110) at org.apache.spark.graphx.impl.EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply(EdgeRDDImpl.scala:108) at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601) at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61) at org.apache.spark.rdd.RDD.iterator(RDD.scala:228) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61) at org.apache.spark.rdd.RDD.iterator(RDD.scala:228) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744)

    Apache's JIRA Issue Tracker | 2 years ago | Stephane Maarek
    scheduler.TaskSetManager: Lost task 107.0 in stage 39.0 (TID 1794, *HIDDEN): java.lang.ArrayIndexOutOfBoundsException: -1
  2. 0

    Running the following code: val subgraph = graph.subgraph ( vpred = (id,article) => //working predicate) ).cache() println( s"Subgraph contains ${subgraph.vertices.count} nodes and ${subgraph.edges.count} edges") val prGraph = subgraph.staticPageRank(5).cache val titleAndPrGraph = subgraph.outerJoinVertices(prGraph.vertices) { (v, title, rank) => (rank.getOrElse(0.0), title) } titleAndPrGraph.vertices.top(13) { Ordering.by((entry: (VertexId, (Double, _))) => entry._2._1) }.foreach(t => println(t._2._2._1 + ": " + t._2._1 + ", id:" + t._1)) Returns a graph with 5000 nodes and 4000 edges. Then it crashes during the PageRank with the following: 15/01/29 05:51:07 INFO scheduler.TaskSetManager: Starting task 125.0 in stage 39.0 (TID 1808, *HIDDEN, PROCESS_LOCAL, 2059 bytes) 15/01/29 05:51:07 WARN scheduler.TaskSetManager: Lost task 107.0 in stage 39.0 (TID 1794, *HIDDEN): java.lang.ArrayIndexOutOfBoundsException: -1 at org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap$mcJI$sp.apply$mcJI$sp(GraphXPrimitiveKeyOpenHashMap.scala:64) at org.apache.spark.graphx.impl.EdgePartition.updateVertices(EdgePartition.scala:91) at org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:75) at org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:73) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.graphx.impl.EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply(EdgeRDDImpl.scala:110) at org.apache.spark.graphx.impl.EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply(EdgeRDDImpl.scala:108) at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601) at org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61) at org.apache.spark.rdd.RDD.iterator(RDD.scala:228) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61) at org.apache.spark.rdd.RDD.iterator(RDD.scala:228) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744)

    Apache's JIRA Issue Tracker | 2 years ago | Stephane Maarek
    scheduler.TaskSetManager: Lost task 107.0 in stage 39.0 (TID 1794, *HIDDEN): java.lang.ArrayIndexOutOfBoundsException: -1
  3. 0

    Error on executing 'Select * from tablename'

    GitHub | 1 year ago | rkiyer999
    scheduler.TaskSetManager: Lost task 0.0 in stage 6.0 (TID 6, localhost): java.lang.ArrayIndexOutOfBoundsException: 1
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    HiveContext causes a NullPointer on spark

    Stack Overflow | 3 months ago | gloopy-rocket
    scheduler.TaskSetManager: Lost task 0.0 in stage 1.0 (TID 1, localhost): java.lang.NullPointerException
  6. 0

    Can't Run SparkPi on Hadoop with YARN API

    Stack Overflow | 3 months ago | jencoston
    scheduler.TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1, gpi-col24.gpi.collect): java.lang.ClassNotFoundException: org.apache.spark.examples.SparkPi$$anonfun$1

    Not finding the right solution?
    Take a tour to get the most out of Samebug.

    Tired of useless tips?

    Automated exception search integrated into your IDE

    Root Cause Analysis

    1. scheduler.TaskSetManager

      Lost task 107.0 in stage 39.0 (TID 1794, *HIDDEN): java.lang.ArrayIndexOutOfBoundsException: -1

      at org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap$mcJI$sp.apply$mcJI$sp()
    2. Spark Project GraphX
      ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply
      1. org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap$mcJI$sp.apply$mcJI$sp(GraphXPrimitiveKeyOpenHashMap.scala:64)
      2. org.apache.spark.graphx.impl.EdgePartition.updateVertices(EdgePartition.scala:91)
      3. org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:75)
      4. org.apache.spark.graphx.impl.ReplicatedVertexView$$anonfun$2$$anonfun$apply$1.apply(ReplicatedVertexView.scala:73)
      4 frames
    3. Scala
      Iterator$$anon$11.next
      1. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      1 frame
    4. Spark Project GraphX
      EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply
      1. org.apache.spark.graphx.impl.EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply(EdgeRDDImpl.scala:110)
      2. org.apache.spark.graphx.impl.EdgeRDDImpl$$anonfun$mapEdgePartitions$1.apply(EdgeRDDImpl.scala:108)
      2 frames
    5. Spark
      Executor$TaskRunner.run
      1. org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601)
      2. org.apache.spark.rdd.RDD$$anonfun$13.apply(RDD.scala:601)
      3. org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
      4. org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
      5. org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61)
      6. org.apache.spark.rdd.RDD.iterator(RDD.scala:228)
      7. org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
      8. org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
      9. org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
      10. org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88)
      11. org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
      12. org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
      13. org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
      14. org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
      15. org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
      16. org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
      17. org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
      18. org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
      19. org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88)
      20. org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
      21. org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
      22. org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88)
      23. org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
      24. org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61)
      25. org.apache.spark.rdd.RDD.iterator(RDD.scala:228)
      26. org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
      27. org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
      28. org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
      29. org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
      30. org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
      31. org.apache.spark.scheduler.Task.run(Task.scala:56)
      32. org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)
      32 frames
    6. Java RT
      Thread.run
      1. java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
      2. java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
      3. java.lang.Thread.run(Thread.java:744)
      3 frames