org.apache.spark.SparkException: Job aborted due to stage failure: Task 12 in stage 0.0 failed 4 times, most recent failure: Lost task 12.3 in stage 0.0 (TID 15, ga16iiphdpdn04.ricohonline.org): java.lang.ArrayIndexOutOfBoundsException Driver stacktrace:

Apache's JIRA Issue Tracker | Aditya Parmar | 2 years ago
tip
Your exception is missing from the Samebug knowledge base.
Here are the best solutions we found on the Internet.
Click on the to mark the helpful solution and get rewards for you help.
  1. 0

    Hi all , I am getting an Arrayoutboundsindex error when i try to run a simple filtering colums query on a file with 2.5 lac records.runs fine when running on a file with 2k records . 15/04/09 12:19:01 WARN TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.sql.catalyst.expressions.GenericRow.apply(Row.scala:142) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:37) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:68) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:52) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1060) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1047) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) 15/04/09 12:19:01 INFO TaskSetManager: Starting task 1.1 in stage 0.0 (TID 2, blrwfl11189.igatecorp.com, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:19:01 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0): java.lang.ArrayIndexOutOfBoundsException 15/04/09 12:19:01 INFO TaskSetManager: Starting task 0.1 in stage 0.0 (TID 3, blrwfl11189.igatecorp.com, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:19:01 INFO TaskSetManager: Lost task 1.1 in stage 0.0 (TID 2) on executor : java.lang.ArrayIndexOutOfBoundsException (null) [duplicate 1] 15/04/09 12:19:01 INFO TaskSetManager: Starting task 1.2 in stage 0.0 (TID 4, blrwfl11189.igatecorp.com, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:19:01 INFO TaskSetManager: Lost task 1.2 in stage 0.0 (TID 4) on executor : java.lang.ArrayIndexOutOfBoundsException (null) [duplicate 2] 15/04/09 12:19:01 INFO TaskSetManager: Starting task 1.3 in stage 0.0 (TID 5, blrwfl11189.igatecorp.com, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:19:01 INFO TaskSetManager: Lost task 0.1 in stage 0.0 (TID 3) on executor : java.lang.ArrayIndexOutOfBoundsException (null) [duplicate 3] 15/04/09 12:19:01 INFO TaskSetManager: Starting task 0.2 in stage 0.0 (TID 6, blrwfl11189.igatecorp.com, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:19:02 INFO TaskSetManager: Lost task 1.3 in stage 0.0 (TID 5) on executor : java.lang.ArrayIndexOutOfBoundsException (null) [duplicate 4] 15/04/09 12:19:02 ERROR TaskSetManager: Task 1 in stage 0.0 failed 4 times; aborting job 15/04/09 12:19:02 INFO TaskSchedulerImpl: Cancelling stage 0 15/04/09 12:19:02 INFO TaskSchedulerImpl: Stage 0 was cancelled 15/04/09 12:19:02 INFO DAGScheduler: Job 0 failed: saveAsTextFile at JavaSchemaRDD.scala:42, took 1.958621 s Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 0.0 failed 4 times, most recent failure: Lost task 1.3 in stage 0.0 (TID 5, ): java.lang.ArrayIndexOutOfBoundsException Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) 15/04/09 12:19:02 WARN TaskSetManager: Lost task 0.2 in stage 0.0 (TID 6, blrwfl11189.igatecorp.com): TaskKilled (killed intentionally) 15/04/09 12:19:02 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool [aditya@blrwfl11189 ~]$ spark-submit --class engineshow engineshow.jar Spark assembly has been built with Hive, including Datanucleus jars on classpath Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties 15/04/09 12:22:30 INFO SecurityManager: Changing view acls to: aditya 15/04/09 12:22:30 INFO SecurityManager: Changing modify acls to: aditya 15/04/09 12:22:30 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(aditya); users with modify permissions: Set(aditya) 15/04/09 12:22:30 INFO Slf4jLogger: Slf4jLogger started 15/04/09 12:22:30 INFO Remoting: Starting remoting 15/04/09 12:22:31 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriver@blrwfl11189.igatecorp.com:47121] 15/04/09 12:22:31 INFO Utils: Successfully started service 'sparkDriver' on port 47121. 15/04/09 12:22:31 INFO SparkEnv: Registering MapOutputTracker 15/04/09 12:22:31 INFO SparkEnv: Registering BlockManagerMaster 15/04/09 12:22:31 INFO DiskBlockManager: Created local directory at /tmp/spark-local-20150409122231-c0ed 15/04/09 12:22:31 INFO MemoryStore: MemoryStore started with capacity 265.4 MB 15/04/09 12:22:31 INFO HttpFileServer: HTTP File server directory is /tmp/spark-6813688b-502e-41e4-ab21-27eb4544986f 15/04/09 12:22:31 INFO HttpServer: Starting HTTP Server 15/04/09 12:22:31 INFO Utils: Successfully started service 'HTTP file server' on port 37771. 15/04/09 12:22:31 INFO Utils: Successfully started service 'SparkUI' on port 4040. 15/04/09 12:22:31 INFO SparkUI: Started SparkUI at http://blrwfl11189.igatecorp.com:4040 15/04/09 12:22:31 INFO SparkContext: Added JAR file:/home/aditya/engineshow.jar at http://10.212.51.39:37771/jars/engineshow.jar with timestamp 1428562351299 15/04/09 12:22:31 INFO AkkaUtils: Connecting to HeartbeatReceiver: akka.tcp://sparkDriver@blrwfl11189.igatecorp.com:47121/user/HeartbeatReceiver 15/04/09 12:22:31 INFO NettyBlockTransferService: Server created on 50256 15/04/09 12:22:31 INFO BlockManagerMaster: Trying to register BlockManager 15/04/09 12:22:31 INFO BlockManagerMasterActor: Registering block manager localhost:50256 with 265.4 MB RAM, BlockManagerId(<driver>, localhost, 50256) 15/04/09 12:22:31 INFO BlockManagerMaster: Registered BlockManager 15/04/09 12:22:31 INFO MemoryStore: ensureFreeSpace(32768) called with curMem=0, maxMem=278302556 15/04/09 12:22:31 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 32.0 KB, free 265.4 MB) 15/04/09 12:22:31 INFO MemoryStore: ensureFreeSpace(4959) called with curMem=32768, maxMem=278302556 15/04/09 12:22:31 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 4.8 KB, free 265.4 MB) 15/04/09 12:22:31 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:50256 (size: 4.8 KB, free: 265.4 MB) 15/04/09 12:22:31 INFO BlockManagerMaster: Updated info of block broadcast_0_piece0 15/04/09 12:22:31 INFO SparkContext: Created broadcast 0 from textFile at engineshow.java:32 15/04/09 12:22:32 INFO HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore 15/04/09 12:22:32 INFO ObjectStore: ObjectStore, initialize called 15/04/09 12:22:32 INFO Persistence: Property datanucleus.cache.level2 unknown - will be ignored 15/04/09 12:22:32 INFO Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored 15/04/09 12:22:32 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies) 15/04/09 12:22:32 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies) 15/04/09 12:22:33 INFO ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order" 15/04/09 12:22:33 INFO MetaStoreDirectSql: MySQL check failed, assuming we are not on mysql: Lexical error at line 1, column 5. Encountered: "@" (64), after : "". 15/04/09 12:22:34 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. 15/04/09 12:22:34 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. 15/04/09 12:22:34 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. 15/04/09 12:22:34 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. 15/04/09 12:22:34 INFO Query: Reading in results for query "org.datanucleus.store.rdbms.query.SQLQuery@0" since the connection used is closing 15/04/09 12:22:34 INFO ObjectStore: Initialized ObjectStore 15/04/09 12:22:34 INFO HiveMetaStore: Added admin role in metastore 15/04/09 12:22:34 INFO HiveMetaStore: Added public role in metastore 15/04/09 12:22:34 INFO HiveMetaStore: No user is added in admin role, since config is empty 15/04/09 12:22:34 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr. 15/04/09 12:22:35 INFO ParseDriver: Parsing command: select a,b from comp1 15/04/09 12:22:35 INFO ParseDriver: Parse Completed 15/04/09 12:22:35 WARN LoadSnappy: Snappy native library is available 15/04/09 12:22:35 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/04/09 12:22:35 WARN LoadSnappy: Snappy native library not loaded 15/04/09 12:22:35 INFO FileInputFormat: Total input paths to process : 1 15/04/09 12:22:35 INFO SparkContext: Starting job: saveAsTextFile at JavaSchemaRDD.scala:42 15/04/09 12:22:35 INFO DAGScheduler: Got job 0 (saveAsTextFile at JavaSchemaRDD.scala:42) with 2 output partitions (allowLocal=false) 15/04/09 12:22:35 INFO DAGScheduler: Final stage: Stage 0(saveAsTextFile at JavaSchemaRDD.scala:42) 15/04/09 12:22:35 INFO DAGScheduler: Parents of final stage: List() 15/04/09 12:22:35 INFO DAGScheduler: Missing parents: List() 15/04/09 12:22:35 INFO DAGScheduler: Submitting Stage 0 (MappedRDD[8] at saveAsTextFile at JavaSchemaRDD.scala:42), which has no missing parents 15/04/09 12:22:35 INFO MemoryStore: ensureFreeSpace(20896) called with curMem=37727, maxMem=278302556 15/04/09 12:22:35 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 20.4 KB, free 265.4 MB) 15/04/09 12:22:35 INFO MemoryStore: ensureFreeSpace(15057) called with curMem=58623, maxMem=278302556 15/04/09 12:22:35 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 14.7 KB, free 265.3 MB) 15/04/09 12:22:35 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:50256 (size: 14.7 KB, free: 265.4 MB) 15/04/09 12:22:35 INFO BlockManagerMaster: Updated info of block broadcast_1_piece0 15/04/09 12:22:35 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:838 15/04/09 12:22:35 INFO DAGScheduler: Submitting 2 missing tasks from Stage 0 (MappedRDD[8] at saveAsTextFile at JavaSchemaRDD.scala:42) 15/04/09 12:22:35 INFO TaskSchedulerImpl: Adding task set 0.0 with 2 tasks 15/04/09 12:22:35 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:22:35 INFO TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, localhost, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:22:35 INFO Executor: Running task 0.0 in stage 0.0 (TID 0) 15/04/09 12:22:35 INFO Executor: Running task 1.0 in stage 0.0 (TID 1) 15/04/09 12:22:35 INFO Executor: Fetching http://10.212.51.39:37771/jars/engineshow.jar with timestamp 1428562351299 15/04/09 12:22:35 INFO Utils: Fetching http://10.212.51.39:37771/jars/engineshow.jar to /tmp/fetchFileTemp3748447218639833283.tmp 15/04/09 12:22:35 INFO Executor: Adding file:/tmp/spark-0ed71155-55db-48dd-ac02-7a56fe33381b/engineshow.jar to class loader 15/04/09 12:22:35 INFO HadoopRDD: Input split: file:/home/aditya/stocks1.csv:0+6708709 15/04/09 12:22:35 INFO HadoopRDD: Input split: file:/home/aditya/stocks1.csv:6708709+6708709 15/04/09 12:22:36 ERROR Executor: Exception in task 1.0 in stage 0.0 (TID 1) java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.sql.catalyst.expressions.GenericRow.apply(Row.scala:142) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:37) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:68) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:52) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1060) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1047) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) 15/04/09 12:22:36 WARN TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1, localhost): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.sql.catalyst.expressions.GenericRow.apply(Row.scala:142) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:37) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:68) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:52) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1060) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1047) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) 15/04/09 12:22:36 ERROR TaskSetManager: Task 1 in stage 0.0 failed 1 times; aborting job 15/04/09 12:22:36 INFO TaskSchedulerImpl: Cancelling stage 0 15/04/09 12:22:36 INFO Executor: Executor is trying to kill task 0.0 in stage 0.0 (TID 0) 15/04/09 12:22:36 INFO TaskSchedulerImpl: Stage 0 was cancelled 15/04/09 12:22:36 INFO Executor: Executor killed task 0.0 in stage 0.0 (TID 0) 15/04/09 12:22:36 INFO DAGScheduler: Job 0 failed: saveAsTextFile at JavaSchemaRDD.scala:42, took 0.616858 s Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 0.0 failed 1 times, most recent failure: Lost task 1.0 in stage 0.0 (TID 1, localhost): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.sql.catalyst.expressions.GenericRow.apply(Row.scala:142) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:37) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:68) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:52) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1060) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1047) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) 15/04/09 12:22:36 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, localhost): TaskKilled (killed intentionally) 15/04/09 12:22:36 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool

    Apache's JIRA Issue Tracker | 2 years ago | Aditya Parmar
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 12 in stage 0.0 failed 4 times, most recent failure: Lost task 12.3 in stage 0.0 (TID 15, ga16iiphdpdn04.ricohonline.org): java.lang.ArrayIndexOutOfBoundsException Driver stacktrace:
  2. 0

    Hi all , I am getting an Arrayoutboundsindex error when i try to run a simple filtering colums query on a file with 2.5 lac records.runs fine when running on a file with 2k records . 15/04/09 12:19:01 WARN TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.sql.catalyst.expressions.GenericRow.apply(Row.scala:142) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:37) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:68) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:52) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1060) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1047) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) 15/04/09 12:19:01 INFO TaskSetManager: Starting task 1.1 in stage 0.0 (TID 2, blrwfl11189.igatecorp.com, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:19:01 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0): java.lang.ArrayIndexOutOfBoundsException 15/04/09 12:19:01 INFO TaskSetManager: Starting task 0.1 in stage 0.0 (TID 3, blrwfl11189.igatecorp.com, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:19:01 INFO TaskSetManager: Lost task 1.1 in stage 0.0 (TID 2) on executor : java.lang.ArrayIndexOutOfBoundsException (null) [duplicate 1] 15/04/09 12:19:01 INFO TaskSetManager: Starting task 1.2 in stage 0.0 (TID 4, blrwfl11189.igatecorp.com, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:19:01 INFO TaskSetManager: Lost task 1.2 in stage 0.0 (TID 4) on executor : java.lang.ArrayIndexOutOfBoundsException (null) [duplicate 2] 15/04/09 12:19:01 INFO TaskSetManager: Starting task 1.3 in stage 0.0 (TID 5, blrwfl11189.igatecorp.com, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:19:01 INFO TaskSetManager: Lost task 0.1 in stage 0.0 (TID 3) on executor : java.lang.ArrayIndexOutOfBoundsException (null) [duplicate 3] 15/04/09 12:19:01 INFO TaskSetManager: Starting task 0.2 in stage 0.0 (TID 6, blrwfl11189.igatecorp.com, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:19:02 INFO TaskSetManager: Lost task 1.3 in stage 0.0 (TID 5) on executor : java.lang.ArrayIndexOutOfBoundsException (null) [duplicate 4] 15/04/09 12:19:02 ERROR TaskSetManager: Task 1 in stage 0.0 failed 4 times; aborting job 15/04/09 12:19:02 INFO TaskSchedulerImpl: Cancelling stage 0 15/04/09 12:19:02 INFO TaskSchedulerImpl: Stage 0 was cancelled 15/04/09 12:19:02 INFO DAGScheduler: Job 0 failed: saveAsTextFile at JavaSchemaRDD.scala:42, took 1.958621 s Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 0.0 failed 4 times, most recent failure: Lost task 1.3 in stage 0.0 (TID 5, ): java.lang.ArrayIndexOutOfBoundsException Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) 15/04/09 12:19:02 WARN TaskSetManager: Lost task 0.2 in stage 0.0 (TID 6, blrwfl11189.igatecorp.com): TaskKilled (killed intentionally) 15/04/09 12:19:02 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool [aditya@blrwfl11189 ~]$ spark-submit --class engineshow engineshow.jar Spark assembly has been built with Hive, including Datanucleus jars on classpath Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties 15/04/09 12:22:30 INFO SecurityManager: Changing view acls to: aditya 15/04/09 12:22:30 INFO SecurityManager: Changing modify acls to: aditya 15/04/09 12:22:30 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(aditya); users with modify permissions: Set(aditya) 15/04/09 12:22:30 INFO Slf4jLogger: Slf4jLogger started 15/04/09 12:22:30 INFO Remoting: Starting remoting 15/04/09 12:22:31 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriver@blrwfl11189.igatecorp.com:47121] 15/04/09 12:22:31 INFO Utils: Successfully started service 'sparkDriver' on port 47121. 15/04/09 12:22:31 INFO SparkEnv: Registering MapOutputTracker 15/04/09 12:22:31 INFO SparkEnv: Registering BlockManagerMaster 15/04/09 12:22:31 INFO DiskBlockManager: Created local directory at /tmp/spark-local-20150409122231-c0ed 15/04/09 12:22:31 INFO MemoryStore: MemoryStore started with capacity 265.4 MB 15/04/09 12:22:31 INFO HttpFileServer: HTTP File server directory is /tmp/spark-6813688b-502e-41e4-ab21-27eb4544986f 15/04/09 12:22:31 INFO HttpServer: Starting HTTP Server 15/04/09 12:22:31 INFO Utils: Successfully started service 'HTTP file server' on port 37771. 15/04/09 12:22:31 INFO Utils: Successfully started service 'SparkUI' on port 4040. 15/04/09 12:22:31 INFO SparkUI: Started SparkUI at http://blrwfl11189.igatecorp.com:4040 15/04/09 12:22:31 INFO SparkContext: Added JAR file:/home/aditya/engineshow.jar at http://10.212.51.39:37771/jars/engineshow.jar with timestamp 1428562351299 15/04/09 12:22:31 INFO AkkaUtils: Connecting to HeartbeatReceiver: akka.tcp://sparkDriver@blrwfl11189.igatecorp.com:47121/user/HeartbeatReceiver 15/04/09 12:22:31 INFO NettyBlockTransferService: Server created on 50256 15/04/09 12:22:31 INFO BlockManagerMaster: Trying to register BlockManager 15/04/09 12:22:31 INFO BlockManagerMasterActor: Registering block manager localhost:50256 with 265.4 MB RAM, BlockManagerId(<driver>, localhost, 50256) 15/04/09 12:22:31 INFO BlockManagerMaster: Registered BlockManager 15/04/09 12:22:31 INFO MemoryStore: ensureFreeSpace(32768) called with curMem=0, maxMem=278302556 15/04/09 12:22:31 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 32.0 KB, free 265.4 MB) 15/04/09 12:22:31 INFO MemoryStore: ensureFreeSpace(4959) called with curMem=32768, maxMem=278302556 15/04/09 12:22:31 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 4.8 KB, free 265.4 MB) 15/04/09 12:22:31 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:50256 (size: 4.8 KB, free: 265.4 MB) 15/04/09 12:22:31 INFO BlockManagerMaster: Updated info of block broadcast_0_piece0 15/04/09 12:22:31 INFO SparkContext: Created broadcast 0 from textFile at engineshow.java:32 15/04/09 12:22:32 INFO HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore 15/04/09 12:22:32 INFO ObjectStore: ObjectStore, initialize called 15/04/09 12:22:32 INFO Persistence: Property datanucleus.cache.level2 unknown - will be ignored 15/04/09 12:22:32 INFO Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored 15/04/09 12:22:32 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies) 15/04/09 12:22:32 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies) 15/04/09 12:22:33 INFO ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order" 15/04/09 12:22:33 INFO MetaStoreDirectSql: MySQL check failed, assuming we are not on mysql: Lexical error at line 1, column 5. Encountered: "@" (64), after : "". 15/04/09 12:22:34 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. 15/04/09 12:22:34 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. 15/04/09 12:22:34 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. 15/04/09 12:22:34 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. 15/04/09 12:22:34 INFO Query: Reading in results for query "org.datanucleus.store.rdbms.query.SQLQuery@0" since the connection used is closing 15/04/09 12:22:34 INFO ObjectStore: Initialized ObjectStore 15/04/09 12:22:34 INFO HiveMetaStore: Added admin role in metastore 15/04/09 12:22:34 INFO HiveMetaStore: Added public role in metastore 15/04/09 12:22:34 INFO HiveMetaStore: No user is added in admin role, since config is empty 15/04/09 12:22:34 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr. 15/04/09 12:22:35 INFO ParseDriver: Parsing command: select a,b from comp1 15/04/09 12:22:35 INFO ParseDriver: Parse Completed 15/04/09 12:22:35 WARN LoadSnappy: Snappy native library is available 15/04/09 12:22:35 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/04/09 12:22:35 WARN LoadSnappy: Snappy native library not loaded 15/04/09 12:22:35 INFO FileInputFormat: Total input paths to process : 1 15/04/09 12:22:35 INFO SparkContext: Starting job: saveAsTextFile at JavaSchemaRDD.scala:42 15/04/09 12:22:35 INFO DAGScheduler: Got job 0 (saveAsTextFile at JavaSchemaRDD.scala:42) with 2 output partitions (allowLocal=false) 15/04/09 12:22:35 INFO DAGScheduler: Final stage: Stage 0(saveAsTextFile at JavaSchemaRDD.scala:42) 15/04/09 12:22:35 INFO DAGScheduler: Parents of final stage: List() 15/04/09 12:22:35 INFO DAGScheduler: Missing parents: List() 15/04/09 12:22:35 INFO DAGScheduler: Submitting Stage 0 (MappedRDD[8] at saveAsTextFile at JavaSchemaRDD.scala:42), which has no missing parents 15/04/09 12:22:35 INFO MemoryStore: ensureFreeSpace(20896) called with curMem=37727, maxMem=278302556 15/04/09 12:22:35 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 20.4 KB, free 265.4 MB) 15/04/09 12:22:35 INFO MemoryStore: ensureFreeSpace(15057) called with curMem=58623, maxMem=278302556 15/04/09 12:22:35 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 14.7 KB, free 265.3 MB) 15/04/09 12:22:35 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:50256 (size: 14.7 KB, free: 265.4 MB) 15/04/09 12:22:35 INFO BlockManagerMaster: Updated info of block broadcast_1_piece0 15/04/09 12:22:35 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:838 15/04/09 12:22:35 INFO DAGScheduler: Submitting 2 missing tasks from Stage 0 (MappedRDD[8] at saveAsTextFile at JavaSchemaRDD.scala:42) 15/04/09 12:22:35 INFO TaskSchedulerImpl: Adding task set 0.0 with 2 tasks 15/04/09 12:22:35 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:22:35 INFO TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, localhost, PROCESS_LOCAL, 1349 bytes) 15/04/09 12:22:35 INFO Executor: Running task 0.0 in stage 0.0 (TID 0) 15/04/09 12:22:35 INFO Executor: Running task 1.0 in stage 0.0 (TID 1) 15/04/09 12:22:35 INFO Executor: Fetching http://10.212.51.39:37771/jars/engineshow.jar with timestamp 1428562351299 15/04/09 12:22:35 INFO Utils: Fetching http://10.212.51.39:37771/jars/engineshow.jar to /tmp/fetchFileTemp3748447218639833283.tmp 15/04/09 12:22:35 INFO Executor: Adding file:/tmp/spark-0ed71155-55db-48dd-ac02-7a56fe33381b/engineshow.jar to class loader 15/04/09 12:22:35 INFO HadoopRDD: Input split: file:/home/aditya/stocks1.csv:0+6708709 15/04/09 12:22:35 INFO HadoopRDD: Input split: file:/home/aditya/stocks1.csv:6708709+6708709 15/04/09 12:22:36 ERROR Executor: Exception in task 1.0 in stage 0.0 (TID 1) java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.sql.catalyst.expressions.GenericRow.apply(Row.scala:142) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:37) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:68) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:52) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1060) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1047) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) 15/04/09 12:22:36 WARN TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1, localhost): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.sql.catalyst.expressions.GenericRow.apply(Row.scala:142) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:37) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:68) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:52) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1060) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1047) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) 15/04/09 12:22:36 ERROR TaskSetManager: Task 1 in stage 0.0 failed 1 times; aborting job 15/04/09 12:22:36 INFO TaskSchedulerImpl: Cancelling stage 0 15/04/09 12:22:36 INFO Executor: Executor is trying to kill task 0.0 in stage 0.0 (TID 0) 15/04/09 12:22:36 INFO TaskSchedulerImpl: Stage 0 was cancelled 15/04/09 12:22:36 INFO Executor: Executor killed task 0.0 in stage 0.0 (TID 0) 15/04/09 12:22:36 INFO DAGScheduler: Job 0 failed: saveAsTextFile at JavaSchemaRDD.scala:42, took 0.616858 s Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 0.0 failed 1 times, most recent failure: Lost task 1.0 in stage 0.0 (TID 1, localhost): java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.sql.catalyst.expressions.GenericRow.apply(Row.scala:142) at org.apache.spark.sql.catalyst.expressions.BoundReference.eval(BoundAttribute.scala:37) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:68) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:52) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1060) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:1047) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) 15/04/09 12:22:36 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, localhost): TaskKilled (killed intentionally) 15/04/09 12:22:36 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool

    Apache's JIRA Issue Tracker | 2 years ago | Aditya Parmar
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 12 in stage 0.0 failed 4 times, most recent failure: Lost task 12.3 in stage 0.0 (TID 15, ga16iiphdpdn04.ricohonline.org): java.lang.ArrayIndexOutOfBoundsException Driver stacktrace:
  3. 0

    Error when trying to run algorithm in Spark

    Stack Overflow | 2 years ago | Hafidz Zulkifli
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 0.0 failed 4 times, most recent failure: Lost task 1.3 in stage 0.0 (TID 5, 192.168.126.129): ExecutorLostFailure (executor 1 lost) Driver stacktrace:
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    Wordcount example on yarn in Hortonworks failing

    Stack Overflow | 2 years ago | M Dev
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, sandbox.hortonworks.com): ExecutorLostFailure (executor 2 lost) Driver stacktrace:
  6. 0

    Streaming from HBase using Spark not serializable

    Stack Overflow | 2 years ago | petfreshman
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0 in stage 14.0 (TID 14) had a not serializable result: org.apache.hadoop.hbase.io.ImmutableBytesWritable

  1. tyson925 1 times, last 3 weeks ago
  2. Nikolay Rybak 1 times, last 3 weeks ago
  3. johnxfly 1 times, last 1 month ago
  4. meneal 1 times, last 7 months ago
20 unregistered visitors
Not finding the right solution?
Take a tour to get the most out of Samebug.

Tired of useless tips?

Automated exception search integrated into your IDE

Root Cause Analysis

  1. org.apache.spark.SparkException

    Job aborted due to stage failure: Task 12 in stage 0.0 failed 4 times, most recent failure: Lost task 12.3 in stage 0.0 (TID 15, ga16iiphdpdn04.ricohonline.org): java.lang.ArrayIndexOutOfBoundsException Driver stacktrace:

    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages()
  2. Spark
    DAGScheduler$$anonfun$abortStage$1.apply
    1. org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214)
    2. org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203)
    3. org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202)
    3 frames
  3. Scala
    ArrayBuffer.foreach
    1. scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    2. scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
    2 frames
  4. Spark
    DAGScheduler$$anonfun$handleTaskSetFailed$1.apply
    1. org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202)
    2. org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696)
    3. org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696)
    3 frames
  5. Scala
    Option.foreach
    1. scala.Option.foreach(Option.scala:236)
    1 frame
  6. Spark
    DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse
    1. org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696)
    2. org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420)
    2 frames
  7. Akka Actor
    Actor$class.aroundReceive
    1. akka.actor.Actor$class.aroundReceive(Actor.scala:465)
    1 frame
  8. Spark
    DAGSchedulerEventProcessActor.aroundReceive
    1. org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375)
    1 frame
  9. Akka Actor
    ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec
    1. akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
    2. akka.actor.ActorCell.invoke(ActorCell.scala:487)
    3. akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238)
    4. akka.dispatch.Mailbox.run(Mailbox.scala:220)
    5. akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393)
    5 frames
  10. Scala
    ForkJoinWorkerThread.run
    1. scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
    2. scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
    3. scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
    4. scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
    4 frames