java.lang.reflect.InvocationTargetException

JIRA | Sun Rui | 2 years ago
  1. 0

    This issue can be exposed by the following code: {code} rdd <- textFile(sc, "hdfs://localhost:9000/TODO.md") rdd1<- map(rdd, function(x) {x}) collect(unionRDD(rdd,rdd1)) {code} An exception will be thrown: {panel:title=My title} 15/03/12 18:17:14 WARN LoadSnappy: Snappy native library is available 15/03/12 18:17:14 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/03/12 18:17:14 WARN LoadSnappy: Snappy native library not loaded 15/03/12 18:17:14 INFO FileInputFormat: Total input paths to process : 1 15/03/12 18:17:16 WARN TaskSetManager: Lost task 3.0 in stage 0.0 (TID 3, ray-desktop.local): java.lang.ArrayStoreException: [B scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:88) scala.Array$.slowcopy(Array.scala:81) scala.Array$.copy(Array.scala:107) scala.collection.mutable.ResizableArray$class.copyToArray(ResizableArray.scala:77) scala.collection.mutable.ArrayBuffer.copyToArray(ArrayBuffer.scala:47) scala.collection.TraversableOnce$class.copyToArray(TraversableOnce.scala:241) scala.collection.AbstractTraversable.copyToArray(Traversable.scala:105) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:249) scala.collection.AbstractTraversable.toArray(Traversable.scala:105) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) edu.berkeley.cs.amplab.sparkr.BaseRRDD$$anon$1.toArray(RRDD.scala:62) org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1146) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:701) 15/03/12 18:17:16 ERROR TaskSetManager: Task 3 in stage 0.0 failed 4 times; aborting job collect on 6 failed with java.lang.reflect.InvocationTargetException java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.handleMethodCall(SparkRBackendHandler.scala:107) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.channelRead0(SparkRBackendHandler.scala:60) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.channelRead0(SparkRBackendHandler.scala:22) at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:163) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:787) at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:130) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:511) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354) at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:116) at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137) at java.lang.Thread.run(Thread.java:662) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 3 in stage 0.0 failed 4 times, most recent failure: Lost task 3.3 in stage 0.0 (TID 8, ray-desktop.local): java.lang.ArrayStoreException: [B scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:88) scala.Array$.slowcopy(Array.scala:81) scala.Array$.copy(Array.scala:107) scala.collection.mutable.ResizableArray$class.copyToArray(ResizableArray.scala:77) scala.collection.mutable.ArrayBuffer.copyToArray(ArrayBuffer.scala:47) scala.collection.TraversableOnce$class.copyToArray(TraversableOnce.scala:241) scala.collection.AbstractTraversable.copyToArray(Traversable.scala:105) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:249) scala.collection.AbstractTraversable.toArray(Traversable.scala:105) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) edu.berkeley.cs.amplab.sparkr.BaseRRDD$$anon$1.toArray(RRDD.scala:62) org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1146) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:701) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Error: returnStatus == 0 is not TRUE {panel} Also, if you run the following code: {code} rdd <- textFile(sc, "hdfs://localhost:9000/TODO.md") rdd1<- map(rdd, function(x) {x}) saveAsObjectFile(rdd1, "hdfs://localhost:9000/test.txt") {code} An additional un-nessary reserialize() is called. The root cause is that the serialized flag in a PipelinedRDD is not set correctly. The actual flag of whether the JRDD contains serialized R objects or not is determined upon the first call to getJRDD(). By default, the JRDD of a PipelinedRDD contains serialized R objects as the dataSerialization parameter for getJRDD() by default is TRUE. So set serialized to TRUE to be consistent with this behavior.

    JIRA | 2 years ago | Sun Rui
    java.lang.reflect.InvocationTargetException
  2. 0

    This issue can be exposed by the following code: {code} rdd <- textFile(sc, "hdfs://localhost:9000/TODO.md") rdd1<- map(rdd, function(x) {x}) collect(unionRDD(rdd,rdd1)) {code} An exception will be thrown: {panel:title=My title} 15/03/12 18:17:14 WARN LoadSnappy: Snappy native library is available 15/03/12 18:17:14 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/03/12 18:17:14 WARN LoadSnappy: Snappy native library not loaded 15/03/12 18:17:14 INFO FileInputFormat: Total input paths to process : 1 15/03/12 18:17:16 WARN TaskSetManager: Lost task 3.0 in stage 0.0 (TID 3, ray-desktop.local): java.lang.ArrayStoreException: [B scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:88) scala.Array$.slowcopy(Array.scala:81) scala.Array$.copy(Array.scala:107) scala.collection.mutable.ResizableArray$class.copyToArray(ResizableArray.scala:77) scala.collection.mutable.ArrayBuffer.copyToArray(ArrayBuffer.scala:47) scala.collection.TraversableOnce$class.copyToArray(TraversableOnce.scala:241) scala.collection.AbstractTraversable.copyToArray(Traversable.scala:105) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:249) scala.collection.AbstractTraversable.toArray(Traversable.scala:105) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) edu.berkeley.cs.amplab.sparkr.BaseRRDD$$anon$1.toArray(RRDD.scala:62) org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1146) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:701) 15/03/12 18:17:16 ERROR TaskSetManager: Task 3 in stage 0.0 failed 4 times; aborting job collect on 6 failed with java.lang.reflect.InvocationTargetException java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.handleMethodCall(SparkRBackendHandler.scala:107) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.channelRead0(SparkRBackendHandler.scala:60) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.channelRead0(SparkRBackendHandler.scala:22) at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:163) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:787) at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:130) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:511) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354) at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:116) at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137) at java.lang.Thread.run(Thread.java:662) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 3 in stage 0.0 failed 4 times, most recent failure: Lost task 3.3 in stage 0.0 (TID 8, ray-desktop.local): java.lang.ArrayStoreException: [B scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:88) scala.Array$.slowcopy(Array.scala:81) scala.Array$.copy(Array.scala:107) scala.collection.mutable.ResizableArray$class.copyToArray(ResizableArray.scala:77) scala.collection.mutable.ArrayBuffer.copyToArray(ArrayBuffer.scala:47) scala.collection.TraversableOnce$class.copyToArray(TraversableOnce.scala:241) scala.collection.AbstractTraversable.copyToArray(Traversable.scala:105) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:249) scala.collection.AbstractTraversable.toArray(Traversable.scala:105) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) edu.berkeley.cs.amplab.sparkr.BaseRRDD$$anon$1.toArray(RRDD.scala:62) org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1146) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:701) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Error: returnStatus == 0 is not TRUE {panel} Also, if you run the following code: {code} rdd <- textFile(sc, "hdfs://localhost:9000/TODO.md") rdd1<- map(rdd, function(x) {x}) saveAsObjectFile(rdd1, "hdfs://localhost:9000/test.txt") {code} An additional un-nessary reserialize() is called. The root cause is that the serialized flag in a PipelinedRDD is not set correctly. The actual flag of whether the JRDD contains serialized R objects or not is determined upon the first call to getJRDD(). By default, the JRDD of a PipelinedRDD contains serialized R objects as the dataSerialization parameter for getJRDD() by default is TRUE. So set serialized to TRUE to be consistent with this behavior.

    JIRA | 2 years ago | Sun Rui
    java.lang.reflect.InvocationTargetException
  3. 0

    Running ADAM transform results in memory errors

    GitHub | 2 years ago | ansalaza
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 25, localhost): java.lang.OutOfMemoryError: Java heap space com.esotericsoftware.kryo.util.IdentityObjectIntMap.resize(IdentityObjectIntMap.java:410) com.esotericsoftware.kryo.util.IdentityObjectIntMap.putStash(IdentityObjectIntMap.java:227) com.esotericsoftware.kryo.util.IdentityObjectIntMap.push(IdentityObjectIntMap.java:221) com.esotericsoftware.kryo.util.IdentityObjectIntMap.put(IdentityObjectIntMap.java:117) com.esotericsoftware.kryo.util.IdentityObjectIntMap.putStash(IdentityObjectIntMap.java:228) com.esotericsoftware.kryo.util.IdentityObjectIntMap.push(IdentityObjectIntMap.java:221) com.esotericsoftware.kryo.util.IdentityObjectIntMap.put(IdentityObjectIntMap.java:117) com.esotericsoftware.kryo.util.MapReferenceResolver.addWrittenObject(MapReferenceResolver.java:23) com.esotericsoftware.kryo.Kryo.writeReferenceOrNull(Kryo.java:598) com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:566) com.esotericsoftware.kryo.serializers.DefaultArraySerializers$ObjectArraySerializer.write(DefaultArraySerializers.java:318) com.esotericsoftware.kryo.serializers.DefaultArraySerializers$ObjectArraySerializer.write(DefaultArraySerializers.java:293) com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:568) org.apache.spark.serializer.KryoSerializerInstance.serialize(KryoSerializer.scala:156) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) Driver stacktrace:
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    Missing Thunder imports on EC2 slaves

    GitHub | 2 years ago | broxtronix
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 400 in stage 9.0 failed 4 times, most recent failure: Lost task 400.3 in stage 9.0 (TID 1743, ip-10-65-128-31.ec2.internal): org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/root/spark/python/pyspark/worker.py", line 75, in main command = pickleSer._read_with_length(infile) File "/root/spark/python/pyspark/serializers.py", line 150, in _read_with_length return self.loads(obj) ImportError: No module named thunder.rdds.imageblocks org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:124) org.apache.spark.api.python.PythonRDD$$anon$1.<init>(PythonRDD.scala:154) org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:87) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.api.python.PairwiseRDD.compute(PythonRDD.scala:265) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) Driver stacktrace:
  6. 0

    error when run subset: Py4JJavaError: An error occurred while calling o102.collect.

    GitHub | 2 years ago | getBioinfo
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 10.0 failed 1 times, most recent failure: Lost task 0.0 in stage 10.0 (TID 10, localhost): org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/worker.py", line 79, in main serializer.dump_stream(func(split_index, iterator), outfile) File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/serializers.py", line 196, in dump_stream self.serializer.dump_stream(self._batched(iterator), stream) File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/serializers.py", line 127, in dump_stream for obj in iterator: File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/serializers.py", line 185, in _batched for item in iterator: File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/rddsampler.py", line 115, in func if self.getUniformSample(split) <= self._fraction: File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/rddsampler.py", line 57, in getUniformSample self.initRandomGenerator(split) File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/rddsampler.py", line 43, in initRandomGenerator self._random = numpy.random.RandomState(self._seed) File "mtrand.pyx", line 610, in mtrand.RandomState.__init__ (numpy/random/mtrand/mtrand.c:7397) File "mtrand.pyx", line 646, in mtrand.RandomState.seed (numpy/random/mtrand/mtrand.c:7697) ValueError: Seed must be between 0 and 4294967295 org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:124) org.apache.spark.api.python.PythonRDD$$anon$1.<init>(PythonRDD.scala:154) org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:87) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918) java.lang.Thread.run(Thread.java:695) Driver stacktrace:

  1. Nikolay Rybak 1 times, last 1 month ago
  2. tyson925 2 times, last 2 months ago
  3. tyson925 1 times, last 4 months ago
  4. meneal 1 times, last 4 months ago
20 unregistered visitors
Not finding the right solution?
Take a tour to get the most out of Samebug.

Tired of useless tips?

Automated exception search integrated into your IDE

Root Cause Analysis

  1. org.apache.spark.SparkException

    Job aborted due to stage failure: Task 3 in stage 0.0 failed 4 times, most recent failure: Lost task 3.3 in stage 0.0 (TID 8, ray-desktop.local): java.lang.ArrayStoreException: [B scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:88) scala.Array$.slowcopy(Array.scala:81) scala.Array$.copy(Array.scala:107) scala.collection.mutable.ResizableArray$class.copyToArray(ResizableArray.scala:77) scala.collection.mutable.ArrayBuffer.copyToArray(ArrayBuffer.scala:47) scala.collection.TraversableOnce$class.copyToArray(TraversableOnce.scala:241) scala.collection.AbstractTraversable.copyToArray(Traversable.scala:105) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:249) scala.collection.AbstractTraversable.toArray(Traversable.scala:105) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) edu.berkeley.cs.amplab.sparkr.BaseRRDD$$anon$1.toArray(RRDD.scala:62) org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) org.apache.spark.rdd.RDD$$anonfun$16.apply(RDD.scala:774) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1121) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1146) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:701) Driver stacktrace:

    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages()
  2. Spark
    DAGScheduler$$anonfun$abortStage$1.apply
    1. org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185)
    2. org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174)
    3. org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173)
    3 frames
  3. Scala
    ArrayBuffer.foreach
    1. scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    2. scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
    2 frames
  4. Spark
    DAGScheduler$$anonfun$handleTaskSetFailed$1.apply
    1. org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173)
    2. org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
    3. org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
    3 frames
  5. Scala
    Option.foreach
    1. scala.Option.foreach(Option.scala:236)
    1 frame
  6. Spark
    DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse
    1. org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688)
    2. org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391)
    2 frames
  7. Akka Actor
    ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec
    1. akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
    2. akka.actor.ActorCell.invoke(ActorCell.scala:456)
    3. akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
    4. akka.dispatch.Mailbox.run(Mailbox.scala:219)
    5. akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
    5 frames
  8. Scala
    ForkJoinWorkerThread.run
    1. scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
    2. scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
    3. scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
    4. scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
    4 frames