java.lang.reflect.InvocationTargetException

JIRA | Amos | 2 years ago
  1. 0

    I get various "return status == 0 is false" and "unimplemented type" errors trying to get data out of any rdd with top() or collect(). The errors are not consistent. I think spark is installed properly because some operations do work. I apologize if I'm missing something easy or not providing the right diagnostic info -- I'm new to SparkR, and this seems to be the only resource for SparkR issues. Some logs: {code:java} Browse[1]> top(estep.rdd, 1L) Error in order(unlist(part, recursive = FALSE), decreasing = !ascending) : unimplemented type 'list' in 'orderVector1' Calls: do.call ... Reduce -> <Anonymous> -> func -> FUN -> FUN -> order Execution halted 15/02/13 19:11:57 ERROR Executor: Exception in task 0.0 in stage 14.0 (TID 14) org.apache.spark.SparkException: R computation failed with Error in order(unlist(part, recursive = FALSE), decreasing = !ascending) : unimplemented type 'list' in 'orderVector1' Calls: do.call ... Reduce -> <Anonymous> -> func -> FUN -> FUN -> order Execution halted at edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:69) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) at org.apache.spark.scheduler.Task.run(Task.scala:54) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 15/02/13 19:11:57 WARN TaskSetManager: Lost task 0.0 in stage 14.0 (TID 14, localhost): org.apache.spark.SparkException: R computation failed with Error in order(unlist(part, recursive = FALSE), decreasing = !ascending) : unimplemented type 'list' in 'orderVector1' Calls: do.call ... Reduce -> <Anonymous> -> func -> FUN -> FUN -> order Execution halted edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:69) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) 15/02/13 19:11:57 ERROR TaskSetManager: Task 0 in stage 14.0 failed 1 times; aborting job collect on 79 failed with java.lang.reflect.InvocationTargetException java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.handleMethodCall(SparkRBackendHandler.scala:107) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.channelRead0(SparkRBackendHandler.scala:60) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.channelRead0(SparkRBackendHandler.scala:22) at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:163) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:787) at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:130) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:511) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354) at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:116) at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137) at java.lang.Thread.run(Thread.java:745) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 14.0 failed 1 times, most recent failure: Lost task 0.0 in stage 14.0 (TID 14, localhost): org.apache.spark.SparkException: R computation failed with Error in order(unlist(part, recursive = FALSE), decreasing = !ascending) : unimplemented type 'list' in 'orderVector1' Calls: do.call ... Reduce -> <Anonymous> -> func -> FUN -> FUN -> order Execution halted edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:69) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Error: returnStatus == 0 is not TRUE {code} {code:java} Welcome to SparkR! Spark context is available as sc > rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) > rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) > tom <- leftOuterJoin(rdd1, rdd2, 2L) > top(tom, 1L) Error: returnStatus == 0 is not TRUE > q("no") {code}

    JIRA | 2 years ago | Amos
    java.lang.reflect.InvocationTargetException
  2. 0

    I get various "return status == 0 is false" and "unimplemented type" errors trying to get data out of any rdd with top() or collect(). The errors are not consistent. I think spark is installed properly because some operations do work. I apologize if I'm missing something easy or not providing the right diagnostic info -- I'm new to SparkR, and this seems to be the only resource for SparkR issues. Some logs: {code:java} Browse[1]> top(estep.rdd, 1L) Error in order(unlist(part, recursive = FALSE), decreasing = !ascending) : unimplemented type 'list' in 'orderVector1' Calls: do.call ... Reduce -> <Anonymous> -> func -> FUN -> FUN -> order Execution halted 15/02/13 19:11:57 ERROR Executor: Exception in task 0.0 in stage 14.0 (TID 14) org.apache.spark.SparkException: R computation failed with Error in order(unlist(part, recursive = FALSE), decreasing = !ascending) : unimplemented type 'list' in 'orderVector1' Calls: do.call ... Reduce -> <Anonymous> -> func -> FUN -> FUN -> order Execution halted at edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:69) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) at org.apache.spark.scheduler.Task.run(Task.scala:54) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 15/02/13 19:11:57 WARN TaskSetManager: Lost task 0.0 in stage 14.0 (TID 14, localhost): org.apache.spark.SparkException: R computation failed with Error in order(unlist(part, recursive = FALSE), decreasing = !ascending) : unimplemented type 'list' in 'orderVector1' Calls: do.call ... Reduce -> <Anonymous> -> func -> FUN -> FUN -> order Execution halted edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:69) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) 15/02/13 19:11:57 ERROR TaskSetManager: Task 0 in stage 14.0 failed 1 times; aborting job collect on 79 failed with java.lang.reflect.InvocationTargetException java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.handleMethodCall(SparkRBackendHandler.scala:107) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.channelRead0(SparkRBackendHandler.scala:60) at edu.berkeley.cs.amplab.sparkr.SparkRBackendHandler.channelRead0(SparkRBackendHandler.scala:22) at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:163) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:333) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:319) at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:787) at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:130) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:511) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354) at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:116) at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137) at java.lang.Thread.run(Thread.java:745) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 14.0 failed 1 times, most recent failure: Lost task 0.0 in stage 14.0 (TID 14, localhost): org.apache.spark.SparkException: R computation failed with Error in order(unlist(part, recursive = FALSE), decreasing = !ascending) : unimplemented type 'list' in 'orderVector1' Calls: do.call ... Reduce -> <Anonymous> -> func -> FUN -> FUN -> order Execution halted edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:69) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Error: returnStatus == 0 is not TRUE {code} {code:java} Welcome to SparkR! Spark context is available as sc > rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) > rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) > tom <- leftOuterJoin(rdd1, rdd2, 2L) > top(tom, 1L) Error: returnStatus == 0 is not TRUE > q("no") {code}

    JIRA | 2 years ago | Amos
    java.lang.reflect.InvocationTargetException
  3. 0

    Running ADAM transform results in memory errors

    GitHub | 2 years ago | ansalaza
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 25, localhost): java.lang.OutOfMemoryError: Java heap space com.esotericsoftware.kryo.util.IdentityObjectIntMap.resize(IdentityObjectIntMap.java:410) com.esotericsoftware.kryo.util.IdentityObjectIntMap.putStash(IdentityObjectIntMap.java:227) com.esotericsoftware.kryo.util.IdentityObjectIntMap.push(IdentityObjectIntMap.java:221) com.esotericsoftware.kryo.util.IdentityObjectIntMap.put(IdentityObjectIntMap.java:117) com.esotericsoftware.kryo.util.IdentityObjectIntMap.putStash(IdentityObjectIntMap.java:228) com.esotericsoftware.kryo.util.IdentityObjectIntMap.push(IdentityObjectIntMap.java:221) com.esotericsoftware.kryo.util.IdentityObjectIntMap.put(IdentityObjectIntMap.java:117) com.esotericsoftware.kryo.util.MapReferenceResolver.addWrittenObject(MapReferenceResolver.java:23) com.esotericsoftware.kryo.Kryo.writeReferenceOrNull(Kryo.java:598) com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:566) com.esotericsoftware.kryo.serializers.DefaultArraySerializers$ObjectArraySerializer.write(DefaultArraySerializers.java:318) com.esotericsoftware.kryo.serializers.DefaultArraySerializers$ObjectArraySerializer.write(DefaultArraySerializers.java:293) com.esotericsoftware.kryo.Kryo.writeClassAndObject(Kryo.java:568) org.apache.spark.serializer.KryoSerializerInstance.serialize(KryoSerializer.scala:156) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) Driver stacktrace:
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    Missing Thunder imports on EC2 slaves

    GitHub | 2 years ago | broxtronix
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 400 in stage 9.0 failed 4 times, most recent failure: Lost task 400.3 in stage 9.0 (TID 1743, ip-10-65-128-31.ec2.internal): org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/root/spark/python/pyspark/worker.py", line 75, in main command = pickleSer._read_with_length(infile) File "/root/spark/python/pyspark/serializers.py", line 150, in _read_with_length return self.loads(obj) ImportError: No module named thunder.rdds.imageblocks org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:124) org.apache.spark.api.python.PythonRDD$$anon$1.<init>(PythonRDD.scala:154) org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:87) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.api.python.PairwiseRDD.compute(PythonRDD.scala:265) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) Driver stacktrace:
  6. 0

    error when run subset: Py4JJavaError: An error occurred while calling o102.collect.

    GitHub | 2 years ago | getBioinfo
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 10.0 failed 1 times, most recent failure: Lost task 0.0 in stage 10.0 (TID 10, localhost): org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/worker.py", line 79, in main serializer.dump_stream(func(split_index, iterator), outfile) File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/serializers.py", line 196, in dump_stream self.serializer.dump_stream(self._batched(iterator), stream) File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/serializers.py", line 127, in dump_stream for obj in iterator: File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/serializers.py", line 185, in _batched for item in iterator: File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/rddsampler.py", line 115, in func if self.getUniformSample(split) <= self._fraction: File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/rddsampler.py", line 57, in getUniformSample self.initRandomGenerator(split) File "/Users/xuh2/Downloads/software/spark-1.1.0-bin-hadoop1/python/pyspark/rddsampler.py", line 43, in initRandomGenerator self._random = numpy.random.RandomState(self._seed) File "mtrand.pyx", line 610, in mtrand.RandomState.__init__ (numpy/random/mtrand/mtrand.c:7397) File "mtrand.pyx", line 646, in mtrand.RandomState.seed (numpy/random/mtrand/mtrand.c:7697) ValueError: Seed must be between 0 and 4294967295 org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:124) org.apache.spark.api.python.PythonRDD$$anon$1.<init>(PythonRDD.scala:154) org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:87) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918) java.lang.Thread.run(Thread.java:695) Driver stacktrace:

  1. Nikolay Rybak 1 times, last 1 month ago
  2. tyson925 2 times, last 2 months ago
  3. tyson925 1 times, last 4 months ago
  4. meneal 1 times, last 4 months ago
20 unregistered visitors
Not finding the right solution?
Take a tour to get the most out of Samebug.

Tired of useless tips?

Automated exception search integrated into your IDE

Root Cause Analysis

  1. org.apache.spark.SparkException

    Job aborted due to stage failure: Task 0 in stage 14.0 failed 1 times, most recent failure: Lost task 0.0 in stage 14.0 (TID 14, localhost): org.apache.spark.SparkException: R computation failed with Error in order(unlist(part, recursive = FALSE), decreasing = !ascending) : unimplemented type 'list' in 'orderVector1' Calls: do.call ... Reduce -> <Anonymous> -> func -> FUN -> FUN -> order Execution halted edu.berkeley.cs.amplab.sparkr.BaseRRDD.compute(RRDD.scala:69) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) Driver stacktrace:

    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages()
  2. Spark
    DAGScheduler$$anonfun$abortStage$1.apply
    1. org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1185)
    2. org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1174)
    3. org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1173)
    3 frames
  3. Scala
    ArrayBuffer.foreach
    1. scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    2. scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
    2 frames
  4. Spark
    DAGScheduler$$anonfun$handleTaskSetFailed$1.apply
    1. org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1173)
    2. org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
    3. org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:688)
    3 frames
  5. Scala
    Option.foreach
    1. scala.Option.foreach(Option.scala:236)
    1 frame
  6. Spark
    DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse
    1. org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:688)
    2. org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1391)
    2 frames
  7. Akka Actor
    ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec
    1. akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
    2. akka.actor.ActorCell.invoke(ActorCell.scala:456)
    3. akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
    4. akka.dispatch.Mailbox.run(Mailbox.scala:219)
    5. akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
    5 frames
  8. Scala
    ForkJoinWorkerThread.run
    1. scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
    2. scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
    3. scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
    4. scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
    4 frames