org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 37.0 failed 1 times, most recent failure: Lost task 0.0 in stage 37.0 (TID 223, localhost): java.lang.IllegalArgumentException: ReplicaPartitioner can only determine the partition of a tuple whose key is a non-empty Set[InetAddress]. Invalid key: Set()

DataStax JIRA | Jacek Lewandowski | 2 years ago
  1. 0

    In 1.2.0 the failing tests are in {{RDDSpec}}: {noformat} java.lang.IllegalArgumentException: ReplicaPartitioner can only determine the partition of a tuple whose key is a non-empty Set[InetAddress]. Invalid key: Set() at com.datastax.spark.connector.rdd.partitioner.ReplicaPartitioner.getPartition(ReplicaPartitioner.scala:44) at org.apache.spark.util.collection.ExternalSorter.org$apache$spark$util$collection$ExternalSorter$$getPartition(ExternalSorter.scala:113) at org.apache.spark.util.collection.ExternalSorter$$anonfun$insertAll$1.apply(ExternalSorter.scala:212) at org.apache.spark.util.collection.ExternalSorter$$anonfun$insertAll$1.apply(ExternalSorter.scala:211) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.util.collection.ExternalSorter.spillToPartitionFiles(ExternalSorter.scala:366) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:211) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) WARN 16:12:59,591 org.apache.spark.Logging$class (Logging.scala:71) - Lost task 0.0 in stage 37.0 (TID 223, localhost): java.lang.IllegalArgumentException: ReplicaPartitioner can only determine the partition of a tuple whose key is a non-empty Set[InetAddress]. Invalid key: Set() at com.datastax.spark.connector.rdd.partitioner.ReplicaPartitioner.getPartition(ReplicaPartitioner.scala:44) at org.apache.spark.util.collection.ExternalSorter.org$apache$spark$util$collection$ExternalSorter$$getPartition(ExternalSorter.scala:113) at org.apache.spark.util.collection.ExternalSorter$$anonfun$insertAll$1.apply(ExternalSorter.scala:212) at org.apache.spark.util.collection.ExternalSorter$$anonfun$insertAll$1.apply(ExternalSorter.scala:211) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.util.collection.ExternalSorter.spillToPartitionFiles(ExternalSorter.scala:366) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:211) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) ERROR 16:12:59,591 org.apache.spark.Logging$class (Logging.scala:75) - Task 0 in stage 37.0 failed 1 times; aborting job [info] - should be repartitionable *** FAILED *** (42 milliseconds) [info] org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 37.0 failed 1 times, most recent failure: Lost task 0.0 in stage 37.0 (TID 223, localhost): java.lang.IllegalArgumentException: ReplicaPartitioner can only determine the partition of a tuple whose key is a non-empty Set[InetAddress]. Invalid key: Set() [info] at com.datastax.spark.connector.rdd.partitioner.ReplicaPartitioner.getPartition(ReplicaPartitioner.scala:44) [info] at org.apache.spark.util.collection.ExternalSorter.org$apache$spark$util$collection$ExternalSorter$$getPartition(ExternalSorter.scala:113) [info] at org.apache.spark.util.collection.ExternalSorter$$anonfun$insertAll$1.apply(ExternalSorter.scala:212) [info] at org.apache.spark.util.collection.ExternalSorter$$anonfun$insertAll$1.apply(ExternalSorter.scala:211) [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) [info] at org.apache.spark.util.collection.ExternalSorter.spillToPartitionFiles(ExternalSorter.scala:366) [info] at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:211) [info] at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63) [info] at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) [info] at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) [info] at org.apache.spark.scheduler.Task.run(Task.scala:56) [info] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200) [info] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [info] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [info] at java.lang.Thread.run(Thread.java:745) [info] [info] Driver stacktrace: [info] at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) [info] at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) [info] at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) [info] at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) [info] at scala.Option.foreach(Option.scala:236) [info] at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) [info] at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) [info] at akka.actor.Actor$class.aroundReceive(Actor.scala:465) [info] at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) [info] at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) [info] at akka.actor.ActorCell.invoke(ActorCell.scala:487) [info] at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) [info] at akka.dispatch.Mailbox.run(Mailbox.scala:220) [info] at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) [info] at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) [info] at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) [info] at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) [info] at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) {noformat}

    DataStax JIRA | 2 years ago | Jacek Lewandowski
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 37.0 failed 1 times, most recent failure: Lost task 0.0 in stage 37.0 (TID 223, localhost): java.lang.IllegalArgumentException: ReplicaPartitioner can only determine the partition of a tuple whose key is a non-empty Set[InetAddress]. Invalid key: Set()
  2. 0

    Apache Spark User List - Flatten list

    nabble.com | 12 months ago
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 37.0 failed 1 times, most recent failure: Lost task 0.0 in stage 37.0 (TID 73, localhost): java.lang.ArrayStoreException: org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
  3. 0

    Spark streaming rawSocketStream

    Stack Overflow | 8 months ago | Lokesh Kumar P
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.IllegalArgumentException
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    Custom Transformer in PySpark Pipeline with Cross Validation

    Stack Overflow | 1 year ago | ValD
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 3.0 failed 1 times, most recent failure: Lost task 0.0 in stage 3.0 (TID 3, localhost): org.apache.spark.SparkException: Can only zip RDDs with same number of elements in each partition
  6. 0

    definig dataframe on existing hbase table

    GitHub | 8 months ago | kamelia78
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.IllegalArgumentException: offset (0) + length (8) exceed the capacity of the array: 3

    Not finding the right solution?
    Take a tour to get the most out of Samebug.

    Tired of useless tips?

    Automated exception search integrated into your IDE

    Root Cause Analysis

    1. org.apache.spark.SparkException

      Job aborted due to stage failure: Task 0 in stage 37.0 failed 1 times, most recent failure: Lost task 0.0 in stage 37.0 (TID 223, localhost): java.lang.IllegalArgumentException: ReplicaPartitioner can only determine the partition of a tuple whose key is a non-empty Set[InetAddress]. Invalid key: Set()

      at com.datastax.spark.connector.rdd.partitioner.ReplicaPartitioner.getPartition()
    2. spark-cassandra-connector
      ReplicaPartitioner.getPartition
      1. com.datastax.spark.connector.rdd.partitioner.ReplicaPartitioner.getPartition(ReplicaPartitioner.scala:44)
      1 frame
    3. Spark
      ExternalSorter$$anonfun$insertAll$1.apply
      1. org.apache.spark.util.collection.ExternalSorter.org$apache$spark$util$collection$ExternalSorter$$getPartition(ExternalSorter.scala:113)
      2. org.apache.spark.util.collection.ExternalSorter$$anonfun$insertAll$1.apply(ExternalSorter.scala:212)
      3. org.apache.spark.util.collection.ExternalSorter$$anonfun$insertAll$1.apply(ExternalSorter.scala:211)
      3 frames
    4. Scala
      Iterator$$anon$11.next
      1. scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
      1 frame
    5. Spark
      Executor$TaskRunner.run
      1. org.apache.spark.util.collection.ExternalSorter.spillToPartitionFiles(ExternalSorter.scala:366)
      2. org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:211)
      3. org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:63)
      4. org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
      5. org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
      6. org.apache.spark.scheduler.Task.run(Task.scala:56)
      7. org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:200)
      7 frames
    6. Java RT
      Thread.run
      1. java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
      2. java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
      3. java.lang.Thread.run(Thread.java:745)
      3 frames