java.lang.NullPointerException

There are no available Samebug tips for this exception. Do you have an idea how to solve this issue? A short tip would help users who saw this issue last week.

  • I get an NPE in a very weird place on the cluster at my work: {code} Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 230, spark01.example.com): java.lang.NullPointerException at com.mongodb.hadoop.output.MongoRecordWriter.write(MongoRecordWriter.java:115) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply$mcV$sp(PairRDDFunctions.scala:1036) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1034) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1034) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1285) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1042) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1014) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: {code} This only occurs when I run Spark with master {{yarn-cluster}}. With master {{local}} it runs fine. As noted above, the cluster uses GPFS/ Here's the snippet from my Scala object which does the DB writing {code:scala} private def applyResultRDD(resultRDD: RDD[(BSONObject, BSONObject)]) = { val outputConfig = new Configuration() if (Option(authUri.getUserInfo).isDefined) outputConfig.set("mongo.auth.uri", authUri.toString) outputConfig.set("mongo.output.uri", outputCollUri.toString) resultRDD.saveAsNewAPIHadoopFile("file:///UNUSED", classOf[BSONObject], classOf[BSONObject], classOf[MongoOutputFormat[BSONObject, BSONObject]], outputConfig) } {code}
    via by Andy MacKinlay,
  • I get an NPE in a very weird place on the cluster at my work: {code} Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 230, spark01.example.com): java.lang.NullPointerException at com.mongodb.hadoop.output.MongoRecordWriter.write(MongoRecordWriter.java:115) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply$mcV$sp(PairRDDFunctions.scala:1036) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1034) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1034) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1285) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1042) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1014) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: {code} This only occurs when I run Spark with master {{yarn-cluster}}. With master {{local}} it runs fine. As noted above, the cluster uses GPFS/ Here's the snippet from my Scala object which does the DB writing {code:scala} private def applyResultRDD(resultRDD: RDD[(BSONObject, BSONObject)]) = { val outputConfig = new Configuration() if (Option(authUri.getUserInfo).isDefined) outputConfig.set("mongo.auth.uri", authUri.toString) outputConfig.set("mongo.output.uri", outputCollUri.toString) resultRDD.saveAsNewAPIHadoopFile("file:///UNUSED", classOf[BSONObject], classOf[BSONObject], classOf[MongoOutputFormat[BSONObject, BSONObject]], outputConfig) } {code}
    via by Andy MacKinlay,
    • java.lang.NullPointerException at com.mongodb.hadoop.output.MongoRecordWriter.write(MongoRecordWriter.java:115) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply$mcV$sp(PairRDDFunctions.scala:1036) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1034) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1034) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1285) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1042) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1014) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745)
    No Bugmate found.