org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/user/sukla/wordcount/input/file01

JIRA | Sukla Nag | 2 years ago
  1. 0

    I tried to run wordcount.R througp Rscript sudo Rscript wordcount.R local /user/sukla/wordcount/input/file01 The following is the complete Error Statement 15/01/19 11:22:50 WARN Utils: Your hostname, mooc-OptiPlex-990 resolves to a loopback address: 127.0.1.1; using 10.105.22.201 instead (on interface eth0) 15/01/19 11:22:50 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address 15/01/19 11:22:50 INFO Slf4jLogger: Slf4jLogger started Warning message: In normalizePath(path) : path[1]="/user/sukla/wordcount/input/file01": No such file or directory 15/01/19 11:22:51 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/01/19 11:22:51 WARN LoadSnappy: Snappy native library not loaded 15/01/19 11:22:51 WARN DAGScheduler: Creating new stage failed due to exception - job: 0 org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/user/sukla/wordcount/input/file01 at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:197) at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:208) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) at edu.berkeley.cs.amplab.sparkr.BaseRRDD.getPartitions(RRDD.scala:26) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) at edu.berkeley.cs.amplab.sparkr.BaseRRDD.getPartitions(RRDD.scala:26) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:79) at org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:80) at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:191) at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:189) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.dependencies(RDD.scala:189) at org.apache.spark.scheduler.DAGScheduler.visit$1(DAGScheduler.scala:298) at org.apache.spark.scheduler.DAGScheduler.getParentStages(DAGScheduler.scala:310) at org.apache.spark.scheduler.DAGScheduler.newStage(DAGScheduler.scala:246) at org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:726) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1360) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Error in .jcall(getJRDD(rdd), "Ljava/util/List;", "collect") : org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/user/sukla/wordcount/input/file01 Calls: collect -> collect -> .local -> .jcall -> .jcheck -> .Call Execution halted

    JIRA | 2 years ago | Sukla Nag
    org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/user/sukla/wordcount/input/file01
  2. 0

    I tried to run wordcount.R througp Rscript sudo Rscript wordcount.R local /user/sukla/wordcount/input/file01 The following is the complete Error Statement 15/01/19 11:22:50 WARN Utils: Your hostname, mooc-OptiPlex-990 resolves to a loopback address: 127.0.1.1; using 10.105.22.201 instead (on interface eth0) 15/01/19 11:22:50 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address 15/01/19 11:22:50 INFO Slf4jLogger: Slf4jLogger started Warning message: In normalizePath(path) : path[1]="/user/sukla/wordcount/input/file01": No such file or directory 15/01/19 11:22:51 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/01/19 11:22:51 WARN LoadSnappy: Snappy native library not loaded 15/01/19 11:22:51 WARN DAGScheduler: Creating new stage failed due to exception - job: 0 org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/user/sukla/wordcount/input/file01 at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:197) at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:208) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) at edu.berkeley.cs.amplab.sparkr.BaseRRDD.getPartitions(RRDD.scala:26) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) at edu.berkeley.cs.amplab.sparkr.BaseRRDD.getPartitions(RRDD.scala:26) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:79) at org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:80) at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:191) at org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:189) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.dependencies(RDD.scala:189) at org.apache.spark.scheduler.DAGScheduler.visit$1(DAGScheduler.scala:298) at org.apache.spark.scheduler.DAGScheduler.getParentStages(DAGScheduler.scala:310) at org.apache.spark.scheduler.DAGScheduler.newStage(DAGScheduler.scala:246) at org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:726) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1360) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Error in .jcall(getJRDD(rdd), "Ljava/util/List;", "collect") : org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/user/sukla/wordcount/input/file01 Calls: collect -> collect -> .local -> .jcall -> .jcheck -> .Call Execution halted

    JIRA | 2 years ago | Sukla Nag
    org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/user/sukla/wordcount/input/file01
  3. 0

    Accessing Google Storage with SparkR on bdutil deployed cluster

    Stack Overflow | 2 years ago
    java.lang.reflect.InvocationTargetException
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    Apache Spark User List - SparkContext.textfile() cannot load file using UNC path on windows

    nabble.com | 1 year ago
    org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file://10.209.128.150/TempShare/SvmPocData/reuters-two-categories.load
  6. 0

    Scala code pattern for loading RDD or catching error and creating the RDD?

    Stack Overflow | 2 years ago | Ziggy Eunicien
    org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: hdfs://localhost/Users/data/hdfs/namenode/myRDD.txt

  1. tyson925 3 times, last 1 month ago
  2. tyson925 1 times, last 1 month ago
7 unregistered visitors
Not finding the right solution?
Take a tour to get the most out of Samebug.

Tired of useless tips?

Automated exception search integrated into your IDE

Root Cause Analysis

  1. org.apache.hadoop.mapred.InvalidInputException

    Input path does not exist: file:/user/sukla/wordcount/input/file01

    at org.apache.hadoop.mapred.FileInputFormat.listStatus()
  2. Hadoop
    FileInputFormat.getSplits
    1. org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:197)
    2. org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:208)
    2 frames
  3. Spark
    RDD$$anonfun$partitions$2.apply
    1. org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179)
    2. org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
    3. org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
    3 frames
  4. Scala
    Option.getOrElse
    1. scala.Option.getOrElse(Option.scala:120)
    1 frame
  5. Spark
    RDD$$anonfun$partitions$2.apply
    1. org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
    2. org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
    3. org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
    4. org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
    4 frames
  6. Scala
    Option.getOrElse
    1. scala.Option.getOrElse(Option.scala:120)
    1 frame
  7. Spark
    RDD.partitions
    1. org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
    1 frame
  8. edu.berkeley.cs
    BaseRRDD.getPartitions
    1. edu.berkeley.cs.amplab.sparkr.BaseRRDD.getPartitions(RRDD.scala:26)
    1 frame
  9. Spark
    RDD$$anonfun$partitions$2.apply
    1. org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
    2. org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
    2 frames
  10. Scala
    Option.getOrElse
    1. scala.Option.getOrElse(Option.scala:120)
    1 frame
  11. Spark
    RDD.partitions
    1. org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
    1 frame
  12. edu.berkeley.cs
    BaseRRDD.getPartitions
    1. edu.berkeley.cs.amplab.sparkr.BaseRRDD.getPartitions(RRDD.scala:26)
    1 frame
  13. Spark
    RDD$$anonfun$partitions$2.apply
    1. org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
    2. org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
    2 frames
  14. Scala
    Option.getOrElse
    1. scala.Option.getOrElse(Option.scala:120)
    1 frame
  15. Spark
    RDD$$anonfun$dependencies$2.apply
    1. org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
    2. org.apache.spark.ShuffleDependency.<init>(Dependency.scala:79)
    3. org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:80)
    4. org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:191)
    5. org.apache.spark.rdd.RDD$$anonfun$dependencies$2.apply(RDD.scala:189)
    5 frames
  16. Scala
    Option.getOrElse
    1. scala.Option.getOrElse(Option.scala:120)
    1 frame
  17. Spark
    DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse
    1. org.apache.spark.rdd.RDD.dependencies(RDD.scala:189)
    2. org.apache.spark.scheduler.DAGScheduler.visit$1(DAGScheduler.scala:298)
    3. org.apache.spark.scheduler.DAGScheduler.getParentStages(DAGScheduler.scala:310)
    4. org.apache.spark.scheduler.DAGScheduler.newStage(DAGScheduler.scala:246)
    5. org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:726)
    6. org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1360)
    6 frames
  18. Akka Actor
    ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec
    1. akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
    2. akka.actor.ActorCell.invoke(ActorCell.scala:456)
    3. akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
    4. akka.dispatch.Mailbox.run(Mailbox.scala:219)
    5. akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
    5 frames
  19. Scala
    ForkJoinWorkerThread.run
    1. scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
    2. scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
    3. scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
    4. scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
    4 frames