java.lang.NoClassDefFoundError: org/apache/kafka/common/message/KafkaLZ4BlockOutputStream

  1. 0

    How to store spark pair rdd as file into HDFS ?

    Stack Overflow | 2 weeks ago | Chayma Sakouhi
    java.lang.NoClassDefFoundError: org/apache/kafka/common/message/KafkaLZ4BlockOutputStream
  2. 0

    Re: Spark Streaming: java.lang.NoClassDefFoundError: org/apache/kafka/common/message/KafkaLZ4BlockOutputStream

    spark-user | 10 months ago | Ted Yu
    scheduler.TaskSetManager: Lost task 0.0 in stage 1.0 (TID 3, ip-172-31-32-183.us-west-2.compute.internal): java.lang.NoClassDefFoundError: org/apache/kafka/common/message/KafkaLZ4BlockOutputStream
  3. 0

    We have seen non-deterministic {{FAILED_TO_UNCOMPRESS(5)}} errors during shuffle read. Here's a sample stacktrace from an executor: {code} 14/10/23 18:34:11 ERROR Executor: Exception in task 1747.3 in stage 11.0 (TID 33053) java.io.IOException: FAILED_TO_UNCOMPRESS(5) at org.xerial.snappy.SnappyNative.throw_error(SnappyNative.java:78) at org.xerial.snappy.SnappyNative.rawUncompress(Native Method) at org.xerial.snappy.Snappy.rawUncompress(Snappy.java:391) at org.xerial.snappy.Snappy.uncompress(Snappy.java:427) at org.xerial.snappy.SnappyInputStream.readFully(SnappyInputStream.java:127) at org.xerial.snappy.SnappyInputStream.readHeader(SnappyInputStream.java:88) at org.xerial.snappy.SnappyInputStream.<init>(SnappyInputStream.java:58) at org.apache.spark.io.SnappyCompressionCodec.compressedInputStream(CompressionCodec.scala:128) at org.apache.spark.storage.BlockManager.wrapForCompression(BlockManager.scala:1090) at org.apache.spark.storage.ShuffleBlockFetcherIterator$$anon$1$$anonfun$onBlockFetchSuccess$1.apply(ShuffleBlockFetcherIterator.scala:116) at org.apache.spark.storage.ShuffleBlockFetcherIterator$$anon$1$$anonfun$onBlockFetchSuccess$1.apply(ShuffleBlockFetcherIterator.scala:115) at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:243) at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:52) at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.util.collection.ExternalAppendOnlyMap.insertAll(ExternalAppendOnlyMap.scala:129) at org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$5.apply(CoGroupedRDD.scala:159) at org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$5.apply(CoGroupedRDD.scala:158) at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771) at org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:158) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) at org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) at org.apache.spark.rdd.FlatMappedValuesRDD.compute(FlatMappedValuesRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:181) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) {code} Here's another occurrence of a similar error: {code} java.io.IOException: failed to read chunk org.xerial.snappy.SnappyInputStream.hasNextChunk(SnappyInputStream.java:348) org.xerial.snappy.SnappyInputStream.rawRead(SnappyInputStream.java:159) org.xerial.snappy.SnappyInputStream.read(SnappyInputStream.java:142) java.io.ObjectInputStream$PeekInputStream.read(ObjectInputStream.java:2310) java.io.ObjectInputStream$BlockDataInputStream.read(ObjectInputStream.java:2712) java.io.ObjectInputStream$BlockDataInputStream.readFully(ObjectInputStream.java:2742) java.io.ObjectInputStream.readArray(ObjectInputStream.java:1687) java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1344) java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990) java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915) java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798) java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350) java.io.ObjectInputStream.readObject(ObjectInputStream.java:370) org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:62) org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:133) org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71) scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30) org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39) org.apache.spark.util.collection.ExternalAppendOnlyMap.insertAll(ExternalAppendOnlyMap.scala:129) org.apache.spark.Aggregator.combineValuesByKey(Aggregator.scala:58) org.apache.spark.shuffle.hash.HashShuffleReader.read(HashShuffleReader.scala:46) org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:92) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) org.apache.spark.scheduler.Task.run(Task.scala:56) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:182) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:745) {code} The first stacktrace was reported by a Spark user. The second stacktrace occurred when running {code} import java.util.Random val numKeyValPairs=1000 val numberOfMappers=200 val keySize=10000 for (i <- 0 to 19) { val pairs1 = sc.parallelize(0 to numberOfMappers, numberOfMappers).flatMap(p=>{ val randGen = new Random val arr1 = new Array[(Int, Array[Byte])](numKeyValPairs) for (i <- 0 until numKeyValPairs){ val byteArr = new Array[Byte](keySize) randGen.nextBytes(byteArr) arr1(i) = (randGen.nextInt(Int.MaxValue),byteArr) } arr1 }) pairs1.groupByKey(numberOfMappers).count } {code} This job frequently runs without any problems, but when it fails it seem that every post-shuffle task fails with either PARSING_ERROR(2), FAILED_TO_UNCOMPRESS(5), or some other decompression error. I've seen reports of similar problems when using LZF compression, so I think that this is caused by some sort of general stream corruption issue. This issue has been observed even when no spilling occurs, so I don't believe that this is due to a bug in spilling code. I was unable to reproduce this when running this code in a fresh Spark EC2 cluster and we've been having a hard time finding a deterministic reproduction.

    Apache's JIRA Issue Tracker | 2 years ago | Josh Rosen
    org.apache.spark.SparkException: Job aborted due to stage failure: Task 19 in stage 0.0 failed 4 times, most recent failure: Lost task 19.3 in stage 0.0 (TID 66, xvac-d25.xv.dc.openx.org): java.io.IOException: failed to uncompress the chunk: FAILED_TO_UNCOMPRESS(5)
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    UnknownCodecException kafka

    Stack Overflow | 11 months ago | Thabby07
    kafka.common.UnknownCodecException: 3 is an unknown compression codec
Not finding the right solution?
Take a tour to get the most out of Samebug.

Tired of useless tips?

Automated exception search integrated into your IDE

Root Cause Analysis

  1. java.lang.ClassNotFoundException

    org.apache.kafka.common.message.KafkaLZ4BlockOutputStream

    at java.net.URLClassLoader.findClass()
  2. Java RT
    ClassLoader.loadClass
    1. java.net.URLClassLoader.findClass(URLClassLoader.java:381)
    2. java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    3. sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
    4. java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    4 frames
  3. Apache Kafka
    IteratorTemplate.hasNext
    1. kafka.message.ByteBufferMessageSet$.decompress(ByteBufferMessageSet.scala:65)
    2. kafka.message.ByteBufferMessageSet$$anon$1.makeNextOuter(ByteBufferMessageSet.scala:179)
    3. kafka.message.ByteBufferMessageSet$$anon$1.makeNext(ByteBufferMessageSet.scala:192)
    4. kafka.message.ByteBufferMessageSet$$anon$1.makeNext(ByteBufferMessageSet.scala:146)
    5. kafka.utils.IteratorTemplate.maybeComputeNext(IteratorTemplate.scala:66)
    6. kafka.utils.IteratorTemplate.hasNext(IteratorTemplate.scala:58)
    6 frames
  4. Scala
    Iterator$$anon$18.hasNext
    1. scala.collection.Iterator$$anon$18.hasNext(Iterator.scala:764)
    1 frame
  5. Spark Project External Kafka
    KafkaRDD$KafkaRDDIterator.getNext
    1. org.apache.spark.streaming.kafka.KafkaRDD$KafkaRDDIterator.getNext(KafkaRDD.scala:211)
    1 frame
  6. Spark
    NextIterator.hasNext
    1. org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
    1 frame
  7. Scala
    Iterator$$anon$11.hasNext
    1. scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
    1 frame
  8. Spark
    Executor$TaskRunner.run
    1. org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply$mcV$sp(PairRDDFunctions.scala:1203)
    2. org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply(PairRDDFunctions.scala:1203)
    3. org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13$$anonfun$apply$7.apply(PairRDDFunctions.scala:1203)
    4. org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1325)
    5. org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1211)
    6. org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1190)
    7. org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
    8. org.apache.spark.scheduler.Task.run(Task.scala:85)
    9. org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
    9 frames
  9. Java RT
    Thread.run
    1. java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    2. java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    3. java.lang.Thread.run(Thread.java:745)
    3 frames