java.lang.Exception: org.apache.pig.backend.executionengine.ExecException: ERROR 2108: Could not determine data type of field: [B@3982a033

JIRA | Geoff Minerbo | 3 years ago
  1. 0

    We're trying to use the Mongo Hadoop Connector to copy data from Mongo to Hadoop. The BSONLoader is having problems parsing any field that contains binary data. Any help would be greatly appreciated. h3. REPRO STEPS: * Create a document containing a field containing Binary Data {code:JavaScript}db.Books.save({"title" : "The Three Musketeers", "author" : "Alexandre Dumas" }) db.Books.save({"title" : "Siddhartha", "author" : "Hermann Hesse", "myBinaryData" : BinData(0,"12345678")}) {code} * Dump Collection to generate BSON file {noformat}mongodump --db myBooks{noformat} * Use pig to read the Books.bson file {noformat} REGISTER /my/work/dir/lib/mongo-java-driver-2.11.2.jar REGISTER /my/work/dir/lib/mongo-hadoop-core_cdh4.3.0-1.1.0.jar REGISTER /my/work/dir/lib/mongo-hadoop-pig_cdh4.3.0-1.1.0.jar rawBooks = LOAD '/my/work/dir/data/Books.bson' USING com.mongodb.hadoop.pig.BSONLoader(); STORE rawBooks INTO '/my/work/dir/out/p13n_data'; {noformat} h3. Results {noformat} java.lang.Exception: org.apache.pig.backend.executionengine.ExecException: ERROR 2108: Could not determine data type of field: [B@3982a033 at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:404) Caused by: org.apache.pig.backend.executionengine.ExecException: ERROR 2108: Could not determine data type of field: [B@3982a033 at org.apache.pig.impl.util.StorageUtil.putField(StorageUtil.java:208) at org.apache.pig.impl.util.StorageUtil.putField(StorageUtil.java:166) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat$PigLineRecordWriter.write(PigTextOutputFormat.java:68) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat$PigLineRecordWriter.write(PigTextOutputFormat.java:44) at org.apache.pig.builtin.PigStorage.putNext(PigStorage.java:296) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:139) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:98) at org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:559) at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:85) at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:106) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.collect(PigMapOnly.java:48) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:264) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:64) at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:140) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:673) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:331) at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:266) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441) at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) at java.util.concurrent.FutureTask.run(FutureTask.java:138) at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) at java.lang.Thread.run(Thread.java:662) {noformat}

    JIRA | 3 years ago | Geoff Minerbo
    java.lang.Exception: org.apache.pig.backend.executionengine.ExecException: ERROR 2108: Could not determine data type of field: [B@3982a033
  2. 0

    We're trying to use the Mongo Hadoop Connector to copy data from Mongo to Hadoop. The BSONLoader is having problems parsing any field that contains binary data. Any help would be greatly appreciated. h3. REPRO STEPS: * Create a document containing a field containing Binary Data {code:JavaScript}db.Books.save({"title" : "The Three Musketeers", "author" : "Alexandre Dumas" }) db.Books.save({"title" : "Siddhartha", "author" : "Hermann Hesse", "myBinaryData" : BinData(0,"12345678")}) {code} * Dump Collection to generate BSON file {noformat}mongodump --db myBooks{noformat} * Use pig to read the Books.bson file {noformat} REGISTER /my/work/dir/lib/mongo-java-driver-2.11.2.jar REGISTER /my/work/dir/lib/mongo-hadoop-core_cdh4.3.0-1.1.0.jar REGISTER /my/work/dir/lib/mongo-hadoop-pig_cdh4.3.0-1.1.0.jar rawBooks = LOAD '/my/work/dir/data/Books.bson' USING com.mongodb.hadoop.pig.BSONLoader(); STORE rawBooks INTO '/my/work/dir/out/p13n_data'; {noformat} h3. Results {noformat} java.lang.Exception: org.apache.pig.backend.executionengine.ExecException: ERROR 2108: Could not determine data type of field: [B@3982a033 at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:404) Caused by: org.apache.pig.backend.executionengine.ExecException: ERROR 2108: Could not determine data type of field: [B@3982a033 at org.apache.pig.impl.util.StorageUtil.putField(StorageUtil.java:208) at org.apache.pig.impl.util.StorageUtil.putField(StorageUtil.java:166) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat$PigLineRecordWriter.write(PigTextOutputFormat.java:68) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat$PigLineRecordWriter.write(PigTextOutputFormat.java:44) at org.apache.pig.builtin.PigStorage.putNext(PigStorage.java:296) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:139) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:98) at org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:559) at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:85) at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:106) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.collect(PigMapOnly.java:48) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:264) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:64) at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:140) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:673) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:331) at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:266) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441) at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) at java.util.concurrent.FutureTask.run(FutureTask.java:138) at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) at java.lang.Thread.run(Thread.java:662) {noformat}

    JIRA | 3 years ago | Geoff Minerbo
    java.lang.Exception: org.apache.pig.backend.executionengine.ExecException: ERROR 2108: Could not determine data type of field: [B@3982a033
  3. 0

    Pig MongoLoader exception loading data with UUIDs

    Stack Overflow | 3 years ago | Marquez
    org.apache.pig.backend.executionengine.ExecException: ERROR 2108: \ Could not determine data type of field: 1423ed53-5064-0000-784b-7bf2e2dd837b
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    Loading large parquet files with huge schema using Pig

    Stack Overflow | 5 months ago | Prateek
    org.apache.pig.backend.executionengine.ExecException: ERROR 2118: Could not read footer: java.lang.OutOfMemoryError: Java heap space
  6. 0

    hadoop - 不能将pig关系存到Hbase中 - 数据库 - HBase - 酷方网

    koofun.com | 12 months ago
    org.apache.pig.backend.executionengine.ExecException: ERROR 2244: Job failed, hadoop does not return any error message

    Not finding the right solution?
    Take a tour to get the most out of Samebug.

    Tired of useless tips?

    Automated exception search integrated into your IDE

    Root Cause Analysis

    1. org.apache.pig.backend.executionengine.ExecException

      ERROR 2108: Could not determine data type of field: [B@3982a033

      at org.apache.pig.impl.util.StorageUtil.putField()
    2. org.apache.pig
      PigOutputFormat$PigRecordWriter.write
      1. org.apache.pig.impl.util.StorageUtil.putField(StorageUtil.java:208)
      2. org.apache.pig.impl.util.StorageUtil.putField(StorageUtil.java:166)
      3. org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat$PigLineRecordWriter.write(PigTextOutputFormat.java:68)
      4. org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat$PigLineRecordWriter.write(PigTextOutputFormat.java:44)
      5. org.apache.pig.builtin.PigStorage.putNext(PigStorage.java:296)
      6. org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:139)
      7. org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:98)
      7 frames
    3. Hadoop
      WrappedMapper$Context.write
      1. org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:559)
      2. org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:85)
      3. org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:106)
      3 frames
    4. org.apache.pig
      PigGenericMapBase.map
      1. org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.collect(PigMapOnly.java:48)
      2. org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:264)
      3. org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:64)
      3 frames
    5. Hadoop
      LocalJobRunner$Job$MapTaskRunnable.run
      1. org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:140)
      2. org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:673)
      3. org.apache.hadoop.mapred.MapTask.run(MapTask.java:331)
      4. org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:266)
      4 frames
    6. Java RT
      Thread.run
      1. java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441)
      2. java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303)
      3. java.util.concurrent.FutureTask.run(FutureTask.java:138)
      4. java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
      5. java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
      6. java.lang.Thread.run(Thread.java:662)
      6 frames