I have connected a HDFS source which has parquet formatted files stored in it. The files are using Parquet v2.0, SNAPPY compression and dictionary coding. When loading the files there is a metadata error and the cause of it appears to be a null pointer exception thrown in org.apache.parquet.hadoop.DirectCodecFactory$DirectCodecPool$ParquetCompressionCodecException.
I couldn’t find anywhere if Dremio version 3 has Parquet v2.0 support. I am using Dremio Build 3.0.1-201811132128360291-804fe82
Stack trace below:
2018-11-27 14:47:19,929 [qtp1450348360-138] ERROR c.d.d.server.GenericExceptionMapper - Unexpected exception when processing POST http://localhost:9047/apiv2/source/Wealth%20Data%20Cube/file_preview/test/python/InstrumentPerformance : java.lang.RuntimeException: Error in parquet reader (complex).
Message: Failure in setting up reader
Parquet Metadata: ParquetMetaData{FileMetaData{schema: message schema {
optional int32 AnalysisPeriodId;
optional binary InstrumentId (UTF8);
optional binary CurrencyCode (UTF8);
optional binary PriceCode (UTF8);
optional binary DatasourceCode (UTF8);
optional binary FrequencyCode (UTF8);
optional int64 PriceDate (TIMESTAMP_MILLIS);
optional double Price;
optional int64 PriorPriceDate (TIMESTAMP_MILLIS);
optional double PriorPrice;
optional double PercentageReturn;
optional double ReturnValue;
optional boolean PeriodActive;
optional binary ReturnSource (UTF8);
}
, metadata: {}}, blocks: [BlockMetaData{1500028, 20704392 [ColumnMetaData{SNAPPY [AnalysisPeriodId] INT32 [RLE_DICTIONARY, PLAIN], 4}, ColumnMetaData{SNAPPY [InstrumentId] BINARY [RLE_DICTIONARY, PLAIN], 432727}, ColumnMetaData{SNAPPY [CurrencyCode] BINARY [RLE_DICTIONARY, PLAIN], 1196536}, ColumnMetaData{SNAPPY [PriceCode] BINARY [RLE_DICTIONARY, PLAIN], 1416636}, ColumnMetaData{SNAPPY [DatasourceCode] BINARY [RLE_DICTIONARY, PLAIN], 2178741}, ColumnMetaData{SNAPPY [FrequencyCode] BINARY [RLE_DICTIONARY, PLAIN], 2181952}, ColumnMetaData{SNAPPY [PriceDate] INT64 [RLE_DICTIONARY, PLAIN], 2510174}, ColumnMetaData{SNAPPY [Price] DOUBLE [RLE_DICTIONARY, PLAIN], 3786665}, ColumnMetaData{SNAPPY [PriorPriceDate] INT64 [RLE_DICTIONARY, PLAIN], 6900285}, ColumnMetaData{SNAPPY [PriorPrice] DOUBLE [RLE_DICTIONARY, PLAIN], 8198831}, ColumnMetaData{SNAPPY [PercentageReturn] DOUBLE [RLE_DICTIONARY, PLAIN], 11309961}, ColumnMetaData{SNAPPY [ReturnValue] DOUBLE [RLE_DICTIONARY, PLAIN], 14931140}, ColumnMetaData{SNAPPY [PeriodActive] BOOLEAN [RLE], 17045526}, ColumnMetaData{SNAPPY [ReturnSource] BINARY [RLE_DICTIONARY, PLAIN], 17171947}]}]}
java.lang.RuntimeException: Error in parquet reader (complex).
Message: Failure in setting up reader
Parquet Metadata: ParquetMetaData{FileMetaData{schema: message schema {
optional int32 AnalysisPeriodId;
optional binary InstrumentId (UTF8);
optional binary CurrencyCode (UTF8);
optional binary PriceCode (UTF8);
optional binary DatasourceCode (UTF8);
optional binary FrequencyCode (UTF8);
optional int64 PriceDate (TIMESTAMP_MILLIS);
optional double Price;
optional int64 PriorPriceDate (TIMESTAMP_MILLIS);
optional double PriorPrice;
optional double PercentageReturn;
optional double ReturnValue;
optional boolean PeriodActive;
optional binary ReturnSource (UTF8);
}
, metadata: {}}, blocks: [BlockMetaData{1500028, 20704392 [ColumnMetaData{SNAPPY [AnalysisPeriodId] INT32 [RLE_DICTIONARY, PLAIN], 4}, ColumnMetaData{SNAPPY [InstrumentId] BINARY [RLE_DICTIONARY, PLAIN], 432727}, ColumnMetaData{SNAPPY [CurrencyCode] BINARY [RLE_DICTIONARY, PLAIN], 1196536}, ColumnMetaData{SNAPPY [PriceCode] BINARY [RLE_DICTIONARY, PLAIN], 1416636}, ColumnMetaData{SNAPPY [DatasourceCode] BINARY [RLE_DICTIONARY, PLAIN], 2178741}, ColumnMetaData{SNAPPY [FrequencyCode] BINARY [RLE_DICTIONARY, PLAIN], 2181952}, ColumnMetaData{SNAPPY [PriceDate] INT64 [RLE_DICTIONARY, PLAIN], 2510174}, ColumnMetaData{SNAPPY [Price] DOUBLE [RLE_DICTIONARY, PLAIN], 3786665}, ColumnMetaData{SNAPPY [PriorPriceDate] INT64 [RLE_DICTIONARY, PLAIN], 6900285}, ColumnMetaData{SNAPPY [PriorPrice] DOUBLE [RLE_DICTIONARY, PLAIN], 8198831}, ColumnMetaData{SNAPPY [PercentageReturn] DOUBLE [RLE_DICTIONARY, PLAIN], 11309961}, ColumnMetaData{SNAPPY [ReturnValue] DOUBLE [RLE_DICTIONARY, PLAIN], 14931140}, ColumnMetaData{SNAPPY [PeriodActive] BOOLEAN [RLE], 17045526}, ColumnMetaData{SNAPPY [ReturnSource] BINARY [RLE_DICTIONARY, PLAIN], 17171947}]}]}
at com.dremio.exec.store.parquet2.ParquetRowiseReader.handleAndRaise(ParquetRowiseReader.java:316) ~[dremio-sabot-kernel-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.exec.store.parquet2.ParquetRowiseReader.setup(ParquetRowiseReader.java:308) ~[dremio-sabot-kernel-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.exec.store.parquet.ParquetFormatPlugin$PreviewReader.nextReader(ParquetFormatPlugin.java:222) ~[dremio-sabot-kernel-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.exec.store.parquet.ParquetFormatPlugin$PreviewReader.setup(ParquetFormatPlugin.java:198) ~[dremio-sabot-kernel-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.dac.model.sources.FormatTools.getData(FormatTools.java:313) ~[dremio-dac-backend-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.dac.model.sources.FormatTools.previewData(FormatTools.java:247) ~[dremio-dac-backend-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.dac.resource.SourceResource.previewFileFormat(SourceResource.java:297) ~[dremio-dac-backend-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[na:1.8.0_192]
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[na:1.8.0_192]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[na:1.8.0_192]
at java.lang.reflect.Method.invoke(Method.java:498) ~[na:1.8.0_192]
at org.glassfish.jersey.server.model.internal.ResourceMethodInvocationHandlerFactory$1.invoke(ResourceMethodInvocationHandlerFactory.java:81) ~[jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher$1.run(AbstractJavaResourceMethodDispatcher.java:144) ~[jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.invoke(AbstractJavaResourceMethodDispatcher.java:161) ~[jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$TypeOutInvoker.doDispatch(JavaResourceMethodDispatcherProvider.java:205) ~[jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.dispatch(AbstractJavaResourceMethodDispatcher.java:99) ~[jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.server.model.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:389) ~[jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:347) ~[jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:102) ~[jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.server.ServerRuntime$2.run(ServerRuntime.java:326) ~[jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.internal.Errors$1.call(Errors.java:271) [jersey-common-2.25.1.jar:na]
at org.glassfish.jersey.internal.Errors$1.call(Errors.java:267) [jersey-common-2.25.1.jar:na]
at org.glassfish.jersey.internal.Errors.process(Errors.java:315) [jersey-common-2.25.1.jar:na]
at org.glassfish.jersey.internal.Errors.process(Errors.java:297) [jersey-common-2.25.1.jar:na]
at org.glassfish.jersey.internal.Errors.process(Errors.java:267) [jersey-common-2.25.1.jar:na]
at org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.java:317) [jersey-common-2.25.1.jar:na]
at org.glassfish.jersey.server.ServerRuntime.process(ServerRuntime.java:305) [jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.server.ApplicationHandler.handle(ApplicationHandler.java:1154) [jersey-server-2.25.1.jar:na]
at org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.java:473) [jersey-container-servlet-core-2.25.1.jar:na]
at org.glassfish.jersey.servlet.WebComponent.service(WebComponent.java:427) [jersey-container-servlet-core-2.25.1.jar:na]
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:388) [jersey-container-servlet-core-2.25.1.jar:na]
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:341) [jersey-container-servlet-core-2.25.1.jar:na]
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:228) [jersey-container-servlet-core-2.25.1.jar:na]
at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:812) [jetty-servlet-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1669) [jetty-servlet-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.servlets.UserAgentFilter.doFilter(UserAgentFilter.java:83) [jetty-servlets-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.servlets.GzipFilter.doFilter(GzipFilter.java:301) [jetty-servlets-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1652) [jetty-servlet-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:585) [jetty-servlet-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1127) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:515) [jetty-servlet-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1061) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.server.handler.RequestLogHandler.handle(RequestLogHandler.java:95) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.server.Server.handle(Server.java:499) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:311) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:258) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.io.AbstractConnection$2.run(AbstractConnection.java:544) [jetty-io-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:635) [jetty-util-9.2.22.v20170606.jar:9.2.22.v20170606]
at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:555) [jetty-util-9.2.22.v20170606.jar:9.2.22.v20170606]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_192]
Suppressed: java.lang.IllegalStateException: Memory was leaked by query. Memory leaked: (131072)
Allocator(job-serialize) 0/131072/779328/9223372036854775807 (res/actual/peak/limit)
at org.apache.arrow.memory.BaseAllocator.close(BaseAllocator.java:421) ~[arrow-memory-0.10.0-201810190306530881-d55177b-dremio.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.common.AutoCloseables.close(AutoCloseables.java:92) ~[dremio-common-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.common.AutoCloseables.close(AutoCloseables.java:71) ~[dremio-common-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.sabot.exec.context.OperatorContextImpl.close(OperatorContextImpl.java:196) ~[dremio-sabot-kernel-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.common.AutoCloseables.close(AutoCloseables.java:92) ~[dremio-common-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.common.AutoCloseables$RollbackCloseable.close(AutoCloseables.java:174) ~[dremio-common-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
at com.dremio.dac.model.sources.FormatTools.getData(FormatTools.java:356) ~[dremio-dac-backend-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
... 48 common frames omitted
Caused by: java.lang.RuntimeException: Error reading page.
File path: /test/python/InstrumentPerformance
Row count: 1500028
Column Chunk Metadata: ColumnMetaData{SNAPPY [AnalysisPeriodId] INT32 [RLE_DICTIONARY, PLAIN], 4}
Page Header: PageHeader(type:DATA_PAGE_V2, uncompressed_page_size:118472, compressed_page_size:117817, data_page_header_v2:DataPageHeaderV2(num_values:236000, num_nulls:0, num_rows:236000, encoding:RLE_DICTIONARY, definition_levels_byte_length:4, repetition_levels_byte_length:0, statistics:Statistics(max:27 00 00 00, min:02 00 00 00, null_count:0)))
File offset: 4
Size: 432723
Value read so far: 236000
at org.apache.parquet.hadoop.ColumnChunkIncReadStore$ColumnChunkIncPageReader.readPage(ColumnChunkIncReadStore.java:238) ~[dremio-sabot-kernel-3.0.1-201811132128360291-804fe82.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.column.impl.ColumnReaderImpl.readPage(ColumnReaderImpl.java:537) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.column.impl.ColumnReaderImpl.checkRead(ColumnReaderImpl.java:530) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.column.impl.ColumnReaderImpl.consume(ColumnReaderImpl.java:642) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.column.impl.ColumnReaderImpl.(ColumnReaderImpl.java:358) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.column.impl.ColumnReadStoreImpl.newMemColumnReader(ColumnReadStoreImpl.java:82) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.column.impl.ColumnReadStoreImpl.getColumnReader(ColumnReadStoreImpl.java:77) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.io.RecordReaderImplementation.(RecordReaderImplementation.java:270) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.io.MessageColumnIO$1.visit(MessageColumnIO.java:140) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.io.MessageColumnIO$1.visit(MessageColumnIO.java:106) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.filter2.compat.FilterCompat$NoOpFilter.accept(FilterCompat.java:154) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.io.MessageColumnIO.getRecordReader(MessageColumnIO.java:106) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.io.MessageColumnIO.getRecordReader(MessageColumnIO.java:82) ~[parquet-column-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at com.dremio.exec.store.parquet2.ParquetRowiseReader.setup(ParquetRowiseReader.java:303) ~[dremio-sabot-kernel-3.0.1-201811132128360291-804fe82.jar:3.0.1-201811132128360291-804fe82]
… 51 common frames omitted
Caused by: org.apache.parquet.hadoop.DirectCodecFactory$DirectCodecPool$ParquetCompressionCodecException: null
at org.apache.parquet.hadoop.DirectCodecFactory$FullDirectDecompressor.decompress(DirectCodecFactory.java:232) ~[parquet-hadoop-1.8.1-fast-201808062344590567-7d04803.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.hadoop.ColumnChunkIncReadStore$ColumnChunkIncPageReader.uncompressPage(ColumnChunkIncReadStore.java:283) ~[dremio-sabot-kernel-3.0.1-201811132128360291-804fe82.jar:1.8.1-fast-201808062344590567-7d04803]
at org.apache.parquet.hadoop.ColumnChunkIncReadStore$ColumnChunkIncPageReader.readPage(ColumnChunkIncReadStore.java:208) ~[dremio-sabot-kernel-3.0.1-201811132128360291-804fe82.jar:1.8.1-fast-201808062344590567-7d04803]
… 64 common frames omitted