Dremio OS hive metadata error (Windows)

Hello,

when trying to access an external hive table referring to a time basided partitioned parquet file, I get the following error:

Failure while attempting to read metadata for PROD."default".installation

When I connect to HDFS directly with the same Dremio installation I can see the folder reported missing as not a file. When I navigate further down until the actual parquet files, Dremio has no trouble opening the partition.

How can I enable this type of Hive table in Dremio (on partitioned parquet tables?)

2018-11-08 14:57:54,394 [qtp1169903663-118] INFO  c.dremio.exec.catalog.DatasetManager - User Error Occurred [ErrorId: 9d9166c7-b558-42be-b4d6-55f347f3f9b5]
com.dremio.common.exceptions.UserException: Failure while attempting to read metadata for PROD."default".installation.
	at com.dremio.common.exceptions.UserException$Builder.build(UserException.java:746) ~[dremio-common-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.exec.catalog.DatasetManager.getTableFromPlugin(DatasetManager.java:316) [dremio-sabot-kernel-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.exec.catalog.DatasetManager.getTable(DatasetManager.java:191) [dremio-sabot-kernel-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.exec.catalog.CatalogImpl.getTable(CatalogImpl.java:128) [dremio-sabot-kernel-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.exec.catalog.DelegatingCatalog.getTable(DelegatingCatalog.java:59) [dremio-sabot-kernel-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.exec.catalog.CachingCatalog.getTable(CachingCatalog.java:66) [dremio-sabot-kernel-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[na:1.8.0_181]
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[na:1.8.0_181]
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[na:1.8.0_181]
	at java.lang.reflect.Method.invoke(Method.java:498) ~[na:1.8.0_181]
	at org.glassfish.hk2.utilities.reflection.ReflectionHelper.invoke(ReflectionHelper.java:1287) [hk2-utils-2.5.0-b32.jar:na]
	at org.jvnet.hk2.internal.MethodInterceptorImpl.internalInvoke(MethodInterceptorImpl.java:109) [hk2-locator-2.5.0-b32.jar:na]
	at org.jvnet.hk2.internal.MethodInterceptorImpl.invoke(MethodInterceptorImpl.java:125) [hk2-locator-2.5.0-b32.jar:na]
	at org.jvnet.hk2.internal.MethodInterceptorInvocationHandler.invoke(MethodInterceptorInvocationHandler.java:62) [hk2-locator-2.5.0-b32.jar:na]
	at com.sun.proxy.$Proxy127.getTable(Unknown Source) [na:na]
	at com.dremio.dac.explore.DatasetsResource.getDatasetSummary(DatasetsResource.java:268) [dremio-dac-backend-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.dac.explore.DatasetsResource.newUntitled(DatasetsResource.java:141) [dremio-dac-backend-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.dac.explore.DatasetsResource.newUntitledFromParent(DatasetsResource.java:208) [dremio-dac-backend-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[na:1.8.0_181]
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[na:1.8.0_181]
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[na:1.8.0_181]
	at java.lang.reflect.Method.invoke(Method.java:498) ~[na:1.8.0_181]
	at org.glassfish.jersey.server.model.internal.ResourceMethodInvocationHandlerFactory$1.invoke(ResourceMethodInvocationHandlerFactory.java:81) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher$1.run(AbstractJavaResourceMethodDispatcher.java:144) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.invoke(AbstractJavaResourceMethodDispatcher.java:161) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$TypeOutInvoker.doDispatch(JavaResourceMethodDispatcherProvider.java:205) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.dispatch(AbstractJavaResourceMethodDispatcher.java:99) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.server.model.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:389) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:347) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:102) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.server.ServerRuntime$2.run(ServerRuntime.java:326) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.internal.Errors$1.call(Errors.java:271) [jersey-common-2.25.1.jar:na]
	at org.glassfish.jersey.internal.Errors$1.call(Errors.java:267) [jersey-common-2.25.1.jar:na]
	at org.glassfish.jersey.internal.Errors.process(Errors.java:315) [jersey-common-2.25.1.jar:na]
	at org.glassfish.jersey.internal.Errors.process(Errors.java:297) [jersey-common-2.25.1.jar:na]
	at org.glassfish.jersey.internal.Errors.process(Errors.java:267) [jersey-common-2.25.1.jar:na]
	at org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.java:317) [jersey-common-2.25.1.jar:na]
	at org.glassfish.jersey.server.ServerRuntime.process(ServerRuntime.java:305) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.server.ApplicationHandler.handle(ApplicationHandler.java:1154) [jersey-server-2.25.1.jar:na]
	at org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.java:473) [jersey-container-servlet-core-2.25.1.jar:na]
	at org.glassfish.jersey.servlet.WebComponent.service(WebComponent.java:427) [jersey-container-servlet-core-2.25.1.jar:na]
	at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:388) [jersey-container-servlet-core-2.25.1.jar:na]
	at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:341) [jersey-container-servlet-core-2.25.1.jar:na]
	at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:228) [jersey-container-servlet-core-2.25.1.jar:na]
	at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:812) [jetty-servlet-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1669) [jetty-servlet-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.servlets.UserAgentFilter.doFilter(UserAgentFilter.java:83) [jetty-servlets-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.servlets.GzipFilter.doFilter(GzipFilter.java:301) [jetty-servlets-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1652) [jetty-servlet-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:585) [jetty-servlet-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1127) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:515) [jetty-servlet-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1061) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.server.handler.RequestLogHandler.handle(RequestLogHandler.java:95) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.server.Server.handle(Server.java:499) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:311) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:258) [jetty-server-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.io.AbstractConnection$2.run(AbstractConnection.java:544) [jetty-io-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:635) [jetty-util-9.2.22.v20170606.jar:9.2.22.v20170606]
	at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:555) [jetty-util-9.2.22.v20170606.jar:9.2.22.v20170606]
	at java.lang.Thread.run(Thread.java:748) [na:1.8.0_181]
Caused by: java.io.IOException: Not a file: hdfs://prod/folder/data/installation/yearMonth=2018-7
	at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:329) ~[hadoop-mapreduce-client-core-2.8.3.jar:na]
	at com.dremio.exec.store.hive.DatasetBuilder$HiveSplitsGenerator.runInner(DatasetBuilder.java:420) ~[dremio-hive-plugin-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.exec.store.hive.DatasetBuilder.buildSplits(DatasetBuilder.java:552) ~[dremio-hive-plugin-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.exec.store.hive.DatasetBuilder.buildIfNecessary(DatasetBuilder.java:324) ~[dremio-hive-plugin-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.exec.store.hive.DatasetBuilder.getDataset(DatasetBuilder.java:246) ~[dremio-hive-plugin-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	at com.dremio.exec.catalog.DatasetManager.getTableFromPlugin(DatasetManager.java:298) [dremio-sabot-kernel-3.0.0-201810262305460004-5c90d75.jar:3.0.0-201810262305460004-5c90d75]
	... 62 common frames omitted

Hello,

I have a feeling it might be related to this:
https://community.hortonworks.com/content/supportkb/188319/errorexception-in-thread-main-javaioioexception-no.html

If this is the case, the question could be rephrased as how I can set the SQL Context for Hive in Dremio?

Hi @orlandob

Can you please do a hadoop fs -ls on hdfs://prod/folder/data/installation/yearMonth=2018-7 and see if you have more folders under it?

Thanks
@balaji.ramaswamy

Hello @balaji.ramaswamy,

  • Under it,are parquet file partitions:

part-00000-d76126d8-e4b6-4307-b627-38b4402f4c10.c000.snappy.parquet
part-00001-d76126d8-e4b6-4307-b627-38b4402f4c10.c000.snappy.parquet
part-00002-d76126d8-e4b6-4307-b627-38b4402f4c10.c000.snappy.parquet
part-00003-d76126d8-e4b6-4307-b627-38b4402f4c10.c000.snappy.parquet
part-00004-d76126d8-e4b6-4307-b627-38b4402f4c10.c000.snappy.parquet
part-00005-d76126d8-e4b6-4307-b627-38b4402f4c10.c000.snappy.parquet

  • But I have also cases with tables that work neither:

linked to e.g. /prod/folder/data/installation/year=2018
with under it
/prod/folder/data/installation/year=2018/month=5
/prod/folder/data/installation/year=2018/month=6

and another level under it
/prod/folder/data/installation/year=2018/month=6/day=1
/prod/folder/data/installation/year=2018/month=6/day=2

and then again parquet file partitions.

@orlandob,

Are day=1,2 and month=5,6 are under year=2018? And I assume month and day are also partitions in the Hive table DDL? Would you be able to send us the “Show create table <table_name>” from Hive CLI?

Thanks
@balaji.ramaswamy

Hi again @balaji.ramaswamy,

1. Are day=1,2 and month=5,6 are under year=2018? And I assume month and day are also partitions in the Hive table DDL? Would you be able to send us the Show create table from Hive CLI?

Yes the day, month are nested folders under year = 2018 in the second case,
in the first case the folder is yearMonth with directly under it parquet partitions.

But neither works. The error log I reported in the opening message was for the first case with ‘yearMonth’

2. Results from show create table

1st (reported case)

CREATE EXTERNAL TABLE `installation`(
`time` timestamp,
`s_01_installation` float,
`s_02_installation` float,
# ... (more floats)
`yearmonth` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'hdfs://prod/folder/data/installation'
TBLPROPERTIES (
'numFiles'='300',
'spark.sql.create.version'='2.3.0.2.6.5.0-292',
'spark.sql.sources.schema.numParts'='1',
'spark.sql.sources.schema.part.0'='{\"type\":\"struct\",\"fields\":[{\"name\":\"time\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1420A332_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1420A240_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1420A333_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1420A334_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1420A339_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1420A340_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1310A332_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1310A240_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1310A333_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1310A334_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1310A339_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1310A340_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1320A332_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1320A240_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1320A333_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1320A334_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1320A339_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1320A340_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1400A332_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1400A240_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1400A333_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1400A334_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1400A339_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1400A340_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1410A332_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1410A240_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1410A333_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1410A334_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1410A339_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1410A340_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1300A332_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1300A240_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1300A333_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1300A334_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1300A339_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1300A340_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1220A332_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1220A240_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1220A333_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1220A334_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1220A339_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"TS_1220A340_WWA-EW-HLBA\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"yearMonth\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}',
'totalSize'='48290325733',
'transient_lastDdlTime'='1537457475')

2nd alternative case

CREATE EXTERNAL TABLE `installation`(
`time` bigint,
`partition_key` double,
`s_01_installation` float,
`s_02_installation` float,
#... (more floats)
)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'hdfs://prod/folder/staging/installation'
TBLPROPERTIES (
'numFiles'='123523',
'totalSize'='168657646543',
'transient_lastDdlTime'='1537264170')