Bug Report: IndexOutOfBoundsException When Filtering Parquet Data in Dremio
The queries (1) and (3) are working fine but the query (2) would fail with IndexOutOfBoundsException exception:
-- 1
SELECT * FROM "local_data_parquet"."file_2022-03-31.parquet_0";
-- 2
SELECT * FROM "local_data_parquet"."file_2022-03-31.parquet_0"
where key_metric='2a753ac3-c241-41fb-8675-6ea80a72ef4c';
-- 3
SELECT * FROM "local_data_parquet"."file_2022-03-31.parquet_0"
WHERE key_metric = CAST('2a753ac3-c241-41fb-8675-6ea80a72ef4c' AS VARBINARY);
Environment
- Dremio Version: Build 25.1.1-202409260159070462-716c0676 Community Edition
- Parquet File Format Version: 1.0
- Compression Used: None
- Storage: local file
- Execution Mode: Single Node
- Java Version: OpenJDK Runtime Environment (Red_Hat-11.0.17.0.8-2.el7_9) (build 11.0.17+8-LTS)
Stacktrace
2025-02-07 22:11:13,517 [Fabric-RPC-Offload31] INFO c.d.exec.work.foreman.AttemptManager - 18597aff-3b47-2a89-a5e5-5353d85a0300: State change requested RUNNING --> FAILED, Exception com.dremio.common.exceptions.UserRemoteException: SYSTEM ERROR: IndexOutOfBoundsException
SqlOperatorImpl PARQUET_ROW_GROUP_SCAN
Location 1:5:4
SqlOperatorImpl PARQUET_ROW_GROUP_SCAN
Location 1:5:4
ErrorOrigin: EXECUTOR
[Error Id: b9f9d4fa-b3e8-4a75-aa87-511759f8a194 ]
(java.lang.IndexOutOfBoundsException) null
java.nio.Buffer.checkIndex():693
java.nio.HeapByteBuffer.getLong():458
org.apache.parquet.internal.column.columnindex.LongColumnIndexBuilder.convert():87
org.apache.parquet.internal.column.columnindex.LongColumnIndexBuilder.addMinMaxFromBytes():96
org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder.fill():590
org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder.build():530
org.apache.parquet.format.converter.ParquetMetadataConverter.fromParquetColumnIndex():2245
com.dremio.exec.store.parquet.ColumnIndexProvider.<init>():93
com.dremio.exec.store.parquet.StreamPerColumnProvider.getColumnIndexProvider():129
com.dremio.parquet.reader.metadata.RowGroups.getRowGroupMetadataFor():239
com.dremio.extra.exec.store.dfs.parquet.ParquetVectorizedReader.setup():434
com.dremio.exec.store.parquet.UnifiedParquetReader.setup():261
com.dremio.exec.store.parquet.UpPromotingParquetReader.setup():172
com.dremio.exec.store.parquet.ParquetCoercionReader.setup():88
com.dremio.sabot.op.scan.ScanOperator.setupReaderAsCorrectUser():399
com.dremio.sabot.op.scan.ScanOperator.setupReader():389
com.dremio.sabot.op.scan.ScanOperator.setup():354
com.dremio.sabot.driver.SmartOp$SmartProducer.setup():638
com.dremio.sabot.driver.Pipe$SetupVisitor.visitProducer():90
com.dremio.sabot.driver.Pipe$SetupVisitor.visitProducer():70
com.dremio.sabot.driver.SmartOp$SmartProducer.accept():609
com.dremio.sabot.driver.StraightPipe.setup():100
com.dremio.sabot.driver.StraightPipe.setup():100
com.dremio.sabot.driver.StraightPipe.setup():100
com.dremio.sabot.driver.StraightPipe.setup():100
com.dremio.sabot.driver.Pipeline.setup():79
com.dremio.sabot.exec.fragment.FragmentExecutor.setupExecution():735
com.dremio.sabot.exec.fragment.FragmentExecutor.run():514
com.dremio.sabot.exec.fragment.FragmentExecutor$AsyncTaskImpl.run():1234
com.dremio.sabot.task.AsyncTaskWrapper.run():130
com.dremio.sabot.task.slicing.SlicingThread.mainExecutionLoop():279
com.dremio.sabot.task.slicing.SlicingThread.run():186
2025-02-07 22:11:13,517 [e0 - 18597aff-3b47-2a89-a5e5-5353d85a0300:frag:1:0] ERROR com.dremio.sabot.driver.SmartOp - IndexOutOfBoundsException
com.dremio.common.exceptions.UserException: IndexOutOfBoundsException
at com.dremio.common.exceptions.UserException$Builder.build(UserException.java:984)
at com.dremio.sabot.driver.SmartOp.contextualize(SmartOp.java:203)
at com.dremio.sabot.driver.SmartOp$SmartProducer.setup(SmartOp.java:643)
at com.dremio.sabot.driver.Pipe$SetupVisitor.visitProducer(Pipe.java:90)
at com.dremio.sabot.driver.Pipe$SetupVisitor.visitProducer(Pipe.java:70)
at com.dremio.sabot.driver.SmartOp$SmartProducer.accept(SmartOp.java:609)
at com.dremio.sabot.driver.StraightPipe.setup(StraightPipe.java:100)
at com.dremio.sabot.driver.StraightPipe.setup(StraightPipe.java:100)
at com.dremio.sabot.driver.StraightPipe.setup(StraightPipe.java:100)
at com.dremio.sabot.driver.StraightPipe.setup(StraightPipe.java:100)
at com.dremio.sabot.driver.Pipeline.setup(Pipeline.java:79)
at com.dremio.sabot.exec.fragment.FragmentExecutor.setupExecution(FragmentExecutor.java:735)
at com.dremio.sabot.exec.fragment.FragmentExecutor.run(FragmentExecutor.java:514)
at com.dremio.sabot.exec.fragment.FragmentExecutor$AsyncTaskImpl.run(FragmentExecutor.java:1234)
at com.dremio.sabot.task.AsyncTaskWrapper.run(AsyncTaskWrapper.java:130)
at com.dremio.sabot.task.slicing.SlicingThread.mainExecutionLoop(SlicingThread.java:279)
at com.dremio.sabot.task.slicing.SlicingThread.run(SlicingThread.java:186)
Caused by: java.lang.IndexOutOfBoundsException: null
at java.base/java.nio.Buffer.checkIndex(Buffer.java:693)
at java.base/java.nio.HeapByteBuffer.getLong(HeapByteBuffer.java:458)
at org.apache.parquet.internal.column.columnindex.LongColumnIndexBuilder.convert(LongColumnIndexBuilder.java:87)
at org.apache.parquet.internal.column.columnindex.LongColumnIndexBuilder.addMinMaxFromBytes(LongColumnIndexBuilder.java:96)
at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder.fill(ColumnIndexBuilder.java:590)
at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder.build(ColumnIndexBuilder.java:530)
at org.apache.parquet.format.converter.ParquetMetadataConverter.fromParquetColumnIndex(ParquetMetadataConverter.java:2245)
at com.dremio.exec.store.parquet.ColumnIndexProvider.<init>(ColumnIndexProvider.java:93)
at com.dremio.exec.store.parquet.StreamPerColumnProvider.getColumnIndexProvider(StreamPerColumnProvider.java:129)
at com.dremio.parquet.reader.metadata.RowGroups.getRowGroupMetadataFor(RowGroups.java:239)
at com.dremio.extra.exec.store.dfs.parquet.ParquetVectorizedReader.setup(ParquetVectorizedReader.java:434)
at com.dremio.exec.store.parquet.UnifiedParquetReader.setup(UnifiedParquetReader.java:261)
at com.dremio.exec.store.parquet.UpPromotingParquetReader.setup(UpPromotingParquetReader.java:172)
at com.dremio.exec.store.parquet.ParquetCoercionReader.setup(ParquetCoercionReader.java:88)
at com.dremio.sabot.op.scan.ScanOperator.setupReaderAsCorrectUser(ScanOperator.java:399)
at com.dremio.sabot.op.scan.ScanOperator.setupReader(ScanOperator.java:389)
at com.dremio.sabot.op.scan.ScanOperator.setup(ScanOperator.java:354)
at com.dremio.sabot.driver.SmartOp$SmartProducer.setup(SmartOp.java:638)
... 14 common frames omitted
2025-02-07 22:11:13,518 [FABRIC-rpc-event-queue] INFO c.dremio.sabot.exec.FragmentHandler - set cancel for fragment 18597aff-3b47-2a89-a5e5-5353d85a0300:0:0
2025-02-07 22:11:13,518 [FABRIC-rpc-event-queue] INFO c.d.s.exec.fragment.FragmentExecutor - Cancellation requested for fragment 18597aff-3b47-2a89-a5e5-5353d85a0300:0:0.
2025-02-07 22:11:13,518 [FABRIC-rpc-event-queue] INFO c.dremio.sabot.exec.FragmentHandler - set cancel for fragment 18597aff-3b47-2a89-a5e5-5353d85a0300:1:2
2025-02-07 22:11:13,518 [FABRIC-rpc-event-queue] INFO c.d.s.exec.fragment.FragmentExecutor - Cancellation requested for fragment 18597aff-3b47-2a89-a5e5-5353d85a0300:1:2.
2025-02-07 22:11:13,520 [e5 - 18597aff-3b47-2a89-a5e5-5353d85a0300:frag:0:0] INFO c.d.s.exec.fragment.FragmentExecutor - Query fragment is being transitioned to cancelled state
2025-02-07 22:11:13,520 [e5 - 18597aff-3b47-2a89-a5e5-5353d85a0300:frag:0:0] INFO c.d.s.e.f.FragmentStatusReporter - 18597aff-3b47-2a89-a5e5-5353d85a0300:0:0 scheduling weight 1: State to report: CANCELLED
2025-02-07 22:11:13,524 [e1 - 18597aff-3b47-2a89-a5e5-5353d85a0300:frag:1:2] ERROR com.dremio.sabot.driver.SmartOp - IndexOutOfBoundsException
com.dremio.common.exceptions.UserException: IndexOutOfBoundsException
at com.dremio.common.exceptions.UserException$Builder.build(UserException.java:984)
at com.dremio.sabot.driver.SmartOp.contextualize(SmartOp.java:203)
at com.dremio.sabot.driver.SmartOp$SmartProducer.setup(SmartOp.java:643)
at com.dremio.sabot.driver.Pipe$SetupVisitor.visitProducer(Pipe.java:90)
at com.dremio.sabot.driver.Pipe$SetupVisitor.visitProducer(Pipe.java:70)
at com.dremio.sabot.driver.SmartOp$SmartProducer.accept(SmartOp.java:609)
at com.dremio.sabot.driver.StraightPipe.setup(StraightPipe.java:100)
at com.dremio.sabot.driver.StraightPipe.setup(StraightPipe.java:100)
at com.dremio.sabot.driver.StraightPipe.setup(StraightPipe.java:100)
at com.dremio.sabot.driver.StraightPipe.setup(StraightPipe.java:100)
at com.dremio.sabot.driver.Pipeline.setup(Pipeline.java:79)
at com.dremio.sabot.exec.fragment.FragmentExecutor.setupExecution(FragmentExecutor.java:735)
at com.dremio.sabot.exec.fragment.FragmentExecutor.run(FragmentExecutor.java:514)
at com.dremio.sabot.exec.fragment.FragmentExecutor$AsyncTaskImpl.run(FragmentExecutor.java:1234)
at com.dremio.sabot.task.AsyncTaskWrapper.run(AsyncTaskWrapper.java:130)
at com.dremio.sabot.task.slicing.SlicingThread.mainExecutionLoop(SlicingThread.java:279)
at com.dremio.sabot.task.slicing.SlicingThread.run(SlicingThread.java:186)
Caused by: java.lang.IndexOutOfBoundsException: null
at java.base/java.nio.Buffer.checkIndex(Buffer.java:693)
at java.base/java.nio.HeapByteBuffer.getLong(HeapByteBuffer.java:458)
at org.apache.parquet.internal.column.columnindex.LongColumnIndexBuilder.convert(LongColumnIndexBuilder.java:87)
at org.apache.parquet.internal.column.columnindex.LongColumnIndexBuilder.addMinMaxFromBytes(LongColumnIndexBuilder.java:96)
at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder.fill(ColumnIndexBuilder.java:590)
at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder.build(ColumnIndexBuilder.java:530)
at org.apache.parquet.format.converter.ParquetMetadataConverter.fromParquetColumnIndex(ParquetMetadataConverter.java:2245)
at com.dremio.exec.store.parquet.ColumnIndexProvider.<init>(ColumnIndexProvider.java:93)
at com.dremio.exec.store.parquet.StreamPerColumnProvider.getColumnIndexProvider(StreamPerColumnProvider.java:129)
at com.dremio.parquet.reader.metadata.RowGroups.getRowGroupMetadataFor(RowGroups.java:239)
at com.dremio.extra.exec.store.dfs.parquet.ParquetVectorizedReader.setup(ParquetVectorizedReader.java:434)
at com.dremio.exec.store.parquet.UnifiedParquetReader.setup(UnifiedParquetReader.java:261)
at com.dremio.exec.store.parquet.UpPromotingParquetReader.setup(UpPromotingParquetReader.java:172)
at com.dremio.exec.store.parquet.ParquetCoercionReader.setup(ParquetCoercionReader.java:88)
at com.dremio.sabot.op.scan.ScanOperator.setupReaderAsCorrectUser(ScanOperator.java:399)
at com.dremio.sabot.op.scan.ScanOperator.setupReader(ScanOperator.java:389)
at com.dremio.sabot.op.scan.ScanOperator.setup(ScanOperator.java:354)
at com.dremio.sabot.driver.SmartOp$SmartProducer.setup(SmartOp.java:638)
... 14 common frames omitted
2025-02-07 22:11:13,567 [async-query-logger19] INFO query.logger - Query: 18597aff-3b47-2a89-a5e5-5353d85a0300; outcome: FAILED
2025-02-07 22:11:13,596 [grpc-default-executor-159] INFO c.d.service.jobs.JobResultsStore - User Error Occurred [ErrorId: 80089d24-5762-4386-892e-3175848e7927]
com.dremio.common.exceptions.UserException: IndexOutOfBoundsException
at com.dremio.common.exceptions.UserException$Builder.build(UserException.java:984)
at com.dremio.service.jobs.JobResultsStore.loadJobData(JobResultsStore.java:150)
at com.dremio.service.jobs.JobResultsStore$LateJobLoader.load(JobResultsStore.java:347)
at com.dremio.service.jobs.JobDataImpl.range(JobDataImpl.java:50)
at com.dremio.service.jobs.LocalJobsService.getJobData(LocalJobsService.java:1530)
at com.dremio.service.jobs.JobsFlightProducer.getStream(JobsFlightProducer.java:78)
at com.dremio.dac.service.flight.CoordinatorFlightProducer.getStream(CoordinatorFlightProducer.java:61)
at org.apache.arrow.flight.FlightService.doGetCustom(FlightService.java:112)
at org.apache.arrow.flight.FlightBindingService$DoGetMethod.invoke(FlightBindingService.java:144)
at org.apache.arrow.flight.FlightBindingService$DoGetMethod.invoke(FlightBindingService.java:134)
at io.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:182)
at io.grpc.PartialForwardingServerCallListener.onHalfClose(PartialForwardingServerCallListener.java:35)
at io.grpc.ForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:23)
at io.grpc.ForwardingServerCallListener$SimpleForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:40)
at io.grpc.Contexts$ContextualizedServerCallListener.onHalfClose(Contexts.java:86)
at io.grpc.PartialForwardingServerCallListener.onHalfClose(PartialForwardingServerCallListener.java:35)
at io.grpc.ForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:23)
at io.grpc.ForwardingServerCallListener$SimpleForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:40)
at io.opentracing.contrib.grpc.TracingServerInterceptor$2.onHalfClose(TracingServerInterceptor.java:231)
at io.grpc.PartialForwardingServerCallListener.onHalfClose(PartialForwardingServerCallListener.java:35)
at io.grpc.ForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:23)
at io.grpc.ForwardingServerCallListener$SimpleForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:40)
at com.dremio.service.grpc.ContextualizedServerInterceptor$1.lambda$onHalfClose$0(ContextualizedServerInterceptor.java:74)
at com.dremio.context.RequestContext.run(RequestContext.java:103)
at com.dremio.service.grpc.ContextualizedServerInterceptor$1.onHalfClose(ContextualizedServerInterceptor.java:74)
at io.grpc.PartialForwardingServerCallListener.onHalfClose(PartialForwardingServerCallListener.java:35)
at io.grpc.ForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:23)
at io.grpc.ForwardingServerCallListener$SimpleForwardingServerCallListener.onHalfClose(ForwardingServerCallListener.java:40)
at io.grpc.util.TransmitStatusRuntimeExceptionInterceptor$1.onHalfClose(TransmitStatusRuntimeExceptionInterceptor.java:74)
at io.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:356)
at io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:861)
at io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
at io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:133)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)