Hello everyone,
I was thinking about replacing some pyODBC connection by Arrow Flight used in dremio_client, I am not sure I setup that correctly.
The pyodbc returns 10k rows in ~1s, while the one passing through dremio_client last 20~ sec…
I was kind of expecting the reverse result
Have you any idea? the code is basically copy/paste from the sample:
Many thanks!
import argparse
import sys
from pyarrow import flight
import dremio_client
import pandas as pd
import streamlit as st
import pyodbc
import time
class DremioClientAuthMiddlewareFactory(flight.ClientMiddlewareFactory):
"""A factory that creates DremioClientAuthMiddleware(s).""" def __init__(self): self.call_credential = [] def start_call(self, info): return DremioClientAuthMiddleware(self) def set_call_credential(self, call_credential): self.call_credential = call_credential
class DremioClientAuthMiddleware(flight.ClientMiddleware):
def __init__(self, factory): self.factory = factory def received_headers(self, headers): auth_header_key = 'authorization' authorization_header = [] for key in headers: if key.lower() == auth_header_key: authorization_header = headers.get(auth_header_key) self.factory.set_call_credential([ b'authorization', authorization_header[0].encode("utf-8")])
def connectDremioODBC():
port = 31010 driver = "Dremio Connector" cnxn = pyodbc.connect("Driver={};ConnectionType=Direct;HOST={};PORT={};AuthenticationType=Plain;UID={};PWD={}".format(driver, hostname,port,username,password),autocommit=True) df = pd.read_sql(sqlquery,cnxn) print(df)
def connectDremio():
flightport = "32010" scheme = "grpc+tcp" connection_args = {} initial_options = flight.FlightCallOptions(headers=[ (b'routing-tag', b'test-routing-tag'), (b'routing-queue', b'Low Cost User Queries') ]) client_auth_middleware = DremioClientAuthMiddlewareFactory() client = flight.FlightClient("{}://{}:{}".format(scheme, hostname, flightport), middleware=[client_auth_middleware], **connection_args) bearer_token = client.authenticate_basic_token(username, password, initial_options) flight_desc = flight.FlightDescriptor.for_command(sqlquery) options = flight.FlightCallOptions(headers=[bearer_token]) schema = client.get_schema(flight_desc, options) flight_info = client.get_flight_info(flight.FlightDescriptor.for_command(sqlquery), options) reader = client.do_get(flight_info.endpoints[0].ticket, options) print(reader.read_pandas())
if name == “main”:
print('Begin') start = time.time() username = "yen" password = "secret" sqlquery = "SELECT * FROM sql1.SQL1.dbo.my_great_table" hostname = "localhost" connectDremio() # connectDremioODBC() end = time.time() print(end - start)