import os
import requests
import json
import time
from enum import Enum, auto
from http import HTTPStatus
class DremioProxy(object):
''' A dremio proxy class to encapsulate all supported interactions with Dremio REST server
'''
JOB_COMPLETED_STATE = 'COMPLETED'
JOB_CANCELED_STATE = 'CANCELED'
JOB_FAILED_STATE = 'FAILED'
JOB_FINAL_STATES = [JOB_COMPLETED_STATE, JOB_CANCELED_STATE, JOB_FAILED_STATE]
def __init__(self, dremio_server, username, password, poll_frequency=1):
self.__dremio_server = dremio_server
self.__username = username
self.__password = password
self.__headers = {}
self.__poll_frequency = poll_frequency # in seconds
def login(self):
print(f'Log in on Dremio server: {self.__dremio_server}')
data = {'userName': self.__username, 'password': self.__password}
headers = {'content-type': 'application/json'}
response = self.__send_post_request(f'{self.__dremio_server}/apiv2/login', data, headers)
self.__headers = {'Content-Type': 'application/json',
'cache-control': 'no-cache',
f'Authorization': '_dremio{token}'.format(token=json.loads(response.text)['token'])}
print(self.__headers)
def create_physical_dataset(self, file_path):
print(f'Create physical dataset: {file_path}')
ID, path = self.__get_info(file_path)
print(path)
data = {
'entityType': 'dataset',
'id': ID,
'path': path,
'type': "PHYSICAL_DATASET",
'format': {
'type': 'Parquet'
}
}
self.__send_post_request(f'{self.__dremio_server}/api/v3/catalog/{ID}',data)
def __get_info(self, path):
url = f'{self.__dremio_server}/api/v3/catalog/by-path/{path}'
response = self.__send_get_request(url)
response_data = json.loads(response.text)
quoted_text = requests.utils.quote(response_data['id'], safe='')
print(quoted_text)
return quoted_text, response_data['path']
def __send_get_request(self, url, throw_evaluation_exception=True):
try:
print(url)
response = requests.get(url, headers=self.__headers)
print(response.text)
print(response.status_code)
except Exception as e:
LOGGER.exception(f'Exception thrown while sending GET request. URL: {url}, details: {e}')
raise self.__evaluate_requests_response(url, response, throw_exception=throw_evaluation_exception)
return response
def __send_post_request(self, url, data, headers=None, throw_evaluation_exception=True):
try:
print(data)
print(url)
response = requests.post(url,
headers=headers if headers else self.__headers,
data=json.dumps(data))
print(response.text)
print(response.status_code)
except Exception as e:
LOGGER.exception(f'Exception thrown while sending POST request. URL: {url}, '
f'headers: {headers}, data: {data}, details: {e}')
raise self.__evaluate_requests_response(url, response, data, throw_exception=throw_evaluation_exception)
return response
path = ‘BRDF Storage Account/curated/TSA/ClaimsData2007-2009/TsaClaimsData2007-2009Dataset.parquet’
dremio_client = DremioProxy(server_name, username, password)
dremio_client.login()
dremio_client.create_physical_dataset(path)
gettting logged in successfully
Get request, response - {“entityType”:“file”,“id”:“dremio:/BRDF Storage Account/curated/TSA/ClaimsData2007-2009/TsaClaimsData2007-2009Dataset.parquet”,“path”:[“BRDF Storage Account”,“curated”,“TSA”,“ClaimsData2007-2009”,“TsaClaimsData2007-2009Dataset.parquet”]}
ID that I am getting in return - “dremio:/BRDF Storage Account/curated/TSA/ClaimsData2007-2009/TsaClaimsData2007-2009Dataset.parquet”
post url - {server_name}/api/v3/catalog/dremio%3A%2FBRDF%20Storage%20Account%2Fcurated%2FTSA%2FClaimsData2007-2009%2FTsaClaimsData2007-2009Dataset.parquet
with this data : {‘entityType’: ‘dataset’, ‘id’: ‘dremio%3A%2FBRDF%20Storage%20Account%2Fcurated%2FTSA%2FClaimsData2007-2009%2FTsaClaimsData2007-2009Dataset.parquet’, ‘path’: [‘BRDF Storage Account’, ‘curated’, ‘TSA’, ‘ClaimsData2007-2009’, ‘TsaClaimsData2007-2009Dataset.parquet’], ‘type’: ‘PHYSICAL_DATASET’, ‘format’: {‘type’: ‘Parquet’}}
getting this response :
{“errorMessage”:“Something went wrong. Please check the log file for details, see https://docs.dremio.com/advanced-administration/log-files.html",“moreInfo”:"HTTP 404 Not Found”}