Apache Airflow dags w/ backend configuration bundle.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

227 lines
8.3 KiB

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from contextlib import closing
from typing import Any, Dict, List, Optional, Tuple, Union
import pyexasol
from airflow.hooks.dbapi import DbApiHook
from pyexasol import ExaConnection
class ExasolHook(DbApiHook):
"""
Interact with Exasol.
You can specify the pyexasol ``compression``, ``encryption``, ``json_lib``
and ``client_name`` parameters in the extra field of your connection
as ``{"compression": True, "json_lib": "rapidjson", etc}``.
See `pyexasol reference
<https://github.com/badoo/pyexasol/blob/master/docs/REFERENCE.md#connect>`_
for more details.
"""
conn_name_attr = "exasol_conn_id"
default_conn_name = "exasol_default"
conn_type = "exasol"
hook_name = "Exasol"
supports_autocommit = True
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.schema = kwargs.pop("schema", None)
def get_conn(self) -> ExaConnection:
conn_id = getattr(self, self.conn_name_attr)
conn = self.get_connection(conn_id)
conn_args = dict(
dsn=f"{conn.host}:{conn.port}",
user=conn.login,
password=conn.password,
schema=self.schema or conn.schema,
)
# check for parameters in conn.extra
for arg_name, arg_val in conn.extra_dejson.items():
if arg_name in ["compression", "encryption", "json_lib", "client_name"]:
conn_args[arg_name] = arg_val
conn = pyexasol.connect(**conn_args)
return conn
def get_pandas_df(
self, sql: Union[str, list], parameters: Optional[dict] = None, **kwargs
) -> None:
"""
Executes the sql and returns a pandas dataframe
:param sql: the sql statement to be executed (str) or a list of
sql statements to execute
:type sql: str or list
:param parameters: The parameters to render the SQL query with.
:type parameters: dict or iterable
:param kwargs: (optional) passed into pyexasol.ExaConnection.export_to_pandas method
:type kwargs: dict
"""
with closing(self.get_conn()) as conn:
conn.export_to_pandas(sql, query_params=parameters, **kwargs)
def get_records(
self, sql: Union[str, list], parameters: Optional[dict] = None
) -> List[Union[dict, Tuple[Any, ...]]]:
"""
Executes the sql and returns a set of records.
:param sql: the sql statement to be executed (str) or a list of
sql statements to execute
:type sql: str or list
:param parameters: The parameters to render the SQL query with.
:type parameters: dict or iterable
"""
with closing(self.get_conn()) as conn:
with closing(conn.execute(sql, parameters)) as cur:
return cur.fetchall()
def get_first(
self, sql: Union[str, list], parameters: Optional[dict] = None
) -> Optional[Any]:
"""
Executes the sql and returns the first resulting row.
:param sql: the sql statement to be executed (str) or a list of
sql statements to execute
:type sql: str or list
:param parameters: The parameters to render the SQL query with.
:type parameters: dict or iterable
"""
with closing(self.get_conn()) as conn:
with closing(conn.execute(sql, parameters)) as cur:
return cur.fetchone()
def export_to_file(
self,
filename: str,
query_or_table: str,
query_params: Optional[Dict] = None,
export_params: Optional[Dict] = None,
) -> None:
"""
Exports data to a file.
:param filename: Path to the file to which the data has to be exported
:type filename: str
:param query_or_table: the sql statement to be executed or table name to export
:type query_or_table: str
:param query_params: Query parameters passed to underlying ``export_to_file``
method of :class:`~pyexasol.connection.ExaConnection`.
:type query_params: dict
:param export_params: Extra parameters passed to underlying ``export_to_file``
method of :class:`~pyexasol.connection.ExaConnection`.
:type export_params: dict
"""
self.log.info("Getting data from exasol")
with closing(self.get_conn()) as conn:
conn.export_to_file(
dst=filename,
query_or_table=query_or_table,
query_params=query_params,
export_params=export_params,
)
self.log.info("Data saved to %s", filename)
def run(
self,
sql: Union[str, list],
autocommit: bool = False,
parameters: Optional[dict] = None,
) -> None:
"""
Runs a command or a list of commands. Pass a list of sql
statements to the sql parameter to get them to execute
sequentially
:param sql: the sql statement to be executed (str) or a list of
sql statements to execute
:type sql: str or list
:param autocommit: What to set the connection's autocommit setting to
before executing the query.
:type autocommit: bool
:param parameters: The parameters to render the SQL query with.
:type parameters: dict or iterable
"""
if isinstance(sql, str):
sql = [sql]
with closing(self.get_conn()) as conn:
if self.supports_autocommit:
self.set_autocommit(conn, autocommit)
for query in sql:
self.log.info(query)
with closing(conn.execute(query, parameters)) as cur:
self.log.info(cur.row_count)
# If autocommit was set to False for db that supports autocommit,
# or if db does not supports autocommit, we do a manual commit.
if not self.get_autocommit(conn):
conn.commit()
def set_autocommit(self, conn, autocommit: bool) -> None:
"""
Sets the autocommit flag on the connection
:param conn: Connection to set autocommit setting to.
:type conn: connection object
:param autocommit: The autocommit setting to set.
:type autocommit: bool
"""
if not self.supports_autocommit and autocommit:
self.log.warning(
"%s connection doesn't support autocommit but autocommit activated.",
getattr(self, self.conn_name_attr),
)
conn.set_autocommit(autocommit)
def get_autocommit(self, conn) -> bool:
"""
Get autocommit setting for the provided connection.
Return True if autocommit is set.
Return False if autocommit is not set or set to False or conn
does not support autocommit.
:param conn: Connection to get autocommit setting from.
:type conn: connection object
:return: connection autocommit setting.
:rtype: bool
"""
autocommit = conn.attr.get("autocommit")
if autocommit is None:
autocommit = super().get_autocommit(conn)
return autocommit
@staticmethod
def _serialize_cell(cell, conn=None) -> object:
"""
Exasol will adapt all arguments to the execute() method internally,
hence we return cell without any conversion.
:param cell: The cell to insert into the table
:type cell: object
:param conn: The database connection
:type conn: connection object
:return: The cell
:rtype: object
"""
return cell