Source code for ads.secrets.big_data_service

#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2022 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import ads
from ads.secrets import SecretKeeper, Secret
from ads.bds.auth import DEFAULT_KRB5_CONFIG_PATH
import json
import os
import base64
from tqdm.auto import tqdm
from dataclasses import dataclass, field

logger = ads.getLogger("ads.secrets")


[docs] @dataclass class BDSSecret(Secret): """ Dataclass representing the attributes managed and serialized by BDSSecretKeeper. Attributes ---------- principal: str The unique identity to which Kerberos can assign tickets. hdfs_host: str hdfs host name from the bds cluster. hive_host: str hive host name from the bds cluster. hdfs_port: str hdfs port from the bds cluster. hive_port: str hive port from the bds cluster. kerb5_path: str krb5.conf file path. kerb5_content: dict Content of the krb5.conf. keytab_path: str Path to the keytab file. keytab_content: dict Content of the keytab file. secret_id: str secret id where the BDSSecret is stored. """ principal: str hdfs_host: str hive_host: str hdfs_port: str hive_port: str kerb5_path: str = field( default=None, ) kerb5_content: dict = field( default=None, repr=False, metadata={"serializable": False}, ) keytab_path: str = field( default=None, ) keytab_content: dict = field( default=None, repr=False, metadata={"serializable": False}, ) secret_id: str = field(repr=False, default_factory=str)
[docs] class BDSSecretKeeper(SecretKeeper): """ `BDSSecretKeeper` provides an interface to save BDS hdfs and hive credentials. This interface does not store the wallet file by default. For saving keytab and krb5.cofig file, set `save_files=True` while calling `BDSSecretKeeper.save` method. Attributes ---------- principal: str The unique identity to which Kerberos can assign tickets. hdfs_host: str hdfs host name from the bds cluster. hive_host: str hive host name from the bds cluster. hdfs_port: str hdfs port from the bds cluster. hive_port: str hive port from the bds cluster. kerb5_path: str krb5.conf file path. kerb5_content: dict Content of the krb5.conf. keytab_path: str Path to the keytab file. keytab_content: dict Content of the keytab file. secret_id: str secret id where the BDSSecret is stored. kwargs ------ vault_id: str. OCID of the vault where the secret is stored. Required for saving secret. key_id: str. OCID of the key used for encrypting the secret. Required for saving secret. compartment_id: str. OCID of the compartment where the vault is located. Required for saving secret. auth: dict. Dictionay returned from ads.common.auth.api_keys() or ads.common.auth.resource_principal(). By default, will follow what is set in `ads.set_auth`. Use this attribute to override the default. """ def __init__( self, principal: str = None, hdfs_host: str = None, hive_host: str = None, hdfs_port: str = None, hive_port: str = None, kerb5_path: str = None, kerb5_content: str = None, keytab_path: str = None, keytab_content: str = None, keytab_dir: str = None, secret_id: str = None, **kwargs, ): """ Parameters ---------- principal: str The unique identity to which Kerberos can assign tickets. hdfs_host: str hdfs host name from the bds cluster. hive_host: str hive host name from the bds cluster. hdfs_port: str hdfs port from the bds cluster. hive_port: str hive port from the bds cluster. kerb5_path: str krb5.conf file path. kerb5_content: dict Content of the krb5.conf. keytab_path: str Path to the keytab file. keytab_content: dict Content of the keytab file. keytab_dir: (str, optional). Default None. Local directory where the extracted keytab content is saved. secret_id: str secret id where the BDSSecret is stored. kwargs ------ vault_id: str. OCID of the vault where the secret is stored. Required for saving secret. key_id: str. OCID of the key used for encrypting the secret. Required for saving secret. compartment_id: str. OCID of the compartment where the vault is located. Required for saving secret. auth: dict. Dictionay returned from ads.common.auth.api_keys() or ads.common.auth.resource_principal(). By default, will follow what is set in `ads.set_auth`. Use this attribute to override the default. """ self.data = BDSSecret( principal=principal, hdfs_host=hdfs_host, hive_host=hive_host, hdfs_port=hdfs_port, hive_port=hive_port, kerb5_path=kerb5_path, kerb5_content=kerb5_content, keytab_path=keytab_path, keytab_content=keytab_content, secret_id=secret_id, ) self.keytab_dir = keytab_dir super().__init__(**kwargs)
[docs] def encode(self, serialize: bool = True) -> "ads.secrets.bds.BDSSecretKeeper": """ Prepares content to save in vault. The port, host name and the keytab and krb5.config files are base64 encoded and stored in `self.secret` Parameters ---------- serialize: bool, optional When set to True, loads the keytab and krb5.config file and encodes the content of both files. Returns ------- BDSSecretKeeper: Returns self object """ if serialize: if not self.data.keytab_path and not self.data.kerb5_path: raise ValueError( "Missing path to keytab or krb5.config file. Required keytab file path to be set in `keytab_path` and krb5.config file path to be set in `kerb5_path`." ) if self.data.keytab_path: with open(os.path.expanduser(self.data.keytab_path), "rb") as f: encoded = self._encode(f.read()) self.keytab_content = encoded if self.data.kerb5_path: with open(os.path.expanduser(self.data.kerb5_path), "rb") as f: encoded = self._encode(f.read()) self.kerb5_content = encoded self.secret = { **self.data.serialize(), "keytab_content": self.keytab_content, "kerb5_content": self.kerb5_content, } self.encoded = self.secret else: super().encode() return self
[docs] def decode(self, save_files: bool = True) -> "ads.secrets.bds.BDSSecretKeeper": """ Converts the content in `self.secret` to `BDSSecret` and stores in `self.data` If the `keytab_path` and `kerb5_path` are passed through the constructor, then retain it. We do not want to override what user has passed in If the `keytab_path` and `kerb5_path` are not passed, but the sercret has `secret_id`, then we generate the keytab file in the location specified by `keytab_path` in the constructor. Returns ------- BDSSecretKeeper: Returns self object """ content = json.loads(self._decode()) data = BDSSecret( **content, ) krb5_file_path = os.path.expanduser(DEFAULT_KRB5_CONFIG_PATH) if hasattr(self, "data") and self.data.keytab_path and self.data.kerb5_path: # If the keytab and kerb5 locations are passed in as a keyword argument inside # `load_secret` method, then we need to retain that information after # the secret was deserialized. logger.debug(f"Setting keytab file to {data.keytab_path}") data.keytab_path = self.data.keytab_path data.kerb5_path = self.data.kerb5_path if save_files: os.makedirs(os.path.dirname(krb5_file_path), exist_ok=True) if not os.path.exists(self.data.kerb5_path): raise FileNotFoundError( f"`kerb5_path` does not exists. Cannot load the kerb5 config from this location {self.data.kerb5_path}." ) with open(self.data.kerb5_path) as rf: with open(krb5_file_path, "wb") as wf: wf.write(rf.read()) elif data.secret_id: logger.debug(f"Secret ids corresponding to the keytab file found.") if data.secret_id: content = json.loads( self._decode(self.get_secret(data.secret_id, decoded=False)) ) if save_files: if content["keytab_content"]: if data.keytab_path: if self.keytab_dir: data.keytab_path = os.path.abspath( os.path.expanduser( os.path.join( self.keytab_dir, os.path.basename(data.keytab_path), ) ) ) os.makedirs( os.path.dirname(data.keytab_path), exist_ok=True ) else: raise ValueError( "`keytab_path` field not found. `keytab_path` cannot be None when `save_files` = True." ) with open( os.path.abspath(os.path.expanduser(data.keytab_path)), "wb" ) as wf: wf.write(base64.b64decode(content["keytab_content"])) if content["kerb5_content"]: os.makedirs(os.path.dirname(krb5_file_path), exist_ok=True) with open( os.path.abspath(os.path.expanduser(krb5_file_path)), "wb" ) as wf: wf.write(base64.b64decode(content["kerb5_content"])) else: logger.warning( "Keytab and krb5 config information not found. If you have local keytab and krb5.config files, you can pass that as a keyword argument `keytab_path=<path to the keytab file>` and `kerb5_path=<path to the kerb5 file>` inside the `BDSSecretKeeper.load_secret` method" ) self.data = data return self
[docs] def save( self, name: str, description: str, freeform_tags: dict = None, defined_tags: dict = None, save_files: bool = True, ) -> "ads.secrets.bds.BDSSecretKeeper": """Saves credentials to Vault and returns self. Parameters ---------- name : str Name of the secret when saved in the Vault. description : str Description of the secret when saved in the Vault. freeform_tags : (dict, optional). Default is None freeform_tags to be used for saving the secret in OCI console. defined_tags: (dict, optional). Default is None Save the tags under predefined tags in OCI console. save_files: (bool, optional). Default is False If set to True, saves the contents of the keytab and krb5 file as separate secret. Returns ------- BDSSecretKeeper: Returns self object """ if not save_files: logger.info( "`save_files` set to False. Not saving keytab or krb5.config file content to vault." ) logger.debug(f"Encoding secrets. Save files is set to {save_files}") self.encode(serialize=save_files) secret_id = None n = 2 if save_files else 1 with tqdm(total=n, leave=False) as pbar: if save_files: pbar.set_description( f"Saving the keytab file and the krb5.config file." ) secret_id = self.create_secret( self._encode( json.dumps( { "keytab_content": self.encoded["keytab_content"], "kerb5_content": self.encoded["kerb5_content"], } ).encode("utf-8") ), encode=False, secret_name=f"{name}_keytab_krb5", description=f"Keytab file and krb5 config file.", freeform_tags=freeform_tags, defined_tags=defined_tags, ) pbar.update() self.data.secret_id = secret_id pbar.set_description(f"Saving the credentials.") self.secret_id = self.create_secret( self._encode(json.dumps(self.data.serialize()).encode("utf-8")), encode=False, secret_name=name, description=description, freeform_tags=freeform_tags, defined_tags=defined_tags, ) pbar.update() return self