Source code for clkhash.key_derivation

from typing import Tuple, Union, Optional, Sequence

from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.hkdf import HKDF

"""
We use the block-size of SHA1 and MD5 as the default key size for HMAC
"""
DEFAULT_KEY_SIZE = 64

DEFAULT_NUM_HASHING_METHODS = 2

_HASH_FUNCTIONS = {
    'SHA256': hashes.SHA256,
    'SHA512': hashes.SHA512
}


[docs]def hkdf(secret: bytes, num_keys: int, hash_algo: str = 'SHA256', salt: Optional[bytes] = None, info: Optional[bytes] = None, key_size: int = DEFAULT_KEY_SIZE ) -> Tuple[bytes, ...]: """ Executes the HKDF key derivation function as described in rfc5869 to derive `num_keys` keys of size `key_size` from the secret. :param secret: input keying material :param num_keys: the number of keys the kdf should produce :param hash_algo: The hash function used by HKDF for the internal HMAC calls. The choice of hash function defines the maximum length of the output key material. Output bytes <= 255 * hash digest size (in bytes). :param salt: HKDF is defined to operate with and without random salt. This is done to accommodate applications where a salt value is not available. We stress, however, that the use of salt adds significantly to the strength of HKDF, ensuring independence between different uses of the hash function, supporting "source-independent" extraction, and strengthening the analytical results that back the HKDF design. Random salt differs fundamentally from the initial keying material in two ways: it is non-secret and can be re-used. Ideally, the salt value is a random (or pseudorandom) string of the length HashLen. Yet, even a salt value of less quality (shorter in size or with limited entropy) may still make a significant contribution to the security of the output keying material. :param info: While the 'info' value is optional in the definition of HKDF, it is often of great importance in applications. Its main objective is to bind the derived key material to application- and context-specific information. For example, 'info' may contain a protocol number, algorithm identifiers, user identities, etc. In particular, it may prevent the derivation of the same keying material for different contexts (when the same input key material (IKM) is used in such different contexts). It may also accommodate additional inputs to the key expansion part, if so desired (e.g., an application may want to bind the key material to its length L, thus making L part of the 'info' field). There is one technical requirement from 'info': it should be independent of the input key material value IKM. :param key_size: the size of the produced keys :return: Derived keys """ try: hash_function = _HASH_FUNCTIONS[hash_algo] except KeyError as e: msg = f"unsupported hash function '{hash_algo}'" raise ValueError(msg) from e hkdf = HKDF(algorithm=hash_function(), length=num_keys * key_size, salt=salt, info=info, backend=default_backend()) # hkdf.derive returns a block of num_keys * key_size bytes which we # divide up into num_keys chunks, each of size key_size keybytes = hkdf.derive(secret) keys = tuple(keybytes[i * key_size:(i + 1) * key_size] for i in range(num_keys)) return keys
[docs]def generate_key_lists(secret: Union[bytes, str], num_identifier: int, num_hashing_methods: int = DEFAULT_NUM_HASHING_METHODS, key_size: int = DEFAULT_KEY_SIZE, salt: Optional[bytes] = None, info: Optional[bytes] = None, kdf: str = 'HKDF', hash_algo: str = 'SHA256' ) -> Tuple[Tuple[bytes, ...], ...]: """ Generates `num_hashing_methods` derived keys for each identifier for the secret using a key derivation function (KDF). The only supported key derivation function for now is 'HKDF'. The previous secret usage can be reproduced by setting kdf to 'legacy', but it will use the secret twice. This is highly discouraged, as this strategy will map the same n-grams in different identifier to the same bits in the Bloom filter and thus does not lead to good results. :param secret: a secret (either as bytes or string) :param num_identifier: the number of identifiers :param num_hashing_methods: number of hashing methods used per identifier, each of them requiring a different key :param key_size: the size of the derived keys :param salt: salt for the KDF as bytes :param info: optional context and application specific information as bytes :param kdf: the key derivation function algorithm to use :param hash_algo: the hashing algorithm to use (ignored if `kdf` is not 'HKDF') :return: The derived keys. First dimension is of size num_identifier, second dimension is of size num_hashing_methods A key is represented as bytes. """ if num_hashing_methods < 1: raise ValueError('num_hashing_methods: "{}" is not supported, it' ' should be greater than 0.'.format(num_hashing_methods)) try: if isinstance(secret, bytes): secret_bytes = secret else: secret_bytes = secret.encode('UTF-8') except AttributeError: raise TypeError("provided 'secret' has to be either of type bytes or strings.") if kdf == 'HKDF': # we first create the good number of keys, and we then pack them in the expected way. key_tuples = hkdf(secret_bytes, num_hashing_methods * num_identifier, hash_algo=hash_algo, salt=salt, info=info, key_size=key_size) # regroup such that we get a tuple of keys for each identifier split_list = [key_tuples[(i*num_hashing_methods):((i+1)*num_hashing_methods)] for i in range(num_identifier)] return tuple(split_list) if kdf == 'legacy': return tuple(tuple([secret_bytes] * num_hashing_methods) for _ in range(num_identifier)) raise ValueError(f'kdf: "{kdf}" is not supported.')