|
- """passlib.pwd -- password generation helpers"""
- #=============================================================================
- # imports
- #=============================================================================
- from __future__ import absolute_import, division, print_function, unicode_literals
- # core
- import codecs
- from collections import defaultdict
- try:
- from collections.abc import MutableMapping
- except ImportError:
- # py2 compat
- from collections import MutableMapping
- from math import ceil, log as logf
- import logging; log = logging.getLogger(__name__)
- import pkg_resources
- import os
- # site
- # pkg
- from passlib import exc
- from passlib.utils.compat import PY2, irange, itervalues, int_types
- from passlib.utils import rng, getrandstr, to_unicode
- from passlib.utils.decor import memoized_property
- # local
- __all__ = [
- "genword", "default_charsets",
- "genphrase", "default_wordsets",
- ]
-
- #=============================================================================
- # constants
- #=============================================================================
-
- # XXX: rename / publically document this map?
- entropy_aliases = dict(
- # barest protection from throttled online attack
- unsafe=12,
-
- # some protection from unthrottled online attack
- weak=24,
-
- # some protection from offline attacks
- fair=36,
-
- # reasonable protection from offline attacks
- strong=48,
-
- # very good protection from offline attacks
- secure=60,
- )
-
- #=============================================================================
- # internal helpers
- #=============================================================================
-
- def _superclasses(obj, cls):
- """return remaining classes in object's MRO after cls"""
- mro = type(obj).__mro__
- return mro[mro.index(cls)+1:]
-
-
- def _self_info_rate(source):
- """
- returns 'rate of self-information' --
- i.e. average (per-symbol) entropy of the sequence **source**,
- where probability of a given symbol occurring is calculated based on
- the number of occurrences within the sequence itself.
-
- if all elements of the source are unique, this should equal ``log(len(source), 2)``.
-
- :arg source:
- iterable containing 0+ symbols
- (e.g. list of strings or ints, string of characters, etc).
-
- :returns:
- float bits of entropy
- """
- try:
- size = len(source)
- except TypeError:
- # if len() doesn't work, calculate size by summing counts later
- size = None
- counts = defaultdict(int)
- for char in source:
- counts[char] += 1
- if size is None:
- values = counts.values()
- size = sum(values)
- else:
- values = itervalues(counts)
- if not size:
- return 0
- # NOTE: the following performs ``- sum(value / size * logf(value / size, 2) for value in values)``,
- # it just does so with as much pulled out of the sum() loop as possible...
- return logf(size, 2) - sum(value * logf(value, 2) for value in values) / size
-
-
- # def _total_self_info(source):
- # """
- # return total self-entropy of a sequence
- # (the average entropy per symbol * size of sequence)
- # """
- # return _self_info_rate(source) * len(source)
-
-
- def _open_asset_path(path, encoding=None):
- """
- :param asset_path:
- string containing absolute path to file,
- or package-relative path using format
- ``"python.module:relative/file/path"``.
-
- :returns:
- filehandle opened in 'rb' mode
- (unless encoding explicitly specified)
- """
- if encoding:
- return codecs.getreader(encoding)(_open_asset_path(path))
- if os.path.isabs(path):
- return open(path, "rb")
- package, sep, subpath = path.partition(":")
- if not sep:
- raise ValueError("asset path must be absolute file path "
- "or use 'pkg.name:sub/path' format: %r" % (path,))
- return pkg_resources.resource_stream(package, subpath)
-
-
- #: type aliases
- _sequence_types = (list, tuple)
- _set_types = (set, frozenset)
-
- #: set of elements that ensure_unique() has validated already.
- _ensure_unique_cache = set()
-
-
- def _ensure_unique(source, param="source"):
- """
- helper for generators --
- Throws ValueError if source elements aren't unique.
- Error message will display (abbreviated) repr of the duplicates in a string/list
- """
- # check cache to speed things up for frozensets / tuples / strings
- cache = _ensure_unique_cache
- hashable = True
- try:
- if source in cache:
- return True
- except TypeError:
- hashable = False
-
- # check if it has dup elements
- if isinstance(source, _set_types) or len(set(source)) == len(source):
- if hashable:
- try:
- cache.add(source)
- except TypeError:
- # XXX: under pypy, "list() in set()" above doesn't throw TypeError,
- # but trying to add unhashable it to a set *does*.
- pass
- return True
-
- # build list of duplicate values
- seen = set()
- dups = set()
- for elem in source:
- (dups if elem in seen else seen).add(elem)
- dups = sorted(dups)
- trunc = 8
- if len(dups) > trunc:
- trunc = 5
- dup_repr = ", ".join(repr(str(word)) for word in dups[:trunc])
- if len(dups) > trunc:
- dup_repr += ", ... plus %d others" % (len(dups) - trunc)
-
- # throw error
- raise ValueError("`%s` cannot contain duplicate elements: %s" %
- (param, dup_repr))
-
- #=============================================================================
- # base generator class
- #=============================================================================
- class SequenceGenerator(object):
- """
- Base class used by word & phrase generators.
-
- These objects take a series of options, corresponding
- to those of the :func:`generate` function.
- They act as callables which can be used to generate a password
- or a list of 1+ passwords. They also expose some read-only
- informational attributes.
-
- Parameters
- ----------
- :param entropy:
- Optionally specify the amount of entropy the resulting passwords
- should contain (as measured with respect to the generator itself).
- This will be used to auto-calculate the required password size.
-
- :param length:
- Optionally specify the length of password to generate,
- measured as count of whatever symbols the subclass uses (characters or words).
- Note if ``entropy`` requires a larger minimum length,
- that will be used instead.
-
- :param rng:
- Optionally provide a custom RNG source to use.
- Should be an instance of :class:`random.Random`,
- defaults to :class:`random.SystemRandom`.
-
- Attributes
- ----------
- .. autoattribute:: length
- .. autoattribute:: symbol_count
- .. autoattribute:: entropy_per_symbol
- .. autoattribute:: entropy
-
- Subclassing
- -----------
- Subclasses must implement the ``.__next__()`` method,
- and set ``.symbol_count`` before calling base ``__init__`` method.
- """
- #=============================================================================
- # instance attrs
- #=============================================================================
-
- #: requested size of final password
- length = None
-
- #: requested entropy of final password
- requested_entropy = "strong"
-
- #: random number source to use
- rng = rng
-
- #: number of potential symbols (must be filled in by subclass)
- symbol_count = None
-
- #=============================================================================
- # init
- #=============================================================================
- def __init__(self, entropy=None, length=None, rng=None, **kwds):
-
- # make sure subclass set things up correctly
- assert self.symbol_count is not None, "subclass must set .symbol_count"
-
- # init length & requested entropy
- if entropy is not None or length is None:
- if entropy is None:
- entropy = self.requested_entropy
- entropy = entropy_aliases.get(entropy, entropy)
- if entropy <= 0:
- raise ValueError("`entropy` must be positive number")
- min_length = int(ceil(entropy / self.entropy_per_symbol))
- if length is None or length < min_length:
- length = min_length
-
- self.requested_entropy = entropy
-
- if length < 1:
- raise ValueError("`length` must be positive integer")
- self.length = length
-
- # init other common options
- if rng is not None:
- self.rng = rng
-
- # hand off to parent
- if kwds and _superclasses(self, SequenceGenerator) == (object,):
- raise TypeError("Unexpected keyword(s): %s" % ", ".join(kwds.keys()))
- super(SequenceGenerator, self).__init__(**kwds)
-
- #=============================================================================
- # informational helpers
- #=============================================================================
-
- @memoized_property
- def entropy_per_symbol(self):
- """
- Average entropy per symbol (assuming all symbols have equal probability)
- """
- return logf(self.symbol_count, 2)
-
- @memoized_property
- def entropy(self):
- """
- Effective entropy of generated passwords.
-
- This value will always be a multiple of :attr:`entropy_per_symbol`.
- If entropy is specified in constructor, :attr:`length` will be chosen so
- so that this value is the smallest multiple >= :attr:`requested_entropy`.
- """
- return self.length * self.entropy_per_symbol
-
- #=============================================================================
- # generation
- #=============================================================================
- def __next__(self):
- """main generation function, should create one password/phrase"""
- raise NotImplementedError("implement in subclass")
-
- def __call__(self, returns=None):
- """
- frontend used by genword() / genphrase() to create passwords
- """
- if returns is None:
- return next(self)
- elif isinstance(returns, int_types):
- return [next(self) for _ in irange(returns)]
- elif returns is iter:
- return self
- else:
- raise exc.ExpectedTypeError(returns, "<None>, int, or <iter>", "returns")
-
- def __iter__(self):
- return self
-
- if PY2:
- def next(self):
- return self.__next__()
-
- #=============================================================================
- # eoc
- #=============================================================================
-
- #=============================================================================
- # default charsets
- #=============================================================================
-
- #: global dict of predefined characters sets
- default_charsets = dict(
- # ascii letters, digits, and some punctuation
- ascii_72='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!@#$%^&*?/',
-
- # ascii letters and digits
- ascii_62='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
-
- # ascii_50, without visually similar '1IiLl', '0Oo', '5S', '8B'
- ascii_50='234679abcdefghjkmnpqrstuvwxyzACDEFGHJKMNPQRTUVWXYZ',
-
- # lower case hexadecimal
- hex='0123456789abcdef',
- )
-
- #=============================================================================
- # password generator
- #=============================================================================
-
- class WordGenerator(SequenceGenerator):
- """
- Class which generates passwords by randomly choosing from a string of unique characters.
-
- Parameters
- ----------
- :param chars:
- custom character string to draw from.
-
- :param charset:
- predefined charset to draw from.
-
- :param \\*\\*kwds:
- all other keywords passed to the :class:`SequenceGenerator` parent class.
-
- Attributes
- ----------
- .. autoattribute:: chars
- .. autoattribute:: charset
- .. autoattribute:: default_charsets
- """
- #=============================================================================
- # instance attrs
- #=============================================================================
-
- #: Predefined character set in use (set to None for instances using custom 'chars')
- charset = "ascii_62"
-
- #: string of chars to draw from -- usually filled in from charset
- chars = None
-
- #=============================================================================
- # init
- #=============================================================================
- def __init__(self, chars=None, charset=None, **kwds):
-
- # init chars and charset
- if chars:
- if charset:
- raise TypeError("`chars` and `charset` are mutually exclusive")
- else:
- if not charset:
- charset = self.charset
- assert charset
- chars = default_charsets[charset]
- self.charset = charset
- chars = to_unicode(chars, param="chars")
- _ensure_unique(chars, param="chars")
- self.chars = chars
-
- # hand off to parent
- super(WordGenerator, self).__init__(**kwds)
- # log.debug("WordGenerator(): entropy/char=%r", self.entropy_per_symbol)
-
- #=============================================================================
- # informational helpers
- #=============================================================================
-
- @memoized_property
- def symbol_count(self):
- return len(self.chars)
-
- #=============================================================================
- # generation
- #=============================================================================
-
- def __next__(self):
- # XXX: could do things like optionally ensure certain character groups
- # (e.g. letters & punctuation) are included
- return getrandstr(self.rng, self.chars, self.length)
-
- #=============================================================================
- # eoc
- #=============================================================================
-
-
- def genword(entropy=None, length=None, returns=None, **kwds):
- """Generate one or more random passwords.
-
- This function uses :mod:`random.SystemRandom` to generate
- one or more passwords using various character sets.
- The complexity of the password can be specified
- by size, or by the desired amount of entropy.
-
- Usage Example::
-
- >>> # generate a random alphanumeric string with 48 bits of entropy (the default)
- >>> from passlib import pwd
- >>> pwd.genword()
- 'DnBHvDjMK6'
-
- >>> # generate a random hexadecimal string with 52 bits of entropy
- >>> pwd.genword(entropy=52, charset="hex")
- '310f1a7ac793f'
-
- :param entropy:
- Strength of resulting password, measured in 'guessing entropy' bits.
- An appropriate **length** value will be calculated
- based on the requested entropy amount, and the size of the character set.
-
- This can be a positive integer, or one of the following preset
- strings: ``"weak"`` (24), ``"fair"`` (36),
- ``"strong"`` (48), and ``"secure"`` (56).
-
- If neither this or **length** is specified, **entropy** will default
- to ``"strong"`` (48).
-
- :param length:
- Size of resulting password, measured in characters.
- If omitted, the size is auto-calculated based on the **entropy** parameter.
-
- If both **entropy** and **length** are specified,
- the stronger value will be used.
-
- :param returns:
- Controls what this function returns:
-
- * If ``None`` (the default), this function will generate a single password.
- * If an integer, this function will return a list containing that many passwords.
- * If the ``iter`` constant, will return an iterator that yields passwords.
-
- :param chars:
-
- Optionally specify custom string of characters to use when randomly
- generating a password. This option cannot be combined with **charset**.
-
- :param charset:
-
- The predefined character set to draw from (if not specified by **chars**).
- There are currently four presets available:
-
- * ``"ascii_62"`` (the default) -- all digits and ascii upper & lowercase letters.
- Provides ~5.95 entropy per character.
-
- * ``"ascii_50"`` -- subset which excludes visually similar characters
- (``1IiLl0Oo5S8B``). Provides ~5.64 entropy per character.
-
- * ``"ascii_72"`` -- all digits and ascii upper & lowercase letters,
- as well as some punctuation. Provides ~6.17 entropy per character.
-
- * ``"hex"`` -- Lower case hexadecimal. Providers 4 bits of entropy per character.
-
- :returns:
- :class:`!unicode` string containing randomly generated password;
- or list of 1+ passwords if :samp:`returns={int}` is specified.
- """
- gen = WordGenerator(length=length, entropy=entropy, **kwds)
- return gen(returns)
-
- #=============================================================================
- # default wordsets
- #=============================================================================
-
- def _load_wordset(asset_path):
- """
- load wordset from compressed datafile within package data.
- file should be utf-8 encoded
-
- :param asset_path:
- string containing absolute path to wordset file,
- or "python.module:relative/file/path".
-
- :returns:
- tuple of words, as loaded from specified words file.
- """
- # open resource file, convert to tuple of words (strip blank lines & ws)
- with _open_asset_path(asset_path, "utf-8") as fh:
- gen = (word.strip() for word in fh)
- words = tuple(word for word in gen if word)
-
- # NOTE: works but not used
- # # detect if file uses "<int> <word>" format, and strip numeric prefix
- # def extract(row):
- # idx, word = row.replace("\t", " ").split(" ", 1)
- # if not idx.isdigit():
- # raise ValueError("row is not dice index + word")
- # return word
- # try:
- # extract(words[-1])
- # except ValueError:
- # pass
- # else:
- # words = tuple(extract(word) for word in words)
-
- log.debug("loaded %d-element wordset from %r", len(words), asset_path)
- return words
-
-
- class WordsetDict(MutableMapping):
- """
- Special mapping used to store dictionary of wordsets.
- Different from a regular dict in that some wordsets
- may be lazy-loaded from an asset path.
- """
-
- #: dict of key -> asset path
- paths = None
-
- #: dict of key -> value
- _loaded = None
-
- def __init__(self, *args, **kwds):
- self.paths = {}
- self._loaded = {}
- super(WordsetDict, self).__init__(*args, **kwds)
-
- def __getitem__(self, key):
- try:
- return self._loaded[key]
- except KeyError:
- pass
- path = self.paths[key]
- value = self._loaded[key] = _load_wordset(path)
- return value
-
- def set_path(self, key, path):
- """
- set asset path to lazy-load wordset from.
- """
- self.paths[key] = path
-
- def __setitem__(self, key, value):
- self._loaded[key] = value
-
- def __delitem__(self, key):
- if key in self:
- del self._loaded[key]
- self.paths.pop(key, None)
- else:
- del self.paths[key]
-
- @property
- def _keyset(self):
- keys = set(self._loaded)
- keys.update(self.paths)
- return keys
-
- def __iter__(self):
- return iter(self._keyset)
-
- def __len__(self):
- return len(self._keyset)
-
- # NOTE: speeds things up, and prevents contains from lazy-loading
- def __contains__(self, key):
- return key in self._loaded or key in self.paths
-
-
- #: dict of predefined word sets.
- #: key is name of wordset, value should be sequence of words.
- default_wordsets = WordsetDict()
-
- # register the wordsets built into passlib
- for name in "eff_long eff_short eff_prefixed bip39".split():
- default_wordsets.set_path(name, "passlib:_data/wordsets/%s.txt" % name)
-
- #=============================================================================
- # passphrase generator
- #=============================================================================
- class PhraseGenerator(SequenceGenerator):
- """class which generates passphrases by randomly choosing
- from a list of unique words.
-
- :param wordset:
- wordset to draw from.
- :param preset:
- name of preset wordlist to use instead of ``wordset``.
- :param spaces:
- whether to insert spaces between words in output (defaults to ``True``).
- :param \\*\\*kwds:
- all other keywords passed to the :class:`SequenceGenerator` parent class.
-
- .. autoattribute:: wordset
- """
- #=============================================================================
- # instance attrs
- #=============================================================================
-
- #: predefined wordset to use
- wordset = "eff_long"
-
- #: list of words to draw from
- words = None
-
- #: separator to use when joining words
- sep = " "
-
- #=============================================================================
- # init
- #=============================================================================
- def __init__(self, wordset=None, words=None, sep=None, **kwds):
-
- # load wordset
- if words is not None:
- if wordset is not None:
- raise TypeError("`words` and `wordset` are mutually exclusive")
- else:
- if wordset is None:
- wordset = self.wordset
- assert wordset
- words = default_wordsets[wordset]
- self.wordset = wordset
-
- # init words
- if not isinstance(words, _sequence_types):
- words = tuple(words)
- _ensure_unique(words, param="words")
- self.words = words
-
- # init separator
- if sep is None:
- sep = self.sep
- sep = to_unicode(sep, param="sep")
- self.sep = sep
-
- # hand off to parent
- super(PhraseGenerator, self).__init__(**kwds)
- ##log.debug("PhraseGenerator(): entropy/word=%r entropy/char=%r min_chars=%r",
- ## self.entropy_per_symbol, self.entropy_per_char, self.min_chars)
-
- #=============================================================================
- # informational helpers
- #=============================================================================
-
- @memoized_property
- def symbol_count(self):
- return len(self.words)
-
- #=============================================================================
- # generation
- #=============================================================================
-
- def __next__(self):
- words = (self.rng.choice(self.words) for _ in irange(self.length))
- return self.sep.join(words)
-
- #=============================================================================
- # eoc
- #=============================================================================
-
-
- def genphrase(entropy=None, length=None, returns=None, **kwds):
- """Generate one or more random password / passphrases.
-
- This function uses :mod:`random.SystemRandom` to generate
- one or more passwords; it can be configured to generate
- alphanumeric passwords, or full english phrases.
- The complexity of the password can be specified
- by size, or by the desired amount of entropy.
-
- Usage Example::
-
- >>> # generate random phrase with 48 bits of entropy
- >>> from passlib import pwd
- >>> pwd.genphrase()
- 'gangly robbing salt shove'
-
- >>> # generate a random phrase with 52 bits of entropy
- >>> # using a particular wordset
- >>> pwd.genword(entropy=52, wordset="bip39")
- 'wheat dilemma reward rescue diary'
-
- :param entropy:
- Strength of resulting password, measured in 'guessing entropy' bits.
- An appropriate **length** value will be calculated
- based on the requested entropy amount, and the size of the word set.
-
- This can be a positive integer, or one of the following preset
- strings: ``"weak"`` (24), ``"fair"`` (36),
- ``"strong"`` (48), and ``"secure"`` (56).
-
- If neither this or **length** is specified, **entropy** will default
- to ``"strong"`` (48).
-
- :param length:
- Length of resulting password, measured in words.
- If omitted, the size is auto-calculated based on the **entropy** parameter.
-
- If both **entropy** and **length** are specified,
- the stronger value will be used.
-
- :param returns:
- Controls what this function returns:
-
- * If ``None`` (the default), this function will generate a single password.
- * If an integer, this function will return a list containing that many passwords.
- * If the ``iter`` builtin, will return an iterator that yields passwords.
-
- :param words:
-
- Optionally specifies a list/set of words to use when randomly generating a passphrase.
- This option cannot be combined with **wordset**.
-
- :param wordset:
-
- The predefined word set to draw from (if not specified by **words**).
- There are currently four presets available:
-
- ``"eff_long"`` (the default)
-
- Wordset containing 7776 english words of ~7 letters.
- Constructed by the EFF, it offers ~12.9 bits of entropy per word.
-
- This wordset (and the other ``"eff_"`` wordsets)
- were `created by the EFF <https://www.eff.org/deeplinks/2016/07/new-wordlists-random-passphrases>`_
- to aid in generating passwords. See their announcement page
- for more details about the design & properties of these wordsets.
-
- ``"eff_short"``
-
- Wordset containing 1296 english words of ~4.5 letters.
- Constructed by the EFF, it offers ~10.3 bits of entropy per word.
-
- ``"eff_prefixed"``
-
- Wordset containing 1296 english words of ~8 letters,
- selected so that they each have a unique 3-character prefix.
- Constructed by the EFF, it offers ~10.3 bits of entropy per word.
-
- ``"bip39"``
-
- Wordset of 2048 english words of ~5 letters,
- selected so that they each have a unique 4-character prefix.
- Published as part of Bitcoin's `BIP 39 <https://github.com/bitcoin/bips/blob/master/bip-0039/english.txt>`_,
- this wordset has exactly 11 bits of entropy per word.
-
- This list offers words that are typically shorter than ``"eff_long"``
- (at the cost of slightly less entropy); and much shorter than
- ``"eff_prefixed"`` (at the cost of a longer unique prefix).
-
- :param sep:
- Optional separator to use when joining words.
- Defaults to ``" "`` (a space), but can be an empty string, a hyphen, etc.
-
- :returns:
- :class:`!unicode` string containing randomly generated passphrase;
- or list of 1+ passphrases if :samp:`returns={int}` is specified.
- """
- gen = PhraseGenerator(entropy=entropy, length=length, **kwds)
- return gen(returns)
-
- #=============================================================================
- # strength measurement
- #
- # NOTE:
- # for a little while, had rough draft of password strength measurement alg here.
- # but not sure if there's value in yet another measurement algorithm,
- # that's not just duplicating the effort of libraries like zxcbn.
- # may revive it later, but for now, leaving some refs to others out there:
- # * NIST 800-63 has simple alg
- # * zxcvbn (https://tech.dropbox.com/2012/04/zxcvbn-realistic-password-strength-estimation/)
- # might also be good, and has approach similar to composite approach i was already thinking about,
- # but much more well thought out.
- # * passfault (https://github.com/c-a-m/passfault) looks thorough,
- # but may have licensing issues, plus porting to python looks like very big job :(
- # * give a look at running things through zlib - might be able to cheaply
- # catch extra redundancies.
- #=============================================================================
-
- #=============================================================================
- # eof
- #=============================================================================
|