|
- """
- Register Hypothesis strategies for Pydantic custom types.
-
- This enables fully-automatic generation of test data for most Pydantic classes.
-
- Note that this module has *no* runtime impact on Pydantic itself; instead it
- is registered as a setuptools entry point and Hypothesis will import it if
- Pydantic is installed. See also:
-
- https://hypothesis.readthedocs.io/en/latest/strategies.html#registering-strategies-via-setuptools-entry-points
- https://hypothesis.readthedocs.io/en/latest/data.html#hypothesis.strategies.register_type_strategy
- https://hypothesis.readthedocs.io/en/latest/strategies.html#interaction-with-pytest-cov
- https://pydantic-docs.helpmanual.io/usage/types/#pydantic-types
-
- Note that because our motivation is to *improve user experience*, the strategies
- are always sound (never generate invalid data) but sacrifice completeness for
- maintainability (ie may be unable to generate some tricky but valid data).
-
- Finally, this module makes liberal use of `# type: ignore[<code>]` pragmas.
- This is because Hypothesis annotates `register_type_strategy()` with
- `(T, SearchStrategy[T])`, but in most cases we register e.g. `ConstrainedInt`
- to generate instances of the builtin `int` type which match the constraints.
- """
-
- import contextlib
- import ipaddress
- import json
- import math
- from fractions import Fraction
- from typing import Callable, Dict, Type, Union, cast, overload
-
- import hypothesis.strategies as st
-
- import pydantic
- import pydantic.color
- import pydantic.types
-
- # FilePath and DirectoryPath are explicitly unsupported, as we'd have to create
- # them on-disk, and that's unsafe in general without being told *where* to do so.
- #
- # URLs are unsupported because it's easy for users to define their own strategy for
- # "normal" URLs, and hard for us to define a general strategy which includes "weird"
- # URLs but doesn't also have unpredictable performance problems.
- #
- # conlist() and conset() are unsupported for now, because the workarounds for
- # Cython and Hypothesis to handle parametrized generic types are incompatible.
- # Once Cython can support 'normal' generics we'll revisit this.
-
- # Emails
- try:
- import email_validator
- except ImportError: # pragma: no cover
- pass
- else:
-
- def is_valid_email(s: str) -> bool:
- # Hypothesis' st.emails() occasionally generates emails like 0@A0--0.ac
- # that are invalid according to email-validator, so we filter those out.
- try:
- email_validator.validate_email(s, check_deliverability=False)
- return True
- except email_validator.EmailNotValidError: # pragma: no cover
- return False
-
- # Note that these strategies deliberately stay away from any tricky Unicode
- # or other encoding issues; we're just trying to generate *something* valid.
- st.register_type_strategy(pydantic.EmailStr, st.emails().filter(is_valid_email)) # type: ignore[arg-type]
- st.register_type_strategy(
- pydantic.NameEmail,
- st.builds(
- '{} <{}>'.format, # type: ignore[arg-type]
- st.from_regex('[A-Za-z0-9_]+( [A-Za-z0-9_]+){0,5}', fullmatch=True),
- st.emails().filter(is_valid_email),
- ),
- )
-
- # PyObject - dotted names, in this case taken from the math module.
- st.register_type_strategy(
- pydantic.PyObject, # type: ignore[arg-type]
- st.sampled_from(
- [cast(pydantic.PyObject, f'math.{name}') for name in sorted(vars(math)) if not name.startswith('_')]
- ),
- )
-
- # CSS3 Colors; as name, hex, rgb(a) tuples or strings, or hsl strings
- _color_regexes = (
- '|'.join(
- (
- pydantic.color.r_hex_short,
- pydantic.color.r_hex_long,
- pydantic.color.r_rgb,
- pydantic.color.r_rgba,
- pydantic.color.r_hsl,
- pydantic.color.r_hsla,
- )
- )
- # Use more precise regex patterns to avoid value-out-of-range errors
- .replace(pydantic.color._r_sl, r'(?:(\d\d?(?:\.\d+)?|100(?:\.0+)?)%)')
- .replace(pydantic.color._r_alpha, r'(?:(0(?:\.\d+)?|1(?:\.0+)?|\.\d+|\d{1,2}%))')
- .replace(pydantic.color._r_255, r'(?:((?:\d|\d\d|[01]\d\d|2[0-4]\d|25[0-4])(?:\.\d+)?|255(?:\.0+)?))')
- )
- st.register_type_strategy(
- pydantic.color.Color,
- st.one_of(
- st.sampled_from(sorted(pydantic.color.COLORS_BY_NAME)),
- st.tuples(
- st.integers(0, 255),
- st.integers(0, 255),
- st.integers(0, 255),
- st.none() | st.floats(0, 1) | st.floats(0, 100).map('{}%'.format),
- ),
- st.from_regex(_color_regexes, fullmatch=True),
- ),
- )
-
-
- # Card numbers, valid according to the Luhn algorithm
-
-
- def add_luhn_digit(card_number: str) -> str:
- # See https://en.wikipedia.org/wiki/Luhn_algorithm
- for digit in '0123456789':
- with contextlib.suppress(Exception):
- pydantic.PaymentCardNumber.validate_luhn_check_digit(card_number + digit)
- return card_number + digit
- raise AssertionError('Unreachable') # pragma: no cover
-
-
- card_patterns = (
- # Note that these patterns omit the Luhn check digit; that's added by the function above
- '4[0-9]{14}', # Visa
- '5[12345][0-9]{13}', # Mastercard
- '3[47][0-9]{12}', # American Express
- '[0-26-9][0-9]{10,17}', # other (incomplete to avoid overlap)
- )
- st.register_type_strategy(
- pydantic.PaymentCardNumber,
- st.from_regex('|'.join(card_patterns), fullmatch=True).map(add_luhn_digit), # type: ignore[arg-type]
- )
-
- # UUIDs
- st.register_type_strategy(pydantic.UUID1, st.uuids(version=1))
- st.register_type_strategy(pydantic.UUID3, st.uuids(version=3))
- st.register_type_strategy(pydantic.UUID4, st.uuids(version=4))
- st.register_type_strategy(pydantic.UUID5, st.uuids(version=5))
-
- # Secrets
- st.register_type_strategy(pydantic.SecretBytes, st.binary().map(pydantic.SecretBytes))
- st.register_type_strategy(pydantic.SecretStr, st.text().map(pydantic.SecretStr))
-
- # IP addresses, networks, and interfaces
- st.register_type_strategy(pydantic.IPvAnyAddress, st.ip_addresses()) # type: ignore[arg-type]
- st.register_type_strategy(
- pydantic.IPvAnyInterface,
- st.from_type(ipaddress.IPv4Interface) | st.from_type(ipaddress.IPv6Interface), # type: ignore[arg-type]
- )
- st.register_type_strategy(
- pydantic.IPvAnyNetwork,
- st.from_type(ipaddress.IPv4Network) | st.from_type(ipaddress.IPv6Network), # type: ignore[arg-type]
- )
-
- # We hook into the con***() functions and the ConstrainedNumberMeta metaclass,
- # so here we only have to register subclasses for other constrained types which
- # don't go via those mechanisms. Then there are the registration hooks below.
- st.register_type_strategy(pydantic.StrictBool, st.booleans())
- st.register_type_strategy(pydantic.StrictStr, st.text())
-
-
- # Constrained-type resolver functions
- #
- # For these ones, we actually want to inspect the type in order to work out a
- # satisfying strategy. First up, the machinery for tracking resolver functions:
-
- RESOLVERS: Dict[type, Callable[[type], st.SearchStrategy]] = {} # type: ignore[type-arg]
-
-
- @overload
- def _registered(typ: Type[pydantic.types.T]) -> Type[pydantic.types.T]:
- pass
-
-
- @overload
- def _registered(typ: pydantic.types.ConstrainedNumberMeta) -> pydantic.types.ConstrainedNumberMeta:
- pass
-
-
- def _registered(
- typ: Union[Type[pydantic.types.T], pydantic.types.ConstrainedNumberMeta]
- ) -> Union[Type[pydantic.types.T], pydantic.types.ConstrainedNumberMeta]:
- # This function replaces the version in `pydantic.types`, in order to
- # effect the registration of new constrained types so that Hypothesis
- # can generate valid examples.
- pydantic.types._DEFINED_TYPES.add(typ)
- for supertype, resolver in RESOLVERS.items():
- if issubclass(typ, supertype):
- st.register_type_strategy(typ, resolver(typ)) # type: ignore
- return typ
- raise NotImplementedError(f'Unknown type {typ!r} has no resolver to register') # pragma: no cover
-
-
- def resolves(
- typ: Union[type, pydantic.types.ConstrainedNumberMeta]
- ) -> Callable[[Callable[..., st.SearchStrategy]], Callable[..., st.SearchStrategy]]: # type: ignore[type-arg]
- def inner(f): # type: ignore
- assert f not in RESOLVERS
- RESOLVERS[typ] = f
- return f
-
- return inner
-
-
- # Type-to-strategy resolver functions
-
-
- @resolves(pydantic.JsonWrapper)
- def resolve_json(cls): # type: ignore[no-untyped-def]
- try:
- inner = st.none() if cls.inner_type is None else st.from_type(cls.inner_type)
- except Exception: # pragma: no cover
- finite = st.floats(allow_infinity=False, allow_nan=False)
- inner = st.recursive(
- base=st.one_of(st.none(), st.booleans(), st.integers(), finite, st.text()),
- extend=lambda x: st.lists(x) | st.dictionaries(st.text(), x), # type: ignore
- )
- return st.builds(
- json.dumps,
- inner,
- ensure_ascii=st.booleans(),
- indent=st.none() | st.integers(0, 16),
- sort_keys=st.booleans(),
- )
-
-
- @resolves(pydantic.ConstrainedBytes)
- def resolve_conbytes(cls): # type: ignore[no-untyped-def] # pragma: no cover
- min_size = cls.min_length or 0
- max_size = cls.max_length
- if not cls.strip_whitespace:
- return st.binary(min_size=min_size, max_size=max_size)
- # Fun with regex to ensure we neither start nor end with whitespace
- repeats = '{{{},{}}}'.format(
- min_size - 2 if min_size > 2 else 0,
- max_size - 2 if (max_size or 0) > 2 else '',
- )
- if min_size >= 2:
- pattern = rf'\W.{repeats}\W'
- elif min_size == 1:
- pattern = rf'\W(.{repeats}\W)?'
- else:
- assert min_size == 0
- pattern = rf'(\W(.{repeats}\W)?)?'
- return st.from_regex(pattern.encode(), fullmatch=True)
-
-
- @resolves(pydantic.ConstrainedDecimal)
- def resolve_condecimal(cls): # type: ignore[no-untyped-def]
- min_value = cls.ge
- max_value = cls.le
- if cls.gt is not None:
- assert min_value is None, 'Set `gt` or `ge`, but not both'
- min_value = cls.gt
- if cls.lt is not None:
- assert max_value is None, 'Set `lt` or `le`, but not both'
- max_value = cls.lt
- s = st.decimals(min_value, max_value, allow_nan=False, places=cls.decimal_places)
- if cls.lt is not None:
- s = s.filter(lambda d: d < cls.lt)
- if cls.gt is not None:
- s = s.filter(lambda d: cls.gt < d)
- return s
-
-
- @resolves(pydantic.ConstrainedFloat)
- def resolve_confloat(cls): # type: ignore[no-untyped-def]
- min_value = cls.ge
- max_value = cls.le
- exclude_min = False
- exclude_max = False
-
- if cls.gt is not None:
- assert min_value is None, 'Set `gt` or `ge`, but not both'
- min_value = cls.gt
- exclude_min = True
- if cls.lt is not None:
- assert max_value is None, 'Set `lt` or `le`, but not both'
- max_value = cls.lt
- exclude_max = True
-
- if cls.multiple_of is None:
- return st.floats(min_value, max_value, exclude_min=exclude_min, exclude_max=exclude_max, allow_nan=False)
-
- if min_value is not None:
- min_value = math.ceil(min_value / cls.multiple_of)
- if exclude_min:
- min_value = min_value + 1
- if max_value is not None:
- assert max_value >= cls.multiple_of, 'Cannot build model with max value smaller than multiple of'
- max_value = math.floor(max_value / cls.multiple_of)
- if exclude_max:
- max_value = max_value - 1
-
- return st.integers(min_value, max_value).map(lambda x: x * cls.multiple_of)
-
-
- @resolves(pydantic.ConstrainedInt)
- def resolve_conint(cls): # type: ignore[no-untyped-def]
- min_value = cls.ge
- max_value = cls.le
- if cls.gt is not None:
- assert min_value is None, 'Set `gt` or `ge`, but not both'
- min_value = cls.gt + 1
- if cls.lt is not None:
- assert max_value is None, 'Set `lt` or `le`, but not both'
- max_value = cls.lt - 1
-
- if cls.multiple_of is None or cls.multiple_of == 1:
- return st.integers(min_value, max_value)
-
- # These adjustments and the .map handle integer-valued multiples, while the
- # .filter handles trickier cases as for confloat.
- if min_value is not None:
- min_value = math.ceil(Fraction(min_value) / Fraction(cls.multiple_of))
- if max_value is not None:
- max_value = math.floor(Fraction(max_value) / Fraction(cls.multiple_of))
- return st.integers(min_value, max_value).map(lambda x: x * cls.multiple_of)
-
-
- @resolves(pydantic.ConstrainedStr)
- def resolve_constr(cls): # type: ignore[no-untyped-def] # pragma: no cover
- min_size = cls.min_length or 0
- max_size = cls.max_length
-
- if cls.regex is None and not cls.strip_whitespace:
- return st.text(min_size=min_size, max_size=max_size)
-
- if cls.regex is not None:
- strategy = st.from_regex(cls.regex)
- if cls.strip_whitespace:
- strategy = strategy.filter(lambda s: s == s.strip())
- elif cls.strip_whitespace:
- repeats = '{{{},{}}}'.format(
- min_size - 2 if min_size > 2 else 0,
- max_size - 2 if (max_size or 0) > 2 else '',
- )
- if min_size >= 2:
- strategy = st.from_regex(rf'\W.{repeats}\W')
- elif min_size == 1:
- strategy = st.from_regex(rf'\W(.{repeats}\W)?')
- else:
- assert min_size == 0
- strategy = st.from_regex(rf'(\W(.{repeats}\W)?)?')
-
- if min_size == 0 and max_size is None:
- return strategy
- elif max_size is None:
- return strategy.filter(lambda s: min_size <= len(s))
- return strategy.filter(lambda s: min_size <= len(s) <= max_size)
-
-
- # Finally, register all previously-defined types, and patch in our new function
- for typ in pydantic.types._DEFINED_TYPES:
- _registered(typ)
- pydantic.types._registered = _registered
- st.register_type_strategy(pydantic.Json, resolve_json)
|