You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

1178 line
44 KiB

  1. import re
  2. import sys
  3. import copy
  4. import types
  5. import inspect
  6. import keyword
  7. __all__ = ['dataclass',
  8. 'field',
  9. 'Field',
  10. 'FrozenInstanceError',
  11. 'InitVar',
  12. 'MISSING',
  13. # Helper functions.
  14. 'fields',
  15. 'asdict',
  16. 'astuple',
  17. 'make_dataclass',
  18. 'replace',
  19. 'is_dataclass',
  20. ]
  21. # Conditions for adding methods. The boxes indicate what action the
  22. # dataclass decorator takes. For all of these tables, when I talk
  23. # about init=, repr=, eq=, order=, unsafe_hash=, or frozen=, I'm
  24. # referring to the arguments to the @dataclass decorator. When
  25. # checking if a dunder method already exists, I mean check for an
  26. # entry in the class's __dict__. I never check to see if an attribute
  27. # is defined in a base class.
  28. # Key:
  29. # +=========+=========================================+
  30. # + Value | Meaning |
  31. # +=========+=========================================+
  32. # | <blank> | No action: no method is added. |
  33. # +---------+-----------------------------------------+
  34. # | add | Generated method is added. |
  35. # +---------+-----------------------------------------+
  36. # | raise | TypeError is raised. |
  37. # +---------+-----------------------------------------+
  38. # | None | Attribute is set to None. |
  39. # +=========+=========================================+
  40. # __init__
  41. #
  42. # +--- init= parameter
  43. # |
  44. # v | | |
  45. # | no | yes | <--- class has __init__ in __dict__?
  46. # +=======+=======+=======+
  47. # | False | | |
  48. # +-------+-------+-------+
  49. # | True | add | | <- the default
  50. # +=======+=======+=======+
  51. # __repr__
  52. #
  53. # +--- repr= parameter
  54. # |
  55. # v | | |
  56. # | no | yes | <--- class has __repr__ in __dict__?
  57. # +=======+=======+=======+
  58. # | False | | |
  59. # +-------+-------+-------+
  60. # | True | add | | <- the default
  61. # +=======+=======+=======+
  62. # __setattr__
  63. # __delattr__
  64. #
  65. # +--- frozen= parameter
  66. # |
  67. # v | | |
  68. # | no | yes | <--- class has __setattr__ or __delattr__ in __dict__?
  69. # +=======+=======+=======+
  70. # | False | | | <- the default
  71. # +-------+-------+-------+
  72. # | True | add | raise |
  73. # +=======+=======+=======+
  74. # Raise because not adding these methods would break the "frozen-ness"
  75. # of the class.
  76. # __eq__
  77. #
  78. # +--- eq= parameter
  79. # |
  80. # v | | |
  81. # | no | yes | <--- class has __eq__ in __dict__?
  82. # +=======+=======+=======+
  83. # | False | | |
  84. # +-------+-------+-------+
  85. # | True | add | | <- the default
  86. # +=======+=======+=======+
  87. # __lt__
  88. # __le__
  89. # __gt__
  90. # __ge__
  91. #
  92. # +--- order= parameter
  93. # |
  94. # v | | |
  95. # | no | yes | <--- class has any comparison method in __dict__?
  96. # +=======+=======+=======+
  97. # | False | | | <- the default
  98. # +-------+-------+-------+
  99. # | True | add | raise |
  100. # +=======+=======+=======+
  101. # Raise because to allow this case would interfere with using
  102. # functools.total_ordering.
  103. # __hash__
  104. # +------------------- unsafe_hash= parameter
  105. # | +----------- eq= parameter
  106. # | | +--- frozen= parameter
  107. # | | |
  108. # v v v | | |
  109. # | no | yes | <--- class has explicitly defined __hash__
  110. # +=======+=======+=======+========+========+
  111. # | False | False | False | | | No __eq__, use the base class __hash__
  112. # +-------+-------+-------+--------+--------+
  113. # | False | False | True | | | No __eq__, use the base class __hash__
  114. # +-------+-------+-------+--------+--------+
  115. # | False | True | False | None | | <-- the default, not hashable
  116. # +-------+-------+-------+--------+--------+
  117. # | False | True | True | add | | Frozen, so hashable, allows override
  118. # +-------+-------+-------+--------+--------+
  119. # | True | False | False | add | raise | Has no __eq__, but hashable
  120. # +-------+-------+-------+--------+--------+
  121. # | True | False | True | add | raise | Has no __eq__, but hashable
  122. # +-------+-------+-------+--------+--------+
  123. # | True | True | False | add | raise | Not frozen, but hashable
  124. # +-------+-------+-------+--------+--------+
  125. # | True | True | True | add | raise | Frozen, so hashable
  126. # +=======+=======+=======+========+========+
  127. # For boxes that are blank, __hash__ is untouched and therefore
  128. # inherited from the base class. If the base is object, then
  129. # id-based hashing is used.
  130. #
  131. # Note that a class may already have __hash__=None if it specified an
  132. # __eq__ method in the class body (not one that was created by
  133. # @dataclass).
  134. #
  135. # See _hash_action (below) for a coded version of this table.
  136. # Raised when an attempt is made to modify a frozen class.
  137. class FrozenInstanceError(AttributeError): pass
  138. # A sentinel object for default values to signal that a default
  139. # factory will be used. This is given a nice repr() which will appear
  140. # in the function signature of dataclasses' constructors.
  141. class _HAS_DEFAULT_FACTORY_CLASS:
  142. def __repr__(self):
  143. return '<factory>'
  144. _HAS_DEFAULT_FACTORY = _HAS_DEFAULT_FACTORY_CLASS()
  145. # A sentinel object to detect if a parameter is supplied or not. Use
  146. # a class to give it a better repr.
  147. class _MISSING_TYPE:
  148. pass
  149. MISSING = _MISSING_TYPE()
  150. # Since most per-field metadata will be unused, create an empty
  151. # read-only proxy that can be shared among all fields.
  152. _EMPTY_METADATA = types.MappingProxyType({})
  153. # Markers for the various kinds of fields and pseudo-fields.
  154. class _FIELD_BASE:
  155. def __init__(self, name):
  156. self.name = name
  157. def __repr__(self):
  158. return self.name
  159. _FIELD = _FIELD_BASE('_FIELD')
  160. _FIELD_CLASSVAR = _FIELD_BASE('_FIELD_CLASSVAR')
  161. _FIELD_INITVAR = _FIELD_BASE('_FIELD_INITVAR')
  162. # The name of an attribute on the class where we store the Field
  163. # objects. Also used to check if a class is a Data Class.
  164. _FIELDS = '__dataclass_fields__'
  165. # The name of an attribute on the class that stores the parameters to
  166. # @dataclass.
  167. _PARAMS = '__dataclass_params__'
  168. # The name of the function, that if it exists, is called at the end of
  169. # __init__.
  170. _POST_INIT_NAME = '__post_init__'
  171. # String regex that string annotations for ClassVar or InitVar must match.
  172. # Allows "identifier.identifier[" or "identifier[".
  173. # https://bugs.python.org/issue33453 for details.
  174. _MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')
  175. class _InitVarMeta(type):
  176. def __getitem__(self, params):
  177. return self
  178. class InitVar(metaclass=_InitVarMeta):
  179. pass
  180. # Instances of Field are only ever created from within this module,
  181. # and only from the field() function, although Field instances are
  182. # exposed externally as (conceptually) read-only objects.
  183. #
  184. # name and type are filled in after the fact, not in __init__.
  185. # They're not known at the time this class is instantiated, but it's
  186. # convenient if they're available later.
  187. #
  188. # When cls._FIELDS is filled in with a list of Field objects, the name
  189. # and type fields will have been populated.
  190. class Field:
  191. __slots__ = ('name',
  192. 'type',
  193. 'default',
  194. 'default_factory',
  195. 'repr',
  196. 'hash',
  197. 'init',
  198. 'compare',
  199. 'metadata',
  200. '_field_type', # Private: not to be used by user code.
  201. )
  202. def __init__(self, default, default_factory, init, repr, hash, compare,
  203. metadata):
  204. self.name = None
  205. self.type = None
  206. self.default = default
  207. self.default_factory = default_factory
  208. self.init = init
  209. self.repr = repr
  210. self.hash = hash
  211. self.compare = compare
  212. self.metadata = (_EMPTY_METADATA
  213. if metadata is None or len(metadata) == 0 else
  214. types.MappingProxyType(metadata))
  215. self._field_type = None
  216. def __repr__(self):
  217. return ('Field('
  218. f'name={self.name!r},'
  219. f'type={self.type!r},'
  220. f'default={self.default!r},'
  221. f'default_factory={self.default_factory!r},'
  222. f'init={self.init!r},'
  223. f'repr={self.repr!r},'
  224. f'hash={self.hash!r},'
  225. f'compare={self.compare!r},'
  226. f'metadata={self.metadata!r},'
  227. f'_field_type={self._field_type}'
  228. ')')
  229. # This is used to support the PEP 487 __set_name__ protocol in the
  230. # case where we're using a field that contains a descriptor as a
  231. # defaul value. For details on __set_name__, see
  232. # https://www.python.org/dev/peps/pep-0487/#implementation-details.
  233. #
  234. # Note that in _process_class, this Field object is overwritten
  235. # with the default value, so the end result is a descriptor that
  236. # had __set_name__ called on it at the right time.
  237. def __set_name__(self, owner, name):
  238. func = getattr(type(self.default), '__set_name__', None)
  239. if func:
  240. # There is a __set_name__ method on the descriptor, call
  241. # it.
  242. func(self.default, owner, name)
  243. class _DataclassParams:
  244. __slots__ = ('init',
  245. 'repr',
  246. 'eq',
  247. 'order',
  248. 'unsafe_hash',
  249. 'frozen',
  250. )
  251. def __init__(self, init, repr, eq, order, unsafe_hash, frozen):
  252. self.init = init
  253. self.repr = repr
  254. self.eq = eq
  255. self.order = order
  256. self.unsafe_hash = unsafe_hash
  257. self.frozen = frozen
  258. def __repr__(self):
  259. return ('_DataclassParams('
  260. f'init={self.init!r},'
  261. f'repr={self.repr!r},'
  262. f'eq={self.eq!r},'
  263. f'order={self.order!r},'
  264. f'unsafe_hash={self.unsafe_hash!r},'
  265. f'frozen={self.frozen!r}'
  266. ')')
  267. # This function is used instead of exposing Field creation directly,
  268. # so that a type checker can be told (via overloads) that this is a
  269. # function whose type depends on its parameters.
  270. def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True,
  271. hash=None, compare=True, metadata=None):
  272. """Return an object to identify dataclass fields.
  273. default is the default value of the field. default_factory is a
  274. 0-argument function called to initialize a field's value. If init
  275. is True, the field will be a parameter to the class's __init__()
  276. function. If repr is True, the field will be included in the
  277. object's repr(). If hash is True, the field will be included in
  278. the object's hash(). If compare is True, the field will be used
  279. in comparison functions. metadata, if specified, must be a
  280. mapping which is stored but not otherwise examined by dataclass.
  281. It is an error to specify both default and default_factory.
  282. """
  283. if default is not MISSING and default_factory is not MISSING:
  284. raise ValueError('cannot specify both default and default_factory')
  285. return Field(default, default_factory, init, repr, hash, compare,
  286. metadata)
  287. def _tuple_str(obj_name, fields):
  288. # Return a string representing each field of obj_name as a tuple
  289. # member. So, if fields is ['x', 'y'] and obj_name is "self",
  290. # return "(self.x,self.y)".
  291. # Special case for the 0-tuple.
  292. if not fields:
  293. return '()'
  294. # Note the trailing comma, needed if this turns out to be a 1-tuple.
  295. return f'({",".join([f"{obj_name}.{f.name}" for f in fields])},)'
  296. def _create_fn(name, args, body, *, globals=None, locals=None,
  297. return_type=MISSING):
  298. # Note that we mutate locals when exec() is called. Caller
  299. # beware! The only callers are internal to this module, so no
  300. # worries about external callers.
  301. if locals is None:
  302. locals = {}
  303. return_annotation = ''
  304. if return_type is not MISSING:
  305. locals['_return_type'] = return_type
  306. return_annotation = '->_return_type'
  307. args = ','.join(args)
  308. body = '\n'.join(f' {b}' for b in body)
  309. # Compute the text of the entire function.
  310. txt = f'def {name}({args}){return_annotation}:\n{body}'
  311. exec(txt, globals, locals)
  312. return locals[name]
  313. def _field_assign(frozen, name, value, self_name):
  314. # If we're a frozen class, then assign to our fields in __init__
  315. # via object.__setattr__. Otherwise, just use a simple
  316. # assignment.
  317. #
  318. # self_name is what "self" is called in this function: don't
  319. # hard-code "self", since that might be a field name.
  320. if frozen:
  321. return f'object.__setattr__({self_name},{name!r},{value})'
  322. return f'{self_name}.{name}={value}'
  323. def _field_init(f, frozen, globals, self_name):
  324. # Return the text of the line in the body of __init__ that will
  325. # initialize this field.
  326. default_name = f'_dflt_{f.name}'
  327. if f.default_factory is not MISSING:
  328. if f.init:
  329. # This field has a default factory. If a parameter is
  330. # given, use it. If not, call the factory.
  331. globals[default_name] = f.default_factory
  332. value = (f'{default_name}() '
  333. f'if {f.name} is _HAS_DEFAULT_FACTORY '
  334. f'else {f.name}')
  335. else:
  336. # This is a field that's not in the __init__ params, but
  337. # has a default factory function. It needs to be
  338. # initialized here by calling the factory function,
  339. # because there's no other way to initialize it.
  340. # For a field initialized with a default=defaultvalue, the
  341. # class dict just has the default value
  342. # (cls.fieldname=defaultvalue). But that won't work for a
  343. # default factory, the factory must be called in __init__
  344. # and we must assign that to self.fieldname. We can't
  345. # fall back to the class dict's value, both because it's
  346. # not set, and because it might be different per-class
  347. # (which, after all, is why we have a factory function!).
  348. globals[default_name] = f.default_factory
  349. value = f'{default_name}()'
  350. else:
  351. # No default factory.
  352. if f.init:
  353. if f.default is MISSING:
  354. # There's no default, just do an assignment.
  355. value = f.name
  356. elif f.default is not MISSING:
  357. globals[default_name] = f.default
  358. value = f.name
  359. else:
  360. # This field does not need initialization. Signify that
  361. # to the caller by returning None.
  362. return None
  363. # Only test this now, so that we can create variables for the
  364. # default. However, return None to signify that we're not going
  365. # to actually do the assignment statement for InitVars.
  366. if f._field_type == _FIELD_INITVAR:
  367. return None
  368. # Now, actually generate the field assignment.
  369. return _field_assign(frozen, f.name, value, self_name)
  370. def _init_param(f):
  371. # Return the __init__ parameter string for this field. For
  372. # example, the equivalent of 'x:int=3' (except instead of 'int',
  373. # reference a variable set to int, and instead of '3', reference a
  374. # variable set to 3).
  375. if f.default is MISSING and f.default_factory is MISSING:
  376. # There's no default, and no default_factory, just output the
  377. # variable name and type.
  378. default = ''
  379. elif f.default is not MISSING:
  380. # There's a default, this will be the name that's used to look
  381. # it up.
  382. default = f'=_dflt_{f.name}'
  383. elif f.default_factory is not MISSING:
  384. # There's a factory function. Set a marker.
  385. default = '=_HAS_DEFAULT_FACTORY'
  386. return f'{f.name}:_type_{f.name}{default}'
  387. def _init_fn(fields, frozen, has_post_init, self_name):
  388. # fields contains both real fields and InitVar pseudo-fields.
  389. # Make sure we don't have fields without defaults following fields
  390. # with defaults. This actually would be caught when exec-ing the
  391. # function source code, but catching it here gives a better error
  392. # message, and future-proofs us in case we build up the function
  393. # using ast.
  394. seen_default = False
  395. for f in fields:
  396. # Only consider fields in the __init__ call.
  397. if f.init:
  398. if not (f.default is MISSING and f.default_factory is MISSING):
  399. seen_default = True
  400. elif seen_default:
  401. raise TypeError(f'non-default argument {f.name!r} '
  402. 'follows default argument')
  403. globals = {'MISSING': MISSING,
  404. '_HAS_DEFAULT_FACTORY': _HAS_DEFAULT_FACTORY}
  405. body_lines = []
  406. for f in fields:
  407. line = _field_init(f, frozen, globals, self_name)
  408. # line is None means that this field doesn't require
  409. # initialization (it's a pseudo-field). Just skip it.
  410. if line:
  411. body_lines.append(line)
  412. # Does this class have a post-init function?
  413. if has_post_init:
  414. params_str = ','.join(f.name for f in fields
  415. if f._field_type is _FIELD_INITVAR)
  416. body_lines.append(f'{self_name}.{_POST_INIT_NAME}({params_str})')
  417. # If no body lines, use 'pass'.
  418. if not body_lines:
  419. body_lines = ['pass']
  420. locals = {f'_type_{f.name}': f.type for f in fields}
  421. return _create_fn('__init__',
  422. [self_name] + [_init_param(f) for f in fields if f.init],
  423. body_lines,
  424. locals=locals,
  425. globals=globals,
  426. return_type=None)
  427. def _repr_fn(fields):
  428. return _create_fn('__repr__',
  429. ('self',),
  430. ['return self.__class__.__qualname__ + f"(' +
  431. ', '.join([f"{f.name}={{self.{f.name}!r}}"
  432. for f in fields]) +
  433. ')"'])
  434. def _frozen_get_del_attr(cls, fields):
  435. # XXX: globals is modified on the first call to _create_fn, then
  436. # the modified version is used in the second call. Is this okay?
  437. globals = {'cls': cls,
  438. 'FrozenInstanceError': FrozenInstanceError}
  439. if fields:
  440. fields_str = '(' + ','.join(repr(f.name) for f in fields) + ',)'
  441. else:
  442. # Special case for the zero-length tuple.
  443. fields_str = '()'
  444. return (_create_fn('__setattr__',
  445. ('self', 'name', 'value'),
  446. (f'if type(self) is cls or name in {fields_str}:',
  447. ' raise FrozenInstanceError(f"cannot assign to field {name!r}")',
  448. f'super(cls, self).__setattr__(name, value)'),
  449. globals=globals),
  450. _create_fn('__delattr__',
  451. ('self', 'name'),
  452. (f'if type(self) is cls or name in {fields_str}:',
  453. ' raise FrozenInstanceError(f"cannot delete field {name!r}")',
  454. f'super(cls, self).__delattr__(name)'),
  455. globals=globals),
  456. )
  457. def _cmp_fn(name, op, self_tuple, other_tuple):
  458. # Create a comparison function. If the fields in the object are
  459. # named 'x' and 'y', then self_tuple is the string
  460. # '(self.x,self.y)' and other_tuple is the string
  461. # '(other.x,other.y)'.
  462. return _create_fn(name,
  463. ('self', 'other'),
  464. [ 'if other.__class__ is self.__class__:',
  465. f' return {self_tuple}{op}{other_tuple}',
  466. 'return NotImplemented'])
  467. def _hash_fn(fields):
  468. self_tuple = _tuple_str('self', fields)
  469. return _create_fn('__hash__',
  470. ('self',),
  471. [f'return hash({self_tuple})'])
  472. def _is_classvar(a_type, typing):
  473. # This test uses a typing internal class, but it's the best way to
  474. # test if this is a ClassVar.
  475. return type(a_type) is typing._ClassVar
  476. def _is_initvar(a_type, dataclasses):
  477. # The module we're checking against is the module we're
  478. # currently in (dataclasses.py).
  479. return a_type is dataclasses.InitVar
  480. def _is_type(annotation, cls, a_module, a_type, is_type_predicate):
  481. # Given a type annotation string, does it refer to a_type in
  482. # a_module? For example, when checking that annotation denotes a
  483. # ClassVar, then a_module is typing, and a_type is
  484. # typing.ClassVar.
  485. # It's possible to look up a_module given a_type, but it involves
  486. # looking in sys.modules (again!), and seems like a waste since
  487. # the caller already knows a_module.
  488. # - annotation is a string type annotation
  489. # - cls is the class that this annotation was found in
  490. # - a_module is the module we want to match
  491. # - a_type is the type in that module we want to match
  492. # - is_type_predicate is a function called with (obj, a_module)
  493. # that determines if obj is of the desired type.
  494. # Since this test does not do a local namespace lookup (and
  495. # instead only a module (global) lookup), there are some things it
  496. # gets wrong.
  497. # With string annotations, cv0 will be detected as a ClassVar:
  498. # CV = ClassVar
  499. # @dataclass
  500. # class C0:
  501. # cv0: CV
  502. # But in this example cv1 will not be detected as a ClassVar:
  503. # @dataclass
  504. # class C1:
  505. # CV = ClassVar
  506. # cv1: CV
  507. # In C1, the code in this function (_is_type) will look up "CV" in
  508. # the module and not find it, so it will not consider cv1 as a
  509. # ClassVar. This is a fairly obscure corner case, and the best
  510. # way to fix it would be to eval() the string "CV" with the
  511. # correct global and local namespaces. However that would involve
  512. # a eval() penalty for every single field of every dataclass
  513. # that's defined. It was judged not worth it.
  514. match = _MODULE_IDENTIFIER_RE.match(annotation)
  515. if match:
  516. ns = None
  517. module_name = match.group(1)
  518. if not module_name:
  519. # No module name, assume the class's module did
  520. # "from dataclasses import InitVar".
  521. ns = sys.modules.get(cls.__module__).__dict__
  522. else:
  523. # Look up module_name in the class's module.
  524. module = sys.modules.get(cls.__module__)
  525. if module and module.__dict__.get(module_name) is a_module:
  526. ns = sys.modules.get(a_type.__module__).__dict__
  527. if ns and is_type_predicate(ns.get(match.group(2)), a_module):
  528. return True
  529. return False
  530. def _get_field(cls, a_name, a_type):
  531. # Return a Field object for this field name and type. ClassVars
  532. # and InitVars are also returned, but marked as such (see
  533. # f._field_type).
  534. # If the default value isn't derived from Field, then it's only a
  535. # normal default value. Convert it to a Field().
  536. default = getattr(cls, a_name, MISSING)
  537. if isinstance(default, Field):
  538. f = default
  539. else:
  540. if isinstance(default, types.MemberDescriptorType):
  541. # This is a field in __slots__, so it has no default value.
  542. default = MISSING
  543. f = field(default=default)
  544. # Only at this point do we know the name and the type. Set them.
  545. f.name = a_name
  546. f.type = a_type
  547. # Assume it's a normal field until proven otherwise. We're next
  548. # going to decide if it's a ClassVar or InitVar, everything else
  549. # is just a normal field.
  550. f._field_type = _FIELD
  551. # In addition to checking for actual types here, also check for
  552. # string annotations. get_type_hints() won't always work for us
  553. # (see https://github.com/python/typing/issues/508 for example),
  554. # plus it's expensive and would require an eval for every stirng
  555. # annotation. So, make a best effort to see if this is a ClassVar
  556. # or InitVar using regex's and checking that the thing referenced
  557. # is actually of the correct type.
  558. # For the complete discussion, see https://bugs.python.org/issue33453
  559. # If typing has not been imported, then it's impossible for any
  560. # annotation to be a ClassVar. So, only look for ClassVar if
  561. # typing has been imported by any module (not necessarily cls's
  562. # module).
  563. typing = sys.modules.get('typing')
  564. if typing:
  565. if (_is_classvar(a_type, typing)
  566. or (isinstance(f.type, str)
  567. and _is_type(f.type, cls, typing, typing.ClassVar,
  568. _is_classvar))):
  569. f._field_type = _FIELD_CLASSVAR
  570. # If the type is InitVar, or if it's a matching string annotation,
  571. # then it's an InitVar.
  572. if f._field_type is _FIELD:
  573. # The module we're checking against is the module we're
  574. # currently in (dataclasses.py).
  575. dataclasses = sys.modules[__name__]
  576. if (_is_initvar(a_type, dataclasses)
  577. or (isinstance(f.type, str)
  578. and _is_type(f.type, cls, dataclasses, dataclasses.InitVar,
  579. _is_initvar))):
  580. f._field_type = _FIELD_INITVAR
  581. # Validations for individual fields. This is delayed until now,
  582. # instead of in the Field() constructor, since only here do we
  583. # know the field name, which allows for better error reporting.
  584. # Special restrictions for ClassVar and InitVar.
  585. if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR):
  586. if f.default_factory is not MISSING:
  587. raise TypeError(f'field {f.name} cannot have a '
  588. 'default factory')
  589. # Should I check for other field settings? default_factory
  590. # seems the most serious to check for. Maybe add others. For
  591. # example, how about init=False (or really,
  592. # init=<not-the-default-init-value>)? It makes no sense for
  593. # ClassVar and InitVar to specify init=<anything>.
  594. # For real fields, disallow mutable defaults for known types.
  595. if f._field_type is _FIELD and isinstance(f.default, (list, dict, set)):
  596. raise ValueError(f'mutable default {type(f.default)} for field '
  597. f'{f.name} is not allowed: use default_factory')
  598. return f
  599. def _set_new_attribute(cls, name, value):
  600. # Never overwrites an existing attribute. Returns True if the
  601. # attribute already exists.
  602. if name in cls.__dict__:
  603. return True
  604. setattr(cls, name, value)
  605. return False
  606. # Decide if/how we're going to create a hash function. Key is
  607. # (unsafe_hash, eq, frozen, does-hash-exist). Value is the action to
  608. # take. The common case is to do nothing, so instead of providing a
  609. # function that is a no-op, use None to signify that.
  610. def _hash_set_none(cls, fields):
  611. return None
  612. def _hash_add(cls, fields):
  613. flds = [f for f in fields if (f.compare if f.hash is None else f.hash)]
  614. return _hash_fn(flds)
  615. def _hash_exception(cls, fields):
  616. # Raise an exception.
  617. raise TypeError(f'Cannot overwrite attribute __hash__ '
  618. f'in class {cls.__name__}')
  619. #
  620. # +-------------------------------------- unsafe_hash?
  621. # | +------------------------------- eq?
  622. # | | +------------------------ frozen?
  623. # | | | +---------------- has-explicit-hash?
  624. # | | | |
  625. # | | | | +------- action
  626. # | | | | |
  627. # v v v v v
  628. _hash_action = {(False, False, False, False): None,
  629. (False, False, False, True ): None,
  630. (False, False, True, False): None,
  631. (False, False, True, True ): None,
  632. (False, True, False, False): _hash_set_none,
  633. (False, True, False, True ): None,
  634. (False, True, True, False): _hash_add,
  635. (False, True, True, True ): None,
  636. (True, False, False, False): _hash_add,
  637. (True, False, False, True ): _hash_exception,
  638. (True, False, True, False): _hash_add,
  639. (True, False, True, True ): _hash_exception,
  640. (True, True, False, False): _hash_add,
  641. (True, True, False, True ): _hash_exception,
  642. (True, True, True, False): _hash_add,
  643. (True, True, True, True ): _hash_exception,
  644. }
  645. # See https://bugs.python.org/issue32929#msg312829 for an if-statement
  646. # version of this table.
  647. def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen):
  648. # Now that dicts retain insertion order, there's no reason to use
  649. # an ordered dict. I am leveraging that ordering here, because
  650. # derived class fields overwrite base class fields, but the order
  651. # is defined by the base class, which is found first.
  652. fields = {}
  653. setattr(cls, _PARAMS, _DataclassParams(init, repr, eq, order,
  654. unsafe_hash, frozen))
  655. # Find our base classes in reverse MRO order, and exclude
  656. # ourselves. In reversed order so that more derived classes
  657. # override earlier field definitions in base classes. As long as
  658. # we're iterating over them, see if any are frozen.
  659. any_frozen_base = False
  660. has_dataclass_bases = False
  661. for b in cls.__mro__[-1:0:-1]:
  662. # Only process classes that have been processed by our
  663. # decorator. That is, they have a _FIELDS attribute.
  664. base_fields = getattr(b, _FIELDS, None)
  665. if base_fields:
  666. has_dataclass_bases = True
  667. for f in base_fields.values():
  668. fields[f.name] = f
  669. if getattr(b, _PARAMS).frozen:
  670. any_frozen_base = True
  671. # Annotations that are defined in this class (not in base
  672. # classes). If __annotations__ isn't present, then this class
  673. # adds no new annotations. We use this to compute fields that are
  674. # added by this class.
  675. #
  676. # Fields are found from cls_annotations, which is guaranteed to be
  677. # ordered. Default values are from class attributes, if a field
  678. # has a default. If the default value is a Field(), then it
  679. # contains additional info beyond (and possibly including) the
  680. # actual default value. Pseudo-fields ClassVars and InitVars are
  681. # included, despite the fact that they're not real fields. That's
  682. # dealt with later.
  683. cls_annotations = cls.__dict__.get('__annotations__', {})
  684. # Now find fields in our class. While doing so, validate some
  685. # things, and set the default values (as class attributes) where
  686. # we can.
  687. cls_fields = [_get_field(cls, name, type)
  688. for name, type in cls_annotations.items()]
  689. for f in cls_fields:
  690. fields[f.name] = f
  691. # If the class attribute (which is the default value for this
  692. # field) exists and is of type 'Field', replace it with the
  693. # real default. This is so that normal class introspection
  694. # sees a real default value, not a Field.
  695. if isinstance(getattr(cls, f.name, None), Field):
  696. if f.default is MISSING:
  697. # If there's no default, delete the class attribute.
  698. # This happens if we specify field(repr=False), for
  699. # example (that is, we specified a field object, but
  700. # no default value). Also if we're using a default
  701. # factory. The class attribute should not be set at
  702. # all in the post-processed class.
  703. delattr(cls, f.name)
  704. else:
  705. setattr(cls, f.name, f.default)
  706. # Do we have any Field members that don't also have annotations?
  707. for name, value in cls.__dict__.items():
  708. if isinstance(value, Field) and not name in cls_annotations:
  709. raise TypeError(f'{name!r} is a field but has no type annotation')
  710. # Check rules that apply if we are derived from any dataclasses.
  711. if has_dataclass_bases:
  712. # Raise an exception if any of our bases are frozen, but we're not.
  713. if any_frozen_base and not frozen:
  714. raise TypeError('cannot inherit non-frozen dataclass from a '
  715. 'frozen one')
  716. # Raise an exception if we're frozen, but none of our bases are.
  717. if not any_frozen_base and frozen:
  718. raise TypeError('cannot inherit frozen dataclass from a '
  719. 'non-frozen one')
  720. # Remember all of the fields on our class (including bases). This
  721. # also marks this class as being a dataclass.
  722. setattr(cls, _FIELDS, fields)
  723. # Was this class defined with an explicit __hash__? Note that if
  724. # __eq__ is defined in this class, then python will automatically
  725. # set __hash__ to None. This is a heuristic, as it's possible
  726. # that such a __hash__ == None was not auto-generated, but it
  727. # close enough.
  728. class_hash = cls.__dict__.get('__hash__', MISSING)
  729. has_explicit_hash = not (class_hash is MISSING or
  730. (class_hash is None and '__eq__' in cls.__dict__))
  731. # If we're generating ordering methods, we must be generating the
  732. # eq methods.
  733. if order and not eq:
  734. raise ValueError('eq must be true if order is true')
  735. if init:
  736. # Does this class have a post-init function?
  737. has_post_init = hasattr(cls, _POST_INIT_NAME)
  738. # Include InitVars and regular fields (so, not ClassVars).
  739. flds = [f for f in fields.values()
  740. if f._field_type in (_FIELD, _FIELD_INITVAR)]
  741. _set_new_attribute(cls, '__init__',
  742. _init_fn(flds,
  743. frozen,
  744. has_post_init,
  745. # The name to use for the "self"
  746. # param in __init__. Use "self"
  747. # if possible.
  748. '__dataclass_self__' if 'self' in fields
  749. else 'self',
  750. ))
  751. # Get the fields as a list, and include only real fields. This is
  752. # used in all of the following methods.
  753. field_list = [f for f in fields.values() if f._field_type is _FIELD]
  754. if repr:
  755. flds = [f for f in field_list if f.repr]
  756. _set_new_attribute(cls, '__repr__', _repr_fn(flds))
  757. if eq:
  758. # Create _eq__ method. There's no need for a __ne__ method,
  759. # since python will call __eq__ and negate it.
  760. flds = [f for f in field_list if f.compare]
  761. self_tuple = _tuple_str('self', flds)
  762. other_tuple = _tuple_str('other', flds)
  763. _set_new_attribute(cls, '__eq__',
  764. _cmp_fn('__eq__', '==',
  765. self_tuple, other_tuple))
  766. if order:
  767. # Create and set the ordering methods.
  768. flds = [f for f in field_list if f.compare]
  769. self_tuple = _tuple_str('self', flds)
  770. other_tuple = _tuple_str('other', flds)
  771. for name, op in [('__lt__', '<'),
  772. ('__le__', '<='),
  773. ('__gt__', '>'),
  774. ('__ge__', '>='),
  775. ]:
  776. if _set_new_attribute(cls, name,
  777. _cmp_fn(name, op, self_tuple, other_tuple)):
  778. raise TypeError(f'Cannot overwrite attribute {name} '
  779. f'in class {cls.__name__}. Consider using '
  780. 'functools.total_ordering')
  781. if frozen:
  782. for fn in _frozen_get_del_attr(cls, field_list):
  783. if _set_new_attribute(cls, fn.__name__, fn):
  784. raise TypeError(f'Cannot overwrite attribute {fn.__name__} '
  785. f'in class {cls.__name__}')
  786. # Decide if/how we're going to create a hash function.
  787. hash_action = _hash_action[bool(unsafe_hash),
  788. bool(eq),
  789. bool(frozen),
  790. has_explicit_hash]
  791. if hash_action:
  792. # No need to call _set_new_attribute here, since by the time
  793. # we're here the overwriting is unconditional.
  794. cls.__hash__ = hash_action(cls, field_list)
  795. if not getattr(cls, '__doc__'):
  796. # Create a class doc-string.
  797. cls.__doc__ = (cls.__name__ +
  798. str(inspect.signature(cls)).replace(' -> None', ''))
  799. return cls
  800. # _cls should never be specified by keyword, so start it with an
  801. # underscore. The presence of _cls is used to detect if this
  802. # decorator is being called with parameters or not.
  803. def dataclass(_cls=None, *, init=True, repr=True, eq=True, order=False,
  804. unsafe_hash=False, frozen=False):
  805. """Returns the same class as was passed in, with dunder methods
  806. added based on the fields defined in the class.
  807. Examines PEP 526 __annotations__ to determine fields.
  808. If init is true, an __init__() method is added to the class. If
  809. repr is true, a __repr__() method is added. If order is true, rich
  810. comparison dunder methods are added. If unsafe_hash is true, a
  811. __hash__() method function is added. If frozen is true, fields may
  812. not be assigned to after instance creation.
  813. """
  814. def wrap(cls):
  815. return _process_class(cls, init, repr, eq, order, unsafe_hash, frozen)
  816. # See if we're being called as @dataclass or @dataclass().
  817. if _cls is None:
  818. # We're called with parens.
  819. return wrap
  820. # We're called as @dataclass without parens.
  821. return wrap(_cls)
  822. def fields(class_or_instance):
  823. """Return a tuple describing the fields of this dataclass.
  824. Accepts a dataclass or an instance of one. Tuple elements are of
  825. type Field.
  826. """
  827. # Might it be worth caching this, per class?
  828. try:
  829. fields = getattr(class_or_instance, _FIELDS)
  830. except AttributeError:
  831. raise TypeError('must be called with a dataclass type or instance')
  832. # Exclude pseudo-fields. Note that fields is sorted by insertion
  833. # order, so the order of the tuple is as the fields were defined.
  834. return tuple(f for f in fields.values() if f._field_type is _FIELD)
  835. def _is_dataclass_instance(obj):
  836. """Returns True if obj is an instance of a dataclass."""
  837. return not isinstance(obj, type) and hasattr(obj, _FIELDS)
  838. def is_dataclass(obj):
  839. """Returns True if obj is a dataclass or an instance of a
  840. dataclass."""
  841. return hasattr(obj, _FIELDS)
  842. def asdict(obj, *, dict_factory=dict):
  843. """Return the fields of a dataclass instance as a new dictionary mapping
  844. field names to field values.
  845. Example usage:
  846. @dataclass
  847. class C:
  848. x: int
  849. y: int
  850. c = C(1, 2)
  851. assert asdict(c) == {'x': 1, 'y': 2}
  852. If given, 'dict_factory' will be used instead of built-in dict.
  853. The function applies recursively to field values that are
  854. dataclass instances. This will also look into built-in containers:
  855. tuples, lists, and dicts.
  856. """
  857. if not _is_dataclass_instance(obj):
  858. raise TypeError("asdict() should be called on dataclass instances")
  859. return _asdict_inner(obj, dict_factory)
  860. def _asdict_inner(obj, dict_factory):
  861. if _is_dataclass_instance(obj):
  862. result = []
  863. for f in fields(obj):
  864. value = _asdict_inner(getattr(obj, f.name), dict_factory)
  865. result.append((f.name, value))
  866. return dict_factory(result)
  867. elif isinstance(obj, (list, tuple)):
  868. return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
  869. elif isinstance(obj, dict):
  870. return type(obj)((_asdict_inner(k, dict_factory), _asdict_inner(v, dict_factory))
  871. for k, v in obj.items())
  872. else:
  873. return copy.deepcopy(obj)
  874. def astuple(obj, *, tuple_factory=tuple):
  875. """Return the fields of a dataclass instance as a new tuple of field values.
  876. Example usage::
  877. @dataclass
  878. class C:
  879. x: int
  880. y: int
  881. c = C(1, 2)
  882. assert astuple(c) == (1, 2)
  883. If given, 'tuple_factory' will be used instead of built-in tuple.
  884. The function applies recursively to field values that are
  885. dataclass instances. This will also look into built-in containers:
  886. tuples, lists, and dicts.
  887. """
  888. if not _is_dataclass_instance(obj):
  889. raise TypeError("astuple() should be called on dataclass instances")
  890. return _astuple_inner(obj, tuple_factory)
  891. def _astuple_inner(obj, tuple_factory):
  892. if _is_dataclass_instance(obj):
  893. result = []
  894. for f in fields(obj):
  895. value = _astuple_inner(getattr(obj, f.name), tuple_factory)
  896. result.append(value)
  897. return tuple_factory(result)
  898. elif isinstance(obj, (list, tuple)):
  899. return type(obj)(_astuple_inner(v, tuple_factory) for v in obj)
  900. elif isinstance(obj, dict):
  901. return type(obj)((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory))
  902. for k, v in obj.items())
  903. else:
  904. return copy.deepcopy(obj)
  905. def make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True,
  906. repr=True, eq=True, order=False, unsafe_hash=False,
  907. frozen=False):
  908. """Return a new dynamically created dataclass.
  909. The dataclass name will be 'cls_name'. 'fields' is an iterable
  910. of either (name), (name, type) or (name, type, Field) objects. If type is
  911. omitted, use the string 'typing.Any'. Field objects are created by
  912. the equivalent of calling 'field(name, type [, Field-info])'.
  913. C = make_dataclass('C', ['x', ('y', int), ('z', int, field(init=False))], bases=(Base,))
  914. is equivalent to:
  915. @dataclass
  916. class C(Base):
  917. x: 'typing.Any'
  918. y: int
  919. z: int = field(init=False)
  920. For the bases and namespace parameters, see the builtin type() function.
  921. The parameters init, repr, eq, order, unsafe_hash, and frozen are passed to
  922. dataclass().
  923. """
  924. if namespace is None:
  925. namespace = {}
  926. else:
  927. # Copy namespace since we're going to mutate it.
  928. namespace = namespace.copy()
  929. # While we're looking through the field names, validate that they
  930. # are identifiers, are not keywords, and not duplicates.
  931. seen = set()
  932. anns = {}
  933. for item in fields:
  934. if isinstance(item, str):
  935. name = item
  936. tp = 'typing.Any'
  937. elif len(item) == 2:
  938. name, tp, = item
  939. elif len(item) == 3:
  940. name, tp, spec = item
  941. namespace[name] = spec
  942. else:
  943. raise TypeError(f'Invalid field: {item!r}')
  944. if not isinstance(name, str) or not name.isidentifier():
  945. raise TypeError(f'Field names must be valid identifers: {name!r}')
  946. if keyword.iskeyword(name):
  947. raise TypeError(f'Field names must not be keywords: {name!r}')
  948. if name in seen:
  949. raise TypeError(f'Field name duplicated: {name!r}')
  950. seen.add(name)
  951. anns[name] = tp
  952. namespace['__annotations__'] = anns
  953. # We use `types.new_class()` instead of simply `type()` to allow dynamic creation
  954. # of generic dataclassses.
  955. cls = types.new_class(cls_name, bases, {}, lambda ns: ns.update(namespace))
  956. return dataclass(cls, init=init, repr=repr, eq=eq, order=order,
  957. unsafe_hash=unsafe_hash, frozen=frozen)
  958. def replace(obj, **changes):
  959. """Return a new object replacing specified fields with new values.
  960. This is especially useful for frozen classes. Example usage:
  961. @dataclass(frozen=True)
  962. class C:
  963. x: int
  964. y: int
  965. c = C(1, 2)
  966. c1 = replace(c, x=3)
  967. assert c1.x == 3 and c1.y == 2
  968. """
  969. # We're going to mutate 'changes', but that's okay because it's a
  970. # new dict, even if called with 'replace(obj, **my_changes)'.
  971. if not _is_dataclass_instance(obj):
  972. raise TypeError("replace() should be called on dataclass instances")
  973. # It's an error to have init=False fields in 'changes'.
  974. # If a field is not in 'changes', read its value from the provided obj.
  975. for f in getattr(obj, _FIELDS).values():
  976. if not f.init:
  977. # Error if this field is specified in changes.
  978. if f.name in changes:
  979. raise ValueError(f'field {f.name} is declared with '
  980. 'init=False, it cannot be specified with '
  981. 'replace()')
  982. continue
  983. if f.name not in changes:
  984. changes[f.name] = getattr(obj, f.name)
  985. # Create the new object, which calls __init__() and
  986. # __post_init__() (if defined), using all of the init fields we've
  987. # added and/or left in 'changes'. If there are values supplied in
  988. # changes that aren't fields, this will correctly raise a
  989. # TypeError.
  990. return obj.__class__(**changes)