No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 
 

2150 líneas
74 KiB

  1. # cython: binding=True
  2. # cython: auto_pickle=False
  3. # cython: language_level=3
  4. """
  5. The ``lxml.objectify`` module implements a Python object API for XML.
  6. It is based on `lxml.etree`.
  7. """
  8. cimport cython
  9. from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup
  10. from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode
  11. from lxml.includes.tree cimport const_xmlChar, _xcstr
  12. from lxml cimport python
  13. from lxml.includes cimport tree
  14. cimport lxml.includes.etreepublic as cetree
  15. cimport libc.string as cstring_h # not to be confused with stdlib 'string'
  16. from libc.string cimport const_char
  17. from libc cimport limits
  18. __all__ = ['BoolElement', 'DataElement', 'E', 'Element', 'ElementMaker',
  19. 'FloatElement', 'IntElement', 'NoneElement',
  20. 'NumberElement', 'ObjectPath', 'ObjectifiedDataElement',
  21. 'ObjectifiedElement', 'ObjectifyElementClassLookup',
  22. 'PYTYPE_ATTRIBUTE', 'PyType', 'StringElement', 'SubElement',
  23. 'XML', 'annotate', 'deannotate', 'dump', 'enable_recursive_str',
  24. 'fromstring', 'getRegisteredTypes', 'makeparser', 'parse',
  25. 'pyannotate', 'pytypename', 'set_default_parser',
  26. 'set_pytype_attribute_tag', 'xsiannotate']
  27. cdef object etree
  28. from lxml import etree
  29. # initialize C-API of lxml.etree
  30. import_lxml__etree()
  31. __version__ = etree.__version__
  32. cdef object _float_is_inf, _float_is_nan
  33. from math import isinf as _float_is_inf, isnan as _float_is_nan
  34. cdef object re
  35. import re
  36. cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError)
  37. cdef object is_special_method = re.compile('__.*__$').match
  38. cdef object _typename(object t):
  39. cdef const_char* c_name
  40. c_name = python._fqtypename(t)
  41. s = cstring_h.strrchr(c_name, c'.')
  42. if s is not NULL:
  43. c_name = s + 1
  44. return pyunicode(<const_xmlChar*>c_name)
  45. # namespace/name for "pytype" hint attribute
  46. cdef object PYTYPE_NAMESPACE
  47. cdef bytes PYTYPE_NAMESPACE_UTF8
  48. cdef const_xmlChar* _PYTYPE_NAMESPACE
  49. cdef object PYTYPE_ATTRIBUTE_NAME
  50. cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8
  51. cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME
  52. PYTYPE_ATTRIBUTE = None
  53. cdef unicode TREE_PYTYPE_NAME = "TREE"
  54. cdef tuple _unicodeAndUtf8(s):
  55. return s, python.PyUnicode_AsUTF8String(s)
  56. def set_pytype_attribute_tag(attribute_tag=None):
  57. """set_pytype_attribute_tag(attribute_tag=None)
  58. Change name and namespace of the XML attribute that holds Python type
  59. information.
  60. Do not use this unless you know what you are doing.
  61. Reset by calling without argument.
  62. Default: "{http://codespeak.net/lxml/objectify/pytype}pytype"
  63. """
  64. global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME
  65. global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8
  66. global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8
  67. if attribute_tag is None:
  68. PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \
  69. _unicodeAndUtf8("http://codespeak.net/lxml/objectify/pytype")
  70. PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
  71. _unicodeAndUtf8("pytype")
  72. else:
  73. PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
  74. cetree.getNsTag(attribute_tag)
  75. PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8')
  76. PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8')
  77. _PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8
  78. _PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8
  79. PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName(
  80. _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
  81. set_pytype_attribute_tag()
  82. # namespaces for XML Schema
  83. cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8
  84. XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \
  85. _unicodeAndUtf8("http://www.w3.org/2001/XMLSchema")
  86. cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8)
  87. cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8
  88. XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \
  89. _unicodeAndUtf8("http://www.w3.org/2001/XMLSchema-instance")
  90. cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8)
  91. cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS
  92. cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS
  93. ################################################################################
  94. # Element class for the main API
  95. cdef class ObjectifiedElement(ElementBase):
  96. """Main XML Element class.
  97. Element children are accessed as object attributes. Multiple children
  98. with the same name are available through a list index. Example::
  99. >>> root = XML("<root><c1><c2>0</c2><c2>1</c2></c1></root>")
  100. >>> second_c2 = root.c1.c2[1]
  101. >>> print(second_c2.text)
  102. 1
  103. Note that you cannot (and must not) instantiate this class or its
  104. subclasses.
  105. """
  106. def __iter__(self):
  107. """Iterate over self and all siblings with the same tag.
  108. """
  109. parent = self.getparent()
  110. if parent is None:
  111. return iter([self])
  112. return etree.ElementChildIterator(parent, tag=self.tag)
  113. def __str__(self):
  114. if __RECURSIVE_STR:
  115. return _dump(self, 0)
  116. else:
  117. return textOf(self._c_node) or ''
  118. # pickle support for objectified Element
  119. def __reduce__(self):
  120. return fromstring, (etree.tostring(self),)
  121. @property
  122. def text(self):
  123. return textOf(self._c_node)
  124. @property
  125. def __dict__(self):
  126. """A fake implementation for __dict__ to support dir() etc.
  127. Note that this only considers the first child with a given name.
  128. """
  129. cdef _Element child
  130. cdef dict children
  131. c_ns = tree._getNs(self._c_node)
  132. tag = "{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None
  133. children = {}
  134. for child in etree.ElementChildIterator(self, tag=tag):
  135. if c_ns is NULL and tree._getNs(child._c_node) is not NULL:
  136. continue
  137. name = pyunicode(child._c_node.name)
  138. if name not in children:
  139. children[name] = child
  140. return children
  141. def __len__(self):
  142. """Count self and siblings with the same tag.
  143. """
  144. return _countSiblings(self._c_node)
  145. def countchildren(self):
  146. """countchildren(self)
  147. Return the number of children of this element, regardless of their
  148. name.
  149. """
  150. # copied from etree
  151. cdef Py_ssize_t c
  152. cdef tree.xmlNode* c_node
  153. c = 0
  154. c_node = self._c_node.children
  155. while c_node is not NULL:
  156. if tree._isElement(c_node):
  157. c += 1
  158. c_node = c_node.next
  159. return c
  160. def getchildren(self):
  161. """getchildren(self)
  162. Returns a sequence of all direct children. The elements are
  163. returned in document order.
  164. """
  165. cdef tree.xmlNode* c_node
  166. result = []
  167. c_node = self._c_node.children
  168. while c_node is not NULL:
  169. if tree._isElement(c_node):
  170. result.append(cetree.elementFactory(self._doc, c_node))
  171. c_node = c_node.next
  172. return result
  173. def __getattr__(self, tag):
  174. """Return the (first) child with the given tag name. If no namespace
  175. is provided, the child will be looked up in the same one as self.
  176. """
  177. return _lookupChildOrRaise(self, tag)
  178. def __setattr__(self, tag, value):
  179. """Set the value of the (first) child with the given tag name. If no
  180. namespace is provided, the child will be looked up in the same one as
  181. self.
  182. """
  183. cdef _Element element
  184. # properties are looked up /after/ __setattr__, so we must emulate them
  185. if tag == 'text' or tag == 'pyval':
  186. # read-only !
  187. raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable"
  188. elif tag == 'tail':
  189. cetree.setTailText(self._c_node, value)
  190. return
  191. elif tag == 'tag':
  192. ElementBase.tag.__set__(self, value)
  193. return
  194. elif tag == 'base':
  195. ElementBase.base.__set__(self, value)
  196. return
  197. tag = _buildChildTag(self, tag)
  198. element = _lookupChild(self, tag)
  199. if element is None:
  200. _appendValue(self, tag, value)
  201. else:
  202. _replaceElement(element, value)
  203. def __delattr__(self, tag):
  204. child = _lookupChildOrRaise(self, tag)
  205. self.remove(child)
  206. def addattr(self, tag, value):
  207. """addattr(self, tag, value)
  208. Add a child value to the element.
  209. As opposed to append(), it sets a data value, not an element.
  210. """
  211. _appendValue(self, _buildChildTag(self, tag), value)
  212. def __getitem__(self, key):
  213. """Return a sibling, counting from the first child of the parent. The
  214. method behaves like both a dict and a sequence.
  215. * If argument is an integer, returns the sibling at that position.
  216. * If argument is a string, does the same as getattr(). This can be
  217. used to provide namespaces for element lookup, or to look up
  218. children with special names (``text`` etc.).
  219. * If argument is a slice object, returns the matching slice.
  220. """
  221. cdef tree.xmlNode* c_self_node
  222. cdef tree.xmlNode* c_parent
  223. cdef tree.xmlNode* c_node
  224. cdef Py_ssize_t c_index
  225. if python._isString(key):
  226. return _lookupChildOrRaise(self, key)
  227. elif isinstance(key, slice):
  228. return list(self)[key]
  229. # normal item access
  230. c_index = key # raises TypeError if necessary
  231. c_self_node = self._c_node
  232. c_parent = c_self_node.parent
  233. if c_parent is NULL:
  234. if c_index == 0 or c_index == -1:
  235. return self
  236. raise IndexError, unicode(key)
  237. if c_index < 0:
  238. c_node = c_parent.last
  239. else:
  240. c_node = c_parent.children
  241. c_node = _findFollowingSibling(
  242. c_node, tree._getNs(c_self_node), c_self_node.name, c_index)
  243. if c_node is NULL:
  244. raise IndexError, unicode(key)
  245. return elementFactory(self._doc, c_node)
  246. def __setitem__(self, key, value):
  247. """Set the value of a sibling, counting from the first child of the
  248. parent. Implements key assignment, item assignment and slice
  249. assignment.
  250. * If argument is an integer, sets the sibling at that position.
  251. * If argument is a string, does the same as setattr(). This is used
  252. to provide namespaces for element lookup.
  253. * If argument is a sequence (list, tuple, etc.), assign the contained
  254. items to the siblings.
  255. """
  256. cdef _Element element
  257. cdef tree.xmlNode* c_node
  258. if python._isString(key):
  259. key = _buildChildTag(self, key)
  260. element = _lookupChild(self, key)
  261. if element is None:
  262. _appendValue(self, key, value)
  263. else:
  264. _replaceElement(element, value)
  265. return
  266. if self._c_node.parent is NULL:
  267. # the 'root[i] = ...' case
  268. raise TypeError, "assignment to root element is invalid"
  269. if isinstance(key, slice):
  270. # slice assignment
  271. _setSlice(key, self, value)
  272. else:
  273. # normal index assignment
  274. if key < 0:
  275. c_node = self._c_node.parent.last
  276. else:
  277. c_node = self._c_node.parent.children
  278. c_node = _findFollowingSibling(
  279. c_node, tree._getNs(self._c_node), self._c_node.name, key)
  280. if c_node is NULL:
  281. raise IndexError, unicode(key)
  282. element = elementFactory(self._doc, c_node)
  283. _replaceElement(element, value)
  284. def __delitem__(self, key):
  285. parent = self.getparent()
  286. if parent is None:
  287. raise TypeError, "deleting items not supported by root element"
  288. if isinstance(key, slice):
  289. # slice deletion
  290. del_items = list(self)[key]
  291. remove = parent.remove
  292. for el in del_items:
  293. remove(el)
  294. else:
  295. # normal index deletion
  296. sibling = self.__getitem__(key)
  297. parent.remove(sibling)
  298. def descendantpaths(self, prefix=None):
  299. """descendantpaths(self, prefix=None)
  300. Returns a list of object path expressions for all descendants.
  301. """
  302. if prefix is not None and not python._isString(prefix):
  303. prefix = '.'.join(prefix)
  304. return _build_descendant_paths(self._c_node, prefix)
  305. cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
  306. if c_node.name != c_name:
  307. return 0
  308. if c_href == NULL:
  309. return 1
  310. c_node_href = tree._getNs(c_node)
  311. if c_node_href == NULL:
  312. return c_href[0] == c'\0'
  313. return tree.xmlStrcmp(c_node_href, c_href) == 0
  314. cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node):
  315. cdef tree.xmlNode* c_node
  316. cdef Py_ssize_t count
  317. c_tag = c_start_node.name
  318. c_href = tree._getNs(c_start_node)
  319. count = 1
  320. c_node = c_start_node.next
  321. while c_node is not NULL:
  322. if c_node.type == tree.XML_ELEMENT_NODE and \
  323. _tagMatches(c_node, c_href, c_tag):
  324. count += 1
  325. c_node = c_node.next
  326. c_node = c_start_node.prev
  327. while c_node is not NULL:
  328. if c_node.type == tree.XML_ELEMENT_NODE and \
  329. _tagMatches(c_node, c_href, c_tag):
  330. count += 1
  331. c_node = c_node.prev
  332. return count
  333. cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node,
  334. const_xmlChar* href, const_xmlChar* name,
  335. Py_ssize_t index):
  336. cdef tree.xmlNode* (*next)(tree.xmlNode*)
  337. if index >= 0:
  338. next = cetree.nextElement
  339. else:
  340. index = -1 - index
  341. next = cetree.previousElement
  342. while c_node is not NULL:
  343. if c_node.type == tree.XML_ELEMENT_NODE and \
  344. _tagMatches(c_node, href, name):
  345. index = index - 1
  346. if index < 0:
  347. return c_node
  348. c_node = next(c_node)
  349. return NULL
  350. cdef object _lookupChild(_Element parent, tag):
  351. cdef tree.xmlNode* c_result
  352. cdef tree.xmlNode* c_node
  353. c_node = parent._c_node
  354. ns, tag = cetree.getNsTagWithEmptyNs(tag)
  355. c_tag_len = len(<bytes> tag)
  356. if c_tag_len > limits.INT_MAX:
  357. return None
  358. c_tag = tree.xmlDictExists(
  359. c_node.doc.dict, _xcstr(tag), <int> c_tag_len)
  360. if c_tag is NULL:
  361. return None # not in the hash map => not in the tree
  362. if ns is None:
  363. # either inherit ns from parent or use empty (i.e. no) namespace
  364. c_href = tree._getNs(c_node) or <const_xmlChar*>''
  365. else:
  366. c_href = _xcstr(ns)
  367. c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0)
  368. if c_result is NULL:
  369. return None
  370. return elementFactory(parent._doc, c_result)
  371. cdef object _lookupChildOrRaise(_Element parent, tag):
  372. element = _lookupChild(parent, tag)
  373. if element is None:
  374. raise AttributeError, "no such child: " + _buildChildTag(parent, tag)
  375. return element
  376. cdef object _buildChildTag(_Element parent, tag):
  377. ns, tag = cetree.getNsTag(tag)
  378. c_tag = _xcstr(tag)
  379. c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns)
  380. return cetree.namespacedNameFromNsName(c_href, c_tag)
  381. cdef _replaceElement(_Element element, value):
  382. cdef _Element new_element
  383. if isinstance(value, _Element):
  384. # deep copy the new element
  385. new_element = cetree.deepcopyNodeToDocument(
  386. element._doc, (<_Element>value)._c_node)
  387. new_element.tag = element.tag
  388. elif isinstance(value, (list, tuple)):
  389. element[:] = value
  390. return
  391. else:
  392. new_element = element.makeelement(element.tag)
  393. _setElementValue(new_element, value)
  394. element.getparent().replace(element, new_element)
  395. cdef _appendValue(_Element parent, tag, value):
  396. cdef _Element new_element
  397. if isinstance(value, _Element):
  398. # deep copy the new element
  399. new_element = cetree.deepcopyNodeToDocument(
  400. parent._doc, (<_Element>value)._c_node)
  401. new_element.tag = tag
  402. cetree.appendChildToElement(parent, new_element)
  403. elif isinstance(value, (list, tuple)):
  404. for item in value:
  405. _appendValue(parent, tag, item)
  406. else:
  407. new_element = cetree.makeElement(
  408. tag, parent._doc, None, None, None, None, None)
  409. _setElementValue(new_element, value)
  410. cetree.appendChildToElement(parent, new_element)
  411. cdef _setElementValue(_Element element, value):
  412. if value is None:
  413. cetree.setAttributeValue(
  414. element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true")
  415. elif isinstance(value, _Element):
  416. _replaceElement(element, value)
  417. return
  418. else:
  419. cetree.delAttributeFromNsName(
  420. element._c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil")
  421. if python._isString(value):
  422. pytype_name = "str"
  423. py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
  424. else:
  425. pytype_name = _typename(value)
  426. py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
  427. if py_type is not None:
  428. value = py_type.stringify(value)
  429. else:
  430. value = unicode(value)
  431. if py_type is not None:
  432. cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
  433. else:
  434. cetree.delAttributeFromNsName(
  435. element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
  436. cetree.setNodeText(element._c_node, value)
  437. cdef _setSlice(sliceobject, _Element target, items):
  438. cdef _Element parent
  439. cdef tree.xmlNode* c_node
  440. cdef Py_ssize_t c_step, c_start, pos
  441. # collect existing slice
  442. if (<slice>sliceobject).step is None:
  443. c_step = 1
  444. else:
  445. c_step = (<slice>sliceobject).step
  446. if c_step == 0:
  447. raise ValueError, "Invalid slice"
  448. cdef list del_items = target[sliceobject]
  449. # collect new values
  450. new_items = []
  451. tag = target.tag
  452. for item in items:
  453. if isinstance(item, _Element):
  454. # deep copy the new element
  455. new_element = cetree.deepcopyNodeToDocument(
  456. target._doc, (<_Element>item)._c_node)
  457. new_element.tag = tag
  458. else:
  459. new_element = cetree.makeElement(
  460. tag, target._doc, None, None, None, None, None)
  461. _setElementValue(new_element, item)
  462. new_items.append(new_element)
  463. # sanity check - raise what a list would raise
  464. if c_step != 1 and len(del_items) != len(new_items):
  465. raise ValueError, \
  466. f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}"
  467. # replace existing items
  468. pos = 0
  469. parent = target.getparent()
  470. replace = parent.replace
  471. while pos < len(new_items) and pos < len(del_items):
  472. replace(del_items[pos], new_items[pos])
  473. pos += 1
  474. # remove leftover items
  475. if pos < len(del_items):
  476. remove = parent.remove
  477. while pos < len(del_items):
  478. remove(del_items[pos])
  479. pos += 1
  480. # append remaining new items
  481. if pos < len(new_items):
  482. # the sanity check above guarantees (step == 1)
  483. if pos > 0:
  484. item = new_items[pos-1]
  485. else:
  486. if (<slice>sliceobject).start > 0:
  487. c_node = parent._c_node.children
  488. else:
  489. c_node = parent._c_node.last
  490. c_node = _findFollowingSibling(
  491. c_node, tree._getNs(target._c_node), target._c_node.name,
  492. (<slice>sliceobject).start - 1)
  493. if c_node is NULL:
  494. while pos < len(new_items):
  495. cetree.appendChildToElement(parent, new_items[pos])
  496. pos += 1
  497. return
  498. item = cetree.elementFactory(parent._doc, c_node)
  499. while pos < len(new_items):
  500. add = item.addnext
  501. item = new_items[pos]
  502. add(item)
  503. pos += 1
  504. ################################################################################
  505. # Data type support in subclasses
  506. cdef class ObjectifiedDataElement(ObjectifiedElement):
  507. """This is the base class for all data type Elements. Subclasses should
  508. override the 'pyval' property and possibly the __str__ method.
  509. """
  510. @property
  511. def pyval(self):
  512. return textOf(self._c_node)
  513. def __str__(self):
  514. return textOf(self._c_node) or ''
  515. def __repr__(self):
  516. return textOf(self._c_node) or ''
  517. def _setText(self, s):
  518. """For use in subclasses only. Don't use unless you know what you are
  519. doing.
  520. """
  521. cetree.setNodeText(self._c_node, s)
  522. cdef class NumberElement(ObjectifiedDataElement):
  523. cdef object _parse_value
  524. def _setValueParser(self, function):
  525. """Set the function that parses the Python value from a string.
  526. Do not use this unless you know what you are doing.
  527. """
  528. self._parse_value = function
  529. @property
  530. def pyval(self):
  531. return _parseNumber(self)
  532. def __int__(self):
  533. return int(_parseNumber(self))
  534. def __float__(self):
  535. return float(_parseNumber(self))
  536. def __complex__(self):
  537. return complex(_parseNumber(self))
  538. def __str__(self):
  539. return unicode(_parseNumber(self))
  540. def __repr__(self):
  541. return repr(_parseNumber(self))
  542. def __oct__(self):
  543. return oct(_parseNumber(self))
  544. def __hex__(self):
  545. return hex(_parseNumber(self))
  546. def __richcmp__(self, other, int op):
  547. return _richcmpPyvals(self, other, op)
  548. def __hash__(self):
  549. return hash(_parseNumber(self))
  550. def __add__(self, other):
  551. return _numericValueOf(self) + _numericValueOf(other)
  552. def __radd__(self, other):
  553. return _numericValueOf(other) + _numericValueOf(self)
  554. def __sub__(self, other):
  555. return _numericValueOf(self) - _numericValueOf(other)
  556. def __rsub__(self, other):
  557. return _numericValueOf(other) - _numericValueOf(self)
  558. def __mul__(self, other):
  559. return _numericValueOf(self) * _numericValueOf(other)
  560. def __rmul__(self, other):
  561. return _numericValueOf(other) * _numericValueOf(self)
  562. def __div__(self, other):
  563. return _numericValueOf(self) / _numericValueOf(other)
  564. def __rdiv__(self, other):
  565. return _numericValueOf(other) / _numericValueOf(self)
  566. def __truediv__(self, other):
  567. return _numericValueOf(self) / _numericValueOf(other)
  568. def __rtruediv__(self, other):
  569. return _numericValueOf(other) / _numericValueOf(self)
  570. def __floordiv__(self, other):
  571. return _numericValueOf(self) // _numericValueOf(other)
  572. def __rfloordiv__(self, other):
  573. return _numericValueOf(other) // _numericValueOf(self)
  574. def __mod__(self, other):
  575. return _numericValueOf(self) % _numericValueOf(other)
  576. def __rmod__(self, other):
  577. return _numericValueOf(other) % _numericValueOf(self)
  578. def __divmod__(self, other):
  579. return divmod(_numericValueOf(self), _numericValueOf(other))
  580. def __rdivmod__(self, other):
  581. return divmod(_numericValueOf(other), _numericValueOf(self))
  582. def __pow__(self, other, modulo):
  583. if modulo is None:
  584. return _numericValueOf(self) ** _numericValueOf(other)
  585. else:
  586. return pow(_numericValueOf(self), _numericValueOf(other), modulo)
  587. def __rpow__(self, other, modulo):
  588. if modulo is None:
  589. return _numericValueOf(other) ** _numericValueOf(self)
  590. else:
  591. return pow(_numericValueOf(other), _numericValueOf(self), modulo)
  592. def __neg__(self):
  593. return - _numericValueOf(self)
  594. def __pos__(self):
  595. return + _numericValueOf(self)
  596. def __abs__(self):
  597. return abs( _numericValueOf(self) )
  598. def __bool__(self):
  599. return bool(_numericValueOf(self))
  600. def __invert__(self):
  601. return ~ _numericValueOf(self)
  602. def __lshift__(self, other):
  603. return _numericValueOf(self) << _numericValueOf(other)
  604. def __rlshift__(self, other):
  605. return _numericValueOf(other) << _numericValueOf(self)
  606. def __rshift__(self, other):
  607. return _numericValueOf(self) >> _numericValueOf(other)
  608. def __rrshift__(self, other):
  609. return _numericValueOf(other) >> _numericValueOf(self)
  610. def __and__(self, other):
  611. return _numericValueOf(self) & _numericValueOf(other)
  612. def __rand__(self, other):
  613. return _numericValueOf(other) & _numericValueOf(self)
  614. def __or__(self, other):
  615. return _numericValueOf(self) | _numericValueOf(other)
  616. def __ror__(self, other):
  617. return _numericValueOf(other) | _numericValueOf(self)
  618. def __xor__(self, other):
  619. return _numericValueOf(self) ^ _numericValueOf(other)
  620. def __rxor__(self, other):
  621. return _numericValueOf(other) ^ _numericValueOf(self)
  622. cdef class IntElement(NumberElement):
  623. def _init(self):
  624. self._parse_value = int
  625. def __index__(self):
  626. return int(_parseNumber(self))
  627. cdef class FloatElement(NumberElement):
  628. def _init(self):
  629. self._parse_value = float
  630. cdef class StringElement(ObjectifiedDataElement):
  631. """String data class.
  632. Note that this class does *not* support the sequence protocol of strings:
  633. len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported.
  634. Instead, use the .text attribute to get a 'real' string.
  635. """
  636. @property
  637. def pyval(self):
  638. return textOf(self._c_node) or ''
  639. def __repr__(self):
  640. return repr(textOf(self._c_node) or '')
  641. def strlen(self):
  642. text = textOf(self._c_node)
  643. if text is None:
  644. return 0
  645. else:
  646. return len(text)
  647. def __bool__(self):
  648. return bool(textOf(self._c_node))
  649. def __richcmp__(self, other, int op):
  650. return _richcmpPyvals(self, other, op)
  651. def __hash__(self):
  652. return hash(textOf(self._c_node) or '')
  653. def __add__(self, other):
  654. text = _strValueOf(self)
  655. other = _strValueOf(other)
  656. return text + other
  657. def __radd__(self, other):
  658. text = _strValueOf(self)
  659. other = _strValueOf(other)
  660. return other + text
  661. def __mul__(self, other):
  662. if isinstance(self, StringElement):
  663. return (textOf((<StringElement>self)._c_node) or '') * _numericValueOf(other)
  664. elif isinstance(other, StringElement):
  665. return _numericValueOf(self) * (textOf((<StringElement>other)._c_node) or '')
  666. else:
  667. return NotImplemented
  668. def __rmul__(self, other):
  669. return _numericValueOf(other) * (textOf((<StringElement>self)._c_node) or '')
  670. def __mod__(self, other):
  671. return (_strValueOf(self) or '') % other
  672. def __int__(self):
  673. return int(textOf(self._c_node))
  674. def __float__(self):
  675. return float(textOf(self._c_node))
  676. def __complex__(self):
  677. return complex(textOf(self._c_node))
  678. cdef class NoneElement(ObjectifiedDataElement):
  679. def __str__(self):
  680. return "None"
  681. def __repr__(self):
  682. return "None"
  683. def __bool__(self):
  684. return False
  685. def __richcmp__(self, other, int op):
  686. if other is None or self is None:
  687. return python.PyObject_RichCompare(None, None, op)
  688. if isinstance(self, NoneElement):
  689. return python.PyObject_RichCompare(None, other, op)
  690. else:
  691. return python.PyObject_RichCompare(self, None, op)
  692. def __hash__(self):
  693. return hash(None)
  694. @property
  695. def pyval(self):
  696. return None
  697. cdef class BoolElement(IntElement):
  698. """Boolean type base on string values: 'true' or 'false'.
  699. Note that this inherits from IntElement to mimic the behaviour of
  700. Python's bool type.
  701. """
  702. def _init(self):
  703. self._parse_value = _parseBool # wraps as Python callable
  704. def __bool__(self):
  705. return _parseBool(textOf(self._c_node))
  706. def __int__(self):
  707. return 0 + _parseBool(textOf(self._c_node))
  708. def __float__(self):
  709. return 0.0 + _parseBool(textOf(self._c_node))
  710. def __richcmp__(self, other, int op):
  711. return _richcmpPyvals(self, other, op)
  712. def __hash__(self):
  713. return hash(_parseBool(textOf(self._c_node)))
  714. def __str__(self):
  715. return unicode(_parseBool(textOf(self._c_node)))
  716. def __repr__(self):
  717. return repr(_parseBool(textOf(self._c_node)))
  718. @property
  719. def pyval(self):
  720. return _parseBool(textOf(self._c_node))
  721. cdef _checkBool(s):
  722. cdef int value = -1
  723. if s is not None:
  724. value = __parseBoolAsInt(s)
  725. if value == -1:
  726. raise ValueError
  727. cdef bint _parseBool(s) except -1:
  728. cdef int value
  729. if s is None:
  730. return False
  731. value = __parseBoolAsInt(s)
  732. if value == -1:
  733. raise ValueError, f"Invalid boolean value: '{s}'"
  734. return value
  735. cdef inline int __parseBoolAsInt(text) except -2:
  736. if text == 'false':
  737. return 0
  738. elif text == 'true':
  739. return 1
  740. elif text == '0':
  741. return 0
  742. elif text == '1':
  743. return 1
  744. return -1
  745. cdef object _parseNumber(NumberElement element):
  746. return element._parse_value(textOf(element._c_node))
  747. cdef enum NumberParserState:
  748. NPS_SPACE_PRE = 0
  749. NPS_SIGN = 1
  750. NPS_DIGITS = 2
  751. NPS_POINT_LEAD = 3
  752. NPS_POINT = 4
  753. NPS_FRACTION = 5
  754. NPS_EXP = 6
  755. NPS_EXP_SIGN = 7
  756. NPS_DIGITS_EXP = 8
  757. NPS_SPACE_TAIL = 9
  758. NPS_INF1 = 20
  759. NPS_INF2 = 21
  760. NPS_INF3 = 22
  761. NPS_NAN1 = 23
  762. NPS_NAN2 = 24
  763. NPS_NAN3 = 25
  764. NPS_ERROR = 99
  765. ctypedef fused bytes_unicode:
  766. bytes
  767. unicode
  768. cdef _checkNumber(bytes_unicode s, bint allow_float):
  769. cdef Py_UCS4 c
  770. cdef NumberParserState state = NPS_SPACE_PRE
  771. for c in s:
  772. if c in '0123456789':
  773. if state in (NPS_DIGITS, NPS_FRACTION, NPS_DIGITS_EXP):
  774. pass
  775. elif state in (NPS_SPACE_PRE, NPS_SIGN):
  776. state = NPS_DIGITS
  777. elif state in (NPS_POINT_LEAD, NPS_POINT):
  778. state = NPS_FRACTION
  779. elif state in (NPS_EXP, NPS_EXP_SIGN):
  780. state = NPS_DIGITS_EXP
  781. else:
  782. state = NPS_ERROR
  783. else:
  784. if c == '.':
  785. if state in (NPS_SPACE_PRE, NPS_SIGN):
  786. state = NPS_POINT_LEAD
  787. elif state == NPS_DIGITS:
  788. state = NPS_POINT
  789. else:
  790. state = NPS_ERROR
  791. if not allow_float:
  792. state = NPS_ERROR
  793. elif c in '-+':
  794. if state == NPS_SPACE_PRE:
  795. state = NPS_SIGN
  796. elif state == NPS_EXP:
  797. state = NPS_EXP_SIGN
  798. else:
  799. state = NPS_ERROR
  800. elif c == 'E':
  801. if state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION):
  802. state = NPS_EXP
  803. else:
  804. state = NPS_ERROR
  805. if not allow_float:
  806. state = NPS_ERROR
  807. # Allow INF and NaN. XMLSchema requires case, we don't, like Python.
  808. elif c in 'iI':
  809. state = NPS_INF1 if allow_float and state in (NPS_SPACE_PRE, NPS_SIGN) else NPS_ERROR
  810. elif c in 'fF':
  811. state = NPS_INF3 if state == NPS_INF2 else NPS_ERROR
  812. elif c in 'aA':
  813. state = NPS_NAN2 if state == NPS_NAN1 else NPS_ERROR
  814. elif c in 'nN':
  815. # Python also allows [+-]NaN, so let's accept that.
  816. if state in (NPS_SPACE_PRE, NPS_SIGN):
  817. state = NPS_NAN1 if allow_float else NPS_ERROR
  818. elif state == NPS_NAN2:
  819. state = NPS_NAN3
  820. elif state == NPS_INF1:
  821. state = NPS_INF2
  822. else:
  823. state = NPS_ERROR
  824. # Allow spaces around text values.
  825. else:
  826. if c.isspace() if (bytes_unicode is unicode) else c in b'\x09\x0a\x0b\x0c\x0d\x20':
  827. if state in (NPS_SPACE_PRE, NPS_SPACE_TAIL):
  828. pass
  829. elif state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3):
  830. state = NPS_SPACE_TAIL
  831. else:
  832. state = NPS_ERROR
  833. else:
  834. state = NPS_ERROR
  835. if state == NPS_ERROR:
  836. break
  837. if state not in (NPS_DIGITS, NPS_FRACTION, NPS_POINT, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3, NPS_SPACE_TAIL):
  838. raise ValueError
  839. cdef _checkInt(s):
  840. return _checkNumber(<unicode>s, allow_float=False)
  841. cdef _checkFloat(s):
  842. return _checkNumber(<unicode>s, allow_float=True)
  843. cdef object _strValueOf(obj):
  844. if python._isString(obj):
  845. return obj
  846. if isinstance(obj, _Element):
  847. return textOf((<_Element>obj)._c_node) or ''
  848. if obj is None:
  849. return ''
  850. return unicode(obj)
  851. cdef object _numericValueOf(obj):
  852. if isinstance(obj, NumberElement):
  853. return _parseNumber(<NumberElement>obj)
  854. try:
  855. # not always numeric, but Python will raise the right exception
  856. return obj.pyval
  857. except AttributeError:
  858. pass
  859. return obj
  860. cdef _richcmpPyvals(left, right, int op):
  861. left = getattr(left, 'pyval', left)
  862. right = getattr(right, 'pyval', right)
  863. return python.PyObject_RichCompare(left, right, op)
  864. ################################################################################
  865. # Python type registry
  866. cdef class PyType:
  867. """PyType(self, name, type_check, type_class, stringify=None)
  868. User defined type.
  869. Named type that contains a type check function, a type class that
  870. inherits from ObjectifiedDataElement and an optional "stringification"
  871. function. The type check must take a string as argument and raise
  872. ValueError or TypeError if it cannot handle the string value. It may be
  873. None in which case it is not considered for type guessing. For registered
  874. named types, the 'stringify' function (or unicode() if None) is used to
  875. convert a Python object with type name 'name' to the string representation
  876. stored in the XML tree.
  877. Example::
  878. PyType('int', int, MyIntClass).register()
  879. Note that the order in which types are registered matters. The first
  880. matching type will be used.
  881. """
  882. cdef readonly object name
  883. cdef readonly object type_check
  884. cdef readonly object stringify
  885. cdef object _type
  886. cdef list _schema_types
  887. def __init__(self, name, type_check, type_class, stringify=None):
  888. if isinstance(name, bytes):
  889. name = (<bytes>name).decode('ascii')
  890. elif not isinstance(name, unicode):
  891. raise TypeError, "Type name must be a string"
  892. if type_check is not None and not callable(type_check):
  893. raise TypeError, "Type check function must be callable (or None)"
  894. if name != TREE_PYTYPE_NAME and \
  895. not issubclass(type_class, ObjectifiedDataElement):
  896. raise TypeError, \
  897. "Data classes must inherit from ObjectifiedDataElement"
  898. self.name = name
  899. self._type = type_class
  900. self.type_check = type_check
  901. if stringify is None:
  902. stringify = unicode
  903. self.stringify = stringify
  904. self._schema_types = []
  905. def __repr__(self):
  906. return "PyType(%s, %s)" % (self.name, self._type.__name__)
  907. def register(self, before=None, after=None):
  908. """register(self, before=None, after=None)
  909. Register the type.
  910. The additional keyword arguments 'before' and 'after' accept a
  911. sequence of type names that must appear before/after the new type in
  912. the type list. If any of them is not currently known, it is simply
  913. ignored. Raises ValueError if the dependencies cannot be fulfilled.
  914. """
  915. if self.name == TREE_PYTYPE_NAME:
  916. raise ValueError, "Cannot register tree type"
  917. if self.type_check is not None:
  918. for item in _TYPE_CHECKS:
  919. if item[0] is self.type_check:
  920. _TYPE_CHECKS.remove(item)
  921. break
  922. entry = (self.type_check, self)
  923. first_pos = 0
  924. last_pos = -1
  925. if before or after:
  926. if before is None:
  927. before = ()
  928. elif after is None:
  929. after = ()
  930. for i, (check, pytype) in enumerate(_TYPE_CHECKS):
  931. if last_pos == -1 and pytype.name in before:
  932. last_pos = i
  933. if pytype.name in after:
  934. first_pos = i+1
  935. if last_pos == -1:
  936. _TYPE_CHECKS.append(entry)
  937. elif first_pos > last_pos:
  938. raise ValueError, "inconsistent before/after dependencies"
  939. else:
  940. _TYPE_CHECKS.insert(last_pos, entry)
  941. _PYTYPE_DICT[self.name] = self
  942. for xs_type in self._schema_types:
  943. _SCHEMA_TYPE_DICT[xs_type] = self
  944. def unregister(self):
  945. "unregister(self)"
  946. if _PYTYPE_DICT.get(self.name) is self:
  947. del _PYTYPE_DICT[self.name]
  948. for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()):
  949. if pytype is self:
  950. del _SCHEMA_TYPE_DICT[xs_type]
  951. if self.type_check is None:
  952. return
  953. try:
  954. _TYPE_CHECKS.remove( (self.type_check, self) )
  955. except ValueError:
  956. pass
  957. property xmlSchemaTypes:
  958. """The list of XML Schema datatypes this Python type maps to.
  959. Note that this must be set before registering the type!
  960. """
  961. def __get__(self):
  962. return self._schema_types
  963. def __set__(self, types):
  964. self._schema_types = list(map(unicode, types))
  965. cdef dict _PYTYPE_DICT = {}
  966. cdef dict _SCHEMA_TYPE_DICT = {}
  967. cdef list _TYPE_CHECKS = []
  968. cdef unicode _xml_bool(value):
  969. return "true" if value else "false"
  970. cdef unicode _xml_float(value):
  971. if _float_is_inf(value):
  972. if value > 0:
  973. return "INF"
  974. return "-INF"
  975. if _float_is_nan(value):
  976. return "NaN"
  977. return unicode(repr(value))
  978. cdef _pytypename(obj):
  979. return "str" if python._isString(obj) else _typename(obj)
  980. def pytypename(obj):
  981. """pytypename(obj)
  982. Find the name of the corresponding PyType for a Python object.
  983. """
  984. return _pytypename(obj)
  985. cdef _registerPyTypes():
  986. pytype = PyType('int', _checkInt, IntElement) # wraps functions for Python
  987. pytype.xmlSchemaTypes = ("integer", "int", "short", "byte", "unsignedShort",
  988. "unsignedByte", "nonPositiveInteger",
  989. "negativeInteger", "long", "nonNegativeInteger",
  990. "unsignedLong", "unsignedInt", "positiveInteger",)
  991. pytype.register()
  992. # 'long' type just for backwards compatibility
  993. pytype = PyType('long', None, IntElement)
  994. pytype.register()
  995. pytype = PyType('float', _checkFloat, FloatElement, _xml_float) # wraps functions for Python
  996. pytype.xmlSchemaTypes = ("double", "float")
  997. pytype.register()
  998. pytype = PyType('bool', _checkBool, BoolElement, _xml_bool) # wraps functions for Python
  999. pytype.xmlSchemaTypes = ("boolean",)
  1000. pytype.register()
  1001. pytype = PyType('str', None, StringElement)
  1002. pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language",
  1003. "Name", "NCName", "ID", "IDREF", "ENTITY",
  1004. "NMTOKEN", )
  1005. pytype.register()
  1006. # since lxml 2.0
  1007. pytype = PyType('NoneType', None, NoneElement)
  1008. pytype.register()
  1009. # backwards compatibility
  1010. pytype = PyType('none', None, NoneElement)
  1011. pytype.register()
  1012. # non-registered PyType for inner tree elements
  1013. cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement)
  1014. _registerPyTypes()
  1015. def getRegisteredTypes():
  1016. """getRegisteredTypes()
  1017. Returns a list of the currently registered PyType objects.
  1018. To add a new type, retrieve this list and call unregister() for all
  1019. entries. Then add the new type at a suitable position (possibly replacing
  1020. an existing one) and call register() for all entries.
  1021. This is necessary if the new type interferes with the type check functions
  1022. of existing ones (normally only int/float/bool) and must the tried before
  1023. other types. To add a type that is not yet parsable by the current type
  1024. check functions, you can simply register() it, which will append it to the
  1025. end of the type list.
  1026. """
  1027. cdef list types = []
  1028. cdef set known = set()
  1029. for check, pytype in _TYPE_CHECKS:
  1030. name = pytype.name
  1031. if name not in known:
  1032. known.add(name)
  1033. types.append(pytype)
  1034. for pytype in _PYTYPE_DICT.values():
  1035. name = pytype.name
  1036. if name not in known:
  1037. known.add(name)
  1038. types.append(pytype)
  1039. return types
  1040. cdef PyType _guessPyType(value, PyType defaulttype):
  1041. if value is None:
  1042. return None
  1043. for type_check, tested_pytype in _TYPE_CHECKS:
  1044. try:
  1045. type_check(value)
  1046. return <PyType>tested_pytype
  1047. except IGNORABLE_ERRORS:
  1048. # could not be parsed as the specified type => ignore
  1049. pass
  1050. return defaulttype
  1051. cdef object _guessElementClass(tree.xmlNode* c_node):
  1052. value = textOf(c_node)
  1053. if value is None:
  1054. return None
  1055. if value == '':
  1056. return StringElement
  1057. for type_check, pytype in _TYPE_CHECKS:
  1058. try:
  1059. type_check(value)
  1060. return (<PyType>pytype)._type
  1061. except IGNORABLE_ERRORS:
  1062. pass
  1063. return None
  1064. ################################################################################
  1065. # adapted ElementMaker supports registered PyTypes
  1066. @cython.final
  1067. @cython.internal
  1068. cdef class _ObjectifyElementMakerCaller:
  1069. cdef object _tag
  1070. cdef object _nsmap
  1071. cdef object _element_factory
  1072. cdef bint _annotate
  1073. def __call__(self, *children, **attrib):
  1074. "__call__(self, *children, **attrib)"
  1075. cdef _ObjectifyElementMakerCaller elementMaker
  1076. cdef _Element element
  1077. cdef _Element childElement
  1078. cdef bint has_children
  1079. cdef bint has_string_value
  1080. if self._element_factory is None:
  1081. element = _makeElement(self._tag, None, attrib, self._nsmap)
  1082. else:
  1083. element = self._element_factory(self._tag, attrib, self._nsmap)
  1084. pytype_name = None
  1085. has_children = False
  1086. has_string_value = False
  1087. for child in children:
  1088. if child is None:
  1089. if len(children) == 1:
  1090. cetree.setAttributeValue(
  1091. element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true")
  1092. elif python._isString(child):
  1093. _add_text(element, child)
  1094. has_string_value = True
  1095. elif isinstance(child, _Element):
  1096. cetree.appendChildToElement(element, <_Element>child)
  1097. has_children = True
  1098. elif isinstance(child, _ObjectifyElementMakerCaller):
  1099. elementMaker = <_ObjectifyElementMakerCaller>child
  1100. if elementMaker._element_factory is None:
  1101. cetree.makeSubElement(element, elementMaker._tag,
  1102. None, None, None, None)
  1103. else:
  1104. childElement = elementMaker._element_factory(
  1105. elementMaker._tag)
  1106. cetree.appendChildToElement(element, childElement)
  1107. has_children = True
  1108. elif isinstance(child, dict):
  1109. for name, value in child.items():
  1110. # keyword arguments in attrib take precedence
  1111. if name in attrib:
  1112. continue
  1113. pytype = _PYTYPE_DICT.get(_typename(value))
  1114. if pytype is not None:
  1115. value = (<PyType>pytype).stringify(value)
  1116. elif not python._isString(value):
  1117. value = unicode(value)
  1118. cetree.setAttributeValue(element, name, value)
  1119. else:
  1120. if pytype_name is not None:
  1121. # concatenation always makes the result a string
  1122. has_string_value = True
  1123. pytype_name = _typename(child)
  1124. pytype = _PYTYPE_DICT.get(_typename(child))
  1125. if pytype is not None:
  1126. _add_text(element, (<PyType>pytype).stringify(child))
  1127. else:
  1128. has_string_value = True
  1129. child = unicode(child)
  1130. _add_text(element, child)
  1131. if self._annotate and not has_children:
  1132. if has_string_value:
  1133. cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, "str")
  1134. elif pytype_name is not None:
  1135. cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
  1136. return element
  1137. cdef _add_text(_Element elem, text):
  1138. # add text to the tree in construction, either as element text or
  1139. # tail text, depending on the current tree state
  1140. cdef tree.xmlNode* c_child
  1141. c_child = cetree.findChildBackwards(elem._c_node, 0)
  1142. if c_child is not NULL:
  1143. old = cetree.tailOf(c_child)
  1144. if old is not None:
  1145. text = old + text
  1146. cetree.setTailText(c_child, text)
  1147. else:
  1148. old = cetree.textOf(elem._c_node)
  1149. if old is not None:
  1150. text = old + text
  1151. cetree.setNodeText(elem._c_node, text)
  1152. cdef class ElementMaker:
  1153. """ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None)
  1154. An ElementMaker that can be used for constructing trees.
  1155. Example::
  1156. >>> M = ElementMaker(annotate=False)
  1157. >>> attributes = {'class': 'par'}
  1158. >>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) )
  1159. >>> from lxml.etree import tostring
  1160. >>> print(tostring(html, method='html').decode('ascii'))
  1161. <html><body><p style="font-weight: bold" class="par">hello<br>objectify</p></body></html>
  1162. To create tags that are not valid Python identifiers, call the factory
  1163. directly and pass the tag name as first argument::
  1164. >>> root = M('tricky-tag', 'some text')
  1165. >>> print(root.tag)
  1166. tricky-tag
  1167. >>> print(root.text)
  1168. some text
  1169. Note that this module has a predefined ElementMaker instance called ``E``.
  1170. """
  1171. cdef object _makeelement
  1172. cdef object _namespace
  1173. cdef object _nsmap
  1174. cdef bint _annotate
  1175. cdef dict _cache
  1176. def __init__(self, *, namespace=None, nsmap=None, annotate=True,
  1177. makeelement=None):
  1178. if nsmap is None:
  1179. nsmap = _DEFAULT_NSMAP if annotate else {}
  1180. self._nsmap = nsmap
  1181. self._namespace = None if namespace is None else "{%s}" % namespace
  1182. self._annotate = annotate
  1183. if makeelement is not None:
  1184. if not callable(makeelement):
  1185. raise TypeError(
  1186. f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}")
  1187. self._makeelement = makeelement
  1188. else:
  1189. self._makeelement = None
  1190. self._cache = {}
  1191. @cython.final
  1192. cdef _build_element_maker(self, tag, bint caching):
  1193. cdef _ObjectifyElementMakerCaller element_maker
  1194. element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller)
  1195. if self._namespace is not None and tag[0] != "{":
  1196. element_maker._tag = self._namespace + tag
  1197. else:
  1198. element_maker._tag = tag
  1199. element_maker._nsmap = self._nsmap
  1200. element_maker._annotate = self._annotate
  1201. element_maker._element_factory = self._makeelement
  1202. if caching:
  1203. if len(self._cache) > 200:
  1204. self._cache.clear()
  1205. self._cache[tag] = element_maker
  1206. return element_maker
  1207. def __getattr__(self, tag):
  1208. element_maker = self._cache.get(tag)
  1209. if element_maker is None:
  1210. return self._build_element_maker(tag, caching=True)
  1211. return element_maker
  1212. def __call__(self, tag, *args, **kwargs):
  1213. element_maker = self._cache.get(tag)
  1214. if element_maker is None:
  1215. element_maker = self._build_element_maker(
  1216. tag, caching=not is_special_method(tag))
  1217. return element_maker(*args, **kwargs)
  1218. ################################################################################
  1219. # Recursive element dumping
  1220. cdef bint __RECURSIVE_STR = 0 # default: off
  1221. def enable_recursive_str(on=True):
  1222. """enable_recursive_str(on=True)
  1223. Enable a recursively generated tree representation for str(element),
  1224. based on objectify.dump(element).
  1225. """
  1226. global __RECURSIVE_STR
  1227. __RECURSIVE_STR = on
  1228. def dump(_Element element not None):
  1229. """dump(_Element element not None)
  1230. Return a recursively generated string representation of an element.
  1231. """
  1232. return _dump(element, 0)
  1233. cdef object _dump(_Element element, int indent):
  1234. indentstr = " " * indent
  1235. if isinstance(element, ObjectifiedDataElement):
  1236. value = repr(element)
  1237. else:
  1238. value = textOf(element._c_node)
  1239. if value is not None:
  1240. if not value.strip():
  1241. value = None
  1242. else:
  1243. value = repr(value)
  1244. result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n"
  1245. xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS
  1246. pytype_ns = "{%s}" % PYTYPE_NAMESPACE
  1247. for name, value in sorted(cetree.iterattributes(element, 3)):
  1248. if '{' in name:
  1249. if name == PYTYPE_ATTRIBUTE:
  1250. if value == TREE_PYTYPE_NAME:
  1251. continue
  1252. else:
  1253. name = name.replace(pytype_ns, 'py:')
  1254. name = name.replace(xsi_ns, 'xsi:')
  1255. result += f"{indentstr} * {name} = {value!r}\n"
  1256. indent += 1
  1257. for child in element.iterchildren():
  1258. result += _dump(child, indent)
  1259. if indent == 1:
  1260. return result[:-1] # strip last '\n'
  1261. else:
  1262. return result
  1263. ################################################################################
  1264. # Pickle support for objectified ElementTree
  1265. def __unpickleElementTree(data):
  1266. return etree.ElementTree(fromstring(data))
  1267. cdef _setupPickle(elementTreeReduceFunction):
  1268. import copyreg
  1269. copyreg.pickle(etree._ElementTree,
  1270. elementTreeReduceFunction, __unpickleElementTree)
  1271. def pickleReduceElementTree(obj):
  1272. return __unpickleElementTree, (etree.tostring(obj),)
  1273. _setupPickle(pickleReduceElementTree)
  1274. del pickleReduceElementTree
  1275. ################################################################################
  1276. # Element class lookup
  1277. cdef class ObjectifyElementClassLookup(ElementClassLookup):
  1278. """ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None)
  1279. Element class lookup method that uses the objectify classes.
  1280. """
  1281. cdef object empty_data_class
  1282. cdef object tree_class
  1283. def __init__(self, tree_class=None, empty_data_class=None):
  1284. """Lookup mechanism for objectify.
  1285. The default Element classes can be replaced by passing subclasses of
  1286. ObjectifiedElement and ObjectifiedDataElement as keyword arguments.
  1287. 'tree_class' defines inner tree classes (defaults to
  1288. ObjectifiedElement), 'empty_data_class' defines the default class for
  1289. empty data elements (defaults to StringElement).
  1290. """
  1291. self._lookup_function = _lookupElementClass
  1292. if tree_class is None:
  1293. tree_class = ObjectifiedElement
  1294. self.tree_class = tree_class
  1295. if empty_data_class is None:
  1296. empty_data_class = StringElement
  1297. self.empty_data_class = empty_data_class
  1298. cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node):
  1299. cdef ObjectifyElementClassLookup lookup
  1300. lookup = <ObjectifyElementClassLookup>state
  1301. # if element has children => no data class
  1302. if cetree.hasChild(c_node):
  1303. return lookup.tree_class
  1304. # if element is defined as xsi:nil, return NoneElement class
  1305. if "true" == cetree.attributeValueFromNsName(
  1306. c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil"):
  1307. return NoneElement
  1308. # check for Python type hint
  1309. value = cetree.attributeValueFromNsName(
  1310. c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
  1311. if value is not None:
  1312. if value == TREE_PYTYPE_NAME:
  1313. return lookup.tree_class
  1314. py_type = <PyType>_PYTYPE_DICT.get(value)
  1315. if py_type is not None:
  1316. return py_type._type
  1317. # unknown 'pyval' => try to figure it out ourself, just go on
  1318. # check for XML Schema type hint
  1319. value = cetree.attributeValueFromNsName(
  1320. c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
  1321. if value is not None:
  1322. schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
  1323. if schema_type is None and ':' in value:
  1324. prefix, value = value.split(':', 1)
  1325. schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
  1326. if schema_type is not None:
  1327. return schema_type._type
  1328. # otherwise determine class based on text content type
  1329. el_class = _guessElementClass(c_node)
  1330. if el_class is not None:
  1331. return el_class
  1332. # if element is a root node => default to tree node
  1333. if c_node.parent is NULL or not tree._isElement(c_node.parent):
  1334. return lookup.tree_class
  1335. return lookup.empty_data_class
  1336. ################################################################################
  1337. # Type annotations
  1338. cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype):
  1339. if pytype is None:
  1340. return None
  1341. value = textOf(c_node)
  1342. try:
  1343. pytype.type_check(value)
  1344. return pytype
  1345. except IGNORABLE_ERRORS:
  1346. # could not be parsed as the specified type => ignore
  1347. pass
  1348. return None
  1349. def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False,
  1350. empty_pytype=None):
  1351. """pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None)
  1352. Recursively annotates the elements of an XML tree with 'pytype'
  1353. attributes.
  1354. If the 'ignore_old' keyword argument is True (the default), current 'pytype'
  1355. attributes will be ignored and replaced. Otherwise, they will be checked
  1356. and only replaced if they no longer fit the current text value.
  1357. Setting the keyword argument ``ignore_xsi`` to True makes the function
  1358. additionally ignore existing ``xsi:type`` annotations. The default is to
  1359. use them as a type hint.
  1360. The default annotation of empty elements can be set with the
  1361. ``empty_pytype`` keyword argument. The default is not to annotate empty
  1362. elements. Pass 'str', for example, to make string values the default.
  1363. """
  1364. cdef _Element element
  1365. element = cetree.rootNodeOrRaise(element_or_tree)
  1366. _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype)
  1367. def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False,
  1368. empty_type=None):
  1369. """xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None)
  1370. Recursively annotates the elements of an XML tree with 'xsi:type'
  1371. attributes.
  1372. If the 'ignore_old' keyword argument is True (the default), current
  1373. 'xsi:type' attributes will be ignored and replaced. Otherwise, they will be
  1374. checked and only replaced if they no longer fit the current text value.
  1375. Note that the mapping from Python types to XSI types is usually ambiguous.
  1376. Currently, only the first XSI type name in the corresponding PyType
  1377. definition will be used for annotation. Thus, you should consider naming
  1378. the widest type first if you define additional types.
  1379. Setting the keyword argument ``ignore_pytype`` to True makes the function
  1380. additionally ignore existing ``pytype`` annotations. The default is to
  1381. use them as a type hint.
  1382. The default annotation of empty elements can be set with the
  1383. ``empty_type`` keyword argument. The default is not to annotate empty
  1384. elements. Pass 'string', for example, to make string values the default.
  1385. """
  1386. cdef _Element element
  1387. element = cetree.rootNodeOrRaise(element_or_tree)
  1388. _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None)
  1389. def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
  1390. empty_pytype=None, empty_type=None, annotate_xsi=0,
  1391. annotate_pytype=1):
  1392. """annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1)
  1393. Recursively annotates the elements of an XML tree with 'xsi:type'
  1394. and/or 'py:pytype' attributes.
  1395. If the 'ignore_old' keyword argument is True (the default), current
  1396. 'py:pytype' attributes will be ignored for the type annotation. Set to False
  1397. if you want reuse existing 'py:pytype' information (iff appropriate for the
  1398. element text value).
  1399. If the 'ignore_xsi' keyword argument is False (the default), existing
  1400. 'xsi:type' attributes will be used for the type annotation, if they fit the
  1401. element text values.
  1402. Note that the mapping from Python types to XSI types is usually ambiguous.
  1403. Currently, only the first XSI type name in the corresponding PyType
  1404. definition will be used for annotation. Thus, you should consider naming
  1405. the widest type first if you define additional types.
  1406. The default 'py:pytype' annotation of empty elements can be set with the
  1407. ``empty_pytype`` keyword argument. Pass 'str', for example, to make
  1408. string values the default.
  1409. The default 'xsi:type' annotation of empty elements can be set with the
  1410. ``empty_type`` keyword argument. The default is not to annotate empty
  1411. elements. Pass 'string', for example, to make string values the default.
  1412. The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype'
  1413. (default: 1) control which kind(s) of annotation to use.
  1414. """
  1415. cdef _Element element
  1416. element = cetree.rootNodeOrRaise(element_or_tree)
  1417. _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi,
  1418. ignore_old, empty_type, empty_pytype)
  1419. cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype,
  1420. bint ignore_xsi, bint ignore_pytype,
  1421. empty_type_name, empty_pytype_name):
  1422. cdef _Document doc
  1423. cdef tree.xmlNode* c_node
  1424. cdef PyType empty_pytype, StrType, NoneType
  1425. if not annotate_xsi and not annotate_pytype:
  1426. return
  1427. if empty_type_name is not None:
  1428. if isinstance(empty_type_name, bytes):
  1429. empty_type_name = (<bytes>empty_type_name).decode("ascii")
  1430. empty_pytype = <PyType>_SCHEMA_TYPE_DICT.get(empty_type_name)
  1431. elif empty_pytype_name is not None:
  1432. if isinstance(empty_pytype_name, bytes):
  1433. empty_pytype_name = (<bytes>empty_pytype_name).decode("ascii")
  1434. empty_pytype = <PyType>_PYTYPE_DICT.get(empty_pytype_name)
  1435. else:
  1436. empty_pytype = None
  1437. StrType = <PyType>_PYTYPE_DICT.get('str')
  1438. NoneType = <PyType>_PYTYPE_DICT.get('NoneType')
  1439. doc = element._doc
  1440. c_node = element._c_node
  1441. tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
  1442. if c_node.type == tree.XML_ELEMENT_NODE:
  1443. _annotate_element(c_node, doc, annotate_xsi, annotate_pytype,
  1444. ignore_xsi, ignore_pytype,
  1445. empty_type_name, empty_pytype, StrType, NoneType)
  1446. tree.END_FOR_EACH_ELEMENT_FROM(c_node)
  1447. cdef int _annotate_element(tree.xmlNode* c_node, _Document doc,
  1448. bint annotate_xsi, bint annotate_pytype,
  1449. bint ignore_xsi, bint ignore_pytype,
  1450. empty_type_name, PyType empty_pytype,
  1451. PyType StrType, PyType NoneType) except -1:
  1452. cdef tree.xmlNs* c_ns
  1453. cdef PyType pytype = None
  1454. typename = None
  1455. istree = 0
  1456. # if element is defined as xsi:nil, represent it as None
  1457. if cetree.attributeValueFromNsName(
  1458. c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") == "true":
  1459. pytype = NoneType
  1460. if pytype is None and not ignore_xsi:
  1461. # check that old xsi type value is valid
  1462. typename = cetree.attributeValueFromNsName(
  1463. c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
  1464. if typename is not None:
  1465. pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
  1466. if pytype is None and ':' in typename:
  1467. prefix, typename = typename.split(':', 1)
  1468. pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
  1469. if pytype is not None and pytype is not StrType:
  1470. # StrType does not have a typecheck but is the default
  1471. # anyway, so just accept it if given as type
  1472. # information
  1473. pytype = _check_type(c_node, pytype)
  1474. if pytype is None:
  1475. typename = None
  1476. if pytype is None and not ignore_pytype:
  1477. # check that old pytype value is valid
  1478. old_pytypename = cetree.attributeValueFromNsName(
  1479. c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
  1480. if old_pytypename is not None:
  1481. if old_pytypename == TREE_PYTYPE_NAME:
  1482. if not cetree.hasChild(c_node):
  1483. # only case where we should keep it,
  1484. # everything else is clear enough
  1485. pytype = TREE_PYTYPE
  1486. else:
  1487. if old_pytypename == 'none':
  1488. # transition from lxml 1.x
  1489. old_pytypename = "NoneType"
  1490. pytype = <PyType>_PYTYPE_DICT.get(old_pytypename)
  1491. if pytype is not None and pytype is not StrType:
  1492. # StrType does not have a typecheck but is the
  1493. # default anyway, so just accept it if given as
  1494. # type information
  1495. pytype = _check_type(c_node, pytype)
  1496. if pytype is None:
  1497. # try to guess type
  1498. if not cetree.hasChild(c_node):
  1499. # element has no children => data class
  1500. pytype = _guessPyType(textOf(c_node), StrType)
  1501. else:
  1502. istree = 1
  1503. if pytype is None:
  1504. # use default type for empty elements
  1505. if cetree.hasText(c_node):
  1506. pytype = StrType
  1507. else:
  1508. pytype = empty_pytype
  1509. if typename is None:
  1510. typename = empty_type_name
  1511. if pytype is not None:
  1512. if typename is None:
  1513. if not istree:
  1514. if pytype._schema_types:
  1515. # pytype->xsi:type is a 1:n mapping
  1516. # simply take the first
  1517. typename = pytype._schema_types[0]
  1518. elif typename not in pytype._schema_types:
  1519. typename = pytype._schema_types[0]
  1520. if annotate_xsi:
  1521. if typename is None or istree:
  1522. cetree.delAttributeFromNsName(
  1523. c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
  1524. else:
  1525. # update or create attribute
  1526. typename_utf8 = cetree.utf8(typename)
  1527. c_ns = cetree.findOrBuildNodeNsPrefix(
  1528. doc, c_node, _XML_SCHEMA_NS, <unsigned char*>'xsd')
  1529. if c_ns is not NULL:
  1530. if b':' in typename_utf8:
  1531. prefix, name = typename_utf8.split(b':', 1)
  1532. if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0':
  1533. typename_utf8 = name
  1534. elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0:
  1535. typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + name
  1536. elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0':
  1537. typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + typename_utf8
  1538. c_ns = cetree.findOrBuildNodeNsPrefix(
  1539. doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
  1540. tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"type", _xcstr(typename_utf8))
  1541. if annotate_pytype:
  1542. if pytype is None:
  1543. # delete attribute if it exists
  1544. cetree.delAttributeFromNsName(
  1545. c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
  1546. else:
  1547. # update or create attribute
  1548. c_ns = cetree.findOrBuildNodeNsPrefix(
  1549. doc, c_node, _PYTYPE_NAMESPACE, <unsigned char*>'py')
  1550. pytype_name = cetree.utf8(pytype.name)
  1551. tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME,
  1552. _xcstr(pytype_name))
  1553. if pytype is NoneType:
  1554. c_ns = cetree.findOrBuildNodeNsPrefix(
  1555. doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
  1556. tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"nil", <unsigned char*>"true")
  1557. return 0
  1558. cdef object _strip_attributes = etree.strip_attributes
  1559. cdef object _cleanup_namespaces = etree.cleanup_namespaces
  1560. def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True,
  1561. bint xsi_nil=False, bint cleanup_namespaces=False):
  1562. """deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False)
  1563. Recursively de-annotate the elements of an XML tree by removing 'py:pytype'
  1564. and/or 'xsi:type' attributes and/or 'xsi:nil' attributes.
  1565. If the 'pytype' keyword argument is True (the default), 'py:pytype'
  1566. attributes will be removed. If the 'xsi' keyword argument is True (the
  1567. default), 'xsi:type' attributes will be removed.
  1568. If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil'
  1569. attributes will be removed.
  1570. Note that this does not touch the namespace declarations by
  1571. default. If you want to remove unused namespace declarations from
  1572. the tree, pass the option ``cleanup_namespaces=True``.
  1573. """
  1574. cdef list attribute_names = []
  1575. if pytype:
  1576. attribute_names.append(PYTYPE_ATTRIBUTE)
  1577. if xsi:
  1578. attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR)
  1579. if xsi_nil:
  1580. attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR)
  1581. _strip_attributes(element_or_tree, *attribute_names)
  1582. if cleanup_namespaces:
  1583. _cleanup_namespaces(element_or_tree)
  1584. ################################################################################
  1585. # Module level parser setup
  1586. cdef object __DEFAULT_PARSER
  1587. __DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True)
  1588. __DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() )
  1589. cdef object objectify_parser
  1590. objectify_parser = __DEFAULT_PARSER
  1591. def set_default_parser(new_parser = None):
  1592. """set_default_parser(new_parser = None)
  1593. Replace the default parser used by objectify's Element() and
  1594. fromstring() functions.
  1595. The new parser must be an etree.XMLParser.
  1596. Call without arguments to reset to the original parser.
  1597. """
  1598. global objectify_parser
  1599. if new_parser is None:
  1600. objectify_parser = __DEFAULT_PARSER
  1601. elif isinstance(new_parser, etree.XMLParser):
  1602. objectify_parser = new_parser
  1603. else:
  1604. raise TypeError, "parser must inherit from lxml.etree.XMLParser"
  1605. def makeparser(**kw):
  1606. """makeparser(remove_blank_text=True, **kw)
  1607. Create a new XML parser for objectify trees.
  1608. You can pass all keyword arguments that are supported by
  1609. ``etree.XMLParser()``. Note that this parser defaults to removing
  1610. blank text. You can disable this by passing the
  1611. ``remove_blank_text`` boolean keyword option yourself.
  1612. """
  1613. if 'remove_blank_text' not in kw:
  1614. kw['remove_blank_text'] = True
  1615. parser = etree.XMLParser(**kw)
  1616. parser.set_element_class_lookup( ObjectifyElementClassLookup() )
  1617. return parser
  1618. cdef _Element _makeElement(tag, text, attrib, nsmap):
  1619. return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap)
  1620. ################################################################################
  1621. # Module level factory functions
  1622. cdef object _fromstring
  1623. _fromstring = etree.fromstring
  1624. SubElement = etree.SubElement
  1625. def fromstring(xml, parser=None, *, base_url=None):
  1626. """fromstring(xml, parser=None, base_url=None)
  1627. Objectify specific version of the lxml.etree fromstring() function
  1628. that uses the objectify parser.
  1629. You can pass a different parser as second argument.
  1630. The ``base_url`` keyword argument allows to set the original base URL of
  1631. the document to support relative Paths when looking up external entities
  1632. (DTD, XInclude, ...).
  1633. """
  1634. if parser is None:
  1635. parser = objectify_parser
  1636. return _fromstring(xml, parser, base_url=base_url)
  1637. def XML(xml, parser=None, *, base_url=None):
  1638. """XML(xml, parser=None, base_url=None)
  1639. Objectify specific version of the lxml.etree XML() literal factory
  1640. that uses the objectify parser.
  1641. You can pass a different parser as second argument.
  1642. The ``base_url`` keyword argument allows to set the original base URL of
  1643. the document to support relative Paths when looking up external entities
  1644. (DTD, XInclude, ...).
  1645. """
  1646. if parser is None:
  1647. parser = objectify_parser
  1648. return _fromstring(xml, parser, base_url=base_url)
  1649. cdef object _parse
  1650. _parse = etree.parse
  1651. def parse(f, parser=None, *, base_url=None):
  1652. """parse(f, parser=None, base_url=None)
  1653. Parse a file or file-like object with the objectify parser.
  1654. You can pass a different parser as second argument.
  1655. The ``base_url`` keyword allows setting a URL for the document
  1656. when parsing from a file-like object. This is needed when looking
  1657. up external entities (DTD, XInclude, ...) with relative paths.
  1658. """
  1659. if parser is None:
  1660. parser = objectify_parser
  1661. return _parse(f, parser, base_url=base_url)
  1662. cdef dict _DEFAULT_NSMAP = {
  1663. "py" : PYTYPE_NAMESPACE,
  1664. "xsi" : XML_SCHEMA_INSTANCE_NS,
  1665. "xsd" : XML_SCHEMA_NS
  1666. }
  1667. E = ElementMaker()
  1668. def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes):
  1669. """Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes)
  1670. Objectify specific version of the lxml.etree Element() factory that
  1671. always creates a structural (tree) element.
  1672. NOTE: requires parser based element class lookup activated in lxml.etree!
  1673. """
  1674. if attrib is not None:
  1675. if _attributes:
  1676. attrib = dict(attrib)
  1677. attrib.update(_attributes)
  1678. _attributes = attrib
  1679. if _pytype is None:
  1680. _pytype = TREE_PYTYPE_NAME
  1681. if nsmap is None:
  1682. nsmap = _DEFAULT_NSMAP
  1683. _attributes[PYTYPE_ATTRIBUTE] = _pytype
  1684. return _makeElement(_tag, None, _attributes, nsmap)
  1685. def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
  1686. **_attributes):
  1687. """DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes)
  1688. Create a new element from a Python value and XML attributes taken from
  1689. keyword arguments or a dictionary passed as second argument.
  1690. Automatically adds a 'pytype' attribute for the Python type of the value,
  1691. if the type can be identified. If '_pytype' or '_xsi' are among the
  1692. keyword arguments, they will be used instead.
  1693. If the _value argument is an ObjectifiedDataElement instance, its py:pytype,
  1694. xsi:type and other attributes and nsmap are reused unless they are redefined
  1695. in attrib and/or keyword arguments.
  1696. """
  1697. if nsmap is None:
  1698. nsmap = _DEFAULT_NSMAP
  1699. if attrib is not None and attrib:
  1700. if _attributes:
  1701. attrib = dict(attrib)
  1702. attrib.update(_attributes)
  1703. _attributes = attrib
  1704. if isinstance(_value, ObjectifiedElement):
  1705. if _pytype is None:
  1706. if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP:
  1707. # special case: no change!
  1708. return _value.__copy__()
  1709. if isinstance(_value, ObjectifiedDataElement):
  1710. # reuse existing nsmap unless redefined in nsmap parameter
  1711. temp = _value.nsmap
  1712. if temp is not None and temp:
  1713. temp = dict(temp)
  1714. temp.update(nsmap)
  1715. nsmap = temp
  1716. # reuse existing attributes unless redefined in attrib/_attributes
  1717. temp = _value.attrib
  1718. if temp is not None and temp:
  1719. temp = dict(temp)
  1720. temp.update(_attributes)
  1721. _attributes = temp
  1722. # reuse existing xsi:type or py:pytype attributes, unless provided as
  1723. # arguments
  1724. if _xsi is None and _pytype is None:
  1725. _xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
  1726. _pytype = _attributes.get(PYTYPE_ATTRIBUTE)
  1727. if _xsi is not None:
  1728. if ':' in _xsi:
  1729. prefix, name = _xsi.split(':', 1)
  1730. ns = nsmap.get(prefix)
  1731. if ns != XML_SCHEMA_NS:
  1732. raise ValueError, "XSD types require the XSD namespace"
  1733. elif nsmap is _DEFAULT_NSMAP:
  1734. name = _xsi
  1735. _xsi = 'xsd:' + _xsi
  1736. else:
  1737. name = _xsi
  1738. for prefix, ns in nsmap.items():
  1739. if ns == XML_SCHEMA_NS:
  1740. if prefix is not None and prefix:
  1741. _xsi = prefix + ':' + _xsi
  1742. break
  1743. else:
  1744. raise ValueError, "XSD types require the XSD namespace"
  1745. _attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi
  1746. if _pytype is None:
  1747. # allow using unregistered or even wrong xsi:type names
  1748. py_type = <PyType>_SCHEMA_TYPE_DICT.get(_xsi)
  1749. if py_type is None:
  1750. py_type = <PyType>_SCHEMA_TYPE_DICT.get(name)
  1751. if py_type is not None:
  1752. _pytype = py_type.name
  1753. if _pytype is None:
  1754. _pytype = _pytypename(_value)
  1755. if _value is None and _pytype != "str":
  1756. _pytype = _pytype or "NoneType"
  1757. strval = None
  1758. elif python._isString(_value):
  1759. strval = _value
  1760. elif isinstance(_value, bool):
  1761. if _value:
  1762. strval = "true"
  1763. else:
  1764. strval = "false"
  1765. else:
  1766. py_type = <PyType>_PYTYPE_DICT.get(_pytype)
  1767. stringify = unicode if py_type is None else py_type.stringify
  1768. strval = stringify(_value)
  1769. if _pytype is not None:
  1770. if _pytype == "NoneType" or _pytype == "none":
  1771. strval = None
  1772. _attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = "true"
  1773. else:
  1774. # check if type information from arguments is valid
  1775. py_type = <PyType>_PYTYPE_DICT.get(_pytype)
  1776. if py_type is not None:
  1777. if py_type.type_check is not None:
  1778. py_type.type_check(strval)
  1779. _attributes[PYTYPE_ATTRIBUTE] = _pytype
  1780. return _makeElement("value", strval, _attributes, nsmap)
  1781. ################################################################################
  1782. # ObjectPath
  1783. include "objectpath.pxi"