You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

180 lines
5.9 KiB

  1. cdef object _find_id_attributes
  2. def XMLID(text, parser=None, *, base_url=None):
  3. """XMLID(text, parser=None, base_url=None)
  4. Parse the text and return a tuple (root node, ID dictionary). The root
  5. node is the same as returned by the XML() function. The dictionary
  6. contains string-element pairs. The dictionary keys are the values of 'id'
  7. attributes. The elements referenced by the ID are stored as dictionary
  8. values.
  9. """
  10. cdef dict dic
  11. global _find_id_attributes
  12. if _find_id_attributes is None:
  13. _find_id_attributes = XPath('//*[string(@id)]')
  14. # ElementTree compatible implementation: parse and look for 'id' attributes
  15. root = XML(text, parser, base_url=base_url)
  16. dic = {}
  17. for elem in _find_id_attributes(root):
  18. dic[elem.get('id')] = elem
  19. return root, dic
  20. def XMLDTDID(text, parser=None, *, base_url=None):
  21. """XMLDTDID(text, parser=None, base_url=None)
  22. Parse the text and return a tuple (root node, ID dictionary). The root
  23. node is the same as returned by the XML() function. The dictionary
  24. contains string-element pairs. The dictionary keys are the values of ID
  25. attributes as defined by the DTD. The elements referenced by the ID are
  26. stored as dictionary values.
  27. Note that you must not modify the XML tree if you use the ID dictionary.
  28. The results are undefined.
  29. """
  30. cdef _Element root
  31. root = XML(text, parser, base_url=base_url)
  32. # xml:id spec compatible implementation: use DTD ID attributes from libxml2
  33. if root._doc._c_doc.ids is NULL:
  34. return root, {}
  35. else:
  36. return root, _IDDict(root)
  37. def parseid(source, parser=None, *, base_url=None):
  38. """parseid(source, parser=None)
  39. Parses the source into a tuple containing an ElementTree object and an
  40. ID dictionary. If no parser is provided as second argument, the default
  41. parser is used.
  42. Note that you must not modify the XML tree if you use the ID dictionary.
  43. The results are undefined.
  44. """
  45. cdef _Document doc
  46. doc = _parseDocument(source, parser, base_url)
  47. return _elementTreeFactory(doc, None), _IDDict(doc)
  48. cdef class _IDDict:
  49. """IDDict(self, etree)
  50. A dictionary-like proxy class that mapps ID attributes to elements.
  51. The dictionary must be instantiated with the root element of a parsed XML
  52. document, otherwise the behaviour is undefined. Elements and XML trees
  53. that were created or modified 'by hand' are not supported.
  54. """
  55. cdef _Document _doc
  56. cdef object _keys
  57. cdef object _items
  58. def __cinit__(self, etree):
  59. cdef _Document doc
  60. doc = _documentOrRaise(etree)
  61. if doc._c_doc.ids is NULL:
  62. raise ValueError, "No ID dictionary available."
  63. self._doc = doc
  64. self._keys = None
  65. self._items = None
  66. def copy(self):
  67. return _IDDict(self._doc)
  68. def __getitem__(self, id_name):
  69. cdef tree.xmlHashTable* c_ids
  70. cdef tree.xmlID* c_id
  71. cdef xmlAttr* c_attr
  72. c_ids = self._doc._c_doc.ids
  73. id_utf = _utf8(id_name)
  74. c_id = <tree.xmlID*>tree.xmlHashLookup(c_ids, _xcstr(id_utf))
  75. if c_id is NULL:
  76. raise KeyError, "key not found."
  77. c_attr = c_id.attr
  78. if c_attr is NULL or c_attr.parent is NULL:
  79. raise KeyError, "ID attribute not found."
  80. return _elementFactory(self._doc, c_attr.parent)
  81. def get(self, id_name):
  82. return self[id_name]
  83. def __contains__(self, id_name):
  84. cdef tree.xmlID* c_id
  85. id_utf = _utf8(id_name)
  86. c_id = <tree.xmlID*>tree.xmlHashLookup(
  87. self._doc._c_doc.ids, _xcstr(id_utf))
  88. return c_id is not NULL
  89. def has_key(self, id_name):
  90. return id_name in self
  91. def __repr__(self):
  92. return repr(dict(self))
  93. def keys(self):
  94. if self._keys is None:
  95. self._keys = self._build_keys()
  96. return self._keys[:]
  97. def __iter__(self):
  98. if self._keys is None:
  99. self._keys = self._build_keys()
  100. return iter(self._keys)
  101. def iterkeys(self):
  102. return self
  103. def __len__(self):
  104. if self._keys is None:
  105. self._keys = self._build_keys()
  106. return len(self._keys)
  107. def items(self):
  108. if self._items is None:
  109. self._items = self._build_items()
  110. return self._items[:]
  111. def iteritems(self):
  112. if self._items is None:
  113. self._items = self._build_items()
  114. return iter(self._items)
  115. def values(self):
  116. cdef list values = []
  117. if self._items is None:
  118. self._items = self._build_items()
  119. for item in self._items:
  120. value = python.PyTuple_GET_ITEM(item, 1)
  121. python.Py_INCREF(value)
  122. values.append(value)
  123. return values
  124. def itervalues(self):
  125. return iter(self.values())
  126. cdef object _build_keys(self):
  127. keys = []
  128. tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids,
  129. <tree.xmlHashScanner>_collectIdHashKeys, <python.PyObject*>keys)
  130. return keys
  131. cdef object _build_items(self):
  132. items = []
  133. context = (items, self._doc)
  134. tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids,
  135. <tree.xmlHashScanner>_collectIdHashItemList, <python.PyObject*>context)
  136. return items
  137. cdef void _collectIdHashItemList(void* payload, void* context, xmlChar* name) noexcept:
  138. # collect elements from ID attribute hash table
  139. cdef list lst
  140. c_id = <tree.xmlID*>payload
  141. if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL:
  142. return
  143. lst, doc = <tuple>context
  144. element = _elementFactory(doc, c_id.attr.parent)
  145. lst.append( (funicode(name), element) )
  146. cdef void _collectIdHashKeys(void* payload, void* collect_list, xmlChar* name) noexcept:
  147. c_id = <tree.xmlID*>payload
  148. if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL:
  149. return
  150. (<list>collect_list).append(funicode(name))