Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.
 
 
 
 

279 wiersze
9.2 KiB

  1. # cython: language_level=2
  2. """
  3. SAX-based adapter to copy trees from/to the Python standard library.
  4. Use the `ElementTreeContentHandler` class to build an ElementTree from
  5. SAX events.
  6. Use the `ElementTreeProducer` class or the `saxify()` function to fire
  7. the SAX events of an ElementTree against a SAX ContentHandler.
  8. See https://lxml.de/sax.html
  9. """
  10. from __future__ import absolute_import
  11. from xml.sax.handler import ContentHandler
  12. from lxml import etree
  13. from lxml.etree import ElementTree, SubElement
  14. from lxml.etree import Comment, ProcessingInstruction
  15. class SaxError(etree.LxmlError):
  16. """General SAX error.
  17. """
  18. def _getNsTag(tag):
  19. if tag[0] == '{':
  20. return tuple(tag[1:].split('}', 1))
  21. else:
  22. return None, tag
  23. class ElementTreeContentHandler(ContentHandler):
  24. """Build an lxml ElementTree from SAX events.
  25. """
  26. def __init__(self, makeelement=None):
  27. ContentHandler.__init__(self)
  28. self._root = None
  29. self._root_siblings = []
  30. self._element_stack = []
  31. self._default_ns = None
  32. self._ns_mapping = { None : [None] }
  33. self._new_mappings = {}
  34. if makeelement is None:
  35. makeelement = etree.Element
  36. self._makeelement = makeelement
  37. def _get_etree(self):
  38. "Contains the generated ElementTree after parsing is finished."
  39. return ElementTree(self._root)
  40. etree = property(_get_etree, doc=_get_etree.__doc__)
  41. def setDocumentLocator(self, locator):
  42. pass
  43. def startDocument(self):
  44. pass
  45. def endDocument(self):
  46. pass
  47. def startPrefixMapping(self, prefix, uri):
  48. self._new_mappings[prefix] = uri
  49. try:
  50. self._ns_mapping[prefix].append(uri)
  51. except KeyError:
  52. self._ns_mapping[prefix] = [uri]
  53. if prefix is None:
  54. self._default_ns = uri
  55. def endPrefixMapping(self, prefix):
  56. ns_uri_list = self._ns_mapping[prefix]
  57. ns_uri_list.pop()
  58. if prefix is None:
  59. self._default_ns = ns_uri_list[-1]
  60. def _buildTag(self, ns_name_tuple):
  61. ns_uri, local_name = ns_name_tuple
  62. if ns_uri:
  63. el_tag = "{%s}%s" % ns_name_tuple
  64. elif self._default_ns:
  65. el_tag = "{%s}%s" % (self._default_ns, local_name)
  66. else:
  67. el_tag = local_name
  68. return el_tag
  69. def startElementNS(self, ns_name, qname, attributes=None):
  70. el_name = self._buildTag(ns_name)
  71. if attributes:
  72. attrs = {}
  73. try:
  74. iter_attributes = attributes.iteritems()
  75. except AttributeError:
  76. iter_attributes = attributes.items()
  77. for name_tuple, value in iter_attributes:
  78. if name_tuple[0]:
  79. attr_name = "{%s}%s" % name_tuple
  80. else:
  81. attr_name = name_tuple[1]
  82. attrs[attr_name] = value
  83. else:
  84. attrs = None
  85. element_stack = self._element_stack
  86. if self._root is None:
  87. element = self._root = \
  88. self._makeelement(el_name, attrs, self._new_mappings)
  89. if self._root_siblings and hasattr(element, 'addprevious'):
  90. for sibling in self._root_siblings:
  91. element.addprevious(sibling)
  92. del self._root_siblings[:]
  93. else:
  94. element = SubElement(element_stack[-1], el_name,
  95. attrs, self._new_mappings)
  96. element_stack.append(element)
  97. self._new_mappings.clear()
  98. def processingInstruction(self, target, data):
  99. pi = ProcessingInstruction(target, data)
  100. if self._root is None:
  101. self._root_siblings.append(pi)
  102. else:
  103. self._element_stack[-1].append(pi)
  104. def endElementNS(self, ns_name, qname):
  105. element = self._element_stack.pop()
  106. el_tag = self._buildTag(ns_name)
  107. if el_tag != element.tag:
  108. raise SaxError("Unexpected element closed: " + el_tag)
  109. def startElement(self, name, attributes=None):
  110. if attributes:
  111. attributes = dict(
  112. [((None, k), v) for k, v in attributes.items()]
  113. )
  114. self.startElementNS((None, name), name, attributes)
  115. def endElement(self, name):
  116. self.endElementNS((None, name), name)
  117. def characters(self, data):
  118. last_element = self._element_stack[-1]
  119. try:
  120. # if there already is a child element, we must append to its tail
  121. last_element = last_element[-1]
  122. last_element.tail = (last_element.tail or '') + data
  123. except IndexError:
  124. # otherwise: append to the text
  125. last_element.text = (last_element.text or '') + data
  126. ignorableWhitespace = characters
  127. class ElementTreeProducer(object):
  128. """Produces SAX events for an element and children.
  129. """
  130. def __init__(self, element_or_tree, content_handler):
  131. try:
  132. element = element_or_tree.getroot()
  133. except AttributeError:
  134. element = element_or_tree
  135. self._element = element
  136. self._content_handler = content_handler
  137. from xml.sax.xmlreader import AttributesNSImpl as attr_class
  138. self._attr_class = attr_class
  139. self._empty_attributes = attr_class({}, {})
  140. def saxify(self):
  141. self._content_handler.startDocument()
  142. element = self._element
  143. if hasattr(element, 'getprevious'):
  144. siblings = []
  145. sibling = element.getprevious()
  146. while getattr(sibling, 'tag', None) is ProcessingInstruction:
  147. siblings.append(sibling)
  148. sibling = sibling.getprevious()
  149. for sibling in siblings[::-1]:
  150. self._recursive_saxify(sibling, {})
  151. self._recursive_saxify(element, {})
  152. if hasattr(element, 'getnext'):
  153. sibling = element.getnext()
  154. while getattr(sibling, 'tag', None) is ProcessingInstruction:
  155. self._recursive_saxify(sibling, {})
  156. sibling = sibling.getnext()
  157. self._content_handler.endDocument()
  158. def _recursive_saxify(self, element, parent_nsmap):
  159. content_handler = self._content_handler
  160. tag = element.tag
  161. if tag is Comment or tag is ProcessingInstruction:
  162. if tag is ProcessingInstruction:
  163. content_handler.processingInstruction(
  164. element.target, element.text)
  165. tail = element.tail
  166. if tail:
  167. content_handler.characters(tail)
  168. return
  169. element_nsmap = element.nsmap
  170. new_prefixes = []
  171. if element_nsmap != parent_nsmap:
  172. # There have been updates to the namespace
  173. for prefix, ns_uri in element_nsmap.items():
  174. if parent_nsmap.get(prefix) != ns_uri:
  175. new_prefixes.append( (prefix, ns_uri) )
  176. attribs = element.items()
  177. if attribs:
  178. attr_values = {}
  179. attr_qnames = {}
  180. for attr_ns_name, value in attribs:
  181. attr_ns_tuple = _getNsTag(attr_ns_name)
  182. attr_values[attr_ns_tuple] = value
  183. attr_qnames[attr_ns_tuple] = self._build_qname(
  184. attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap,
  185. preferred_prefix=None, is_attribute=True)
  186. sax_attributes = self._attr_class(attr_values, attr_qnames)
  187. else:
  188. sax_attributes = self._empty_attributes
  189. ns_uri, local_name = _getNsTag(tag)
  190. qname = self._build_qname(
  191. ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False)
  192. for prefix, uri in new_prefixes:
  193. content_handler.startPrefixMapping(prefix, uri)
  194. content_handler.startElementNS(
  195. (ns_uri, local_name), qname, sax_attributes)
  196. text = element.text
  197. if text:
  198. content_handler.characters(text)
  199. for child in element:
  200. self._recursive_saxify(child, element_nsmap)
  201. content_handler.endElementNS((ns_uri, local_name), qname)
  202. for prefix, uri in new_prefixes:
  203. content_handler.endPrefixMapping(prefix)
  204. tail = element.tail
  205. if tail:
  206. content_handler.characters(tail)
  207. def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix, is_attribute):
  208. if ns_uri is None:
  209. return local_name
  210. if not is_attribute and nsmap.get(preferred_prefix) == ns_uri:
  211. prefix = preferred_prefix
  212. else:
  213. # Pick the first matching prefix, in alphabetical order.
  214. candidates = [
  215. pfx for (pfx, uri) in nsmap.items()
  216. if pfx is not None and uri == ns_uri
  217. ]
  218. prefix = (
  219. candidates[0] if len(candidates) == 1
  220. else min(candidates) if candidates
  221. else None
  222. )
  223. if prefix is None:
  224. # Default namespace
  225. return local_name
  226. return prefix + ':' + local_name
  227. def saxify(element_or_tree, content_handler):
  228. """One-shot helper to generate SAX events from an XML tree and fire
  229. them against a SAX ContentHandler.
  230. """
  231. return ElementTreeProducer(element_or_tree, content_handler).saxify()