25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

245 lines
8.4 KiB

  1. #
  2. # ElementTree
  3. # $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
  4. #
  5. # limited xinclude support for element trees
  6. #
  7. # history:
  8. # 2003-08-15 fl created
  9. # 2003-11-14 fl fixed default loader
  10. #
  11. # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
  12. #
  13. # fredrik@pythonware.com
  14. # http://www.pythonware.com
  15. #
  16. # --------------------------------------------------------------------
  17. # The ElementTree toolkit is
  18. #
  19. # Copyright (c) 1999-2004 by Fredrik Lundh
  20. #
  21. # By obtaining, using, and/or copying this software and/or its
  22. # associated documentation, you agree that you have read, understood,
  23. # and will comply with the following terms and conditions:
  24. #
  25. # Permission to use, copy, modify, and distribute this software and
  26. # its associated documentation for any purpose and without fee is
  27. # hereby granted, provided that the above copyright notice appears in
  28. # all copies, and that both that copyright notice and this permission
  29. # notice appear in supporting documentation, and that the name of
  30. # Secret Labs AB or the author not be used in advertising or publicity
  31. # pertaining to distribution of the software without specific, written
  32. # prior permission.
  33. #
  34. # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  35. # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
  36. # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
  37. # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  38. # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  39. # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  40. # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  41. # OF THIS SOFTWARE.
  42. # --------------------------------------------------------------------
  43. """
  44. Limited XInclude support for the ElementTree package.
  45. While lxml.etree has full support for XInclude (see
  46. `etree.ElementTree.xinclude()`), this module provides a simpler, pure
  47. Python, ElementTree compatible implementation that supports a simple
  48. form of custom URL resolvers.
  49. """
  50. from lxml import etree
  51. try:
  52. from urlparse import urljoin
  53. from urllib2 import urlopen
  54. except ImportError:
  55. # Python 3
  56. from urllib.parse import urljoin
  57. from urllib.request import urlopen
  58. XINCLUDE = "{http://www.w3.org/2001/XInclude}"
  59. XINCLUDE_INCLUDE = XINCLUDE + "include"
  60. XINCLUDE_FALLBACK = XINCLUDE + "fallback"
  61. XINCLUDE_ITER_TAG = XINCLUDE + "*"
  62. # For security reasons, the inclusion depth is limited to this read-only value by default.
  63. DEFAULT_MAX_INCLUSION_DEPTH = 6
  64. ##
  65. # Fatal include error.
  66. class FatalIncludeError(etree.LxmlSyntaxError):
  67. pass
  68. class LimitedRecursiveIncludeError(FatalIncludeError):
  69. pass
  70. ##
  71. # ET compatible default loader.
  72. # This loader reads an included resource from disk.
  73. #
  74. # @param href Resource reference.
  75. # @param parse Parse mode. Either "xml" or "text".
  76. # @param encoding Optional text encoding.
  77. # @return The expanded resource. If the parse mode is "xml", this
  78. # is an ElementTree instance. If the parse mode is "text", this
  79. # is a Unicode string. If the loader fails, it can return None
  80. # or raise an IOError exception.
  81. # @throws IOError If the loader fails to load the resource.
  82. def default_loader(href, parse, encoding=None):
  83. file = open(href, 'rb')
  84. if parse == "xml":
  85. data = etree.parse(file).getroot()
  86. else:
  87. data = file.read()
  88. if not encoding:
  89. encoding = 'utf-8'
  90. data = data.decode(encoding)
  91. file.close()
  92. return data
  93. ##
  94. # Default loader used by lxml.etree - handles custom resolvers properly
  95. #
  96. def _lxml_default_loader(href, parse, encoding=None, parser=None):
  97. if parse == "xml":
  98. data = etree.parse(href, parser).getroot()
  99. else:
  100. if "://" in href:
  101. f = urlopen(href)
  102. else:
  103. f = open(href, 'rb')
  104. data = f.read()
  105. f.close()
  106. if not encoding:
  107. encoding = 'utf-8'
  108. data = data.decode(encoding)
  109. return data
  110. ##
  111. # Wrapper for ET compatibility - drops the parser
  112. def _wrap_et_loader(loader):
  113. def load(href, parse, encoding=None, parser=None):
  114. return loader(href, parse, encoding)
  115. return load
  116. ##
  117. # Expand XInclude directives.
  118. #
  119. # @param elem Root element.
  120. # @param loader Optional resource loader. If omitted, it defaults
  121. # to {@link default_loader}. If given, it should be a callable
  122. # that implements the same interface as <b>default_loader</b>.
  123. # @param base_url The base URL of the original file, to resolve
  124. # relative include file references.
  125. # @param max_depth The maximum number of recursive inclusions.
  126. # Limited to reduce the risk of malicious content explosion.
  127. # Pass None to disable the limitation.
  128. # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
  129. # @throws FatalIncludeError If the function fails to include a given
  130. # resource, or if the tree contains malformed XInclude elements.
  131. # @throws IOError If the function fails to load a given resource.
  132. # @returns the node or its replacement if it was an XInclude node
  133. def include(elem, loader=None, base_url=None,
  134. max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
  135. if max_depth is None:
  136. max_depth = -1
  137. elif max_depth < 0:
  138. raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
  139. if base_url is None:
  140. if hasattr(elem, 'getroot'):
  141. tree = elem
  142. elem = elem.getroot()
  143. else:
  144. tree = elem.getroottree()
  145. if hasattr(tree, 'docinfo'):
  146. base_url = tree.docinfo.URL
  147. elif hasattr(elem, 'getroot'):
  148. elem = elem.getroot()
  149. _include(elem, loader, base_url, max_depth)
  150. def _include(elem, loader=None, base_url=None,
  151. max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None):
  152. if loader is not None:
  153. load_include = _wrap_et_loader(loader)
  154. else:
  155. load_include = _lxml_default_loader
  156. if _parent_hrefs is None:
  157. _parent_hrefs = set()
  158. parser = elem.getroottree().parser
  159. include_elements = list(
  160. elem.iter(XINCLUDE_ITER_TAG))
  161. for e in include_elements:
  162. if e.tag == XINCLUDE_INCLUDE:
  163. # process xinclude directive
  164. href = urljoin(base_url, e.get("href"))
  165. parse = e.get("parse", "xml")
  166. parent = e.getparent()
  167. if parse == "xml":
  168. if href in _parent_hrefs:
  169. raise FatalIncludeError(
  170. "recursive include of %r detected" % href
  171. )
  172. if max_depth == 0:
  173. raise LimitedRecursiveIncludeError(
  174. "maximum xinclude depth reached when including file %s" % href)
  175. node = load_include(href, parse, parser=parser)
  176. if node is None:
  177. raise FatalIncludeError(
  178. "cannot load %r as %r" % (href, parse)
  179. )
  180. node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs)
  181. if e.tail:
  182. node.tail = (node.tail or "") + e.tail
  183. if parent is None:
  184. return node # replaced the root node!
  185. parent.replace(e, node)
  186. elif parse == "text":
  187. text = load_include(href, parse, encoding=e.get("encoding"))
  188. if text is None:
  189. raise FatalIncludeError(
  190. "cannot load %r as %r" % (href, parse)
  191. )
  192. predecessor = e.getprevious()
  193. if predecessor is not None:
  194. predecessor.tail = (predecessor.tail or "") + text
  195. elif parent is None:
  196. return text # replaced the root node!
  197. else:
  198. parent.text = (parent.text or "") + text + (e.tail or "")
  199. parent.remove(e)
  200. else:
  201. raise FatalIncludeError(
  202. "unknown parse type in xi:include tag (%r)" % parse
  203. )
  204. elif e.tag == XINCLUDE_FALLBACK:
  205. parent = e.getparent()
  206. if parent is not None and parent.tag != XINCLUDE_INCLUDE:
  207. raise FatalIncludeError(
  208. "xi:fallback tag must be child of xi:include (%r)" % e.tag
  209. )
  210. else:
  211. raise FatalIncludeError(
  212. "Invalid element found in XInclude namespace (%r)" % e.tag
  213. )
  214. return elem