You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

958 rivejä
36 KiB

  1. # XSLT
  2. from lxml.includes cimport xslt
  3. cdef class XSLTError(LxmlError):
  4. """Base class of all XSLT errors.
  5. """
  6. cdef class XSLTParseError(XSLTError):
  7. """Error parsing a stylesheet document.
  8. """
  9. cdef class XSLTApplyError(XSLTError):
  10. """Error running an XSL transformation.
  11. """
  12. class XSLTSaveError(XSLTError, SerialisationError):
  13. """Error serialising an XSLT result.
  14. """
  15. cdef class XSLTExtensionError(XSLTError):
  16. """Error registering an XSLT extension.
  17. """
  18. # version information
  19. LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
  20. LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
  21. ################################################################################
  22. # Where do we store what?
  23. #
  24. # xsltStylesheet->doc->_private
  25. # == _XSLTResolverContext for XSL stylesheet
  26. #
  27. # xsltTransformContext->_private
  28. # == _XSLTResolverContext for transformed document
  29. #
  30. ################################################################################
  31. ################################################################################
  32. # XSLT document loaders
  33. @cython.final
  34. @cython.internal
  35. cdef class _XSLTResolverContext(_ResolverContext):
  36. cdef xmlDoc* _c_style_doc
  37. cdef _BaseParser _parser
  38. cdef _XSLTResolverContext _copy(self):
  39. cdef _XSLTResolverContext context
  40. context = _XSLTResolverContext()
  41. _initXSLTResolverContext(context, self._parser)
  42. context._c_style_doc = self._c_style_doc
  43. return context
  44. cdef _initXSLTResolverContext(_XSLTResolverContext context,
  45. _BaseParser parser):
  46. _initResolverContext(context, parser.resolvers)
  47. context._parser = parser
  48. context._c_style_doc = NULL
  49. cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context,
  50. int parse_options, int* error) with gil:
  51. # call the Python document loaders
  52. cdef _XSLTResolverContext context
  53. cdef _ResolverRegistry resolvers
  54. cdef _InputDocument doc_ref
  55. cdef xmlDoc* c_doc
  56. cdef xmlDoc* c_return_doc = NULL
  57. error[0] = 0
  58. context = <_XSLTResolverContext>c_context
  59. # shortcut if we resolve the stylesheet itself
  60. c_doc = context._c_style_doc
  61. try:
  62. if c_doc is not NULL and c_doc.URL is not NULL:
  63. if tree.xmlStrcmp(c_uri, c_doc.URL) == 0:
  64. c_return_doc = _copyDoc(c_doc, 1)
  65. return c_return_doc # 'goto', see 'finally' below
  66. # delegate to the Python resolvers
  67. resolvers = context._resolvers
  68. if tree.xmlStrncmp(<unsigned char*>'string://__STRING__XSLT__/', c_uri, 26) == 0:
  69. c_uri += 26
  70. uri = _decodeFilename(c_uri)
  71. doc_ref = resolvers.resolve(uri, None, context)
  72. if doc_ref is not None:
  73. if doc_ref._type == PARSER_DATA_STRING:
  74. c_return_doc = _parseDoc(
  75. doc_ref._data_bytes, doc_ref._filename, context._parser)
  76. elif doc_ref._type == PARSER_DATA_FILENAME:
  77. c_return_doc = _parseDocFromFile(
  78. doc_ref._filename, context._parser)
  79. elif doc_ref._type == PARSER_DATA_FILE:
  80. c_return_doc = _parseDocFromFilelike(
  81. doc_ref._file, doc_ref._filename, context._parser)
  82. elif doc_ref._type == PARSER_DATA_EMPTY:
  83. c_return_doc = _newXMLDoc()
  84. if c_return_doc is not NULL and c_return_doc.URL is NULL:
  85. c_return_doc.URL = tree.xmlStrdup(c_uri)
  86. except:
  87. error[0] = 1
  88. context._store_raised()
  89. finally:
  90. return c_return_doc # and swallow any further exceptions
  91. cdef void _xslt_store_resolver_exception(const_xmlChar* c_uri, void* context,
  92. xslt.xsltLoadType c_type) noexcept with gil:
  93. try:
  94. message = f"Cannot resolve URI {_decodeFilename(c_uri)}"
  95. if c_type == xslt.XSLT_LOAD_DOCUMENT:
  96. exception = XSLTApplyError(message)
  97. else:
  98. exception = XSLTParseError(message)
  99. (<_XSLTResolverContext>context)._store_exception(exception)
  100. except BaseException as e:
  101. (<_XSLTResolverContext>context)._store_exception(e)
  102. finally:
  103. return # and swallow any further exceptions
  104. cdef xmlDoc* _xslt_doc_loader(const_xmlChar* c_uri, tree.xmlDict* c_dict,
  105. int parse_options, void* c_ctxt,
  106. xslt.xsltLoadType c_type) noexcept nogil:
  107. # nogil => no Python objects here, may be called without thread context !
  108. cdef xmlDoc* c_doc
  109. cdef xmlDoc* result
  110. cdef void* c_pcontext
  111. cdef int error = 0
  112. # find resolver contexts of stylesheet and transformed doc
  113. if c_type == xslt.XSLT_LOAD_DOCUMENT:
  114. # transformation time
  115. c_pcontext = (<xslt.xsltTransformContext*>c_ctxt)._private
  116. elif c_type == xslt.XSLT_LOAD_STYLESHEET:
  117. # include/import resolution while parsing
  118. c_pcontext = (<xslt.xsltStylesheet*>c_ctxt).doc._private
  119. else:
  120. c_pcontext = NULL
  121. if c_pcontext is NULL:
  122. # can't call Python without context, fall back to default loader
  123. return XSLT_DOC_DEFAULT_LOADER(
  124. c_uri, c_dict, parse_options, c_ctxt, c_type)
  125. c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error)
  126. if c_doc is NULL and not error:
  127. c_doc = XSLT_DOC_DEFAULT_LOADER(
  128. c_uri, c_dict, parse_options, c_ctxt, c_type)
  129. if c_doc is NULL:
  130. _xslt_store_resolver_exception(c_uri, c_pcontext, c_type)
  131. if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET:
  132. c_doc._private = c_pcontext
  133. return c_doc
  134. cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader
  135. xslt.xsltSetLoaderFunc(<xslt.xsltDocLoaderFunc>_xslt_doc_loader)
  136. ################################################################################
  137. # XSLT file/network access control
  138. cdef class XSLTAccessControl:
  139. """XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True)
  140. Access control for XSLT: reading/writing files, directories and
  141. network I/O. Access to a type of resource is granted or denied by
  142. passing any of the following boolean keyword arguments. All of
  143. them default to True to allow access.
  144. - read_file
  145. - write_file
  146. - create_dir
  147. - read_network
  148. - write_network
  149. For convenience, there is also a class member `DENY_ALL` that
  150. provides an XSLTAccessControl instance that is readily configured
  151. to deny everything, and a `DENY_WRITE` member that denies all
  152. write access but allows read access.
  153. See `XSLT`.
  154. """
  155. cdef xslt.xsltSecurityPrefs* _prefs
  156. def __cinit__(self):
  157. self._prefs = xslt.xsltNewSecurityPrefs()
  158. if self._prefs is NULL:
  159. raise MemoryError()
  160. def __init__(self, *, bint read_file=True, bint write_file=True, bint create_dir=True,
  161. bint read_network=True, bint write_network=True):
  162. self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file)
  163. self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file)
  164. self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir)
  165. self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network)
  166. self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network)
  167. DENY_ALL = XSLTAccessControl(
  168. read_file=False, write_file=False, create_dir=False,
  169. read_network=False, write_network=False)
  170. DENY_WRITE = XSLTAccessControl(
  171. read_file=True, write_file=False, create_dir=False,
  172. read_network=True, write_network=False)
  173. def __dealloc__(self):
  174. if self._prefs is not NULL:
  175. xslt.xsltFreeSecurityPrefs(self._prefs)
  176. @cython.final
  177. cdef _setAccess(self, xslt.xsltSecurityOption option, bint allow):
  178. cdef xslt.xsltSecurityCheck function
  179. if allow:
  180. function = xslt.xsltSecurityAllow
  181. else:
  182. function = xslt.xsltSecurityForbid
  183. xslt.xsltSetSecurityPrefs(self._prefs, option, function)
  184. @cython.final
  185. cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt) noexcept:
  186. xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
  187. @property
  188. def options(self):
  189. """The access control configuration as a map of options."""
  190. return {
  191. 'read_file': self._optval(xslt.XSLT_SECPREF_READ_FILE),
  192. 'write_file': self._optval(xslt.XSLT_SECPREF_WRITE_FILE),
  193. 'create_dir': self._optval(xslt.XSLT_SECPREF_CREATE_DIRECTORY),
  194. 'read_network': self._optval(xslt.XSLT_SECPREF_READ_NETWORK),
  195. 'write_network': self._optval(xslt.XSLT_SECPREF_WRITE_NETWORK),
  196. }
  197. @cython.final
  198. cdef _optval(self, xslt.xsltSecurityOption option):
  199. cdef xslt.xsltSecurityCheck function
  200. function = xslt.xsltGetSecurityPrefs(self._prefs, option)
  201. if function is <xslt.xsltSecurityCheck>xslt.xsltSecurityAllow:
  202. return True
  203. elif function is <xslt.xsltSecurityCheck>xslt.xsltSecurityForbid:
  204. return False
  205. else:
  206. return None
  207. def __repr__(self):
  208. items = sorted(self.options.items())
  209. return "%s(%s)" % (
  210. python._fqtypename(self).decode('UTF-8').split('.')[-1],
  211. ', '.join(["%s=%r" % item for item in items]))
  212. ################################################################################
  213. # XSLT
  214. cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf) noexcept:
  215. if ns_utf is None:
  216. return 0
  217. # libxml2 internalises the strings if ctxt has a dict
  218. return xslt.xsltRegisterExtFunction(
  219. <xslt.xsltTransformContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf),
  220. <xslt.xmlXPathFunction>_xpath_function_call)
  221. cdef dict EMPTY_DICT = {}
  222. @cython.final
  223. @cython.internal
  224. cdef class _XSLTContext(_BaseContext):
  225. cdef xslt.xsltTransformContext* _xsltCtxt
  226. cdef _ReadOnlyElementProxy _extension_element_proxy
  227. cdef dict _extension_elements
  228. def __cinit__(self):
  229. self._xsltCtxt = NULL
  230. self._extension_elements = EMPTY_DICT
  231. def __init__(self, namespaces, extensions, error_log, enable_regexp,
  232. build_smart_strings):
  233. if extensions is not None and extensions:
  234. for ns_name_tuple, extension in extensions.items():
  235. if ns_name_tuple[0] is None:
  236. raise XSLTExtensionError, \
  237. "extensions must not have empty namespaces"
  238. if isinstance(extension, XSLTExtension):
  239. if self._extension_elements is EMPTY_DICT:
  240. self._extension_elements = {}
  241. extensions = extensions.copy()
  242. ns_utf = _utf8(ns_name_tuple[0])
  243. name_utf = _utf8(ns_name_tuple[1])
  244. self._extension_elements[(ns_utf, name_utf)] = extension
  245. del extensions[ns_name_tuple]
  246. _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
  247. build_smart_strings)
  248. cdef _BaseContext _copy(self):
  249. cdef _XSLTContext context
  250. context = <_XSLTContext>_BaseContext._copy(self)
  251. context._extension_elements = self._extension_elements
  252. return context
  253. cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
  254. _Document doc):
  255. self._xsltCtxt = xsltCtxt
  256. self._set_xpath_context(xsltCtxt.xpathCtxt)
  257. self._register_context(doc)
  258. self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
  259. self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
  260. _registerXSLTExtensions(xsltCtxt, self._extension_elements)
  261. cdef free_context(self):
  262. self._cleanup_context()
  263. self._release_context()
  264. if self._xsltCtxt is not NULL:
  265. xslt.xsltFreeTransformContext(self._xsltCtxt)
  266. self._xsltCtxt = NULL
  267. self._release_temp_refs()
  268. @cython.final
  269. @cython.internal
  270. @cython.freelist(8)
  271. cdef class _XSLTQuotedStringParam:
  272. """A wrapper class for literal XSLT string parameters that require
  273. quote escaping.
  274. """
  275. cdef bytes strval
  276. def __cinit__(self, strval):
  277. self.strval = _utf8(strval)
  278. @cython.no_gc_clear
  279. cdef class XSLT:
  280. """XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None)
  281. Turn an XSL document into an XSLT object.
  282. Calling this object on a tree or Element will execute the XSLT::
  283. transform = etree.XSLT(xsl_tree)
  284. result = transform(xml_tree)
  285. Keyword arguments of the constructor:
  286. - extensions: a dict mapping ``(namespace, name)`` pairs to
  287. extension functions or extension elements
  288. - regexp: enable exslt regular expression support in XPath
  289. (default: True)
  290. - access_control: access restrictions for network or file
  291. system (see `XSLTAccessControl`)
  292. Keyword arguments of the XSLT call:
  293. - profile_run: enable XSLT profiling and make the profile available
  294. as XML document in ``result.xslt_profile`` (default: False)
  295. Other keyword arguments of the call are passed to the stylesheet
  296. as parameters.
  297. """
  298. cdef _XSLTContext _context
  299. cdef xslt.xsltStylesheet* _c_style
  300. cdef _XSLTResolverContext _xslt_resolver_context
  301. cdef XSLTAccessControl _access_control
  302. cdef _ErrorLog _error_log
  303. def __cinit__(self):
  304. self._c_style = NULL
  305. def __init__(self, xslt_input, *, extensions=None, regexp=True,
  306. access_control=None):
  307. cdef xslt.xsltStylesheet* c_style = NULL
  308. cdef xmlDoc* c_doc
  309. cdef _Document doc
  310. cdef _Element root_node
  311. doc = _documentOrRaise(xslt_input)
  312. root_node = _rootNodeOrRaise(xslt_input)
  313. # set access control or raise TypeError
  314. self._access_control = access_control
  315. # make a copy of the document as stylesheet parsing modifies it
  316. c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
  317. # make sure we always have a stylesheet URL
  318. if c_doc.URL is NULL:
  319. doc_url_utf = python.PyUnicode_AsASCIIString(
  320. f"string://__STRING__XSLT__/{id(self)}.xslt")
  321. c_doc.URL = tree.xmlStrdup(_xcstr(doc_url_utf))
  322. self._error_log = _ErrorLog()
  323. self._xslt_resolver_context = _XSLTResolverContext()
  324. _initXSLTResolverContext(self._xslt_resolver_context, doc._parser)
  325. # keep a copy in case we need to access the stylesheet via 'document()'
  326. self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1)
  327. c_doc._private = <python.PyObject*>self._xslt_resolver_context
  328. with self._error_log:
  329. orig_loader = _register_document_loader()
  330. c_style = xslt.xsltParseStylesheetDoc(c_doc)
  331. _reset_document_loader(orig_loader)
  332. if c_style is NULL or c_style.errors:
  333. tree.xmlFreeDoc(c_doc)
  334. if c_style is not NULL:
  335. xslt.xsltFreeStylesheet(c_style)
  336. self._xslt_resolver_context._raise_if_stored()
  337. # last error seems to be the most accurate here
  338. if self._error_log.last_error is not None and \
  339. self._error_log.last_error.message:
  340. raise XSLTParseError(self._error_log.last_error.message,
  341. self._error_log)
  342. else:
  343. raise XSLTParseError(
  344. self._error_log._buildExceptionMessage(
  345. "Cannot parse stylesheet"),
  346. self._error_log)
  347. c_doc._private = NULL # no longer used!
  348. self._c_style = c_style
  349. self._context = _XSLTContext(None, extensions, self._error_log, regexp, True)
  350. def __dealloc__(self):
  351. if self._xslt_resolver_context is not None and \
  352. self._xslt_resolver_context._c_style_doc is not NULL:
  353. tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc)
  354. # this cleans up the doc copy as well
  355. if self._c_style is not NULL:
  356. xslt.xsltFreeStylesheet(self._c_style)
  357. @property
  358. def error_log(self):
  359. """The log of errors and warnings of an XSLT execution."""
  360. return self._error_log.copy()
  361. @staticmethod
  362. def strparam(strval):
  363. """strparam(strval)
  364. Mark an XSLT string parameter that requires quote escaping
  365. before passing it into the transformation. Use it like this::
  366. result = transform(doc, some_strval = XSLT.strparam(
  367. '''it's \"Monty Python's\" ...'''))
  368. Escaped string parameters can be reused without restriction.
  369. """
  370. return _XSLTQuotedStringParam(strval)
  371. @staticmethod
  372. def set_global_max_depth(int max_depth):
  373. """set_global_max_depth(max_depth)
  374. The maximum traversal depth that the stylesheet engine will allow.
  375. This does not only count the template recursion depth but also takes
  376. the number of variables/parameters into account. The required setting
  377. for a run depends on both the stylesheet and the input data.
  378. Example::
  379. XSLT.set_global_max_depth(5000)
  380. Note that this is currently a global, module-wide setting because
  381. libxslt does not support it at a per-stylesheet level.
  382. """
  383. if max_depth < 0:
  384. raise ValueError("cannot set a maximum stylesheet traversal depth < 0")
  385. xslt.xsltMaxDepth = max_depth
  386. def tostring(self, _ElementTree result_tree):
  387. """tostring(self, result_tree)
  388. Save result doc to string based on stylesheet output method.
  389. :deprecated: use str(result_tree) instead.
  390. """
  391. return str(result_tree)
  392. def __deepcopy__(self, memo):
  393. return self.__copy__()
  394. def __copy__(self):
  395. return _copyXSLT(self)
  396. def __call__(self, _input, *, profile_run=False, **kw):
  397. """__call__(self, _input, profile_run=False, **kw)
  398. Execute the XSL transformation on a tree or Element.
  399. Pass the ``profile_run`` option to get profile information
  400. about the XSLT. The result of the XSLT will have a property
  401. xslt_profile that holds an XML tree with profiling data.
  402. """
  403. cdef _XSLTContext context = None
  404. cdef _XSLTResolverContext resolver_context
  405. cdef _Document input_doc
  406. cdef _Element root_node
  407. cdef _Document result_doc
  408. cdef _Document profile_doc = None
  409. cdef xmlDoc* c_profile_doc
  410. cdef xslt.xsltTransformContext* transform_ctxt
  411. cdef xmlDoc* c_result = NULL
  412. cdef xmlDoc* c_doc
  413. cdef tree.xmlDict* c_dict
  414. cdef const_char** params = NULL
  415. assert self._c_style is not NULL, "XSLT stylesheet not initialised"
  416. input_doc = _documentOrRaise(_input)
  417. root_node = _rootNodeOrRaise(_input)
  418. c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
  419. transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
  420. if transform_ctxt is NULL:
  421. _destroyFakeDoc(input_doc._c_doc, c_doc)
  422. raise MemoryError()
  423. # using the stylesheet dict is safer than using a possibly
  424. # unrelated dict from the current thread. Almost all
  425. # non-input tag/attr names will come from the stylesheet
  426. # anyway.
  427. if transform_ctxt.dict is not NULL:
  428. xmlparser.xmlDictFree(transform_ctxt.dict)
  429. if kw:
  430. # parameter values are stored in the dict
  431. # => avoid unnecessarily cluttering the global dict
  432. transform_ctxt.dict = xmlparser.xmlDictCreateSub(self._c_style.doc.dict)
  433. if transform_ctxt.dict is NULL:
  434. xslt.xsltFreeTransformContext(transform_ctxt)
  435. raise MemoryError()
  436. else:
  437. transform_ctxt.dict = self._c_style.doc.dict
  438. xmlparser.xmlDictReference(transform_ctxt.dict)
  439. xslt.xsltSetCtxtParseOptions(
  440. transform_ctxt, input_doc._parser._parse_options)
  441. if profile_run:
  442. transform_ctxt.profile = 1
  443. try:
  444. context = self._context._copy()
  445. context.register_context(transform_ctxt, input_doc)
  446. resolver_context = self._xslt_resolver_context._copy()
  447. transform_ctxt._private = <python.PyObject*>resolver_context
  448. _convert_xslt_parameters(transform_ctxt, kw, &params)
  449. c_result = self._run_transform(
  450. c_doc, params, context, transform_ctxt)
  451. if params is not NULL:
  452. # deallocate space for parameters
  453. python.lxml_free(params)
  454. if transform_ctxt.state != xslt.XSLT_STATE_OK:
  455. if c_result is not NULL:
  456. tree.xmlFreeDoc(c_result)
  457. c_result = NULL
  458. if transform_ctxt.profile:
  459. c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
  460. if c_profile_doc is not NULL:
  461. profile_doc = _documentFactory(
  462. c_profile_doc, input_doc._parser)
  463. finally:
  464. if context is not None:
  465. context.free_context()
  466. _destroyFakeDoc(input_doc._c_doc, c_doc)
  467. try:
  468. if resolver_context is not None and resolver_context._has_raised():
  469. if c_result is not NULL:
  470. tree.xmlFreeDoc(c_result)
  471. c_result = NULL
  472. resolver_context._raise_if_stored()
  473. if context._exc._has_raised():
  474. if c_result is not NULL:
  475. tree.xmlFreeDoc(c_result)
  476. c_result = NULL
  477. context._exc._raise_if_stored()
  478. if c_result is NULL:
  479. # last error seems to be the most accurate here
  480. error = self._error_log.last_error
  481. if error is not None and error.message:
  482. if error.line > 0:
  483. message = f"{error.message}, line {error.line}"
  484. else:
  485. message = error.message
  486. elif error is not None and error.line > 0:
  487. message = f"Error applying stylesheet, line {error.line}"
  488. else:
  489. message = "Error applying stylesheet"
  490. raise XSLTApplyError(message, self._error_log)
  491. finally:
  492. if resolver_context is not None:
  493. resolver_context.clear()
  494. result_doc = _documentFactory(c_result, input_doc._parser)
  495. c_dict = c_result.dict
  496. xmlparser.xmlDictReference(c_dict)
  497. __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
  498. if c_dict is not c_result.dict or \
  499. self._c_style.doc.dict is not c_result.dict or \
  500. input_doc._c_doc.dict is not c_result.dict:
  501. with nogil:
  502. if c_dict is not c_result.dict:
  503. fixThreadDictNames(<xmlNode*>c_result,
  504. c_dict, c_result.dict)
  505. if self._c_style.doc.dict is not c_result.dict:
  506. fixThreadDictNames(<xmlNode*>c_result,
  507. self._c_style.doc.dict, c_result.dict)
  508. if input_doc._c_doc.dict is not c_result.dict:
  509. fixThreadDictNames(<xmlNode*>c_result,
  510. input_doc._c_doc.dict, c_result.dict)
  511. xmlparser.xmlDictFree(c_dict)
  512. return _xsltResultTreeFactory(result_doc, self, profile_doc)
  513. cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc,
  514. const_char** params, _XSLTContext context,
  515. xslt.xsltTransformContext* transform_ctxt):
  516. cdef xmlDoc* c_result
  517. xslt.xsltSetTransformErrorFunc(transform_ctxt, <void*>self._error_log,
  518. <xmlerror.xmlGenericErrorFunc>_receiveXSLTError)
  519. if self._access_control is not None:
  520. self._access_control._register_in_context(transform_ctxt)
  521. with self._error_log, nogil:
  522. orig_loader = _register_document_loader()
  523. c_result = xslt.xsltApplyStylesheetUser(
  524. self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
  525. _reset_document_loader(orig_loader)
  526. return c_result
  527. cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt,
  528. dict parameters, const_char*** params_ptr):
  529. cdef Py_ssize_t i, parameter_count
  530. cdef const_char** params
  531. cdef tree.xmlDict* c_dict = transform_ctxt.dict
  532. params_ptr[0] = NULL
  533. parameter_count = len(parameters)
  534. if parameter_count == 0:
  535. return
  536. # allocate space for parameters
  537. # * 2 as we want an entry for both key and value,
  538. # and + 1 as array is NULL terminated
  539. params = <const_char**>python.lxml_malloc(parameter_count * 2 + 1, sizeof(const_char*))
  540. if not params:
  541. raise MemoryError()
  542. try:
  543. i = 0
  544. for key, value in parameters.iteritems():
  545. k = _utf8(key)
  546. if isinstance(value, _XSLTQuotedStringParam):
  547. v = (<_XSLTQuotedStringParam>value).strval
  548. xslt.xsltQuoteOneUserParam(
  549. transform_ctxt, _xcstr(k), _xcstr(v))
  550. else:
  551. if isinstance(value, XPath):
  552. v = (<XPath>value)._path
  553. else:
  554. v = _utf8(value)
  555. c_len = len(k)
  556. if c_len > limits.INT_MAX:
  557. raise ValueError("Parameter name too long")
  558. params[i] = <const_char*> tree.xmlDictLookup(c_dict, _xcstr(k), <int> c_len)
  559. i += 1
  560. c_len = len(v)
  561. if c_len > limits.INT_MAX:
  562. raise ValueError("Parameter value too long")
  563. params[i] = <const_char*> tree.xmlDictLookup(c_dict, _xcstr(v), <int> c_len)
  564. i += 1
  565. except:
  566. python.lxml_free(params)
  567. raise
  568. params[i] = NULL
  569. params_ptr[0] = params
  570. cdef XSLT _copyXSLT(XSLT stylesheet):
  571. cdef XSLT new_xslt
  572. cdef xmlDoc* c_doc
  573. assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised"
  574. new_xslt = XSLT.__new__(XSLT)
  575. new_xslt._access_control = stylesheet._access_control
  576. new_xslt._error_log = _ErrorLog()
  577. new_xslt._context = stylesheet._context._copy()
  578. new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy()
  579. new_xslt._xslt_resolver_context._c_style_doc = _copyDoc(
  580. stylesheet._xslt_resolver_context._c_style_doc, 1)
  581. c_doc = _copyDoc(stylesheet._c_style.doc, 1)
  582. new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc)
  583. if new_xslt._c_style is NULL:
  584. tree.xmlFreeDoc(c_doc)
  585. raise MemoryError()
  586. return new_xslt
  587. @cython.final
  588. cdef class _XSLTResultTree(_ElementTree):
  589. """The result of an XSLT evaluation.
  590. Use ``str()`` or ``bytes()`` (or ``unicode()`` in Python 2.x) to serialise to a string,
  591. and the ``.write_output()`` method to write serialise to a file.
  592. """
  593. cdef XSLT _xslt
  594. cdef _Document _profile
  595. cdef xmlChar* _buffer
  596. cdef Py_ssize_t _buffer_len
  597. cdef Py_ssize_t _buffer_refcnt
  598. def write_output(self, file, *, compression=0):
  599. """write_output(self, file, *, compression=0)
  600. Serialise the XSLT output to a file or file-like object.
  601. As opposed to the generic ``.write()`` method, ``.write_output()`` serialises
  602. the result as defined by the ``<xsl:output>`` tag.
  603. """
  604. cdef _FilelikeWriter writer = None
  605. cdef _Document doc
  606. cdef int r, rclose, c_compression
  607. cdef const_xmlChar* c_encoding = NULL
  608. cdef tree.xmlOutputBuffer* c_buffer
  609. if self._context_node is not None:
  610. doc = self._context_node._doc
  611. else:
  612. doc = None
  613. if doc is None:
  614. doc = self._doc
  615. if doc is None:
  616. raise XSLTSaveError("No document to serialise")
  617. c_compression = compression or 0
  618. xslt.LXML_GET_XSLT_ENCODING(c_encoding, self._xslt._c_style)
  619. writer = _create_output_buffer(file, <const_char*>c_encoding, c_compression, &c_buffer, close=False)
  620. if writer is None:
  621. with nogil:
  622. r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
  623. rclose = tree.xmlOutputBufferClose(c_buffer)
  624. else:
  625. r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
  626. rclose = tree.xmlOutputBufferClose(c_buffer)
  627. if writer is not None:
  628. writer._exc_context._raise_if_stored()
  629. if r < 0 or rclose == -1:
  630. python.PyErr_SetFromErrno(IOError) # raises IOError
  631. cdef _saveToStringAndSize(self, xmlChar** s, int* l):
  632. cdef _Document doc
  633. cdef int r
  634. if self._context_node is not None:
  635. doc = self._context_node._doc
  636. else:
  637. doc = None
  638. if doc is None:
  639. doc = self._doc
  640. if doc is None:
  641. s[0] = NULL
  642. return
  643. with nogil:
  644. r = xslt.xsltSaveResultToString(s, l, doc._c_doc,
  645. self._xslt._c_style)
  646. if r == -1:
  647. raise MemoryError()
  648. def __str__(self):
  649. cdef xmlChar* encoding
  650. cdef xmlChar* s = NULL
  651. cdef int l = 0
  652. self._saveToStringAndSize(&s, &l)
  653. if s is NULL:
  654. return ''
  655. encoding = self._xslt._c_style.encoding
  656. try:
  657. if encoding is NULL:
  658. result = s[:l].decode('UTF-8')
  659. else:
  660. result = s[:l].decode(encoding)
  661. finally:
  662. tree.xmlFree(s)
  663. return _stripEncodingDeclaration(result)
  664. def __getbuffer__(self, Py_buffer* buffer, int flags):
  665. cdef int l = 0
  666. if buffer is NULL:
  667. return
  668. if self._buffer is NULL or flags & python.PyBUF_WRITABLE:
  669. self._saveToStringAndSize(<xmlChar**>&buffer.buf, &l)
  670. buffer.len = l
  671. if self._buffer is NULL and not flags & python.PyBUF_WRITABLE:
  672. self._buffer = <xmlChar*>buffer.buf
  673. self._buffer_len = l
  674. self._buffer_refcnt = 1
  675. else:
  676. buffer.buf = self._buffer
  677. buffer.len = self._buffer_len
  678. self._buffer_refcnt += 1
  679. if flags & python.PyBUF_WRITABLE:
  680. buffer.readonly = 0
  681. else:
  682. buffer.readonly = 1
  683. if flags & python.PyBUF_FORMAT:
  684. buffer.format = "B"
  685. else:
  686. buffer.format = NULL
  687. buffer.ndim = 0
  688. buffer.shape = NULL
  689. buffer.strides = NULL
  690. buffer.suboffsets = NULL
  691. buffer.itemsize = 1
  692. buffer.internal = NULL
  693. if buffer.obj is not self: # set by Cython?
  694. buffer.obj = self
  695. def __releasebuffer__(self, Py_buffer* buffer):
  696. if buffer is NULL:
  697. return
  698. if <xmlChar*>buffer.buf is self._buffer:
  699. self._buffer_refcnt -= 1
  700. if self._buffer_refcnt == 0:
  701. tree.xmlFree(<char*>self._buffer)
  702. self._buffer = NULL
  703. else:
  704. tree.xmlFree(<char*>buffer.buf)
  705. buffer.buf = NULL
  706. property xslt_profile:
  707. """Return an ElementTree with profiling data for the stylesheet run.
  708. """
  709. def __get__(self):
  710. cdef object root
  711. if self._profile is None:
  712. return None
  713. root = self._profile.getroot()
  714. if root is None:
  715. return None
  716. return ElementTree(root)
  717. def __del__(self):
  718. self._profile = None
  719. cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile):
  720. cdef _XSLTResultTree result
  721. result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree)
  722. result._xslt = xslt
  723. result._profile = profile
  724. return result
  725. # functions like "output" and "write" are a potential security risk, but we
  726. # rely on the user to configure XSLTAccessControl as needed
  727. xslt.xsltRegisterAllExtras()
  728. # enable EXSLT support for XSLT
  729. xslt.exsltRegisterAll()
  730. ################################################################################
  731. # XSLT PI support
  732. cdef object _RE_PI_HREF = re.compile(r'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")')
  733. cdef object _FIND_PI_HREF = _RE_PI_HREF.findall
  734. cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub
  735. cdef XPath __findStylesheetByID = None
  736. cdef _findStylesheetByID(_Document doc, id):
  737. global __findStylesheetByID
  738. if __findStylesheetByID is None:
  739. __findStylesheetByID = XPath(
  740. "//xsl:stylesheet[@xml:id = $id]",
  741. namespaces={"xsl" : "http://www.w3.org/1999/XSL/Transform"})
  742. return __findStylesheetByID(doc, id=id)
  743. cdef class _XSLTProcessingInstruction(PIBase):
  744. def parseXSL(self, parser=None):
  745. """parseXSL(self, parser=None)
  746. Try to parse the stylesheet referenced by this PI and return
  747. an ElementTree for it. If the stylesheet is embedded in the
  748. same document (referenced via xml:id), find and return an
  749. ElementTree for the stylesheet Element.
  750. The optional ``parser`` keyword argument can be passed to specify the
  751. parser used to read from external stylesheet URLs.
  752. """
  753. cdef _Document result_doc
  754. cdef _Element result_node
  755. cdef bytes href_utf
  756. cdef const_xmlChar* c_href
  757. cdef xmlAttr* c_attr
  758. _assertValidNode(self)
  759. if self._c_node.content is NULL:
  760. raise ValueError, "PI lacks content"
  761. hrefs = _FIND_PI_HREF(' ' + (<unsigned char*>self._c_node.content).decode('UTF-8'))
  762. if len(hrefs) != 1:
  763. raise ValueError, "malformed PI attributes"
  764. hrefs = hrefs[0]
  765. href_utf = utf8(hrefs[0] or hrefs[1])
  766. c_href = _xcstr(href_utf)
  767. if c_href[0] != c'#':
  768. # normal URL, try to parse from it
  769. c_href = tree.xmlBuildURI(
  770. c_href,
  771. tree.xmlNodeGetBase(self._c_node.doc, self._c_node))
  772. if c_href is not NULL:
  773. try:
  774. href_utf = <unsigned char*>c_href
  775. finally:
  776. tree.xmlFree(<char*>c_href)
  777. result_doc = _parseDocumentFromURL(href_utf, parser)
  778. return _elementTreeFactory(result_doc, None)
  779. # ID reference to embedded stylesheet
  780. # try XML:ID lookup
  781. _assertValidDoc(self._doc)
  782. c_href += 1 # skip leading '#'
  783. c_attr = tree.xmlGetID(self._c_node.doc, c_href)
  784. if c_attr is not NULL and c_attr.doc is self._c_node.doc:
  785. result_node = _elementFactory(self._doc, c_attr.parent)
  786. return _elementTreeFactory(result_node._doc, result_node)
  787. # try XPath search
  788. root = _findStylesheetByID(self._doc, funicode(c_href))
  789. if not root:
  790. raise ValueError, "reference to non-existing embedded stylesheet"
  791. elif len(root) > 1:
  792. raise ValueError, "ambiguous reference to embedded stylesheet"
  793. result_node = root[0]
  794. return _elementTreeFactory(result_node._doc, result_node)
  795. def set(self, key, value):
  796. """set(self, key, value)
  797. Supports setting the 'href' pseudo-attribute in the text of
  798. the processing instruction.
  799. """
  800. if key != "href":
  801. raise AttributeError, \
  802. "only setting the 'href' attribute is supported on XSLT-PIs"
  803. if value is None:
  804. attrib = ""
  805. elif '"' in value or '>' in value:
  806. raise ValueError, "Invalid URL, must not contain '\"' or '>'"
  807. else:
  808. attrib = f' href="{value}"'
  809. text = ' ' + self.text
  810. if _FIND_PI_HREF(text):
  811. self.text = _REPLACE_PI_HREF(attrib, text)
  812. else:
  813. self.text = text + attrib