You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

566 line
18 KiB

  1. # read-only tree implementation
  2. @cython.internal
  3. cdef class _ReadOnlyProxy:
  4. "A read-only proxy class suitable for PIs/Comments (for internal use only!)."
  5. cdef bint _free_after_use
  6. cdef xmlNode* _c_node
  7. cdef _ReadOnlyProxy _source_proxy
  8. cdef list _dependent_proxies
  9. def __cinit__(self):
  10. self._c_node = NULL
  11. self._free_after_use = 0
  12. cdef int _assertNode(self) except -1:
  13. """This is our way of saying: this proxy is invalid!
  14. """
  15. if not self._c_node:
  16. raise ReferenceError("Proxy invalidated!")
  17. return 0
  18. cdef int _raise_unsupported_type(self) except -1:
  19. raise TypeError(f"Unsupported node type: {self._c_node.type}")
  20. cdef void free_after_use(self) noexcept:
  21. """Should the xmlNode* be freed when releasing the proxy?
  22. """
  23. self._free_after_use = 1
  24. @property
  25. def tag(self):
  26. """Element tag
  27. """
  28. self._assertNode()
  29. if self._c_node.type == tree.XML_ELEMENT_NODE:
  30. return _namespacedName(self._c_node)
  31. elif self._c_node.type == tree.XML_PI_NODE:
  32. return ProcessingInstruction
  33. elif self._c_node.type == tree.XML_COMMENT_NODE:
  34. return Comment
  35. elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
  36. return Entity
  37. else:
  38. self._raise_unsupported_type()
  39. @property
  40. def text(self):
  41. """Text before the first subelement. This is either a string or
  42. the value None, if there was no text.
  43. """
  44. self._assertNode()
  45. if self._c_node.type == tree.XML_ELEMENT_NODE:
  46. return _collectText(self._c_node.children)
  47. elif self._c_node.type in (tree.XML_PI_NODE,
  48. tree.XML_COMMENT_NODE):
  49. if self._c_node.content is NULL:
  50. return ''
  51. else:
  52. return funicode(self._c_node.content)
  53. elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
  54. return f'&{funicode(self._c_node.name)};'
  55. else:
  56. self._raise_unsupported_type()
  57. @property
  58. def tail(self):
  59. """Text after this element's end tag, but before the next sibling
  60. element's start tag. This is either a string or the value None, if
  61. there was no text.
  62. """
  63. self._assertNode()
  64. return _collectText(self._c_node.next)
  65. @property
  66. def sourceline(self):
  67. """Original line number as found by the parser or None if unknown.
  68. """
  69. cdef long line
  70. self._assertNode()
  71. line = tree.xmlGetLineNo(self._c_node)
  72. if line > 0:
  73. return line
  74. else:
  75. return None
  76. def __repr__(self):
  77. self._assertNode()
  78. if self._c_node.type == tree.XML_ELEMENT_NODE:
  79. return "<Element %s at 0x%x>" % (self.tag, id(self))
  80. elif self._c_node.type == tree.XML_COMMENT_NODE:
  81. return "<!--%s-->" % self.text
  82. elif self._c_node.type == tree.XML_ENTITY_NODE:
  83. return "&%s;" % funicode(self._c_node.name)
  84. elif self._c_node.type == tree.XML_PI_NODE:
  85. text = self.text
  86. if text:
  87. return "<?%s %s?>" % (self.target, text)
  88. else:
  89. return "<?%s?>" % self.target
  90. else:
  91. self._raise_unsupported_type()
  92. def __getitem__(self, x):
  93. """Returns the subelement at the given position or the requested
  94. slice.
  95. """
  96. cdef xmlNode* c_node = NULL
  97. cdef Py_ssize_t step = 0, slicelength = 0
  98. cdef Py_ssize_t c, i
  99. cdef _node_to_node_function next_element
  100. cdef list result
  101. self._assertNode()
  102. if isinstance(x, slice):
  103. # slicing
  104. if _isFullSlice(<slice>x):
  105. return _collectChildren(self)
  106. _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
  107. if c_node is NULL:
  108. return []
  109. if step > 0:
  110. next_element = _nextElement
  111. else:
  112. step = -step
  113. next_element = _previousElement
  114. result = []
  115. c = 0
  116. while c_node is not NULL and c < slicelength:
  117. result.append(_newReadOnlyProxy(self._source_proxy, c_node))
  118. result.append(_elementFactory(self._doc, c_node))
  119. c = c + 1
  120. for i from 0 <= i < step:
  121. c_node = next_element(c_node)
  122. return result
  123. else:
  124. # indexing
  125. c_node = _findChild(self._c_node, x)
  126. if c_node is NULL:
  127. raise IndexError, "list index out of range"
  128. return _newReadOnlyProxy(self._source_proxy, c_node)
  129. def __len__(self):
  130. """Returns the number of subelements.
  131. """
  132. cdef Py_ssize_t c
  133. cdef xmlNode* c_node
  134. self._assertNode()
  135. c = 0
  136. c_node = self._c_node.children
  137. while c_node is not NULL:
  138. if tree._isElement(c_node):
  139. c = c + 1
  140. c_node = c_node.next
  141. return c
  142. def __bool__(self):
  143. cdef xmlNode* c_node
  144. self._assertNode()
  145. c_node = _findChildBackwards(self._c_node, 0)
  146. return c_node != NULL
  147. def __deepcopy__(self, memo):
  148. "__deepcopy__(self, memo)"
  149. return self.__copy__()
  150. cpdef __copy__(self):
  151. "__copy__(self)"
  152. cdef xmlDoc* c_doc
  153. cdef xmlNode* c_node
  154. cdef _Document new_doc
  155. if self._c_node is NULL:
  156. return self
  157. c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive
  158. new_doc = _documentFactory(c_doc, None)
  159. root = new_doc.getroot()
  160. if root is not None:
  161. return root
  162. # Comment/PI
  163. c_node = c_doc.children
  164. while c_node is not NULL and c_node.type != self._c_node.type:
  165. c_node = c_node.next
  166. if c_node is NULL:
  167. return None
  168. return _elementFactory(new_doc, c_node)
  169. def __iter__(self):
  170. return iter(self.getchildren())
  171. def iterchildren(self, tag=None, *, reversed=False):
  172. """iterchildren(self, tag=None, reversed=False)
  173. Iterate over the children of this element.
  174. """
  175. children = self.getchildren()
  176. if tag is not None and tag != '*':
  177. children = [ el for el in children if el.tag == tag ]
  178. if reversed:
  179. children = children[::-1]
  180. return iter(children)
  181. cpdef getchildren(self):
  182. """Returns all subelements. The elements are returned in document
  183. order.
  184. """
  185. cdef xmlNode* c_node
  186. cdef list result
  187. self._assertNode()
  188. result = []
  189. c_node = self._c_node.children
  190. while c_node is not NULL:
  191. if tree._isElement(c_node):
  192. result.append(_newReadOnlyProxy(self._source_proxy, c_node))
  193. c_node = c_node.next
  194. return result
  195. def getparent(self):
  196. """Returns the parent of this element or None for the root element.
  197. """
  198. cdef xmlNode* c_parent
  199. self._assertNode()
  200. c_parent = self._c_node.parent
  201. if c_parent is NULL or not tree._isElement(c_parent):
  202. return None
  203. else:
  204. return _newReadOnlyProxy(self._source_proxy, c_parent)
  205. def getnext(self):
  206. """Returns the following sibling of this element or None.
  207. """
  208. cdef xmlNode* c_node
  209. self._assertNode()
  210. c_node = _nextElement(self._c_node)
  211. if c_node is not NULL:
  212. return _newReadOnlyProxy(self._source_proxy, c_node)
  213. return None
  214. def getprevious(self):
  215. """Returns the preceding sibling of this element or None.
  216. """
  217. cdef xmlNode* c_node
  218. self._assertNode()
  219. c_node = _previousElement(self._c_node)
  220. if c_node is not NULL:
  221. return _newReadOnlyProxy(self._source_proxy, c_node)
  222. return None
  223. @cython.final
  224. @cython.internal
  225. cdef class _ReadOnlyPIProxy(_ReadOnlyProxy):
  226. """A read-only proxy for processing instructions (for internal use only!)"""
  227. @property
  228. def target(self):
  229. self._assertNode()
  230. return funicode(self._c_node.name)
  231. @cython.final
  232. @cython.internal
  233. cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy):
  234. """A read-only proxy for entity references (for internal use only!)"""
  235. property name:
  236. def __get__(self):
  237. return funicode(self._c_node.name)
  238. def __set__(self, value):
  239. value_utf = _utf8(value)
  240. if '&' in value or ';' in value:
  241. raise ValueError(f"Invalid entity name '{value}'")
  242. tree.xmlNodeSetName(self._c_node, _xcstr(value_utf))
  243. @property
  244. def text(self):
  245. return f'&{funicode(self._c_node.name)};'
  246. @cython.internal
  247. cdef class _ReadOnlyElementProxy(_ReadOnlyProxy):
  248. """The main read-only Element proxy class (for internal use only!)."""
  249. @property
  250. def attrib(self):
  251. self._assertNode()
  252. return dict(_collectAttributes(self._c_node, 3))
  253. @property
  254. def prefix(self):
  255. """Namespace prefix or None.
  256. """
  257. self._assertNode()
  258. if self._c_node.ns is not NULL:
  259. if self._c_node.ns.prefix is not NULL:
  260. return funicode(self._c_node.ns.prefix)
  261. return None
  262. @property
  263. def nsmap(self):
  264. """Namespace prefix->URI mapping known in the context of this
  265. Element. This includes all namespace declarations of the
  266. parents.
  267. Note that changing the returned dict has no effect on the Element.
  268. """
  269. self._assertNode()
  270. return _build_nsmap(self._c_node)
  271. def get(self, key, default=None):
  272. """Gets an element attribute.
  273. """
  274. self._assertNode()
  275. return _getNodeAttributeValue(self._c_node, key, default)
  276. def keys(self):
  277. """Gets a list of attribute names. The names are returned in an
  278. arbitrary order (just like for an ordinary Python dictionary).
  279. """
  280. self._assertNode()
  281. return _collectAttributes(self._c_node, 1)
  282. def values(self):
  283. """Gets element attributes, as a sequence. The attributes are returned
  284. in an arbitrary order.
  285. """
  286. self._assertNode()
  287. return _collectAttributes(self._c_node, 2)
  288. def items(self):
  289. """Gets element attributes, as a sequence. The attributes are returned
  290. in an arbitrary order.
  291. """
  292. self._assertNode()
  293. return _collectAttributes(self._c_node, 3)
  294. cdef _ReadOnlyProxy _newReadOnlyProxy(
  295. _ReadOnlyProxy source_proxy, xmlNode* c_node):
  296. cdef _ReadOnlyProxy el
  297. if c_node.type == tree.XML_ELEMENT_NODE:
  298. el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy)
  299. elif c_node.type == tree.XML_PI_NODE:
  300. el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy)
  301. elif c_node.type in (tree.XML_COMMENT_NODE,
  302. tree.XML_ENTITY_REF_NODE):
  303. el = _ReadOnlyProxy.__new__(_ReadOnlyProxy)
  304. else:
  305. raise TypeError(f"Unsupported element type: {c_node.type}")
  306. el._c_node = c_node
  307. _initReadOnlyProxy(el, source_proxy)
  308. return el
  309. cdef inline _initReadOnlyProxy(_ReadOnlyProxy el,
  310. _ReadOnlyProxy source_proxy):
  311. if source_proxy is None:
  312. el._source_proxy = el
  313. el._dependent_proxies = [el]
  314. else:
  315. el._source_proxy = source_proxy
  316. source_proxy._dependent_proxies.append(el)
  317. cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy):
  318. cdef xmlNode* c_node
  319. cdef _ReadOnlyProxy el
  320. if sourceProxy is None:
  321. return
  322. if sourceProxy._dependent_proxies is None:
  323. return
  324. for el in sourceProxy._dependent_proxies:
  325. c_node = el._c_node
  326. el._c_node = NULL
  327. if el._free_after_use:
  328. tree.xmlFreeNode(c_node)
  329. del sourceProxy._dependent_proxies[:]
  330. # opaque wrapper around non-element nodes, e.g. the document node
  331. #
  332. # This class does not imply any restrictions on modifiability or
  333. # read-only status of the node, so use with caution.
  334. @cython.internal
  335. cdef class _OpaqueNodeWrapper:
  336. cdef tree.xmlNode* _c_node
  337. def __init__(self):
  338. raise TypeError, "This type cannot be instantiated from Python"
  339. @cython.final
  340. @cython.internal
  341. cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper):
  342. cdef int _assertNode(self) except -1:
  343. """This is our way of saying: this proxy is invalid!
  344. """
  345. assert self._c_node is not NULL, "Proxy invalidated!"
  346. return 0
  347. cpdef append(self, other_element):
  348. """Append a copy of an Element to the list of children.
  349. """
  350. cdef xmlNode* c_next
  351. cdef xmlNode* c_node
  352. self._assertNode()
  353. c_node = _roNodeOf(other_element)
  354. if c_node.type == tree.XML_ELEMENT_NODE:
  355. if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL:
  356. raise ValueError, "cannot append, document already has a root element"
  357. elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE):
  358. raise TypeError, f"unsupported element type for top-level node: {c_node.type}"
  359. c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node)
  360. c_next = c_node.next
  361. tree.xmlAddChild(self._c_node, c_node)
  362. _moveTail(c_next, c_node)
  363. def extend(self, elements):
  364. """Append a copy of all Elements from a sequence to the list of
  365. children.
  366. """
  367. self._assertNode()
  368. for element in elements:
  369. self.append(element)
  370. cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node):
  371. cdef _OpaqueNodeWrapper node
  372. if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
  373. node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper)
  374. else:
  375. node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper)
  376. node._c_node = c_node
  377. return node
  378. # element proxies that allow restricted modification
  379. @cython.internal
  380. cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy):
  381. """A read-only proxy that allows changing the text content.
  382. """
  383. property text:
  384. def __get__(self):
  385. self._assertNode()
  386. if self._c_node.content is NULL:
  387. return ''
  388. else:
  389. return funicode(self._c_node.content)
  390. def __set__(self, value):
  391. cdef tree.xmlDict* c_dict
  392. self._assertNode()
  393. if value is None:
  394. c_text = <const_xmlChar*>NULL
  395. else:
  396. value = _utf8(value)
  397. c_text = _xcstr(value)
  398. tree.xmlNodeSetContent(self._c_node, c_text)
  399. @cython.final
  400. @cython.internal
  401. cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy):
  402. """A read-only proxy that allows changing the text/target content of a
  403. processing instruction.
  404. """
  405. property target:
  406. def __get__(self):
  407. self._assertNode()
  408. return funicode(self._c_node.name)
  409. def __set__(self, value):
  410. self._assertNode()
  411. value = _utf8(value)
  412. c_text = _xcstr(value)
  413. tree.xmlNodeSetName(self._c_node, c_text)
  414. @cython.final
  415. @cython.internal
  416. cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy):
  417. "A read-only proxy for entity references (for internal use only!)"
  418. property name:
  419. def __get__(self):
  420. return funicode(self._c_node.name)
  421. def __set__(self, value):
  422. value = _utf8(value)
  423. assert '&' not in value and ';' not in value, \
  424. f"Invalid entity name '{value}'"
  425. c_text = _xcstr(value)
  426. tree.xmlNodeSetName(self._c_node, c_text)
  427. @cython.final
  428. @cython.internal
  429. cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy):
  430. """A read-only element that allows adding children and changing the
  431. text content (i.e. everything that adds to the subtree).
  432. """
  433. cpdef append(self, other_element):
  434. """Append a copy of an Element to the list of children.
  435. """
  436. cdef xmlNode* c_next
  437. cdef xmlNode* c_node
  438. self._assertNode()
  439. c_node = _roNodeOf(other_element)
  440. c_node = _copyNodeToDoc(c_node, self._c_node.doc)
  441. c_next = c_node.next
  442. tree.xmlAddChild(self._c_node, c_node)
  443. _moveTail(c_next, c_node)
  444. def extend(self, elements):
  445. """Append a copy of all Elements from a sequence to the list of
  446. children.
  447. """
  448. self._assertNode()
  449. for element in elements:
  450. self.append(element)
  451. property text:
  452. """Text before the first subelement. This is either a string or the
  453. value None, if there was no text.
  454. """
  455. def __get__(self):
  456. self._assertNode()
  457. return _collectText(self._c_node.children)
  458. def __set__(self, value):
  459. self._assertNode()
  460. if isinstance(value, QName):
  461. value = _resolveQNameText(self, value).decode('utf8')
  462. _setNodeText(self._c_node, value)
  463. cdef _ReadOnlyProxy _newAppendOnlyProxy(
  464. _ReadOnlyProxy source_proxy, xmlNode* c_node):
  465. cdef _ReadOnlyProxy el
  466. if c_node.type == tree.XML_ELEMENT_NODE:
  467. el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy)
  468. elif c_node.type == tree.XML_PI_NODE:
  469. el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy)
  470. elif c_node.type == tree.XML_COMMENT_NODE:
  471. el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy)
  472. else:
  473. raise TypeError(f"Unsupported element type: {c_node.type}")
  474. el._c_node = c_node
  475. _initReadOnlyProxy(el, source_proxy)
  476. return el
  477. cdef xmlNode* _roNodeOf(element) except NULL:
  478. cdef xmlNode* c_node
  479. if isinstance(element, _Element):
  480. c_node = (<_Element>element)._c_node
  481. elif isinstance(element, _ReadOnlyProxy):
  482. c_node = (<_ReadOnlyProxy>element)._c_node
  483. elif isinstance(element, _OpaqueNodeWrapper):
  484. c_node = (<_OpaqueNodeWrapper>element)._c_node
  485. else:
  486. raise TypeError, f"invalid argument type {type(element)}"
  487. if c_node is NULL:
  488. raise TypeError, "invalid element"
  489. return c_node
  490. cdef xmlNode* _nonRoNodeOf(element) except NULL:
  491. cdef xmlNode* c_node
  492. if isinstance(element, _Element):
  493. c_node = (<_Element>element)._c_node
  494. elif isinstance(element, _AppendOnlyElementProxy):
  495. c_node = (<_AppendOnlyElementProxy>element)._c_node
  496. elif isinstance(element, _OpaqueNodeWrapper):
  497. c_node = (<_OpaqueNodeWrapper>element)._c_node
  498. else:
  499. raise TypeError, f"invalid argument type {type(element)}"
  500. if c_node is NULL:
  501. raise TypeError, "invalid element"
  502. return c_node