您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 

620 行
23 KiB

  1. # Proxy functions and low level node allocation stuff
  2. # Proxies represent elements, their reference is stored in the C
  3. # structure of the respective node to avoid multiple instantiation of
  4. # the Python class.
  5. @cython.linetrace(False)
  6. @cython.profile(False)
  7. cdef inline _Element getProxy(xmlNode* c_node):
  8. """Get a proxy for a given node.
  9. """
  10. #print "getProxy for:", <int>c_node
  11. if c_node is not NULL and c_node._private is not NULL:
  12. return <_Element>c_node._private
  13. else:
  14. return None
  15. @cython.linetrace(False)
  16. @cython.profile(False)
  17. cdef inline bint hasProxy(xmlNode* c_node):
  18. if c_node._private is NULL:
  19. return False
  20. return True
  21. @cython.linetrace(False)
  22. @cython.profile(False)
  23. cdef inline int _registerProxy(_Element proxy, _Document doc,
  24. xmlNode* c_node) except -1:
  25. """Register a proxy and type for the node it's proxying for.
  26. """
  27. #print "registering for:", <int>proxy._c_node
  28. assert not hasProxy(c_node), "double registering proxy!"
  29. proxy._doc = doc
  30. proxy._c_node = c_node
  31. c_node._private = <void*>proxy
  32. return 0
  33. @cython.linetrace(False)
  34. @cython.profile(False)
  35. cdef inline int _unregisterProxy(_Element proxy) except -1:
  36. """Unregister a proxy for the node it's proxying for.
  37. """
  38. cdef xmlNode* c_node = proxy._c_node
  39. assert c_node._private is <void*>proxy, "Tried to unregister unknown proxy"
  40. c_node._private = NULL
  41. return 0
  42. ################################################################################
  43. # temporarily make a node the root node of its document
  44. cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node) except NULL:
  45. return _plainFakeRootDoc(c_base_doc, c_node, 1)
  46. cdef xmlDoc* _plainFakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node,
  47. bint with_siblings) except NULL:
  48. # build a temporary document that has the given node as root node
  49. # note that copy and original must not be modified during its lifetime!!
  50. # always call _destroyFakeDoc() after use!
  51. cdef xmlNode* c_child
  52. cdef xmlNode* c_root
  53. cdef xmlNode* c_new_root
  54. cdef xmlDoc* c_doc
  55. if with_siblings or (c_node.prev is NULL and c_node.next is NULL):
  56. c_root = tree.xmlDocGetRootElement(c_base_doc)
  57. if c_root is c_node:
  58. # already the root node, no siblings
  59. return c_base_doc
  60. c_doc = _copyDoc(c_base_doc, 0) # non recursive!
  61. c_new_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
  62. tree.xmlDocSetRootElement(c_doc, c_new_root)
  63. _copyParentNamespaces(c_node, c_new_root)
  64. c_new_root.children = c_node.children
  65. c_new_root.last = c_node.last
  66. c_new_root.next = c_new_root.prev = NULL
  67. # store original node
  68. c_doc._private = c_node
  69. # divert parent pointers of children
  70. c_child = c_new_root.children
  71. while c_child is not NULL:
  72. c_child.parent = c_new_root
  73. c_child = c_child.next
  74. c_doc.children = c_new_root
  75. return c_doc
  76. cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc) noexcept:
  77. # delete a temporary document
  78. cdef xmlNode* c_child
  79. cdef xmlNode* c_parent
  80. cdef xmlNode* c_root
  81. if c_doc is c_base_doc:
  82. return
  83. c_root = tree.xmlDocGetRootElement(c_doc)
  84. # restore parent pointers of children
  85. c_parent = <xmlNode*>c_doc._private
  86. c_child = c_root.children
  87. while c_child is not NULL:
  88. c_child.parent = c_parent
  89. c_child = c_child.next
  90. # prevent recursive removal of children
  91. c_root.children = c_root.last = NULL
  92. tree.xmlFreeDoc(c_doc)
  93. cdef _Element _fakeDocElementFactory(_Document doc, xmlNode* c_element):
  94. """Special element factory for cases where we need to create a fake
  95. root document, but still need to instantiate arbitrary nodes from
  96. it. If we instantiate the fake root node, things will turn bad
  97. when it's destroyed.
  98. Instead, if we are asked to instantiate the fake root node, we
  99. instantiate the original node instead.
  100. """
  101. if c_element.doc is not doc._c_doc:
  102. if c_element.doc._private is not NULL:
  103. if c_element is c_element.doc.children:
  104. c_element = <xmlNode*>c_element.doc._private
  105. #assert c_element.type == tree.XML_ELEMENT_NODE
  106. return _elementFactory(doc, c_element)
  107. ################################################################################
  108. # support for freeing tree elements when proxy objects are destroyed
  109. cdef int attemptDeallocation(xmlNode* c_node) noexcept:
  110. """Attempt deallocation of c_node (or higher up in tree).
  111. """
  112. cdef xmlNode* c_top
  113. # could be we actually aren't referring to the tree at all
  114. if c_node is NULL:
  115. #print "not freeing, node is NULL"
  116. return 0
  117. c_top = getDeallocationTop(c_node)
  118. if c_top is not NULL:
  119. #print "freeing:", c_top.name
  120. _removeText(c_top.next) # tail
  121. tree.xmlFreeNode(c_top)
  122. return 1
  123. return 0
  124. cdef xmlNode* getDeallocationTop(xmlNode* c_node) noexcept:
  125. """Return the top of the tree that can be deallocated, or NULL.
  126. """
  127. cdef xmlNode* c_next
  128. #print "trying to do deallocating:", c_node.type
  129. if hasProxy(c_node):
  130. #print "Not freeing: proxies still exist"
  131. return NULL
  132. while c_node.parent is not NULL:
  133. c_node = c_node.parent
  134. #print "checking:", c_current.type
  135. if c_node.type == tree.XML_DOCUMENT_NODE or \
  136. c_node.type == tree.XML_HTML_DOCUMENT_NODE:
  137. #print "not freeing: still in doc"
  138. return NULL
  139. # if we're still attached to the document, don't deallocate
  140. if hasProxy(c_node):
  141. #print "Not freeing: proxies still exist"
  142. return NULL
  143. # see whether we have children to deallocate
  144. if not canDeallocateChildNodes(c_node):
  145. return NULL
  146. # see whether we have siblings to deallocate
  147. c_next = c_node.prev
  148. while c_next:
  149. if _isElement(c_next):
  150. if hasProxy(c_next) or not canDeallocateChildNodes(c_next):
  151. return NULL
  152. c_next = c_next.prev
  153. c_next = c_node.next
  154. while c_next:
  155. if _isElement(c_next):
  156. if hasProxy(c_next) or not canDeallocateChildNodes(c_next):
  157. return NULL
  158. c_next = c_next.next
  159. return c_node
  160. cdef int canDeallocateChildNodes(xmlNode* c_parent) noexcept:
  161. cdef xmlNode* c_node
  162. c_node = c_parent.children
  163. tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1)
  164. if hasProxy(c_node):
  165. return 0
  166. tree.END_FOR_EACH_ELEMENT_FROM(c_node)
  167. return 1
  168. ################################################################################
  169. # fix _Document references and namespaces when a node changes documents
  170. cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) noexcept nogil:
  171. """Copy the namespaces of all ancestors of c_from_node to c_to_node.
  172. """
  173. cdef xmlNode* c_parent
  174. cdef xmlNs* c_ns
  175. cdef xmlNs* c_new_ns
  176. cdef int prefix_known
  177. c_parent = c_from_node.parent
  178. while c_parent and (tree._isElementOrXInclude(c_parent) or
  179. c_parent.type == tree.XML_DOCUMENT_NODE):
  180. c_new_ns = c_parent.nsDef
  181. while c_new_ns:
  182. # libxml2 will check if the prefix is already defined
  183. tree.xmlNewNs(c_to_node, c_new_ns.href, c_new_ns.prefix)
  184. c_new_ns = c_new_ns.next
  185. c_parent = c_parent.parent
  186. ctypedef struct _ns_update_map:
  187. xmlNs* old
  188. xmlNs* new
  189. ctypedef struct _nscache:
  190. _ns_update_map* ns_map
  191. size_t size
  192. size_t last
  193. cdef int _growNsCache(_nscache* c_ns_cache) except -1:
  194. cdef _ns_update_map* ns_map_ptr
  195. if c_ns_cache.size == 0:
  196. c_ns_cache.size = 20
  197. else:
  198. c_ns_cache.size *= 2
  199. ns_map_ptr = <_ns_update_map*> python.lxml_realloc(
  200. c_ns_cache.ns_map, c_ns_cache.size, sizeof(_ns_update_map))
  201. if not ns_map_ptr:
  202. python.lxml_free(c_ns_cache.ns_map)
  203. c_ns_cache.ns_map = NULL
  204. raise MemoryError()
  205. c_ns_cache.ns_map = ns_map_ptr
  206. return 0
  207. cdef inline int _appendToNsCache(_nscache* c_ns_cache,
  208. xmlNs* c_old_ns, xmlNs* c_new_ns) except -1:
  209. if c_ns_cache.last >= c_ns_cache.size:
  210. _growNsCache(c_ns_cache)
  211. c_ns_cache.ns_map[c_ns_cache.last] = _ns_update_map(old=c_old_ns, new=c_new_ns)
  212. c_ns_cache.last += 1
  213. cdef int _stripRedundantNamespaceDeclarations(xmlNode* c_element, _nscache* c_ns_cache,
  214. xmlNs** c_del_ns_list) except -1:
  215. """Removes namespace declarations from an element that are already
  216. defined in its parents. Does not free the xmlNs's, just prepends
  217. them to the c_del_ns_list.
  218. """
  219. cdef xmlNs* c_ns
  220. cdef xmlNs* c_ns_next
  221. cdef xmlNs** c_nsdef
  222. # use a xmlNs** to handle assignments to "c_element.nsDef" correctly
  223. c_nsdef = &c_element.nsDef
  224. while c_nsdef[0] is not NULL:
  225. c_ns = tree.xmlSearchNsByHref(
  226. c_element.doc, c_element.parent, c_nsdef[0].href)
  227. if c_ns is NULL:
  228. # new namespace href => keep and cache the ns declaration
  229. _appendToNsCache(c_ns_cache, c_nsdef[0], c_nsdef[0])
  230. c_nsdef = &c_nsdef[0].next
  231. else:
  232. # known namespace href => cache mapping and strip old ns
  233. _appendToNsCache(c_ns_cache, c_nsdef[0], c_ns)
  234. # cut out c_nsdef.next and prepend it to garbage chain
  235. c_ns_next = c_nsdef[0].next
  236. c_nsdef[0].next = c_del_ns_list[0]
  237. c_del_ns_list[0] = c_nsdef[0]
  238. c_nsdef[0] = c_ns_next
  239. return 0
  240. cdef void _cleanUpFromNamespaceAdaptation(xmlNode* c_start_node,
  241. _nscache* c_ns_cache, xmlNs* c_del_ns_list) noexcept:
  242. # Try to recover from exceptions with really bad timing. We were in the middle
  243. # of ripping out xmlNS-es and likely ran out of memory. Try to fix up the tree
  244. # by re-adding the original xmlNs declarations (which might still be used in some
  245. # places).
  246. if c_ns_cache.ns_map:
  247. python.lxml_free(c_ns_cache.ns_map)
  248. if c_del_ns_list:
  249. if not c_start_node.nsDef:
  250. c_start_node.nsDef = c_del_ns_list
  251. else:
  252. c_ns = c_start_node.nsDef
  253. while c_ns.next:
  254. c_ns = c_ns.next
  255. c_ns.next = c_del_ns_list
  256. cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
  257. xmlNode* c_element) except -1:
  258. """Fix the xmlNs pointers of a node and its subtree that were moved.
  259. Originally copied from libxml2's xmlReconciliateNs(). Expects
  260. libxml2 doc pointers of node to be correct already, but fixes
  261. _Document references.
  262. For each node in the subtree, we do this:
  263. 1) Remove redundant declarations of namespace that are already
  264. defined in its parents.
  265. 2) Replace namespaces that are *not* defined on the node or its
  266. parents by the equivalent namespace declarations that *are*
  267. defined on the node or its parents (possibly using a different
  268. prefix). If a namespace is unknown, declare a new one on the
  269. node.
  270. 3) Reassign the names of tags and attribute from the dict of the
  271. target document *iff* it is different from the dict used in the
  272. source subtree.
  273. 4) Set the Document reference to the new Document (if different).
  274. This is done on backtracking to keep the original Document
  275. alive as long as possible, until all its elements are updated.
  276. Note that the namespace declarations are removed from the tree in
  277. step 1), but freed only after the complete subtree was traversed
  278. and all occurrences were replaced by tree-internal pointers.
  279. """
  280. cdef xmlNode* c_start_node
  281. cdef xmlNode* c_node
  282. cdef xmlDoc* c_doc = doc._c_doc
  283. cdef tree.xmlAttr* c_attr
  284. cdef char* c_name
  285. cdef _nscache c_ns_cache = [NULL, 0, 0]
  286. cdef xmlNs* c_del_ns_list = NULL
  287. cdef proxy_count = 0
  288. if not tree._isElementOrXInclude(c_element):
  289. return 0
  290. c_start_node = c_element
  291. tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
  292. if tree._isElementOrXInclude(c_element):
  293. if hasProxy(c_element):
  294. proxy_count += 1
  295. # 1) cut out namespaces defined here that are already known by
  296. # the ancestors
  297. if c_element.nsDef is not NULL:
  298. try:
  299. _stripRedundantNamespaceDeclarations(c_element, &c_ns_cache, &c_del_ns_list)
  300. except:
  301. _cleanUpFromNamespaceAdaptation(c_start_node, &c_ns_cache, c_del_ns_list)
  302. raise
  303. # 2) make sure the namespaces of an element and its attributes
  304. # are declared in this document (i.e. on the node or its parents)
  305. if c_element.ns is not NULL:
  306. _fixCNs(doc, c_start_node, c_element, &c_ns_cache, c_del_ns_list)
  307. c_node = <xmlNode*>c_element.properties
  308. while c_node is not NULL:
  309. if c_node.ns is not NULL:
  310. _fixCNs(doc, c_start_node, c_node, &c_ns_cache, c_del_ns_list)
  311. c_node = c_node.next
  312. tree.END_FOR_EACH_FROM(c_element)
  313. # free now unused namespace declarations
  314. if c_del_ns_list is not NULL:
  315. tree.xmlFreeNsList(c_del_ns_list)
  316. # cleanup
  317. if c_ns_cache.ns_map is not NULL:
  318. python.lxml_free(c_ns_cache.ns_map)
  319. # 3) fix the names in the tree if we moved it from a different thread
  320. if doc._c_doc.dict is not c_source_doc.dict:
  321. fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict)
  322. # 4) fix _Document references
  323. # (and potentially deallocate the source document)
  324. if proxy_count > 0:
  325. if proxy_count == 1 and c_start_node._private is not NULL:
  326. proxy = getProxy(c_start_node)
  327. if proxy is not None:
  328. if proxy._doc is not doc:
  329. proxy._doc = doc
  330. else:
  331. fixElementDocument(c_start_node, doc, proxy_count)
  332. else:
  333. fixElementDocument(c_start_node, doc, proxy_count)
  334. return 0
  335. cdef void _setTreeDoc(xmlNode* c_node, xmlDoc* c_doc) noexcept:
  336. """Adaptation of 'xmlSetTreeDoc()' that deep-fixes the document links iteratively.
  337. It avoids https://gitlab.gnome.org/GNOME/libxml2/issues/42
  338. """
  339. tree.BEGIN_FOR_EACH_FROM(c_node, c_node, 1)
  340. if c_node.type == tree.XML_ELEMENT_NODE:
  341. c_attr = <tree.xmlAttr*>c_node.properties
  342. while c_attr:
  343. if c_attr.atype == tree.XML_ATTRIBUTE_ID:
  344. tree.xmlRemoveID(c_node.doc, c_attr)
  345. c_attr.doc = c_doc
  346. _fixDocChildren(c_attr.children, c_doc)
  347. c_attr = c_attr.next
  348. # Set doc link for all nodes, not only elements.
  349. c_node.doc = c_doc
  350. tree.END_FOR_EACH_FROM(c_node)
  351. cdef inline void _fixDocChildren(xmlNode* c_child, xmlDoc* c_doc) noexcept:
  352. while c_child:
  353. c_child.doc = c_doc
  354. if c_child.children:
  355. _fixDocChildren(c_child.children, c_doc)
  356. c_child = c_child.next
  357. cdef int _fixCNs(_Document doc, xmlNode* c_start_node, xmlNode* c_node,
  358. _nscache* c_ns_cache, xmlNs* c_del_ns_list) except -1:
  359. cdef xmlNs* c_ns = NULL
  360. cdef bint is_prefixed_attr = (c_node.type == tree.XML_ATTRIBUTE_NODE and c_node.ns.prefix)
  361. for ns_map in c_ns_cache.ns_map[:c_ns_cache.last]:
  362. if c_node.ns is ns_map.old:
  363. if is_prefixed_attr and not ns_map.new.prefix:
  364. # avoid dropping prefix from attributes
  365. continue
  366. c_ns = ns_map.new
  367. break
  368. if c_ns:
  369. c_node.ns = c_ns
  370. else:
  371. # not in cache or not acceptable
  372. # => find a replacement from this document
  373. try:
  374. c_ns = doc._findOrBuildNodeNs(
  375. c_start_node, c_node.ns.href, c_node.ns.prefix,
  376. c_node.type == tree.XML_ATTRIBUTE_NODE)
  377. c_node.ns = c_ns
  378. _appendToNsCache(c_ns_cache, c_node.ns, c_ns)
  379. except:
  380. _cleanUpFromNamespaceAdaptation(c_start_node, c_ns_cache, c_del_ns_list)
  381. raise
  382. return 0
  383. cdef int fixElementDocument(xmlNode* c_element, _Document doc,
  384. size_t proxy_count) except -1:
  385. cdef xmlNode* c_node = c_element
  386. cdef _Element proxy = None # init-to-None required due to fake-loop below
  387. tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
  388. if c_node._private is not NULL:
  389. proxy = getProxy(c_node)
  390. if proxy is not None:
  391. if proxy._doc is not doc:
  392. proxy._doc = doc
  393. proxy_count -= 1
  394. if proxy_count == 0:
  395. return 0
  396. tree.END_FOR_EACH_FROM(c_node)
  397. cdef void fixThreadDictNames(xmlNode* c_element,
  398. tree.xmlDict* c_src_dict,
  399. tree.xmlDict* c_dict) noexcept nogil:
  400. # re-assign the names of tags and attributes
  401. #
  402. # this should only be called when the element is based on a
  403. # different libxml2 tag name dictionary
  404. if c_element.type == tree.XML_DOCUMENT_NODE or \
  405. c_element.type == tree.XML_HTML_DOCUMENT_NODE:
  406. # may define "xml" namespace
  407. fixThreadDictNsForNode(c_element, c_src_dict, c_dict)
  408. if c_element.doc.extSubset:
  409. fixThreadDictNamesForDtd(c_element.doc.extSubset, c_src_dict, c_dict)
  410. if c_element.doc.intSubset:
  411. fixThreadDictNamesForDtd(c_element.doc.intSubset, c_src_dict, c_dict)
  412. c_element = c_element.children
  413. while c_element is not NULL:
  414. fixThreadDictNamesForNode(c_element, c_src_dict, c_dict)
  415. c_element = c_element.next
  416. elif tree._isElementOrXInclude(c_element):
  417. fixThreadDictNamesForNode(c_element, c_src_dict, c_dict)
  418. cdef inline void _fixThreadDictPtr(const_xmlChar** c_ptr,
  419. tree.xmlDict* c_src_dict,
  420. tree.xmlDict* c_dict) noexcept nogil:
  421. c_str = c_ptr[0]
  422. if c_str and c_src_dict and tree.xmlDictOwns(c_src_dict, c_str):
  423. # return value can be NULL on memory error, but we don't handle that here
  424. c_str = tree.xmlDictLookup(c_dict, c_str, -1)
  425. if c_str:
  426. c_ptr[0] = c_str
  427. cdef void fixThreadDictNamesForNode(xmlNode* c_element,
  428. tree.xmlDict* c_src_dict,
  429. tree.xmlDict* c_dict) noexcept nogil:
  430. cdef xmlNode* c_node = c_element
  431. tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
  432. if c_node.type in (tree.XML_ELEMENT_NODE, tree.XML_XINCLUDE_START):
  433. fixThreadDictNamesForAttributes(
  434. c_node.properties, c_src_dict, c_dict)
  435. fixThreadDictNsForNode(c_node, c_src_dict, c_dict)
  436. _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
  437. elif c_node.type == tree.XML_TEXT_NODE:
  438. # libxml2's SAX2 parser interns some indentation space
  439. fixThreadDictContentForNode(c_node, c_src_dict, c_dict)
  440. elif c_node.type == tree.XML_COMMENT_NODE:
  441. pass # don't touch c_node.name
  442. else:
  443. _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
  444. tree.END_FOR_EACH_FROM(c_node)
  445. cdef inline void fixThreadDictNamesForAttributes(tree.xmlAttr* c_attr,
  446. tree.xmlDict* c_src_dict,
  447. tree.xmlDict* c_dict) noexcept nogil:
  448. cdef xmlNode* c_child
  449. cdef xmlNode* c_node = <xmlNode*>c_attr
  450. while c_node is not NULL:
  451. if c_node.type not in (tree.XML_TEXT_NODE, tree.XML_COMMENT_NODE):
  452. _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
  453. # libxml2 keeps some (!) attribute values in the dict
  454. c_child = c_node.children
  455. while c_child is not NULL:
  456. fixThreadDictContentForNode(c_child, c_src_dict, c_dict)
  457. c_child = c_child.next
  458. c_node = c_node.next
  459. cdef inline void fixThreadDictContentForNode(xmlNode* c_node,
  460. tree.xmlDict* c_src_dict,
  461. tree.xmlDict* c_dict) noexcept nogil:
  462. if c_node.content is not NULL and \
  463. c_node.content is not <xmlChar*>&c_node.properties:
  464. if tree.xmlDictOwns(c_src_dict, c_node.content):
  465. # result can be NULL on memory error, but we don't handle that here
  466. c_node.content = <xmlChar*>tree.xmlDictLookup(c_dict, c_node.content, -1)
  467. cdef inline void fixThreadDictNsForNode(xmlNode* c_node,
  468. tree.xmlDict* c_src_dict,
  469. tree.xmlDict* c_dict) noexcept nogil:
  470. cdef xmlNs* c_ns = c_node.nsDef
  471. while c_ns is not NULL:
  472. _fixThreadDictPtr(&c_ns.href, c_src_dict, c_dict)
  473. _fixThreadDictPtr(&c_ns.prefix, c_src_dict, c_dict)
  474. c_ns = c_ns.next
  475. cdef void fixThreadDictNamesForDtd(tree.xmlDtd* c_dtd,
  476. tree.xmlDict* c_src_dict,
  477. tree.xmlDict* c_dict) noexcept nogil:
  478. cdef xmlNode* c_node
  479. cdef tree.xmlElement* c_element
  480. cdef tree.xmlAttribute* c_attribute
  481. cdef tree.xmlEntity* c_entity
  482. c_node = c_dtd.children
  483. while c_node:
  484. if c_node.type == tree.XML_ELEMENT_DECL:
  485. c_element = <tree.xmlElement*>c_node
  486. if c_element.content:
  487. _fixThreadDictPtr(&c_element.content.name, c_src_dict, c_dict)
  488. _fixThreadDictPtr(&c_element.content.prefix, c_src_dict, c_dict)
  489. c_attribute = c_element.attributes
  490. while c_attribute:
  491. _fixThreadDictPtr(&c_attribute.defaultValue, c_src_dict, c_dict)
  492. _fixThreadDictPtr(&c_attribute.name, c_src_dict, c_dict)
  493. _fixThreadDictPtr(&c_attribute.prefix, c_src_dict, c_dict)
  494. _fixThreadDictPtr(&c_attribute.elem, c_src_dict, c_dict)
  495. c_attribute = c_attribute.nexth
  496. elif c_node.type == tree.XML_ENTITY_DECL:
  497. c_entity = <tree.xmlEntity*>c_node
  498. _fixThreadDictPtr(&c_entity.name, c_src_dict, c_dict)
  499. _fixThreadDictPtr(&c_entity.ExternalID, c_src_dict, c_dict)
  500. _fixThreadDictPtr(&c_entity.SystemID, c_src_dict, c_dict)
  501. _fixThreadDictPtr(<const_xmlChar**>&c_entity.content, c_src_dict, c_dict)
  502. c_node = c_node.next
  503. ################################################################################
  504. # adopt an xmlDoc from an external libxml2 document source
  505. cdef _Document _adoptForeignDoc(xmlDoc* c_doc, _BaseParser parser=None, bint is_owned=True):
  506. """Convert and wrap an externally produced xmlDoc for use in lxml.
  507. Assures that all '_private' pointers are NULL to prevent accidental
  508. dereference into lxml proxy objects.
  509. """
  510. if c_doc is NULL:
  511. raise ValueError("Illegal document provided: NULL")
  512. if c_doc.type not in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
  513. doc_type = c_doc.type
  514. if is_owned:
  515. tree.xmlFreeDoc(c_doc)
  516. raise ValueError(f"Illegal document provided: expected XML or HTML, found {doc_type}")
  517. cdef xmlNode* c_node = <xmlNode*>c_doc
  518. if is_owned:
  519. tree.BEGIN_FOR_EACH_FROM(<xmlNode*>c_doc, c_node, 1)
  520. c_node._private = NULL
  521. tree.END_FOR_EACH_FROM(c_node)
  522. else:
  523. # create a fresh copy that lxml owns
  524. c_doc = tree.xmlCopyDoc(c_doc, 1)
  525. if c_doc is NULL:
  526. raise MemoryError()
  527. return _documentFactory(c_doc, parser)