You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

166 lines
6.2 KiB

  1. # support for RelaxNG validation
  2. from lxml.includes cimport relaxng
  3. cdef object _rnc2rng
  4. try:
  5. import rnc2rng as _rnc2rng
  6. except ImportError:
  7. _rnc2rng = None
  8. cdef int _require_rnc2rng() except -1:
  9. if _rnc2rng is None:
  10. raise RelaxNGParseError(
  11. 'compact syntax not supported (please install rnc2rng)')
  12. return 0
  13. cdef class RelaxNGError(LxmlError):
  14. """Base class for RelaxNG errors.
  15. """
  16. cdef class RelaxNGParseError(RelaxNGError):
  17. """Error while parsing an XML document as RelaxNG.
  18. """
  19. cdef class RelaxNGValidateError(RelaxNGError):
  20. """Error while validating an XML document with a RelaxNG schema.
  21. """
  22. ################################################################################
  23. # RelaxNG
  24. cdef class RelaxNG(_Validator):
  25. """RelaxNG(self, etree=None, file=None)
  26. Turn a document into a Relax NG validator.
  27. Either pass a schema as Element or ElementTree, or pass a file or
  28. filename through the ``file`` keyword argument.
  29. """
  30. cdef relaxng.xmlRelaxNG* _c_schema
  31. def __cinit__(self):
  32. self._c_schema = NULL
  33. def __init__(self, etree=None, *, file=None):
  34. cdef _Document doc
  35. cdef _Element root_node
  36. cdef xmlDoc* fake_c_doc = NULL
  37. cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt = NULL
  38. _Validator.__init__(self)
  39. if etree is not None:
  40. doc = _documentOrRaise(etree)
  41. root_node = _rootNodeOrRaise(etree)
  42. fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
  43. parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(fake_c_doc)
  44. elif file is not None:
  45. if _isString(file):
  46. if file[-4:].lower() == '.rnc':
  47. _require_rnc2rng()
  48. rng_data_utf8 = _utf8(_rnc2rng.dumps(_rnc2rng.load(file)))
  49. doc = _parseMemoryDocument(rng_data_utf8, parser=None, url=file)
  50. parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(doc._c_doc)
  51. else:
  52. doc = None
  53. filename = _encodeFilename(file)
  54. with self._error_log:
  55. orig_loader = _register_document_loader()
  56. parser_ctxt = relaxng.xmlRelaxNGNewParserCtxt(_cstr(filename))
  57. _reset_document_loader(orig_loader)
  58. elif (_getFilenameForFile(file) or '')[-4:].lower() == '.rnc':
  59. _require_rnc2rng()
  60. rng_data_utf8 = _utf8(_rnc2rng.dumps(_rnc2rng.load(file)))
  61. doc = _parseMemoryDocument(
  62. rng_data_utf8, parser=None, url=_getFilenameForFile(file))
  63. parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(doc._c_doc)
  64. else:
  65. doc = _parseDocument(file, parser=None, base_url=None)
  66. parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(doc._c_doc)
  67. else:
  68. raise RelaxNGParseError, "No tree or file given"
  69. if parser_ctxt is NULL:
  70. if fake_c_doc is not NULL:
  71. _destroyFakeDoc(doc._c_doc, fake_c_doc)
  72. raise RelaxNGParseError(
  73. self._error_log._buildExceptionMessage(
  74. "Document is not parsable as Relax NG"),
  75. self._error_log)
  76. # Need a cast here because older libxml2 releases do not use 'const' in the functype.
  77. relaxng.xmlRelaxNGSetParserStructuredErrors(
  78. parser_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
  79. _connectGenericErrorLog(self._error_log, xmlerror.XML_FROM_RELAXNGP)
  80. self._c_schema = relaxng.xmlRelaxNGParse(parser_ctxt)
  81. _connectGenericErrorLog(None)
  82. relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt)
  83. if self._c_schema is NULL:
  84. if fake_c_doc is not NULL:
  85. _destroyFakeDoc(doc._c_doc, fake_c_doc)
  86. raise RelaxNGParseError(
  87. self._error_log._buildExceptionMessage(
  88. "Document is not valid Relax NG"),
  89. self._error_log)
  90. if fake_c_doc is not NULL:
  91. _destroyFakeDoc(doc._c_doc, fake_c_doc)
  92. def __dealloc__(self):
  93. relaxng.xmlRelaxNGFree(self._c_schema)
  94. def __call__(self, etree):
  95. """__call__(self, etree)
  96. Validate doc using Relax NG.
  97. Returns true if document is valid, false if not."""
  98. cdef _Document doc
  99. cdef _Element root_node
  100. cdef xmlDoc* c_doc
  101. cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt
  102. cdef int ret
  103. assert self._c_schema is not NULL, "RelaxNG instance not initialised"
  104. doc = _documentOrRaise(etree)
  105. root_node = _rootNodeOrRaise(etree)
  106. valid_ctxt = relaxng.xmlRelaxNGNewValidCtxt(self._c_schema)
  107. if valid_ctxt is NULL:
  108. raise MemoryError()
  109. try:
  110. self._error_log.clear()
  111. # Need a cast here because older libxml2 releases do not use 'const' in the functype.
  112. relaxng.xmlRelaxNGSetValidStructuredErrors(
  113. valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
  114. _connectGenericErrorLog(self._error_log, xmlerror.XML_FROM_RELAXNGV)
  115. c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
  116. with nogil:
  117. ret = relaxng.xmlRelaxNGValidateDoc(valid_ctxt, c_doc)
  118. _destroyFakeDoc(doc._c_doc, c_doc)
  119. finally:
  120. _connectGenericErrorLog(None)
  121. relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt)
  122. if ret == -1:
  123. raise RelaxNGValidateError(
  124. "Internal error in Relax NG validation",
  125. self._error_log)
  126. if ret == 0:
  127. return True
  128. else:
  129. return False
  130. @classmethod
  131. def from_rnc_string(cls, src, base_url=None):
  132. """Parse a RelaxNG schema in compact syntax from a text string
  133. Requires the rnc2rng package to be installed.
  134. Passing the source URL or file path of the source as 'base_url'
  135. will enable resolving resource references relative to the source.
  136. """
  137. _require_rnc2rng()
  138. rng_str = utf8(_rnc2rng.dumps(_rnc2rng.loads(src)))
  139. return cls(_parseMemoryDocument(rng_str, parser=None, url=base_url))