Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 

102 linhas
3.2 KiB

  1. """CSS Selectors based on XPath.
  2. This module supports selecting XML/HTML tags based on CSS selectors.
  3. See the `CSSSelector` class for details.
  4. This is a thin wrapper around cssselect 0.7 or later.
  5. """
  6. from . import etree
  7. try:
  8. import cssselect as external_cssselect
  9. except ImportError:
  10. raise ImportError(
  11. 'cssselect does not seem to be installed. '
  12. 'See https://pypi.org/project/cssselect/')
  13. SelectorSyntaxError = external_cssselect.SelectorSyntaxError
  14. ExpressionError = external_cssselect.ExpressionError
  15. SelectorError = external_cssselect.SelectorError
  16. __all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',
  17. 'CSSSelector']
  18. class LxmlTranslator(external_cssselect.GenericTranslator):
  19. """
  20. A custom CSS selector to XPath translator with lxml-specific extensions.
  21. """
  22. def xpath_contains_function(self, xpath, function):
  23. # Defined there, removed in later drafts:
  24. # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
  25. if function.argument_types() not in (['STRING'], ['IDENT']):
  26. raise ExpressionError(
  27. "Expected a single string or ident for :contains(), got %r"
  28. % function.arguments)
  29. value = function.arguments[0].value
  30. return xpath.add_condition(
  31. 'contains(__lxml_internal_css:lower-case(string(.)), %s)'
  32. % self.xpath_literal(value.lower()))
  33. class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
  34. """
  35. lxml extensions + HTML support.
  36. """
  37. def _make_lower_case(context, s):
  38. return s.lower()
  39. ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
  40. ns.prefix = '__lxml_internal_css'
  41. ns['lower-case'] = _make_lower_case
  42. class CSSSelector(etree.XPath):
  43. """A CSS selector.
  44. Usage::
  45. >>> from lxml import etree, cssselect
  46. >>> select = cssselect.CSSSelector("a tag > child")
  47. >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
  48. >>> [ el.tag for el in select(root) ]
  49. ['child']
  50. To use CSS namespaces, you need to pass a prefix-to-namespace
  51. mapping as ``namespaces`` keyword argument::
  52. >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
  53. >>> select_ns = cssselect.CSSSelector('root > rdf|Description',
  54. ... namespaces={'rdf': rdfns})
  55. >>> rdf = etree.XML((
  56. ... '<root xmlns:rdf="%s">'
  57. ... '<rdf:Description>blah</rdf:Description>'
  58. ... '</root>') % rdfns)
  59. >>> [(el.tag, el.text) for el in select_ns(rdf)]
  60. [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
  61. """
  62. def __init__(self, css, namespaces=None, translator='xml'):
  63. if translator == 'xml':
  64. translator = LxmlTranslator()
  65. elif translator == 'html':
  66. translator = LxmlHTMLTranslator()
  67. elif translator == 'xhtml':
  68. translator = LxmlHTMLTranslator(xhtml=True)
  69. path = translator.css_to_xpath(css)
  70. super().__init__(path, namespaces=namespaces)
  71. self.css = css
  72. def __repr__(self):
  73. return '<%s %x for %r>' % (
  74. self.__class__.__name__,
  75. abs(id(self)),
  76. self.css)