You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

148 lines
4.3 KiB

  1. import codecs
  2. import re
  3. from typing import (IO, Iterator, Match, NamedTuple, Optional, Pattern, # noqa
  4. Sequence, Text)
  5. from .compat import to_text
  6. def make_regex(string, extra_flags=0):
  7. # type: (str, int) -> Pattern[Text]
  8. return re.compile(to_text(string), re.UNICODE | extra_flags)
  9. _whitespace = make_regex(r"\s*", extra_flags=re.MULTILINE)
  10. _export = make_regex(r"(?:export[^\S\r\n]+)?")
  11. _single_quoted_key = make_regex(r"'([^']+)'")
  12. _unquoted_key = make_regex(r"([^=\#\s]+)")
  13. _equal_sign = make_regex(r"[^\S\r\n]*=[^\S\r\n]*")
  14. _single_quoted_value = make_regex(r"'((?:\\'|[^'])*)'")
  15. _double_quoted_value = make_regex(r'"((?:\\"|[^"])*)"')
  16. _unquoted_value_part = make_regex(r"([^ \r\n]*)")
  17. _comment = make_regex(r"(?:\s*#[^\r\n]*)?")
  18. _end_of_line = make_regex(r"[^\S\r\n]*(?:\r\n|\n|\r)?")
  19. _rest_of_line = make_regex(r"[^\r\n]*(?:\r|\n|\r\n)?")
  20. _double_quote_escapes = make_regex(r"\\[\\'\"abfnrtv]")
  21. _single_quote_escapes = make_regex(r"\\[\\']")
  22. Binding = NamedTuple("Binding", [("key", Optional[Text]),
  23. ("value", Optional[Text]),
  24. ("original", Text)])
  25. class Error(Exception):
  26. pass
  27. class Reader:
  28. def __init__(self, stream):
  29. # type: (IO[Text]) -> None
  30. self.string = stream.read()
  31. self.position = 0
  32. self.mark = 0
  33. def has_next(self):
  34. # type: () -> bool
  35. return self.position < len(self.string)
  36. def set_mark(self):
  37. # type: () -> None
  38. self.mark = self.position
  39. def get_marked(self):
  40. # type: () -> Text
  41. return self.string[self.mark:self.position]
  42. def peek(self, count):
  43. # type: (int) -> Text
  44. return self.string[self.position:self.position + count]
  45. def read(self, count):
  46. # type: (int) -> Text
  47. result = self.string[self.position:self.position + count]
  48. if len(result) < count:
  49. raise Error("read: End of string")
  50. self.position += count
  51. return result
  52. def read_regex(self, regex):
  53. # type: (Pattern[Text]) -> Sequence[Text]
  54. match = regex.match(self.string, self.position)
  55. if match is None:
  56. raise Error("read_regex: Pattern not found")
  57. self.position = match.end()
  58. return match.groups()
  59. def decode_escapes(regex, string):
  60. # type: (Pattern[Text], Text) -> Text
  61. def decode_match(match):
  62. # type: (Match[Text]) -> Text
  63. return codecs.decode(match.group(0), 'unicode-escape') # type: ignore
  64. return regex.sub(decode_match, string)
  65. def parse_key(reader):
  66. # type: (Reader) -> Text
  67. char = reader.peek(1)
  68. if char == "'":
  69. (key,) = reader.read_regex(_single_quoted_key)
  70. else:
  71. (key,) = reader.read_regex(_unquoted_key)
  72. return key
  73. def parse_unquoted_value(reader):
  74. # type: (Reader) -> Text
  75. value = u""
  76. while True:
  77. (part,) = reader.read_regex(_unquoted_value_part)
  78. value += part
  79. after = reader.peek(2)
  80. if len(after) < 2 or after[0] in u"\r\n" or after[1] in u" #\r\n":
  81. return value
  82. value += reader.read(2)
  83. def parse_value(reader):
  84. # type: (Reader) -> Text
  85. char = reader.peek(1)
  86. if char == u"'":
  87. (value,) = reader.read_regex(_single_quoted_value)
  88. return decode_escapes(_single_quote_escapes, value)
  89. elif char == u'"':
  90. (value,) = reader.read_regex(_double_quoted_value)
  91. return decode_escapes(_double_quote_escapes, value)
  92. elif char in (u"", u"\n", u"\r"):
  93. return u""
  94. else:
  95. return parse_unquoted_value(reader)
  96. def parse_binding(reader):
  97. # type: (Reader) -> Binding
  98. reader.set_mark()
  99. try:
  100. reader.read_regex(_whitespace)
  101. reader.read_regex(_export)
  102. key = parse_key(reader)
  103. reader.read_regex(_equal_sign)
  104. value = parse_value(reader)
  105. reader.read_regex(_comment)
  106. reader.read_regex(_end_of_line)
  107. return Binding(key=key, value=value, original=reader.get_marked())
  108. except Error:
  109. reader.read_regex(_rest_of_line)
  110. return Binding(key=None, value=None, original=reader.get_marked())
  111. def parse_stream(stream):
  112. # type:(IO[Text]) -> Iterator[Binding]
  113. reader = Reader(stream)
  114. while reader.has_next():
  115. try:
  116. yield parse_binding(reader)
  117. except Error:
  118. return