You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

207 lines
7.1 KiB

  1. # Code to read HTTP data
  2. #
  3. # Strategy: each reader is a callable which takes a ReceiveBuffer object, and
  4. # either:
  5. # 1) consumes some of it and returns an Event
  6. # 2) raises a LocalProtocolError (for consistency -- e.g. we call validate()
  7. # and it might raise a LocalProtocolError, so simpler just to always use
  8. # this)
  9. # 3) returns None, meaning "I need more data"
  10. #
  11. # If they have a .read_eof attribute, then this will be called if an EOF is
  12. # received -- but this is optional. Either way, the actual ConnectionClosed
  13. # event will be generated afterwards.
  14. #
  15. # READERS is a dict describing how to pick a reader. It maps states to either:
  16. # - a reader
  17. # - or, for body readers, a dict of per-framing reader factories
  18. import re
  19. from ._util import LocalProtocolError, RemoteProtocolError, validate
  20. from ._state import *
  21. from ._events import *
  22. from ._abnf import header_field, request_line, status_line, chunk_header
  23. __all__ = ["READERS"]
  24. header_field_re = re.compile(header_field.encode("ascii"))
  25. # Remember that this has to run in O(n) time -- so e.g. the bytearray cast is
  26. # critical.
  27. obs_fold_re = re.compile(br"[ \t]+")
  28. def _obsolete_line_fold(lines):
  29. it = iter(lines)
  30. last = None
  31. for line in it:
  32. match = obs_fold_re.match(line)
  33. if match:
  34. if last is None:
  35. raise LocalProtocolError(
  36. "continuation line at start of headers")
  37. if not isinstance(last, bytearray):
  38. last = bytearray(last)
  39. last += b" "
  40. last += line[match.end():]
  41. else:
  42. if last is not None:
  43. yield last
  44. last = line
  45. if last is not None:
  46. yield last
  47. def _decode_header_lines(lines):
  48. for line in _obsolete_line_fold(lines):
  49. # _obsolete_line_fold yields either bytearray or bytes objects. On
  50. # Python 3, validate() takes either and returns matches as bytes. But
  51. # on Python 2, validate can return matches as bytearrays, so we have
  52. # to explicitly cast back.
  53. matches = validate(header_field_re, bytes(line))
  54. yield (matches["field_name"], matches["field_value"])
  55. request_line_re = re.compile(request_line.encode("ascii"))
  56. def maybe_read_from_IDLE_client(buf):
  57. lines = buf.maybe_extract_lines()
  58. if lines is None:
  59. return None
  60. if not lines:
  61. raise LocalProtocolError("no request line received")
  62. matches = validate(request_line_re, lines[0])
  63. return Request(headers=list(_decode_header_lines(lines[1:])),
  64. _parsed=True,
  65. **matches)
  66. status_line_re = re.compile(status_line.encode("ascii"))
  67. def maybe_read_from_SEND_RESPONSE_server(buf):
  68. lines = buf.maybe_extract_lines()
  69. if lines is None:
  70. return None
  71. if not lines:
  72. raise LocalProtocolError("no response line received")
  73. matches = validate(status_line_re, lines[0])
  74. # Tolerate missing reason phrases
  75. if matches["reason"] is None:
  76. matches["reason"] = b""
  77. status_code = matches["status_code"] = int(matches["status_code"])
  78. class_ = InformationalResponse if status_code < 200 else Response
  79. return class_(headers=list(_decode_header_lines(lines[1:])),
  80. _parsed=True,
  81. **matches)
  82. class ContentLengthReader:
  83. def __init__(self, length):
  84. self._length = length
  85. self._remaining = length
  86. def __call__(self, buf):
  87. if self._remaining == 0:
  88. return EndOfMessage()
  89. data = buf.maybe_extract_at_most(self._remaining)
  90. if data is None:
  91. return None
  92. self._remaining -= len(data)
  93. return Data(data=data)
  94. def read_eof(self):
  95. raise RemoteProtocolError(
  96. "peer closed connection without sending complete message body "
  97. "(received {} bytes, expected {})"
  98. .format(self._length - self._remaining, self._length))
  99. chunk_header_re = re.compile(chunk_header.encode("ascii"))
  100. class ChunkedReader(object):
  101. def __init__(self):
  102. self._bytes_in_chunk = 0
  103. # After reading a chunk, we have to throw away the trailing \r\n; if
  104. # this is >0 then we discard that many bytes before resuming regular
  105. # de-chunkification.
  106. self._bytes_to_discard = 0
  107. self._reading_trailer = False
  108. def __call__(self, buf):
  109. if self._reading_trailer:
  110. lines = buf.maybe_extract_lines()
  111. if lines is None:
  112. return None
  113. return EndOfMessage(headers=list(_decode_header_lines(lines)))
  114. if self._bytes_to_discard > 0:
  115. data = buf.maybe_extract_at_most(self._bytes_to_discard)
  116. if data is None:
  117. return None
  118. self._bytes_to_discard -= len(data)
  119. if self._bytes_to_discard > 0:
  120. return None
  121. # else, fall through and read some more
  122. assert self._bytes_to_discard == 0
  123. if self._bytes_in_chunk == 0:
  124. # We need to refill our chunk count
  125. chunk_header = buf.maybe_extract_until_next(b"\r\n")
  126. if chunk_header is None:
  127. return None
  128. matches = validate(chunk_header_re, chunk_header)
  129. # XX FIXME: we discard chunk extensions. Does anyone care?
  130. # We convert to bytes because Python 2's `int()` function doesn't
  131. # work properly on bytearray objects.
  132. self._bytes_in_chunk = int(bytes(matches["chunk_size"]), base=16)
  133. if self._bytes_in_chunk == 0:
  134. self._reading_trailer = True
  135. return self(buf)
  136. chunk_start = True
  137. else:
  138. chunk_start = False
  139. assert self._bytes_in_chunk > 0
  140. data = buf.maybe_extract_at_most(self._bytes_in_chunk)
  141. if data is None:
  142. return None
  143. self._bytes_in_chunk -= len(data)
  144. if self._bytes_in_chunk == 0:
  145. self._bytes_to_discard = 2
  146. chunk_end = True
  147. else:
  148. chunk_end = False
  149. return Data(data=data, chunk_start=chunk_start, chunk_end=chunk_end)
  150. def read_eof(self):
  151. raise RemoteProtocolError(
  152. "peer closed connection without sending complete message body "
  153. "(incomplete chunked read)")
  154. class Http10Reader(object):
  155. def __call__(self, buf):
  156. data = buf.maybe_extract_at_most(999999999)
  157. if data is None:
  158. return None
  159. return Data(data=data)
  160. def read_eof(self):
  161. return EndOfMessage()
  162. def expect_nothing(buf):
  163. if buf:
  164. raise LocalProtocolError("Got data when expecting EOF")
  165. return None
  166. READERS = {
  167. (CLIENT, IDLE): maybe_read_from_IDLE_client,
  168. (SERVER, IDLE): maybe_read_from_SEND_RESPONSE_server,
  169. (SERVER, SEND_RESPONSE): maybe_read_from_SEND_RESPONSE_server,
  170. (CLIENT, DONE): expect_nothing,
  171. (CLIENT, MUST_CLOSE): expect_nothing,
  172. (CLIENT, CLOSED): expect_nothing,
  173. (SERVER, DONE): expect_nothing,
  174. (SERVER, MUST_CLOSE): expect_nothing,
  175. (SERVER, CLOSED): expect_nothing,
  176. SEND_BODY: {
  177. "chunked": ChunkedReader,
  178. "content-length": ContentLengthReader,
  179. "http/1.0": Http10Reader,
  180. },
  181. }