You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

428 rivejä
15 KiB

  1. from __future__ import annotations
  2. import dataclasses
  3. import os
  4. import re
  5. import sys
  6. import warnings
  7. from collections.abc import Generator
  8. from typing import Callable
  9. from .datastructures import Headers
  10. from .exceptions import SecurityError
  11. from .version import version as websockets_version
  12. __all__ = [
  13. "SERVER",
  14. "USER_AGENT",
  15. "Request",
  16. "Response",
  17. ]
  18. PYTHON_VERSION = "{}.{}".format(*sys.version_info)
  19. # User-Agent header for HTTP requests.
  20. USER_AGENT = os.environ.get(
  21. "WEBSOCKETS_USER_AGENT",
  22. f"Python/{PYTHON_VERSION} websockets/{websockets_version}",
  23. )
  24. # Server header for HTTP responses.
  25. SERVER = os.environ.get(
  26. "WEBSOCKETS_SERVER",
  27. f"Python/{PYTHON_VERSION} websockets/{websockets_version}",
  28. )
  29. # Maximum total size of headers is around 128 * 8 KiB = 1 MiB.
  30. MAX_NUM_HEADERS = int(os.environ.get("WEBSOCKETS_MAX_NUM_HEADERS", "128"))
  31. # Limit request line and header lines. 8KiB is the most common default
  32. # configuration of popular HTTP servers.
  33. MAX_LINE_LENGTH = int(os.environ.get("WEBSOCKETS_MAX_LINE_LENGTH", "8192"))
  34. # Support for HTTP response bodies is intended to read an error message
  35. # returned by a server. It isn't designed to perform large file transfers.
  36. MAX_BODY_SIZE = int(os.environ.get("WEBSOCKETS_MAX_BODY_SIZE", "1_048_576")) # 1 MiB
  37. def d(value: bytes) -> str:
  38. """
  39. Decode a bytestring for interpolating into an error message.
  40. """
  41. return value.decode(errors="backslashreplace")
  42. # See https://datatracker.ietf.org/doc/html/rfc7230#appendix-B.
  43. # Regex for validating header names.
  44. _token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
  45. # Regex for validating header values.
  46. # We don't attempt to support obsolete line folding.
  47. # Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff).
  48. # The ABNF is complicated because it attempts to express that optional
  49. # whitespace is ignored. We strip whitespace and don't revalidate that.
  50. # See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
  51. _value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*")
  52. @dataclasses.dataclass
  53. class Request:
  54. """
  55. WebSocket handshake request.
  56. Attributes:
  57. path: Request path, including optional query.
  58. headers: Request headers.
  59. """
  60. path: str
  61. headers: Headers
  62. # body isn't useful is the context of this library.
  63. _exception: Exception | None = None
  64. @property
  65. def exception(self) -> Exception | None: # pragma: no cover
  66. warnings.warn( # deprecated in 10.3 - 2022-04-17
  67. "Request.exception is deprecated; use ServerProtocol.handshake_exc instead",
  68. DeprecationWarning,
  69. )
  70. return self._exception
  71. @classmethod
  72. def parse(
  73. cls,
  74. read_line: Callable[[int], Generator[None, None, bytes]],
  75. ) -> Generator[None, None, Request]:
  76. """
  77. Parse a WebSocket handshake request.
  78. This is a generator-based coroutine.
  79. The request path isn't URL-decoded or validated in any way.
  80. The request path and headers are expected to contain only ASCII
  81. characters. Other characters are represented with surrogate escapes.
  82. :meth:`parse` doesn't attempt to read the request body because
  83. WebSocket handshake requests don't have one. If the request contains a
  84. body, it may be read from the data stream after :meth:`parse` returns.
  85. Args:
  86. read_line: Generator-based coroutine that reads a LF-terminated
  87. line or raises an exception if there isn't enough data
  88. Raises:
  89. EOFError: If the connection is closed without a full HTTP request.
  90. SecurityError: If the request exceeds a security limit.
  91. ValueError: If the request isn't well formatted.
  92. """
  93. # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1
  94. # Parsing is simple because fixed values are expected for method and
  95. # version and because path isn't checked. Since WebSocket software tends
  96. # to implement HTTP/1.1 strictly, there's little need for lenient parsing.
  97. try:
  98. request_line = yield from parse_line(read_line)
  99. except EOFError as exc:
  100. raise EOFError("connection closed while reading HTTP request line") from exc
  101. try:
  102. method, raw_path, protocol = request_line.split(b" ", 2)
  103. except ValueError: # not enough values to unpack (expected 3, got 1-2)
  104. raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None
  105. if protocol != b"HTTP/1.1":
  106. raise ValueError(
  107. f"unsupported protocol; expected HTTP/1.1: {d(request_line)}"
  108. )
  109. if method != b"GET":
  110. raise ValueError(f"unsupported HTTP method; expected GET; got {d(method)}")
  111. path = raw_path.decode("ascii", "surrogateescape")
  112. headers = yield from parse_headers(read_line)
  113. # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
  114. if "Transfer-Encoding" in headers:
  115. raise NotImplementedError("transfer codings aren't supported")
  116. if "Content-Length" in headers:
  117. raise ValueError("unsupported request body")
  118. return cls(path, headers)
  119. def serialize(self) -> bytes:
  120. """
  121. Serialize a WebSocket handshake request.
  122. """
  123. # Since the request line and headers only contain ASCII characters,
  124. # we can keep this simple.
  125. request = f"GET {self.path} HTTP/1.1\r\n".encode()
  126. request += self.headers.serialize()
  127. return request
  128. @dataclasses.dataclass
  129. class Response:
  130. """
  131. WebSocket handshake response.
  132. Attributes:
  133. status_code: Response code.
  134. reason_phrase: Response reason.
  135. headers: Response headers.
  136. body: Response body.
  137. """
  138. status_code: int
  139. reason_phrase: str
  140. headers: Headers
  141. body: bytes = b""
  142. _exception: Exception | None = None
  143. @property
  144. def exception(self) -> Exception | None: # pragma: no cover
  145. warnings.warn( # deprecated in 10.3 - 2022-04-17
  146. "Response.exception is deprecated; "
  147. "use ClientProtocol.handshake_exc instead",
  148. DeprecationWarning,
  149. )
  150. return self._exception
  151. @classmethod
  152. def parse(
  153. cls,
  154. read_line: Callable[[int], Generator[None, None, bytes]],
  155. read_exact: Callable[[int], Generator[None, None, bytes]],
  156. read_to_eof: Callable[[int], Generator[None, None, bytes]],
  157. include_body: bool = True,
  158. ) -> Generator[None, None, Response]:
  159. """
  160. Parse a WebSocket handshake response.
  161. This is a generator-based coroutine.
  162. The reason phrase and headers are expected to contain only ASCII
  163. characters. Other characters are represented with surrogate escapes.
  164. Args:
  165. read_line: Generator-based coroutine that reads a LF-terminated
  166. line or raises an exception if there isn't enough data.
  167. read_exact: Generator-based coroutine that reads the requested
  168. bytes or raises an exception if there isn't enough data.
  169. read_to_eof: Generator-based coroutine that reads until the end
  170. of the stream.
  171. Raises:
  172. EOFError: If the connection is closed without a full HTTP response.
  173. SecurityError: If the response exceeds a security limit.
  174. LookupError: If the response isn't well formatted.
  175. ValueError: If the response isn't well formatted.
  176. """
  177. # https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.2
  178. try:
  179. status_line = yield from parse_line(read_line)
  180. except EOFError as exc:
  181. raise EOFError("connection closed while reading HTTP status line") from exc
  182. try:
  183. protocol, raw_status_code, raw_reason = status_line.split(b" ", 2)
  184. except ValueError: # not enough values to unpack (expected 3, got 1-2)
  185. raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None
  186. if protocol != b"HTTP/1.1":
  187. raise ValueError(
  188. f"unsupported protocol; expected HTTP/1.1: {d(status_line)}"
  189. )
  190. try:
  191. status_code = int(raw_status_code)
  192. except ValueError: # invalid literal for int() with base 10
  193. raise ValueError(
  194. f"invalid status code; expected integer; got {d(raw_status_code)}"
  195. ) from None
  196. if not 100 <= status_code < 600:
  197. raise ValueError(
  198. f"invalid status code; expected 100–599; got {d(raw_status_code)}"
  199. )
  200. if not _value_re.fullmatch(raw_reason):
  201. raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}")
  202. reason = raw_reason.decode("ascii", "surrogateescape")
  203. headers = yield from parse_headers(read_line)
  204. if include_body:
  205. body = yield from read_body(
  206. status_code, headers, read_line, read_exact, read_to_eof
  207. )
  208. else:
  209. body = b""
  210. return cls(status_code, reason, headers, body)
  211. def serialize(self) -> bytes:
  212. """
  213. Serialize a WebSocket handshake response.
  214. """
  215. # Since the status line and headers only contain ASCII characters,
  216. # we can keep this simple.
  217. response = f"HTTP/1.1 {self.status_code} {self.reason_phrase}\r\n".encode()
  218. response += self.headers.serialize()
  219. response += self.body
  220. return response
  221. def parse_line(
  222. read_line: Callable[[int], Generator[None, None, bytes]],
  223. ) -> Generator[None, None, bytes]:
  224. """
  225. Parse a single line.
  226. CRLF is stripped from the return value.
  227. Args:
  228. read_line: Generator-based coroutine that reads a LF-terminated line
  229. or raises an exception if there isn't enough data.
  230. Raises:
  231. EOFError: If the connection is closed without a CRLF.
  232. SecurityError: If the response exceeds a security limit.
  233. """
  234. try:
  235. line = yield from read_line(MAX_LINE_LENGTH)
  236. except RuntimeError:
  237. raise SecurityError("line too long")
  238. # Not mandatory but safe - https://datatracker.ietf.org/doc/html/rfc7230#section-3.5
  239. if not line.endswith(b"\r\n"):
  240. raise EOFError("line without CRLF")
  241. return line[:-2]
  242. def parse_headers(
  243. read_line: Callable[[int], Generator[None, None, bytes]],
  244. ) -> Generator[None, None, Headers]:
  245. """
  246. Parse HTTP headers.
  247. Non-ASCII characters are represented with surrogate escapes.
  248. Args:
  249. read_line: Generator-based coroutine that reads a LF-terminated line
  250. or raises an exception if there isn't enough data.
  251. Raises:
  252. EOFError: If the connection is closed without complete headers.
  253. SecurityError: If the request exceeds a security limit.
  254. ValueError: If the request isn't well formatted.
  255. """
  256. # https://datatracker.ietf.org/doc/html/rfc7230#section-3.2
  257. # We don't attempt to support obsolete line folding.
  258. headers = Headers()
  259. for _ in range(MAX_NUM_HEADERS + 1):
  260. try:
  261. line = yield from parse_line(read_line)
  262. except EOFError as exc:
  263. raise EOFError("connection closed while reading HTTP headers") from exc
  264. if line == b"":
  265. break
  266. try:
  267. raw_name, raw_value = line.split(b":", 1)
  268. except ValueError: # not enough values to unpack (expected 2, got 1)
  269. raise ValueError(f"invalid HTTP header line: {d(line)}") from None
  270. if not _token_re.fullmatch(raw_name):
  271. raise ValueError(f"invalid HTTP header name: {d(raw_name)}")
  272. raw_value = raw_value.strip(b" \t")
  273. if not _value_re.fullmatch(raw_value):
  274. raise ValueError(f"invalid HTTP header value: {d(raw_value)}")
  275. name = raw_name.decode("ascii") # guaranteed to be ASCII at this point
  276. value = raw_value.decode("ascii", "surrogateescape")
  277. headers[name] = value
  278. else:
  279. raise SecurityError("too many HTTP headers")
  280. return headers
  281. def read_body(
  282. status_code: int,
  283. headers: Headers,
  284. read_line: Callable[[int], Generator[None, None, bytes]],
  285. read_exact: Callable[[int], Generator[None, None, bytes]],
  286. read_to_eof: Callable[[int], Generator[None, None, bytes]],
  287. ) -> Generator[None, None, bytes]:
  288. # https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
  289. # Since websockets only does GET requests (no HEAD, no CONNECT), all
  290. # responses except 1xx, 204, and 304 include a message body.
  291. if 100 <= status_code < 200 or status_code == 204 or status_code == 304:
  292. return b""
  293. # MultipleValuesError is sufficiently unlikely that we don't attempt to
  294. # handle it when accessing headers. Instead we document that its parent
  295. # class, LookupError, may be raised.
  296. # Conversions from str to int are protected by sys.set_int_max_str_digits..
  297. elif (coding := headers.get("Transfer-Encoding")) is not None:
  298. if coding != "chunked":
  299. raise NotImplementedError(f"transfer coding {coding} isn't supported")
  300. body = b""
  301. while True:
  302. chunk_size_line = yield from parse_line(read_line)
  303. raw_chunk_size = chunk_size_line.split(b";", 1)[0]
  304. # Set a lower limit than default_max_str_digits; 1 EB is plenty.
  305. if len(raw_chunk_size) > 15:
  306. str_chunk_size = raw_chunk_size.decode(errors="backslashreplace")
  307. raise SecurityError(f"chunk too large: 0x{str_chunk_size} bytes")
  308. chunk_size = int(raw_chunk_size, 16)
  309. if chunk_size == 0:
  310. break
  311. if len(body) + chunk_size > MAX_BODY_SIZE:
  312. raise SecurityError(
  313. f"chunk too large: {chunk_size} bytes after {len(body)} bytes"
  314. )
  315. body += yield from read_exact(chunk_size)
  316. if (yield from read_exact(2)) != b"\r\n":
  317. raise ValueError("chunk without CRLF")
  318. # Read the trailer.
  319. yield from parse_headers(read_line)
  320. return body
  321. elif (raw_content_length := headers.get("Content-Length")) is not None:
  322. # Set a lower limit than default_max_str_digits; 1 EiB is plenty.
  323. if len(raw_content_length) > 18:
  324. raise SecurityError(f"body too large: {raw_content_length} bytes")
  325. content_length = int(raw_content_length)
  326. if content_length > MAX_BODY_SIZE:
  327. raise SecurityError(f"body too large: {content_length} bytes")
  328. return (yield from read_exact(content_length))
  329. else:
  330. try:
  331. return (yield from read_to_eof(MAX_BODY_SIZE))
  332. except RuntimeError:
  333. raise SecurityError(f"body too large: over {MAX_BODY_SIZE} bytes")