|
- """
- :mod:`websockets.http` module provides basic HTTP/1.1 support. It is merely
- :adequate for WebSocket handshake messages.
-
- These APIs cannot be imported from :mod:`websockets`. They must be imported
- from :mod:`websockets.http`.
-
- """
-
- import asyncio
- import re
- import sys
- from typing import (
- Any,
- Dict,
- Iterable,
- Iterator,
- List,
- Mapping,
- MutableMapping,
- Tuple,
- Union,
- )
-
- from .version import version as websockets_version
-
-
- __all__ = [
- "read_request",
- "read_response",
- "Headers",
- "MultipleValuesError",
- "USER_AGENT",
- ]
-
- MAX_HEADERS = 256
- MAX_LINE = 4096
-
- USER_AGENT = f"Python/{sys.version[:3]} websockets/{websockets_version}"
-
-
- def d(value: bytes) -> str:
- """
- Decode a bytestring for interpolating into an error message.
-
- """
- return value.decode(errors="backslashreplace")
-
-
- # See https://tools.ietf.org/html/rfc7230#appendix-B.
-
- # Regex for validating header names.
-
- _token_re = re.compile(rb"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
-
- # Regex for validating header values.
-
- # We don't attempt to support obsolete line folding.
-
- # Include HTAB (\x09), SP (\x20), VCHAR (\x21-\x7e), obs-text (\x80-\xff).
-
- # The ABNF is complicated because it attempts to express that optional
- # whitespace is ignored. We strip whitespace and don't revalidate that.
-
- # See also https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
-
- _value_re = re.compile(rb"[\x09\x20-\x7e\x80-\xff]*")
-
-
- async def read_request(stream: asyncio.StreamReader) -> Tuple[str, "Headers"]:
- """
- Read an HTTP/1.1 GET request and returns ``(path, headers)``.
-
- ``path`` isn't URL-decoded or validated in any way.
-
- ``path`` and ``headers`` are expected to contain only ASCII characters.
- Other characters are represented with surrogate escapes.
-
- :func:`read_request` doesn't attempt to read the request body because
- WebSocket handshake requests don't have one. If the request contains a
- body, it may be read from ``stream`` after this coroutine returns.
-
- :param stream: input to read the request from
- :raises EOFError: if the connection is closed without a full HTTP request
- :raises SecurityError: if the request exceeds a security limit
- :raises ValueError: if the request isn't well formatted
-
- """
- # https://tools.ietf.org/html/rfc7230#section-3.1.1
-
- # Parsing is simple because fixed values are expected for method and
- # version and because path isn't checked. Since WebSocket software tends
- # to implement HTTP/1.1 strictly, there's little need for lenient parsing.
-
- try:
- request_line = await read_line(stream)
- except EOFError as exc:
- raise EOFError("connection closed while reading HTTP request line") from exc
-
- try:
- method, raw_path, version = request_line.split(b" ", 2)
- except ValueError: # not enough values to unpack (expected 3, got 1-2)
- raise ValueError(f"invalid HTTP request line: {d(request_line)}") from None
-
- if method != b"GET":
- raise ValueError(f"unsupported HTTP method: {d(method)}")
- if version != b"HTTP/1.1":
- raise ValueError(f"unsupported HTTP version: {d(version)}")
- path = raw_path.decode("ascii", "surrogateescape")
-
- headers = await read_headers(stream)
-
- return path, headers
-
-
- async def read_response(stream: asyncio.StreamReader) -> Tuple[int, str, "Headers"]:
- """
- Read an HTTP/1.1 response and returns ``(status_code, reason, headers)``.
-
- ``reason`` and ``headers`` are expected to contain only ASCII characters.
- Other characters are represented with surrogate escapes.
-
- :func:`read_request` doesn't attempt to read the response body because
- WebSocket handshake responses don't have one. If the response contains a
- body, it may be read from ``stream`` after this coroutine returns.
-
- :param stream: input to read the response from
- :raises EOFError: if the connection is closed without a full HTTP response
- :raises SecurityError: if the response exceeds a security limit
- :raises ValueError: if the response isn't well formatted
-
- """
- # https://tools.ietf.org/html/rfc7230#section-3.1.2
-
- # As in read_request, parsing is simple because a fixed value is expected
- # for version, status_code is a 3-digit number, and reason can be ignored.
-
- try:
- status_line = await read_line(stream)
- except EOFError as exc:
- raise EOFError("connection closed while reading HTTP status line") from exc
-
- try:
- version, raw_status_code, raw_reason = status_line.split(b" ", 2)
- except ValueError: # not enough values to unpack (expected 3, got 1-2)
- raise ValueError(f"invalid HTTP status line: {d(status_line)}") from None
-
- if version != b"HTTP/1.1":
- raise ValueError(f"unsupported HTTP version: {d(version)}")
- try:
- status_code = int(raw_status_code)
- except ValueError: # invalid literal for int() with base 10
- raise ValueError(f"invalid HTTP status code: {d(raw_status_code)}") from None
- if not 100 <= status_code < 1000:
- raise ValueError(f"unsupported HTTP status code: {d(raw_status_code)}")
- if not _value_re.fullmatch(raw_reason):
- raise ValueError(f"invalid HTTP reason phrase: {d(raw_reason)}")
- reason = raw_reason.decode()
-
- headers = await read_headers(stream)
-
- return status_code, reason, headers
-
-
- async def read_headers(stream: asyncio.StreamReader) -> "Headers":
- """
- Read HTTP headers from ``stream``.
-
- Non-ASCII characters are represented with surrogate escapes.
-
- """
- # https://tools.ietf.org/html/rfc7230#section-3.2
-
- # We don't attempt to support obsolete line folding.
-
- headers = Headers()
- for _ in range(MAX_HEADERS + 1):
- try:
- line = await read_line(stream)
- except EOFError as exc:
- raise EOFError("connection closed while reading HTTP headers") from exc
- if line == b"":
- break
-
- try:
- raw_name, raw_value = line.split(b":", 1)
- except ValueError: # not enough values to unpack (expected 2, got 1)
- raise ValueError(f"invalid HTTP header line: {d(line)}") from None
- if not _token_re.fullmatch(raw_name):
- raise ValueError(f"invalid HTTP header name: {d(raw_name)}")
- raw_value = raw_value.strip(b" \t")
- if not _value_re.fullmatch(raw_value):
- raise ValueError(f"invalid HTTP header value: {d(raw_value)}")
-
- name = raw_name.decode("ascii") # guaranteed to be ASCII at this point
- value = raw_value.decode("ascii", "surrogateescape")
- headers[name] = value
-
- else:
- raise websockets.exceptions.SecurityError("too many HTTP headers")
-
- return headers
-
-
- async def read_line(stream: asyncio.StreamReader) -> bytes:
- """
- Read a single line from ``stream``.
-
- CRLF is stripped from the return value.
-
- """
- # Security: this is bounded by the StreamReader's limit (default = 32 KiB).
- line = await stream.readline()
- # Security: this guarantees header values are small (hard-coded = 4 KiB)
- if len(line) > MAX_LINE:
- raise websockets.exceptions.SecurityError("line too long")
- # Not mandatory but safe - https://tools.ietf.org/html/rfc7230#section-3.5
- if not line.endswith(b"\r\n"):
- raise EOFError("line without CRLF")
- return line[:-2]
-
-
- class MultipleValuesError(LookupError):
- """
- Exception raised when :class:`Headers` has more than one value for a key.
-
- """
-
- def __str__(self) -> str:
- # Implement the same logic as KeyError_str in Objects/exceptions.c.
- if len(self.args) == 1:
- return repr(self.args[0])
- return super().__str__()
-
-
- class Headers(MutableMapping[str, str]):
- """
- Efficient data structure for manipulating HTTP headers.
-
- A :class:`list` of ``(name, values)`` is inefficient for lookups.
-
- A :class:`dict` doesn't suffice because header names are case-insensitive
- and multiple occurrences of headers with the same name are possible.
-
- :class:`Headers` stores HTTP headers in a hybrid data structure to provide
- efficient insertions and lookups while preserving the original data.
-
- In order to account for multiple values with minimal hassle,
- :class:`Headers` follows this logic:
-
- - When getting a header with ``headers[name]``:
- - if there's no value, :exc:`KeyError` is raised;
- - if there's exactly one value, it's returned;
- - if there's more than one value, :exc:`MultipleValuesError` is raised.
-
- - When setting a header with ``headers[name] = value``, the value is
- appended to the list of values for that header.
-
- - When deleting a header with ``del headers[name]``, all values for that
- header are removed (this is slow).
-
- Other methods for manipulating headers are consistent with this logic.
-
- As long as no header occurs multiple times, :class:`Headers` behaves like
- :class:`dict`, except keys are lower-cased to provide case-insensitivity.
-
- Two methods support support manipulating multiple values explicitly:
-
- - :meth:`get_all` returns a list of all values for a header;
- - :meth:`raw_items` returns an iterator of ``(name, values)`` pairs.
-
- """
-
- __slots__ = ["_dict", "_list"]
-
- def __init__(self, *args: Any, **kwargs: str) -> None:
- self._dict: Dict[str, List[str]] = {}
- self._list: List[Tuple[str, str]] = []
- # MutableMapping.update calls __setitem__ for each (name, value) pair.
- self.update(*args, **kwargs)
-
- def __str__(self) -> str:
- return "".join(f"{key}: {value}\r\n" for key, value in self._list) + "\r\n"
-
- def __repr__(self) -> str:
- return f"{self.__class__.__name__}({self._list!r})"
-
- def copy(self) -> "Headers":
- copy = self.__class__()
- copy._dict = self._dict.copy()
- copy._list = self._list.copy()
- return copy
-
- # Collection methods
-
- def __contains__(self, key: object) -> bool:
- return isinstance(key, str) and key.lower() in self._dict
-
- def __iter__(self) -> Iterator[str]:
- return iter(self._dict)
-
- def __len__(self) -> int:
- return len(self._dict)
-
- # MutableMapping methods
-
- def __getitem__(self, key: str) -> str:
- value = self._dict[key.lower()]
- if len(value) == 1:
- return value[0]
- else:
- raise MultipleValuesError(key)
-
- def __setitem__(self, key: str, value: str) -> None:
- self._dict.setdefault(key.lower(), []).append(value)
- self._list.append((key, value))
-
- def __delitem__(self, key: str) -> None:
- key_lower = key.lower()
- self._dict.__delitem__(key_lower)
- # This is inefficent. Fortunately deleting HTTP headers is uncommon.
- self._list = [(k, v) for k, v in self._list if k.lower() != key_lower]
-
- def __eq__(self, other: Any) -> bool:
- if not isinstance(other, Headers):
- return NotImplemented
- return self._list == other._list
-
- def clear(self) -> None:
- """
- Remove all headers.
-
- """
- self._dict = {}
- self._list = []
-
- # Methods for handling multiple values
-
- def get_all(self, key: str) -> List[str]:
- """
- Return the (possibly empty) list of all values for a header.
-
- :param key: header name
-
- """
- return self._dict.get(key.lower(), [])
-
- def raw_items(self) -> Iterator[Tuple[str, str]]:
- """
- Return an iterator of all values as ``(name, value)`` pairs.
-
- """
- return iter(self._list)
-
-
- HeadersLike = Union[Headers, Mapping[str, str], Iterable[Tuple[str, str]]]
-
-
- # at the bottom to allow circular import, because AbortHandshake depends on HeadersLike
- import websockets.exceptions # isort:skip # noqa
|