Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.
 
 
 
 

587 Zeilen
16 KiB

  1. from __future__ import annotations
  2. import base64
  3. import binascii
  4. import ipaddress
  5. import re
  6. from collections.abc import Sequence
  7. from typing import Callable, TypeVar, cast
  8. from .exceptions import InvalidHeaderFormat, InvalidHeaderValue
  9. from .typing import (
  10. ConnectionOption,
  11. ExtensionHeader,
  12. ExtensionName,
  13. ExtensionParameter,
  14. Subprotocol,
  15. UpgradeProtocol,
  16. )
  17. __all__ = [
  18. "build_host",
  19. "parse_connection",
  20. "parse_upgrade",
  21. "parse_extension",
  22. "build_extension",
  23. "parse_subprotocol",
  24. "build_subprotocol",
  25. "validate_subprotocols",
  26. "build_www_authenticate_basic",
  27. "parse_authorization_basic",
  28. "build_authorization_basic",
  29. ]
  30. T = TypeVar("T")
  31. def build_host(
  32. host: str,
  33. port: int,
  34. secure: bool,
  35. *,
  36. always_include_port: bool = False,
  37. ) -> str:
  38. """
  39. Build a ``Host`` header.
  40. """
  41. # https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
  42. # IPv6 addresses must be enclosed in brackets.
  43. try:
  44. address = ipaddress.ip_address(host)
  45. except ValueError:
  46. # host is a hostname
  47. pass
  48. else:
  49. # host is an IP address
  50. if address.version == 6:
  51. host = f"[{host}]"
  52. if always_include_port or port != (443 if secure else 80):
  53. host = f"{host}:{port}"
  54. return host
  55. # To avoid a dependency on a parsing library, we implement manually the ABNF
  56. # described in https://datatracker.ietf.org/doc/html/rfc6455#section-9.1 and
  57. # https://datatracker.ietf.org/doc/html/rfc7230#appendix-B.
  58. def peek_ahead(header: str, pos: int) -> str | None:
  59. """
  60. Return the next character from ``header`` at the given position.
  61. Return :obj:`None` at the end of ``header``.
  62. We never need to peek more than one character ahead.
  63. """
  64. return None if pos == len(header) else header[pos]
  65. _OWS_re = re.compile(r"[\t ]*")
  66. def parse_OWS(header: str, pos: int) -> int:
  67. """
  68. Parse optional whitespace from ``header`` at the given position.
  69. Return the new position.
  70. The whitespace itself isn't returned because it isn't significant.
  71. """
  72. # There's always a match, possibly empty, whose content doesn't matter.
  73. match = _OWS_re.match(header, pos)
  74. assert match is not None
  75. return match.end()
  76. _token_re = re.compile(r"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
  77. def parse_token(header: str, pos: int, header_name: str) -> tuple[str, int]:
  78. """
  79. Parse a token from ``header`` at the given position.
  80. Return the token value and the new position.
  81. Raises:
  82. InvalidHeaderFormat: On invalid inputs.
  83. """
  84. match = _token_re.match(header, pos)
  85. if match is None:
  86. raise InvalidHeaderFormat(header_name, "expected token", header, pos)
  87. return match.group(), match.end()
  88. _quoted_string_re = re.compile(
  89. r'"(?:[\x09\x20-\x21\x23-\x5b\x5d-\x7e]|\\[\x09\x20-\x7e\x80-\xff])*"'
  90. )
  91. _unquote_re = re.compile(r"\\([\x09\x20-\x7e\x80-\xff])")
  92. def parse_quoted_string(header: str, pos: int, header_name: str) -> tuple[str, int]:
  93. """
  94. Parse a quoted string from ``header`` at the given position.
  95. Return the unquoted value and the new position.
  96. Raises:
  97. InvalidHeaderFormat: On invalid inputs.
  98. """
  99. match = _quoted_string_re.match(header, pos)
  100. if match is None:
  101. raise InvalidHeaderFormat(header_name, "expected quoted string", header, pos)
  102. return _unquote_re.sub(r"\1", match.group()[1:-1]), match.end()
  103. _quotable_re = re.compile(r"[\x09\x20-\x7e\x80-\xff]*")
  104. _quote_re = re.compile(r"([\x22\x5c])")
  105. def build_quoted_string(value: str) -> str:
  106. """
  107. Format ``value`` as a quoted string.
  108. This is the reverse of :func:`parse_quoted_string`.
  109. """
  110. match = _quotable_re.fullmatch(value)
  111. if match is None:
  112. raise ValueError("invalid characters for quoted-string encoding")
  113. return '"' + _quote_re.sub(r"\\\1", value) + '"'
  114. def parse_list(
  115. parse_item: Callable[[str, int, str], tuple[T, int]],
  116. header: str,
  117. pos: int,
  118. header_name: str,
  119. ) -> list[T]:
  120. """
  121. Parse a comma-separated list from ``header`` at the given position.
  122. This is appropriate for parsing values with the following grammar:
  123. 1#item
  124. ``parse_item`` parses one item.
  125. ``header`` is assumed not to start or end with whitespace.
  126. (This function is designed for parsing an entire header value and
  127. :func:`~websockets.http.read_headers` strips whitespace from values.)
  128. Return a list of items.
  129. Raises:
  130. InvalidHeaderFormat: On invalid inputs.
  131. """
  132. # Per https://datatracker.ietf.org/doc/html/rfc7230#section-7, "a recipient
  133. # MUST parse and ignore a reasonable number of empty list elements";
  134. # hence while loops that remove extra delimiters.
  135. # Remove extra delimiters before the first item.
  136. while peek_ahead(header, pos) == ",":
  137. pos = parse_OWS(header, pos + 1)
  138. items = []
  139. while True:
  140. # Loop invariant: a item starts at pos in header.
  141. item, pos = parse_item(header, pos, header_name)
  142. items.append(item)
  143. pos = parse_OWS(header, pos)
  144. # We may have reached the end of the header.
  145. if pos == len(header):
  146. break
  147. # There must be a delimiter after each element except the last one.
  148. if peek_ahead(header, pos) == ",":
  149. pos = parse_OWS(header, pos + 1)
  150. else:
  151. raise InvalidHeaderFormat(header_name, "expected comma", header, pos)
  152. # Remove extra delimiters before the next item.
  153. while peek_ahead(header, pos) == ",":
  154. pos = parse_OWS(header, pos + 1)
  155. # We may have reached the end of the header.
  156. if pos == len(header):
  157. break
  158. # Since we only advance in the header by one character with peek_ahead()
  159. # or with the end position of a regex match, we can't overshoot the end.
  160. assert pos == len(header)
  161. return items
  162. def parse_connection_option(
  163. header: str, pos: int, header_name: str
  164. ) -> tuple[ConnectionOption, int]:
  165. """
  166. Parse a Connection option from ``header`` at the given position.
  167. Return the protocol value and the new position.
  168. Raises:
  169. InvalidHeaderFormat: On invalid inputs.
  170. """
  171. item, pos = parse_token(header, pos, header_name)
  172. return cast(ConnectionOption, item), pos
  173. def parse_connection(header: str) -> list[ConnectionOption]:
  174. """
  175. Parse a ``Connection`` header.
  176. Return a list of HTTP connection options.
  177. Args
  178. header: value of the ``Connection`` header.
  179. Raises:
  180. InvalidHeaderFormat: On invalid inputs.
  181. """
  182. return parse_list(parse_connection_option, header, 0, "Connection")
  183. _protocol_re = re.compile(
  184. r"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+(?:/[-!#$%&\'*+.^_`|~0-9a-zA-Z]+)?"
  185. )
  186. def parse_upgrade_protocol(
  187. header: str, pos: int, header_name: str
  188. ) -> tuple[UpgradeProtocol, int]:
  189. """
  190. Parse an Upgrade protocol from ``header`` at the given position.
  191. Return the protocol value and the new position.
  192. Raises:
  193. InvalidHeaderFormat: On invalid inputs.
  194. """
  195. match = _protocol_re.match(header, pos)
  196. if match is None:
  197. raise InvalidHeaderFormat(header_name, "expected protocol", header, pos)
  198. return cast(UpgradeProtocol, match.group()), match.end()
  199. def parse_upgrade(header: str) -> list[UpgradeProtocol]:
  200. """
  201. Parse an ``Upgrade`` header.
  202. Return a list of HTTP protocols.
  203. Args:
  204. header: Value of the ``Upgrade`` header.
  205. Raises:
  206. InvalidHeaderFormat: On invalid inputs.
  207. """
  208. return parse_list(parse_upgrade_protocol, header, 0, "Upgrade")
  209. def parse_extension_item_param(
  210. header: str, pos: int, header_name: str
  211. ) -> tuple[ExtensionParameter, int]:
  212. """
  213. Parse a single extension parameter from ``header`` at the given position.
  214. Return a ``(name, value)`` pair and the new position.
  215. Raises:
  216. InvalidHeaderFormat: On invalid inputs.
  217. """
  218. # Extract parameter name.
  219. name, pos = parse_token(header, pos, header_name)
  220. pos = parse_OWS(header, pos)
  221. # Extract parameter value, if there is one.
  222. value: str | None = None
  223. if peek_ahead(header, pos) == "=":
  224. pos = parse_OWS(header, pos + 1)
  225. if peek_ahead(header, pos) == '"':
  226. pos_before = pos # for proper error reporting below
  227. value, pos = parse_quoted_string(header, pos, header_name)
  228. # https://datatracker.ietf.org/doc/html/rfc6455#section-9.1 says:
  229. # the value after quoted-string unescaping MUST conform to
  230. # the 'token' ABNF.
  231. if _token_re.fullmatch(value) is None:
  232. raise InvalidHeaderFormat(
  233. header_name, "invalid quoted header content", header, pos_before
  234. )
  235. else:
  236. value, pos = parse_token(header, pos, header_name)
  237. pos = parse_OWS(header, pos)
  238. return (name, value), pos
  239. def parse_extension_item(
  240. header: str, pos: int, header_name: str
  241. ) -> tuple[ExtensionHeader, int]:
  242. """
  243. Parse an extension definition from ``header`` at the given position.
  244. Return an ``(extension name, parameters)`` pair, where ``parameters`` is a
  245. list of ``(name, value)`` pairs, and the new position.
  246. Raises:
  247. InvalidHeaderFormat: On invalid inputs.
  248. """
  249. # Extract extension name.
  250. name, pos = parse_token(header, pos, header_name)
  251. pos = parse_OWS(header, pos)
  252. # Extract all parameters.
  253. parameters = []
  254. while peek_ahead(header, pos) == ";":
  255. pos = parse_OWS(header, pos + 1)
  256. parameter, pos = parse_extension_item_param(header, pos, header_name)
  257. parameters.append(parameter)
  258. return (cast(ExtensionName, name), parameters), pos
  259. def parse_extension(header: str) -> list[ExtensionHeader]:
  260. """
  261. Parse a ``Sec-WebSocket-Extensions`` header.
  262. Return a list of WebSocket extensions and their parameters in this format::
  263. [
  264. (
  265. 'extension name',
  266. [
  267. ('parameter name', 'parameter value'),
  268. ....
  269. ]
  270. ),
  271. ...
  272. ]
  273. Parameter values are :obj:`None` when no value is provided.
  274. Raises:
  275. InvalidHeaderFormat: On invalid inputs.
  276. """
  277. return parse_list(parse_extension_item, header, 0, "Sec-WebSocket-Extensions")
  278. parse_extension_list = parse_extension # alias for backwards compatibility
  279. def build_extension_item(
  280. name: ExtensionName, parameters: Sequence[ExtensionParameter]
  281. ) -> str:
  282. """
  283. Build an extension definition.
  284. This is the reverse of :func:`parse_extension_item`.
  285. """
  286. return "; ".join(
  287. [cast(str, name)]
  288. + [
  289. # Quoted strings aren't necessary because values are always tokens.
  290. name if value is None else f"{name}={value}"
  291. for name, value in parameters
  292. ]
  293. )
  294. def build_extension(extensions: Sequence[ExtensionHeader]) -> str:
  295. """
  296. Build a ``Sec-WebSocket-Extensions`` header.
  297. This is the reverse of :func:`parse_extension`.
  298. """
  299. return ", ".join(
  300. build_extension_item(name, parameters) for name, parameters in extensions
  301. )
  302. build_extension_list = build_extension # alias for backwards compatibility
  303. def parse_subprotocol_item(
  304. header: str, pos: int, header_name: str
  305. ) -> tuple[Subprotocol, int]:
  306. """
  307. Parse a subprotocol from ``header`` at the given position.
  308. Return the subprotocol value and the new position.
  309. Raises:
  310. InvalidHeaderFormat: On invalid inputs.
  311. """
  312. item, pos = parse_token(header, pos, header_name)
  313. return cast(Subprotocol, item), pos
  314. def parse_subprotocol(header: str) -> list[Subprotocol]:
  315. """
  316. Parse a ``Sec-WebSocket-Protocol`` header.
  317. Return a list of WebSocket subprotocols.
  318. Raises:
  319. InvalidHeaderFormat: On invalid inputs.
  320. """
  321. return parse_list(parse_subprotocol_item, header, 0, "Sec-WebSocket-Protocol")
  322. parse_subprotocol_list = parse_subprotocol # alias for backwards compatibility
  323. def build_subprotocol(subprotocols: Sequence[Subprotocol]) -> str:
  324. """
  325. Build a ``Sec-WebSocket-Protocol`` header.
  326. This is the reverse of :func:`parse_subprotocol`.
  327. """
  328. return ", ".join(subprotocols)
  329. build_subprotocol_list = build_subprotocol # alias for backwards compatibility
  330. def validate_subprotocols(subprotocols: Sequence[Subprotocol]) -> None:
  331. """
  332. Validate that ``subprotocols`` is suitable for :func:`build_subprotocol`.
  333. """
  334. if not isinstance(subprotocols, Sequence):
  335. raise TypeError("subprotocols must be a list")
  336. if isinstance(subprotocols, str):
  337. raise TypeError("subprotocols must be a list, not a str")
  338. for subprotocol in subprotocols:
  339. if not _token_re.fullmatch(subprotocol):
  340. raise ValueError(f"invalid subprotocol: {subprotocol}")
  341. def build_www_authenticate_basic(realm: str) -> str:
  342. """
  343. Build a ``WWW-Authenticate`` header for HTTP Basic Auth.
  344. Args:
  345. realm: Identifier of the protection space.
  346. """
  347. # https://datatracker.ietf.org/doc/html/rfc7617#section-2
  348. realm = build_quoted_string(realm)
  349. charset = build_quoted_string("UTF-8")
  350. return f"Basic realm={realm}, charset={charset}"
  351. _token68_re = re.compile(r"[A-Za-z0-9-._~+/]+=*")
  352. def parse_token68(header: str, pos: int, header_name: str) -> tuple[str, int]:
  353. """
  354. Parse a token68 from ``header`` at the given position.
  355. Return the token value and the new position.
  356. Raises:
  357. InvalidHeaderFormat: On invalid inputs.
  358. """
  359. match = _token68_re.match(header, pos)
  360. if match is None:
  361. raise InvalidHeaderFormat(header_name, "expected token68", header, pos)
  362. return match.group(), match.end()
  363. def parse_end(header: str, pos: int, header_name: str) -> None:
  364. """
  365. Check that parsing reached the end of header.
  366. """
  367. if pos < len(header):
  368. raise InvalidHeaderFormat(header_name, "trailing data", header, pos)
  369. def parse_authorization_basic(header: str) -> tuple[str, str]:
  370. """
  371. Parse an ``Authorization`` header for HTTP Basic Auth.
  372. Return a ``(username, password)`` tuple.
  373. Args:
  374. header: Value of the ``Authorization`` header.
  375. Raises:
  376. InvalidHeaderFormat: On invalid inputs.
  377. InvalidHeaderValue: On unsupported inputs.
  378. """
  379. # https://datatracker.ietf.org/doc/html/rfc7235#section-2.1
  380. # https://datatracker.ietf.org/doc/html/rfc7617#section-2
  381. scheme, pos = parse_token(header, 0, "Authorization")
  382. if scheme.lower() != "basic":
  383. raise InvalidHeaderValue(
  384. "Authorization",
  385. f"unsupported scheme: {scheme}",
  386. )
  387. if peek_ahead(header, pos) != " ":
  388. raise InvalidHeaderFormat(
  389. "Authorization", "expected space after scheme", header, pos
  390. )
  391. pos += 1
  392. basic_credentials, pos = parse_token68(header, pos, "Authorization")
  393. parse_end(header, pos, "Authorization")
  394. try:
  395. user_pass = base64.b64decode(basic_credentials.encode()).decode()
  396. except binascii.Error:
  397. raise InvalidHeaderValue(
  398. "Authorization",
  399. "expected base64-encoded credentials",
  400. ) from None
  401. try:
  402. username, password = user_pass.split(":", 1)
  403. except ValueError:
  404. raise InvalidHeaderValue(
  405. "Authorization",
  406. "expected username:password credentials",
  407. ) from None
  408. return username, password
  409. def build_authorization_basic(username: str, password: str) -> str:
  410. """
  411. Build an ``Authorization`` header for HTTP Basic Auth.
  412. This is the reverse of :func:`parse_authorization_basic`.
  413. """
  414. # https://datatracker.ietf.org/doc/html/rfc7617#section-2
  415. assert ":" not in username
  416. user_pass = f"{username}:{password}"
  417. basic_credentials = base64.b64encode(user_pass.encode()).decode()
  418. return "Basic " + basic_credentials