您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 

514 行
15 KiB

  1. """
  2. :mod:`websockets.headers` provides parsers and serializers for HTTP headers
  3. used in WebSocket handshake messages.
  4. These APIs cannot be imported from :mod:`websockets`. They must be imported
  5. from :mod:`websockets.headers`.
  6. """
  7. import base64
  8. import binascii
  9. import re
  10. from typing import Callable, List, NewType, Optional, Sequence, Tuple, TypeVar, cast
  11. from .exceptions import InvalidHeaderFormat, InvalidHeaderValue
  12. from .typing import ExtensionHeader, ExtensionParameter, Subprotocol
  13. __all__ = [
  14. "parse_connection",
  15. "parse_upgrade",
  16. "parse_extension",
  17. "build_extension",
  18. "parse_subprotocol",
  19. "build_subprotocol",
  20. "build_www_authenticate_basic",
  21. "parse_authorization_basic",
  22. "build_authorization_basic",
  23. ]
  24. T = TypeVar("T")
  25. ConnectionOption = NewType("ConnectionOption", str)
  26. UpgradeProtocol = NewType("UpgradeProtocol", str)
  27. # To avoid a dependency on a parsing library, we implement manually the ABNF
  28. # described in https://tools.ietf.org/html/rfc6455#section-9.1 with the
  29. # definitions from https://tools.ietf.org/html/rfc7230#appendix-B.
  30. def peek_ahead(header: str, pos: int) -> Optional[str]:
  31. """
  32. Return the next character from ``header`` at the given position.
  33. Return ``None`` at the end of ``header``.
  34. We never need to peek more than one character ahead.
  35. """
  36. return None if pos == len(header) else header[pos]
  37. _OWS_re = re.compile(r"[\t ]*")
  38. def parse_OWS(header: str, pos: int) -> int:
  39. """
  40. Parse optional whitespace from ``header`` at the given position.
  41. Return the new position.
  42. The whitespace itself isn't returned because it isn't significant.
  43. """
  44. # There's always a match, possibly empty, whose content doesn't matter.
  45. match = _OWS_re.match(header, pos)
  46. assert match is not None
  47. return match.end()
  48. _token_re = re.compile(r"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+")
  49. def parse_token(header: str, pos: int, header_name: str) -> Tuple[str, int]:
  50. """
  51. Parse a token from ``header`` at the given position.
  52. Return the token value and the new position.
  53. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  54. """
  55. match = _token_re.match(header, pos)
  56. if match is None:
  57. raise InvalidHeaderFormat(header_name, "expected token", header, pos)
  58. return match.group(), match.end()
  59. _quoted_string_re = re.compile(
  60. r'"(?:[\x09\x20-\x21\x23-\x5b\x5d-\x7e]|\\[\x09\x20-\x7e\x80-\xff])*"'
  61. )
  62. _unquote_re = re.compile(r"\\([\x09\x20-\x7e\x80-\xff])")
  63. def parse_quoted_string(header: str, pos: int, header_name: str) -> Tuple[str, int]:
  64. """
  65. Parse a quoted string from ``header`` at the given position.
  66. Return the unquoted value and the new position.
  67. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  68. """
  69. match = _quoted_string_re.match(header, pos)
  70. if match is None:
  71. raise InvalidHeaderFormat(header_name, "expected quoted string", header, pos)
  72. return _unquote_re.sub(r"\1", match.group()[1:-1]), match.end()
  73. _quotable_re = re.compile(r"[\x09\x20-\x7e\x80-\xff]*")
  74. _quote_re = re.compile(r"([\x22\x5c])")
  75. def build_quoted_string(value: str) -> str:
  76. """
  77. Format ``value`` as a quoted string.
  78. This is the reverse of :func:`parse_quoted_string`.
  79. """
  80. match = _quotable_re.fullmatch(value)
  81. if match is None:
  82. raise ValueError("invalid characters for quoted-string encoding")
  83. return '"' + _quote_re.sub(r"\\\1", value) + '"'
  84. def parse_list(
  85. parse_item: Callable[[str, int, str], Tuple[T, int]],
  86. header: str,
  87. pos: int,
  88. header_name: str,
  89. ) -> List[T]:
  90. """
  91. Parse a comma-separated list from ``header`` at the given position.
  92. This is appropriate for parsing values with the following grammar:
  93. 1#item
  94. ``parse_item`` parses one item.
  95. ``header`` is assumed not to start or end with whitespace.
  96. (This function is designed for parsing an entire header value and
  97. :func:`~websockets.http.read_headers` strips whitespace from values.)
  98. Return a list of items.
  99. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  100. """
  101. # Per https://tools.ietf.org/html/rfc7230#section-7, "a recipient MUST
  102. # parse and ignore a reasonable number of empty list elements"; hence
  103. # while loops that remove extra delimiters.
  104. # Remove extra delimiters before the first item.
  105. while peek_ahead(header, pos) == ",":
  106. pos = parse_OWS(header, pos + 1)
  107. items = []
  108. while True:
  109. # Loop invariant: a item starts at pos in header.
  110. item, pos = parse_item(header, pos, header_name)
  111. items.append(item)
  112. pos = parse_OWS(header, pos)
  113. # We may have reached the end of the header.
  114. if pos == len(header):
  115. break
  116. # There must be a delimiter after each element except the last one.
  117. if peek_ahead(header, pos) == ",":
  118. pos = parse_OWS(header, pos + 1)
  119. else:
  120. raise InvalidHeaderFormat(header_name, "expected comma", header, pos)
  121. # Remove extra delimiters before the next item.
  122. while peek_ahead(header, pos) == ",":
  123. pos = parse_OWS(header, pos + 1)
  124. # We may have reached the end of the header.
  125. if pos == len(header):
  126. break
  127. # Since we only advance in the header by one character with peek_ahead()
  128. # or with the end position of a regex match, we can't overshoot the end.
  129. assert pos == len(header)
  130. return items
  131. def parse_connection_option(
  132. header: str, pos: int, header_name: str
  133. ) -> Tuple[ConnectionOption, int]:
  134. """
  135. Parse a Connection option from ``header`` at the given position.
  136. Return the protocol value and the new position.
  137. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  138. """
  139. item, pos = parse_token(header, pos, header_name)
  140. return cast(ConnectionOption, item), pos
  141. def parse_connection(header: str) -> List[ConnectionOption]:
  142. """
  143. Parse a ``Connection`` header.
  144. Return a list of HTTP connection options.
  145. :param header: value of the ``Connection`` header
  146. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  147. """
  148. return parse_list(parse_connection_option, header, 0, "Connection")
  149. _protocol_re = re.compile(
  150. r"[-!#$%&\'*+.^_`|~0-9a-zA-Z]+(?:/[-!#$%&\'*+.^_`|~0-9a-zA-Z]+)?"
  151. )
  152. def parse_upgrade_protocol(
  153. header: str, pos: int, header_name: str
  154. ) -> Tuple[UpgradeProtocol, int]:
  155. """
  156. Parse an Upgrade protocol from ``header`` at the given position.
  157. Return the protocol value and the new position.
  158. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  159. """
  160. match = _protocol_re.match(header, pos)
  161. if match is None:
  162. raise InvalidHeaderFormat(header_name, "expected protocol", header, pos)
  163. return cast(UpgradeProtocol, match.group()), match.end()
  164. def parse_upgrade(header: str) -> List[UpgradeProtocol]:
  165. """
  166. Parse an ``Upgrade`` header.
  167. Return a list of HTTP protocols.
  168. :param header: value of the ``Upgrade`` header
  169. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  170. """
  171. return parse_list(parse_upgrade_protocol, header, 0, "Upgrade")
  172. def parse_extension_item_param(
  173. header: str, pos: int, header_name: str
  174. ) -> Tuple[ExtensionParameter, int]:
  175. """
  176. Parse a single extension parameter from ``header`` at the given position.
  177. Return a ``(name, value)`` pair and the new position.
  178. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  179. """
  180. # Extract parameter name.
  181. name, pos = parse_token(header, pos, header_name)
  182. pos = parse_OWS(header, pos)
  183. # Extract parameter value, if there is one.
  184. value: Optional[str] = None
  185. if peek_ahead(header, pos) == "=":
  186. pos = parse_OWS(header, pos + 1)
  187. if peek_ahead(header, pos) == '"':
  188. pos_before = pos # for proper error reporting below
  189. value, pos = parse_quoted_string(header, pos, header_name)
  190. # https://tools.ietf.org/html/rfc6455#section-9.1 says: the value
  191. # after quoted-string unescaping MUST conform to the 'token' ABNF.
  192. if _token_re.fullmatch(value) is None:
  193. raise InvalidHeaderFormat(
  194. header_name, "invalid quoted header content", header, pos_before
  195. )
  196. else:
  197. value, pos = parse_token(header, pos, header_name)
  198. pos = parse_OWS(header, pos)
  199. return (name, value), pos
  200. def parse_extension_item(
  201. header: str, pos: int, header_name: str
  202. ) -> Tuple[ExtensionHeader, int]:
  203. """
  204. Parse an extension definition from ``header`` at the given position.
  205. Return an ``(extension name, parameters)`` pair, where ``parameters`` is a
  206. list of ``(name, value)`` pairs, and the new position.
  207. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  208. """
  209. # Extract extension name.
  210. name, pos = parse_token(header, pos, header_name)
  211. pos = parse_OWS(header, pos)
  212. # Extract all parameters.
  213. parameters = []
  214. while peek_ahead(header, pos) == ";":
  215. pos = parse_OWS(header, pos + 1)
  216. parameter, pos = parse_extension_item_param(header, pos, header_name)
  217. parameters.append(parameter)
  218. return (name, parameters), pos
  219. def parse_extension(header: str) -> List[ExtensionHeader]:
  220. """
  221. Parse a ``Sec-WebSocket-Extensions`` header.
  222. Return a list of WebSocket extensions and their parameters in this format::
  223. [
  224. (
  225. 'extension name',
  226. [
  227. ('parameter name', 'parameter value'),
  228. ....
  229. ]
  230. ),
  231. ...
  232. ]
  233. Parameter values are ``None`` when no value is provided.
  234. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  235. """
  236. return parse_list(parse_extension_item, header, 0, "Sec-WebSocket-Extensions")
  237. parse_extension_list = parse_extension # alias for backwards compatibility
  238. def build_extension_item(name: str, parameters: List[ExtensionParameter]) -> str:
  239. """
  240. Build an extension definition.
  241. This is the reverse of :func:`parse_extension_item`.
  242. """
  243. return "; ".join(
  244. [name]
  245. + [
  246. # Quoted strings aren't necessary because values are always tokens.
  247. name if value is None else f"{name}={value}"
  248. for name, value in parameters
  249. ]
  250. )
  251. def build_extension(extensions: Sequence[ExtensionHeader]) -> str:
  252. """
  253. Build a ``Sec-WebSocket-Extensions`` header.
  254. This is the reverse of :func:`parse_extension`.
  255. """
  256. return ", ".join(
  257. build_extension_item(name, parameters) for name, parameters in extensions
  258. )
  259. build_extension_list = build_extension # alias for backwards compatibility
  260. def parse_subprotocol_item(
  261. header: str, pos: int, header_name: str
  262. ) -> Tuple[Subprotocol, int]:
  263. """
  264. Parse a subprotocol from ``header`` at the given position.
  265. Return the subprotocol value and the new position.
  266. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  267. """
  268. item, pos = parse_token(header, pos, header_name)
  269. return cast(Subprotocol, item), pos
  270. def parse_subprotocol(header: str) -> List[Subprotocol]:
  271. """
  272. Parse a ``Sec-WebSocket-Protocol`` header.
  273. Return a list of WebSocket subprotocols.
  274. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  275. """
  276. return parse_list(parse_subprotocol_item, header, 0, "Sec-WebSocket-Protocol")
  277. parse_subprotocol_list = parse_subprotocol # alias for backwards compatibility
  278. def build_subprotocol(protocols: Sequence[Subprotocol]) -> str:
  279. """
  280. Build a ``Sec-WebSocket-Protocol`` header.
  281. This is the reverse of :func:`parse_subprotocol`.
  282. """
  283. return ", ".join(protocols)
  284. build_subprotocol_list = build_subprotocol # alias for backwards compatibility
  285. def build_www_authenticate_basic(realm: str) -> str:
  286. """
  287. Build a ``WWW-Authenticate`` header for HTTP Basic Auth.
  288. :param realm: authentication realm
  289. """
  290. # https://tools.ietf.org/html/rfc7617#section-2
  291. realm = build_quoted_string(realm)
  292. charset = build_quoted_string("UTF-8")
  293. return f"Basic realm={realm}, charset={charset}"
  294. _token68_re = re.compile(r"[A-Za-z0-9-._~+/]+=*")
  295. def parse_token68(header: str, pos: int, header_name: str) -> Tuple[str, int]:
  296. """
  297. Parse a token68 from ``header`` at the given position.
  298. Return the token value and the new position.
  299. :raises ~websockets.exceptions.InvalidHeaderFormat: on invalid inputs.
  300. """
  301. match = _token68_re.match(header, pos)
  302. if match is None:
  303. raise InvalidHeaderFormat(header_name, "expected token68", header, pos)
  304. return match.group(), match.end()
  305. def parse_end(header: str, pos: int, header_name: str) -> None:
  306. """
  307. Check that parsing reached the end of header.
  308. """
  309. if pos < len(header):
  310. raise InvalidHeaderFormat(header_name, "trailing data", header, pos)
  311. def parse_authorization_basic(header: str) -> Tuple[str, str]:
  312. """
  313. Parse an ``Authorization`` header for HTTP Basic Auth.
  314. Return a ``(username, password)`` tuple.
  315. :param header: value of the ``Authorization`` header
  316. :raises InvalidHeaderFormat: on invalid inputs
  317. :raises InvalidHeaderValue: on unsupported inputs
  318. """
  319. # https://tools.ietf.org/html/rfc7235#section-2.1
  320. # https://tools.ietf.org/html/rfc7617#section-2
  321. scheme, pos = parse_token(header, 0, "Authorization")
  322. if scheme.lower() != "basic":
  323. raise InvalidHeaderValue("Authorization", f"unsupported scheme: {scheme}")
  324. if peek_ahead(header, pos) != " ":
  325. raise InvalidHeaderFormat(
  326. "Authorization", "expected space after scheme", header, pos
  327. )
  328. pos += 1
  329. basic_credentials, pos = parse_token68(header, pos, "Authorization")
  330. parse_end(header, pos, "Authorization")
  331. try:
  332. user_pass = base64.b64decode(basic_credentials.encode()).decode()
  333. except binascii.Error:
  334. raise InvalidHeaderValue(
  335. "Authorization", "expected base64-encoded credentials"
  336. ) from None
  337. try:
  338. username, password = user_pass.split(":", 1)
  339. except ValueError:
  340. raise InvalidHeaderValue(
  341. "Authorization", "expected username:password credentials"
  342. ) from None
  343. return username, password
  344. def build_authorization_basic(username: str, password: str) -> str:
  345. """
  346. Build an ``Authorization`` header for HTTP Basic Auth.
  347. This is the reverse of :func:`parse_authorization_basic`.
  348. """
  349. # https://tools.ietf.org/html/rfc7617#section-2
  350. assert ":" not in username
  351. user_pass = f"{username}:{password}"
  352. basic_credentials = base64.b64encode(user_pass.encode()).decode()
  353. return "Basic " + basic_credentials