Non puoi selezionare più di 25 argomenti Gli argomenti devono iniziare con una lettera o un numero, possono includere trattini ('-') e possono essere lunghi fino a 35 caratteri.
 
 
 
 

226 righe
6.8 KiB

  1. from __future__ import annotations
  2. import dataclasses
  3. import urllib.parse
  4. import urllib.request
  5. from .exceptions import InvalidProxy, InvalidURI
  6. __all__ = ["parse_uri", "WebSocketURI"]
  7. # All characters from the gen-delims and sub-delims sets in RFC 3987.
  8. DELIMS = ":/?#[]@!$&'()*+,;="
  9. @dataclasses.dataclass
  10. class WebSocketURI:
  11. """
  12. WebSocket URI.
  13. Attributes:
  14. secure: :obj:`True` for a ``wss`` URI, :obj:`False` for a ``ws`` URI.
  15. host: Normalized to lower case.
  16. port: Always set even if it's the default.
  17. path: May be empty.
  18. query: May be empty if the URI doesn't include a query component.
  19. username: Available when the URI contains `User Information`_.
  20. password: Available when the URI contains `User Information`_.
  21. .. _User Information: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1
  22. """
  23. secure: bool
  24. host: str
  25. port: int
  26. path: str
  27. query: str
  28. username: str | None = None
  29. password: str | None = None
  30. @property
  31. def resource_name(self) -> str:
  32. if self.path:
  33. resource_name = self.path
  34. else:
  35. resource_name = "/"
  36. if self.query:
  37. resource_name += "?" + self.query
  38. return resource_name
  39. @property
  40. def user_info(self) -> tuple[str, str] | None:
  41. if self.username is None:
  42. return None
  43. assert self.password is not None
  44. return (self.username, self.password)
  45. def parse_uri(uri: str) -> WebSocketURI:
  46. """
  47. Parse and validate a WebSocket URI.
  48. Args:
  49. uri: WebSocket URI.
  50. Returns:
  51. Parsed WebSocket URI.
  52. Raises:
  53. InvalidURI: If ``uri`` isn't a valid WebSocket URI.
  54. """
  55. parsed = urllib.parse.urlparse(uri)
  56. if parsed.scheme not in ["ws", "wss"]:
  57. raise InvalidURI(uri, "scheme isn't ws or wss")
  58. if parsed.hostname is None:
  59. raise InvalidURI(uri, "hostname isn't provided")
  60. if parsed.fragment != "":
  61. raise InvalidURI(uri, "fragment identifier is meaningless")
  62. secure = parsed.scheme == "wss"
  63. host = parsed.hostname
  64. port = parsed.port or (443 if secure else 80)
  65. path = parsed.path
  66. query = parsed.query
  67. username = parsed.username
  68. password = parsed.password
  69. # urllib.parse.urlparse accepts URLs with a username but without a
  70. # password. This doesn't make sense for HTTP Basic Auth credentials.
  71. if username is not None and password is None:
  72. raise InvalidURI(uri, "username provided without password")
  73. try:
  74. uri.encode("ascii")
  75. except UnicodeEncodeError:
  76. # Input contains non-ASCII characters.
  77. # It must be an IRI. Convert it to a URI.
  78. host = host.encode("idna").decode()
  79. path = urllib.parse.quote(path, safe=DELIMS)
  80. query = urllib.parse.quote(query, safe=DELIMS)
  81. if username is not None:
  82. assert password is not None
  83. username = urllib.parse.quote(username, safe=DELIMS)
  84. password = urllib.parse.quote(password, safe=DELIMS)
  85. return WebSocketURI(secure, host, port, path, query, username, password)
  86. @dataclasses.dataclass
  87. class Proxy:
  88. """
  89. Proxy.
  90. Attributes:
  91. scheme: ``"socks5h"``, ``"socks5"``, ``"socks4a"``, ``"socks4"``,
  92. ``"https"``, or ``"http"``.
  93. host: Normalized to lower case.
  94. port: Always set even if it's the default.
  95. username: Available when the proxy address contains `User Information`_.
  96. password: Available when the proxy address contains `User Information`_.
  97. .. _User Information: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1
  98. """
  99. scheme: str
  100. host: str
  101. port: int
  102. username: str | None = None
  103. password: str | None = None
  104. @property
  105. def user_info(self) -> tuple[str, str] | None:
  106. if self.username is None:
  107. return None
  108. assert self.password is not None
  109. return (self.username, self.password)
  110. def parse_proxy(proxy: str) -> Proxy:
  111. """
  112. Parse and validate a proxy.
  113. Args:
  114. proxy: proxy.
  115. Returns:
  116. Parsed proxy.
  117. Raises:
  118. InvalidProxy: If ``proxy`` isn't a valid proxy.
  119. """
  120. parsed = urllib.parse.urlparse(proxy)
  121. if parsed.scheme not in ["socks5h", "socks5", "socks4a", "socks4", "https", "http"]:
  122. raise InvalidProxy(proxy, f"scheme {parsed.scheme} isn't supported")
  123. if parsed.hostname is None:
  124. raise InvalidProxy(proxy, "hostname isn't provided")
  125. if parsed.path not in ["", "/"]:
  126. raise InvalidProxy(proxy, "path is meaningless")
  127. if parsed.query != "":
  128. raise InvalidProxy(proxy, "query is meaningless")
  129. if parsed.fragment != "":
  130. raise InvalidProxy(proxy, "fragment is meaningless")
  131. scheme = parsed.scheme
  132. host = parsed.hostname
  133. port = parsed.port or (443 if parsed.scheme == "https" else 80)
  134. username = parsed.username
  135. password = parsed.password
  136. # urllib.parse.urlparse accepts URLs with a username but without a
  137. # password. This doesn't make sense for HTTP Basic Auth credentials.
  138. if username is not None and password is None:
  139. raise InvalidProxy(proxy, "username provided without password")
  140. try:
  141. proxy.encode("ascii")
  142. except UnicodeEncodeError:
  143. # Input contains non-ASCII characters.
  144. # It must be an IRI. Convert it to a URI.
  145. host = host.encode("idna").decode()
  146. if username is not None:
  147. assert password is not None
  148. username = urllib.parse.quote(username, safe=DELIMS)
  149. password = urllib.parse.quote(password, safe=DELIMS)
  150. return Proxy(scheme, host, port, username, password)
  151. def get_proxy(uri: WebSocketURI) -> str | None:
  152. """
  153. Return the proxy to use for connecting to the given WebSocket URI, if any.
  154. """
  155. if urllib.request.proxy_bypass(f"{uri.host}:{uri.port}"):
  156. return None
  157. # According to the _Proxy Usage_ section of RFC 6455, use a SOCKS5 proxy if
  158. # available, else favor the proxy for HTTPS connections over the proxy for
  159. # HTTP connections.
  160. # The priority of a proxy for WebSocket connections is unspecified. We give
  161. # it the highest priority. This makes it easy to configure a specific proxy
  162. # for websockets.
  163. # getproxies() may return SOCKS proxies as {"socks": "http://host:port"} or
  164. # as {"https": "socks5h://host:port"} depending on whether they're declared
  165. # in the operating system or in environment variables.
  166. proxies = urllib.request.getproxies()
  167. if uri.secure:
  168. schemes = ["wss", "socks", "https"]
  169. else:
  170. schemes = ["ws", "socks", "https", "http"]
  171. for scheme in schemes:
  172. proxy = proxies.get(scheme)
  173. if proxy is not None:
  174. if scheme == "socks" and proxy.startswith("http://"):
  175. proxy = "socks5h://" + proxy[7:]
  176. return proxy
  177. else:
  178. return None