Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 
 

584 lignes
24 KiB

  1. # This contains the main Connection class. Everything in h11 revolves around
  2. # this.
  3. # Import all event types
  4. from ._events import *
  5. # Import all state sentinels
  6. from ._state import *
  7. # Import the internal things we need
  8. from ._util import LocalProtocolError, RemoteProtocolError, make_sentinel
  9. from ._state import ConnectionState, _SWITCH_UPGRADE, _SWITCH_CONNECT
  10. from ._headers import (
  11. get_comma_header, set_comma_header, has_expect_100_continue,
  12. )
  13. from ._receivebuffer import ReceiveBuffer
  14. from ._readers import READERS
  15. from ._writers import WRITERS
  16. # Everything in __all__ gets re-exported as part of the h11 public API.
  17. __all__ = ["Connection", "NEED_DATA", "PAUSED"]
  18. NEED_DATA = make_sentinel("NEED_DATA")
  19. PAUSED = make_sentinel("PAUSED")
  20. # If we ever have this much buffered without it making a complete parseable
  21. # event, we error out. The only time we really buffer is when reading the
  22. # request/reponse line + headers together, so this is effectively the limit on
  23. # the size of that.
  24. #
  25. # Some precedents for defaults:
  26. # - node.js: 80 * 1024
  27. # - tomcat: 8 * 1024
  28. # - IIS: 16 * 1024
  29. # - Apache: <8 KiB per line>
  30. DEFAULT_MAX_INCOMPLETE_EVENT_SIZE = 16 * 1024
  31. # RFC 7230's rules for connection lifecycles:
  32. # - If either side says they want to close the connection, then the connection
  33. # must close.
  34. # - HTTP/1.1 defaults to keep-alive unless someone says Connection: close
  35. # - HTTP/1.0 defaults to close unless both sides say Connection: keep-alive
  36. # (and even this is a mess -- e.g. if you're implementing a proxy then
  37. # sending Connection: keep-alive is forbidden).
  38. #
  39. # We simplify life by simply not supporting keep-alive with HTTP/1.0 peers. So
  40. # our rule is:
  41. # - If someone says Connection: close, we will close
  42. # - If someone uses HTTP/1.0, we will close.
  43. def _keep_alive(event):
  44. connection = get_comma_header(event.headers, b"connection")
  45. if b"close" in connection:
  46. return False
  47. if getattr(event, "http_version", b"1.1") < b"1.1":
  48. return False
  49. return True
  50. def _body_framing(request_method, event):
  51. # Called when we enter SEND_BODY to figure out framing information for
  52. # this body.
  53. #
  54. # These are the only two events that can trigger a SEND_BODY state:
  55. assert type(event) in (Request, Response)
  56. # Returns one of:
  57. #
  58. # ("content-length", count)
  59. # ("chunked", ())
  60. # ("http/1.0", ())
  61. #
  62. # which are (lookup key, *args) for constructing body reader/writer
  63. # objects.
  64. #
  65. # Reference: https://tools.ietf.org/html/rfc7230#section-3.3.3
  66. #
  67. # Step 1: some responses always have an empty body, regardless of what the
  68. # headers say.
  69. if type(event) is Response:
  70. if (event.status_code in (204, 304)
  71. or request_method == b"HEAD"
  72. or (request_method == b"CONNECT"
  73. and 200 <= event.status_code < 300)):
  74. return ("content-length", (0,))
  75. # Section 3.3.3 also lists another case -- responses with status_code
  76. # < 200. For us these are InformationalResponses, not Responses, so
  77. # they can't get into this function in the first place.
  78. assert event.status_code >= 200
  79. # Step 2: check for Transfer-Encoding (T-E beats C-L):
  80. transfer_encodings = get_comma_header(event.headers, b"transfer-encoding")
  81. if transfer_encodings:
  82. assert transfer_encodings == [b"chunked"]
  83. return ("chunked", ())
  84. # Step 3: check for Content-Length
  85. content_lengths = get_comma_header(event.headers, b"content-length")
  86. if content_lengths:
  87. return ("content-length", (int(content_lengths[0]),))
  88. # Step 4: no applicable headers; fallback/default depends on type
  89. if type(event) is Request:
  90. return ("content-length", (0,))
  91. else:
  92. return ("http/1.0", ())
  93. ################################################################
  94. #
  95. # The main Connection class
  96. #
  97. ################################################################
  98. class Connection(object):
  99. """An object encapsulating the state of an HTTP connection.
  100. Args:
  101. our_role: If you're implementing a client, pass :data:`h11.CLIENT`. If
  102. you're implementing a server, pass :data:`h11.SERVER`.
  103. max_incomplete_event_size (int):
  104. The maximum number of bytes we're willing to buffer of an
  105. incomplete event. In practice this mostly sets a limit on the
  106. maximum size of the request/response line + headers. If this is
  107. exceeded, then :meth:`next_event` will raise
  108. :exc:`RemoteProtocolError`.
  109. """
  110. def __init__(self,
  111. our_role,
  112. max_incomplete_event_size=DEFAULT_MAX_INCOMPLETE_EVENT_SIZE):
  113. self._max_incomplete_event_size = max_incomplete_event_size
  114. # State and role tracking
  115. if our_role not in (CLIENT, SERVER):
  116. raise ValueError(
  117. "expected CLIENT or SERVER, not {!r}".format(our_role))
  118. self.our_role = our_role
  119. if our_role is CLIENT:
  120. self.their_role = SERVER
  121. else:
  122. self.their_role = CLIENT
  123. self._cstate = ConnectionState()
  124. # Callables for converting data->events or vice-versa given the
  125. # current state
  126. self._writer = self._get_io_object(self.our_role, None, WRITERS)
  127. self._reader = self._get_io_object(self.their_role, None, READERS)
  128. # Holds any unprocessed received data
  129. self._receive_buffer = ReceiveBuffer()
  130. # If this is true, then it indicates that the incoming connection was
  131. # closed *after* the end of whatever's in self._receive_buffer:
  132. self._receive_buffer_closed = False
  133. # Extra bits of state that don't fit into the state machine.
  134. #
  135. # These two are only used to interpret framing headers for figuring
  136. # out how to read/write response bodies. their_http_version is also
  137. # made available as a convenient public API.
  138. self.their_http_version = None
  139. self._request_method = None
  140. # This is pure flow-control and doesn't at all affect the set of legal
  141. # transitions, so no need to bother ConnectionState with it:
  142. self.client_is_waiting_for_100_continue = False
  143. @property
  144. def states(self):
  145. """A dictionary like::
  146. {CLIENT: <client state>, SERVER: <server state>}
  147. See :ref:`state-machine` for details.
  148. """
  149. return dict(self._cstate.states)
  150. @property
  151. def our_state(self):
  152. """The current state of whichever role we are playing. See
  153. :ref:`state-machine` for details.
  154. """
  155. return self._cstate.states[self.our_role]
  156. @property
  157. def their_state(self):
  158. """The current state of whichever role we are NOT playing. See
  159. :ref:`state-machine` for details.
  160. """
  161. return self._cstate.states[self.their_role]
  162. @property
  163. def they_are_waiting_for_100_continue(self):
  164. return (self.their_role is CLIENT
  165. and self.client_is_waiting_for_100_continue)
  166. def start_next_cycle(self):
  167. """Attempt to reset our connection state for a new request/response
  168. cycle.
  169. If both client and server are in :data:`DONE` state, then resets them
  170. both to :data:`IDLE` state in preparation for a new request/response
  171. cycle on this same connection. Otherwise, raises a
  172. :exc:`LocalProtocolError`.
  173. See :ref:`keepalive-and-pipelining`.
  174. """
  175. old_states = dict(self._cstate.states)
  176. self._cstate.start_next_cycle()
  177. self._request_method = None
  178. # self.their_http_version gets left alone, since it presumably lasts
  179. # beyond a single request/response cycle
  180. assert not self.client_is_waiting_for_100_continue
  181. self._respond_to_state_changes(old_states)
  182. def _process_error(self, role):
  183. old_states = dict(self._cstate.states)
  184. self._cstate.process_error(role)
  185. self._respond_to_state_changes(old_states)
  186. def _server_switch_event(self, event):
  187. if type(event) is InformationalResponse and event.status_code == 101:
  188. return _SWITCH_UPGRADE
  189. if type(event) is Response:
  190. if (_SWITCH_CONNECT in self._cstate.pending_switch_proposals
  191. and 200 <= event.status_code < 300):
  192. return _SWITCH_CONNECT
  193. return None
  194. # All events go through here
  195. def _process_event(self, role, event):
  196. # First, pass the event through the state machine to make sure it
  197. # succeeds.
  198. old_states = dict(self._cstate.states)
  199. if role is CLIENT and type(event) is Request:
  200. if event.method == b"CONNECT":
  201. self._cstate.process_client_switch_proposal(_SWITCH_CONNECT)
  202. if get_comma_header(event.headers, b"upgrade"):
  203. self._cstate.process_client_switch_proposal(_SWITCH_UPGRADE)
  204. server_switch_event = None
  205. if role is SERVER:
  206. server_switch_event = self._server_switch_event(event)
  207. self._cstate.process_event(role, type(event), server_switch_event)
  208. # Then perform the updates triggered by it.
  209. # self._request_method
  210. if type(event) is Request:
  211. self._request_method = event.method
  212. # self.their_http_version
  213. if (role is self.their_role
  214. and type(event) in (Request, Response, InformationalResponse)):
  215. self.their_http_version = event.http_version
  216. # Keep alive handling
  217. #
  218. # RFC 7230 doesn't really say what one should do if Connection: close
  219. # shows up on a 1xx InformationalResponse. I think the idea is that
  220. # this is not supposed to happen. In any case, if it does happen, we
  221. # ignore it.
  222. if type(event) in (Request, Response) and not _keep_alive(event):
  223. self._cstate.process_keep_alive_disabled()
  224. # 100-continue
  225. if type(event) is Request and has_expect_100_continue(event):
  226. self.client_is_waiting_for_100_continue = True
  227. if type(event) in (InformationalResponse, Response):
  228. self.client_is_waiting_for_100_continue = False
  229. if role is CLIENT and type(event) in (Data, EndOfMessage):
  230. self.client_is_waiting_for_100_continue = False
  231. self._respond_to_state_changes(old_states, event)
  232. def _get_io_object(self, role, event, io_dict):
  233. # event may be None; it's only used when entering SEND_BODY
  234. state = self._cstate.states[role]
  235. if state is SEND_BODY:
  236. # Special case: the io_dict has a dict of reader/writer factories
  237. # that depend on the request/response framing.
  238. framing_type, args = _body_framing(self._request_method, event)
  239. return io_dict[SEND_BODY][framing_type](*args)
  240. else:
  241. # General case: the io_dict just has the appropriate reader/writer
  242. # for this state
  243. return io_dict.get((role, state))
  244. # This must be called after any action that might have caused
  245. # self._cstate.states to change.
  246. def _respond_to_state_changes(self, old_states, event=None):
  247. # Update reader/writer
  248. if self.our_state != old_states[self.our_role]:
  249. self._writer = self._get_io_object(self.our_role, event, WRITERS)
  250. if self.their_state != old_states[self.their_role]:
  251. self._reader = self._get_io_object(self.their_role, event, READERS)
  252. @property
  253. def trailing_data(self):
  254. """Data that has been received, but not yet processed, represented as
  255. a tuple with two elements, where the first is a byte-string containing
  256. the unprocessed data itself, and the second is a bool that is True if
  257. the receive connection was closed.
  258. See :ref:`switching-protocols` for discussion of why you'd want this.
  259. """
  260. return (bytes(self._receive_buffer), self._receive_buffer_closed)
  261. def receive_data(self, data):
  262. """Add data to our internal recieve buffer.
  263. This does not actually do any processing on the data, just stores
  264. it. To trigger processing, you have to call :meth:`next_event`.
  265. Args:
  266. data (:term:`bytes-like object`):
  267. The new data that was just received.
  268. Special case: If *data* is an empty byte-string like ``b""``,
  269. then this indicates that the remote side has closed the
  270. connection (end of file). Normally this is convenient, because
  271. standard Python APIs like :meth:`file.read` or
  272. :meth:`socket.recv` use ``b""`` to indicate end-of-file, while
  273. other failures to read are indicated using other mechanisms
  274. like raising :exc:`TimeoutError`. When using such an API you
  275. can just blindly pass through whatever you get from ``read``
  276. to :meth:`receive_data`, and everything will work.
  277. But, if you have an API where reading an empty string is a
  278. valid non-EOF condition, then you need to be aware of this and
  279. make sure to check for such strings and avoid passing them to
  280. :meth:`receive_data`.
  281. Returns:
  282. Nothing, but after calling this you should call :meth:`next_event`
  283. to parse the newly received data.
  284. Raises:
  285. RuntimeError:
  286. Raised if you pass an empty *data*, indicating EOF, and then
  287. pass a non-empty *data*, indicating more data that somehow
  288. arrived after the EOF.
  289. (Calling ``receive_data(b"")`` multiple times is fine,
  290. and equivalent to calling it once.)
  291. """
  292. if data:
  293. if self._receive_buffer_closed:
  294. raise RuntimeError(
  295. "received close, then received more data?")
  296. self._receive_buffer += data
  297. else:
  298. self._receive_buffer_closed = True
  299. def _extract_next_receive_event(self):
  300. state = self.their_state
  301. # We don't pause immediately when they enter DONE, because even in
  302. # DONE state we can still process a ConnectionClosed() event. But
  303. # if we have data in our buffer, then we definitely aren't getting
  304. # a ConnectionClosed() immediately and we need to pause.
  305. if state is DONE and self._receive_buffer:
  306. return PAUSED
  307. if state is MIGHT_SWITCH_PROTOCOL or state is SWITCHED_PROTOCOL:
  308. return PAUSED
  309. assert self._reader is not None
  310. event = self._reader(self._receive_buffer)
  311. if event is None:
  312. if not self._receive_buffer and self._receive_buffer_closed:
  313. # In some unusual cases (basically just HTTP/1.0 bodies), EOF
  314. # triggers an actual protocol event; in that case, we want to
  315. # return that event, and then the state will change and we'll
  316. # get called again to generate the actual ConnectionClosed().
  317. if hasattr(self._reader, "read_eof"):
  318. event = self._reader.read_eof()
  319. else:
  320. event = ConnectionClosed()
  321. if event is None:
  322. event = NEED_DATA
  323. return event
  324. def next_event(self):
  325. """Parse the next event out of our receive buffer, update our internal
  326. state, and return it.
  327. This is a mutating operation -- think of it like calling :func:`next`
  328. on an iterator.
  329. Returns:
  330. : One of three things:
  331. 1) An event object -- see :ref:`events`.
  332. 2) The special constant :data:`NEED_DATA`, which indicates that
  333. you need to read more data from your socket and pass it to
  334. :meth:`receive_data` before this method will be able to return
  335. any more events.
  336. 3) The special constant :data:`PAUSED`, which indicates that we
  337. are not in a state where we can process incoming data (usually
  338. because the peer has finished their part of the current
  339. request/response cycle, and you have not yet called
  340. :meth:`start_next_cycle`). See :ref:`flow-control` for details.
  341. Raises:
  342. RemoteProtocolError:
  343. The peer has misbehaved. You should close the connection
  344. (possibly after sending some kind of 4xx response).
  345. Once this method returns :class:`ConnectionClosed` once, then all
  346. subsequent calls will also return :class:`ConnectionClosed`.
  347. If this method raises any exception besides :exc:`RemoteProtocolError`
  348. then that's a bug -- if it happens please file a bug report!
  349. If this method raises any exception then it also sets
  350. :attr:`Connection.their_state` to :data:`ERROR` -- see
  351. :ref:`error-handling` for discussion.
  352. """
  353. if self.their_state is ERROR:
  354. raise RemoteProtocolError(
  355. "Can't receive data when peer state is ERROR")
  356. try:
  357. event = self._extract_next_receive_event()
  358. if event not in [NEED_DATA, PAUSED]:
  359. self._process_event(self.their_role, event)
  360. self._receive_buffer.compress()
  361. if event is NEED_DATA:
  362. if len(self._receive_buffer) > self._max_incomplete_event_size:
  363. # 431 is "Request header fields too large" which is pretty
  364. # much the only situation where we can get here
  365. raise RemoteProtocolError("Receive buffer too long",
  366. error_status_hint=431)
  367. if self._receive_buffer_closed:
  368. # We're still trying to complete some event, but that's
  369. # never going to happen because no more data is coming
  370. raise RemoteProtocolError(
  371. "peer unexpectedly closed connection")
  372. return event
  373. except BaseException as exc:
  374. self._process_error(self.their_role)
  375. if isinstance(exc, LocalProtocolError):
  376. exc._reraise_as_remote_protocol_error()
  377. else:
  378. raise
  379. def send(self, event):
  380. """Convert a high-level event into bytes that can be sent to the peer,
  381. while updating our internal state machine.
  382. Args:
  383. event: The :ref:`event <events>` to send.
  384. Returns:
  385. If ``type(event) is ConnectionClosed``, then returns
  386. ``None``. Otherwise, returns a :term:`bytes-like object`.
  387. Raises:
  388. LocalProtocolError:
  389. Sending this event at this time would violate our
  390. understanding of the HTTP/1.1 protocol.
  391. If this method raises any exception then it also sets
  392. :attr:`Connection.our_state` to :data:`ERROR` -- see
  393. :ref:`error-handling` for discussion.
  394. """
  395. data_list = self.send_with_data_passthrough(event)
  396. if data_list is None:
  397. return None
  398. else:
  399. return b"".join(data_list)
  400. def send_with_data_passthrough(self, event):
  401. """Identical to :meth:`send`, except that in situations where
  402. :meth:`send` returns a single :term:`bytes-like object`, this instead
  403. returns a list of them -- and when sending a :class:`Data` event, this
  404. list is guaranteed to contain the exact object you passed in as
  405. :attr:`Data.data`. See :ref:`sendfile` for discussion.
  406. """
  407. if self.our_state is ERROR:
  408. raise LocalProtocolError(
  409. "Can't send data when our state is ERROR")
  410. try:
  411. if type(event) is Response:
  412. self._clean_up_response_headers_for_sending(event)
  413. # We want to call _process_event before calling the writer,
  414. # because if someone tries to do something invalid then this will
  415. # give a sensible error message, while our writers all just assume
  416. # they will only receive valid events. But, _process_event might
  417. # change self._writer. So we have to do a little dance:
  418. writer = self._writer
  419. self._process_event(self.our_role, event)
  420. if type(event) is ConnectionClosed:
  421. return None
  422. else:
  423. # In any situation where writer is None, process_event should
  424. # have raised ProtocolError
  425. assert writer is not None
  426. data_list = []
  427. writer(event, data_list.append)
  428. return data_list
  429. except:
  430. self._process_error(self.our_role)
  431. raise
  432. def send_failed(self):
  433. """Notify the state machine that we failed to send the data it gave
  434. us.
  435. This causes :attr:`Connection.our_state` to immediately become
  436. :data:`ERROR` -- see :ref:`error-handling` for discussion.
  437. """
  438. self._process_error(self.our_role)
  439. # When sending a Response, we take responsibility for a few things:
  440. #
  441. # - Sometimes you MUST set Connection: close. We take care of those
  442. # times. (You can also set it yourself if you want, and if you do then
  443. # we'll respect that and close the connection at the right time. But you
  444. # don't have to worry about that unless you want to.)
  445. #
  446. # - The user has to set Content-Length if they want it. Otherwise, for
  447. # responses that have bodies (e.g. not HEAD), then we will automatically
  448. # select the right mechanism for streaming a body of unknown length,
  449. # which depends on depending on the peer's HTTP version.
  450. #
  451. # This function's *only* responsibility is making sure headers are set up
  452. # right -- everything downstream just looks at the headers. There are no
  453. # side channels. It mutates the response event in-place (but not the
  454. # response.headers list object).
  455. def _clean_up_response_headers_for_sending(self, response):
  456. assert type(response) is Response
  457. headers = list(response.headers)
  458. need_close = False
  459. # HEAD requests need some special handling: they always act like they
  460. # have Content-Length: 0, and that's how _body_framing treats
  461. # them. But their headers are supposed to match what we would send if
  462. # the request was a GET. (Technically there is one deviation allowed:
  463. # we're allowed to leave out the framing headers -- see
  464. # https://tools.ietf.org/html/rfc7231#section-4.3.2 . But it's just as
  465. # easy to get them right.)
  466. method_for_choosing_headers = self._request_method
  467. if method_for_choosing_headers == b"HEAD":
  468. method_for_choosing_headers = b"GET"
  469. framing_type, _ = _body_framing(method_for_choosing_headers, response)
  470. if framing_type in ("chunked", "http/1.0"):
  471. # This response has a body of unknown length.
  472. # If our peer is HTTP/1.1, we use Transfer-Encoding: chunked
  473. # If our peer is HTTP/1.0, we use no framing headers, and close the
  474. # connection afterwards.
  475. #
  476. # Make sure to clear Content-Length (in principle user could have
  477. # set both and then we ignored Content-Length b/c
  478. # Transfer-Encoding overwrote it -- this would be naughty of them,
  479. # but the HTTP spec says that if our peer does this then we have
  480. # to fix it instead of erroring out, so we'll accord the user the
  481. # same respect).
  482. set_comma_header(headers, b"content-length", [])
  483. if (self.their_http_version is None
  484. or self.their_http_version < b"1.1"):
  485. # Either we never got a valid request and are sending back an
  486. # error (their_http_version is None), so we assume the worst;
  487. # or else we did get a valid HTTP/1.0 request, so we know that
  488. # they don't understand chunked encoding.
  489. set_comma_header(headers, b"transfer-encoding", [])
  490. # This is actually redundant ATM, since currently we
  491. # unconditionally disable keep-alive when talking to HTTP/1.0
  492. # peers. But let's be defensive just in case we add
  493. # Connection: keep-alive support later:
  494. if self._request_method != b"HEAD":
  495. need_close = True
  496. else:
  497. set_comma_header(headers, b"transfer-encoding", ["chunked"])
  498. if not self._cstate.keep_alive or need_close:
  499. # Make sure Connection: close is set
  500. connection = set(get_comma_header(headers, b"connection"))
  501. connection.discard(b"keep-alive")
  502. connection.add(b"close")
  503. set_comma_header(headers, b"connection", sorted(connection))
  504. response.headers = headers