No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 
 

113 líneas
3.8 KiB

  1. import sys
  2. __all__ = ["ReceiveBuffer"]
  3. # Operations we want to support:
  4. # - find next \r\n or \r\n\r\n, or wait until there is one
  5. # - read at-most-N bytes
  6. # Goals:
  7. # - on average, do this fast
  8. # - worst case, do this in O(n) where n is the number of bytes processed
  9. # Plan:
  10. # - store bytearray, offset, how far we've searched for a separator token
  11. # - use the how-far-we've-searched data to avoid rescanning
  12. # - while doing a stream of uninterrupted processing, advance offset instead
  13. # of constantly copying
  14. # WARNING:
  15. # - I haven't benchmarked or profiled any of this yet.
  16. #
  17. # Note that starting in Python 3.4, deleting the initial n bytes from a
  18. # bytearray is amortized O(n), thanks to some excellent work by Antoine
  19. # Martin:
  20. #
  21. # https://bugs.python.org/issue19087
  22. #
  23. # This means that if we only supported 3.4+, we could get rid of the code here
  24. # involving self._start and self.compress, because it's doing exactly the same
  25. # thing that bytearray now does internally.
  26. #
  27. # BUT unfortunately, we still support 2.7, and reading short segments out of a
  28. # long buffer MUST be O(bytes read) to avoid DoS issues, so we can't actually
  29. # delete this code. Yet:
  30. #
  31. # https://pythonclock.org/
  32. #
  33. # (Two things to double-check first though: make sure PyPy also has the
  34. # optimization, and benchmark to make sure it's a win, since we do have a
  35. # slightly clever thing where we delay calling compress() until we've
  36. # processed a whole event, which could in theory be slightly more efficient
  37. # than the internal bytearray support.)
  38. class ReceiveBuffer(object):
  39. def __init__(self):
  40. self._data = bytearray()
  41. # These are both absolute offsets into self._data:
  42. self._start = 0
  43. self._looked_at = 0
  44. self._looked_for = b""
  45. def __bool__(self):
  46. return bool(len(self))
  47. # for @property unprocessed_data
  48. def __bytes__(self):
  49. return bytes(self._data[self._start:])
  50. if sys.version_info[0] < 3: # version specific: Python 2
  51. __str__ = __bytes__
  52. __nonzero__ = __bool__
  53. def __len__(self):
  54. return len(self._data) - self._start
  55. def compress(self):
  56. # Heuristic: only compress if it lets us reduce size by a factor
  57. # of 2
  58. if self._start > len(self._data) // 2:
  59. del self._data[:self._start]
  60. self._looked_at -= self._start
  61. self._start -= self._start
  62. def __iadd__(self, byteslike):
  63. self._data += byteslike
  64. return self
  65. def maybe_extract_at_most(self, count):
  66. out = self._data[self._start:self._start + count]
  67. if not out:
  68. return None
  69. self._start += len(out)
  70. return out
  71. def maybe_extract_until_next(self, needle):
  72. # Returns extracted bytes on success (advancing offset), or None on
  73. # failure
  74. if self._looked_for == needle:
  75. search_start = max(self._start, self._looked_at - len(needle) + 1)
  76. else:
  77. search_start = self._start
  78. offset = self._data.find(needle, search_start)
  79. if offset == -1:
  80. self._looked_at = len(self._data)
  81. self._looked_for = needle
  82. return None
  83. new_start = offset + len(needle)
  84. out = self._data[self._start:new_start]
  85. self._start = new_start
  86. return out
  87. # HTTP/1.1 has a number of constructs where you keep reading lines until
  88. # you see a blank one. This does that, and then returns the lines.
  89. def maybe_extract_lines(self):
  90. if self._data[self._start:self._start + 2] == b"\r\n":
  91. self._start += 2
  92. return []
  93. else:
  94. data = self.maybe_extract_until_next(b"\r\n\r\n")
  95. if data is None:
  96. return None
  97. lines = data.split(b"\r\n")
  98. assert lines[-2] == lines[-1] == b""
  99. del lines[-2:]
  100. return lines