25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

122 lines
4.1 KiB

  1. # We use native strings for all the re patterns, to take advantage of string
  2. # formatting, and then convert to bytestrings when compiling the final re
  3. # objects.
  4. # https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#whitespace
  5. # OWS = *( SP / HTAB )
  6. # ; optional whitespace
  7. OWS = r"[ \t]*"
  8. # https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.token.separators
  9. # token = 1*tchar
  10. #
  11. # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
  12. # / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
  13. # / DIGIT / ALPHA
  14. # ; any VCHAR, except delimiters
  15. token = r"[-!#$%&'*+.^_`|~0-9a-zA-Z]+"
  16. # https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#header.fields
  17. # field-name = token
  18. field_name = token
  19. # The standard says:
  20. #
  21. # field-value = *( field-content / obs-fold )
  22. # field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  23. # field-vchar = VCHAR / obs-text
  24. # obs-fold = CRLF 1*( SP / HTAB )
  25. # ; obsolete line folding
  26. # ; see Section 3.2.4
  27. #
  28. # https://tools.ietf.org/html/rfc5234#appendix-B.1
  29. #
  30. # VCHAR = %x21-7E
  31. # ; visible (printing) characters
  32. #
  33. # https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.quoted-string
  34. # obs-text = %x80-FF
  35. #
  36. # However, the standard definition of field-content is WRONG! It disallows
  37. # fields containing a single visible character surrounded by whitespace,
  38. # e.g. "foo a bar".
  39. #
  40. # See: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
  41. #
  42. # So our definition of field_content attempts to fix it up...
  43. vchar_or_obs_text = r"[\x21-\xff]"
  44. field_vchar = vchar_or_obs_text
  45. field_content = r"{field_vchar}+(?:[ \t]+{field_vchar}+)*".format(**globals())
  46. # We handle obs-fold at a different level, and our fixed-up field_content
  47. # already grows to swallow the whole value, so ? instead of *
  48. field_value = r"({field_content})?".format(**globals())
  49. # header-field = field-name ":" OWS field-value OWS
  50. header_field = (
  51. r"(?P<field_name>{field_name})"
  52. r":"
  53. r"{OWS}"
  54. r"(?P<field_value>{field_value})"
  55. r"{OWS}"
  56. .format(**globals()))
  57. # https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#request.line
  58. #
  59. # request-line = method SP request-target SP HTTP-version CRLF
  60. # method = token
  61. # HTTP-version = HTTP-name "/" DIGIT "." DIGIT
  62. # HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive
  63. #
  64. # request-target is complicated (see RFC 7230 sec 5.3) -- could be path, full
  65. # URL, host+port (for connect), or even "*", but in any case we are guaranteed
  66. # that it contains no spaces (see sec 3.1.1).
  67. method = token
  68. request_target = r"[^ ]+"
  69. http_version = r"HTTP/(?P<http_version>[0-9]\.[0-9])"
  70. request_line = (
  71. r"(?P<method>{method})"
  72. r" "
  73. r"(?P<target>{request_target})"
  74. r" "
  75. r"{http_version}"
  76. .format(**globals()))
  77. # https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#status.line
  78. #
  79. # status-line = HTTP-version SP status-code SP reason-phrase CRLF
  80. # status-code = 3DIGIT
  81. # reason-phrase = *( HTAB / SP / VCHAR / obs-text )
  82. status_code = r"[0-9]{3}"
  83. reason_phrase = r"([ \t]|{vchar_or_obs_text})*".format(**globals())
  84. status_line = (
  85. r"{http_version}"
  86. r" "
  87. r"(?P<status_code>{status_code})"
  88. # However, there are apparently a few too many servers out there that just
  89. # leave out the reason phrase:
  90. # https://github.com/scrapy/scrapy/issues/345#issuecomment-281756036
  91. # https://github.com/seanmonstar/httparse/issues/29
  92. # so make it optional. ?: is a non-capturing group.
  93. r"(?: (?P<reason>{reason_phrase}))?"
  94. .format(**globals()))
  95. HEXDIG = r"[0-9A-Fa-f]"
  96. # Actually
  97. #
  98. # chunk-size = 1*HEXDIG
  99. #
  100. # but we impose an upper-limit to avoid ridiculosity. len(str(2**64)) == 20
  101. chunk_size = r"({HEXDIG}){{1,20}}".format(**globals())
  102. # Actually
  103. #
  104. # chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
  105. #
  106. # but we aren't parsing the things so we don't really care.
  107. chunk_ext = r";.*"
  108. chunk_header = (
  109. r"(?P<chunk_size>{chunk_size})"
  110. r"(?P<chunk_ext>{chunk_ext})?"
  111. r"\r\n"
  112. .format(**globals()))