|
- #cython: language_level=3
-
- from __future__ import print_function
- from typing import Optional
-
- from cpython.mem cimport PyMem_Malloc, PyMem_Free
- from cpython cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, \
- Py_buffer, PyBytes_AsString
-
- from .python cimport PyMemoryView_Check, PyMemoryView_GET_BUFFER
-
-
- from .errors import (HttpParserError,
- HttpParserCallbackError,
- HttpParserInvalidStatusError,
- HttpParserInvalidMethodError,
- HttpParserInvalidURLError,
- HttpParserUpgrade)
-
- cimport cython
- from . cimport cparser
-
-
- __all__ = ('HttpRequestParser', 'HttpResponseParser')
-
-
- @cython.internal
- cdef class HttpParser:
-
- cdef:
- cparser.llhttp_t* _cparser
- cparser.llhttp_settings_t* _csettings
-
- bytes _current_header_name
- bytes _current_header_value
-
- _proto_on_url, _proto_on_status, _proto_on_body, \
- _proto_on_header, _proto_on_headers_complete, \
- _proto_on_message_complete, _proto_on_chunk_header, \
- _proto_on_chunk_complete, _proto_on_message_begin
-
- object _last_error
-
- Py_buffer py_buf
-
- def __cinit__(self):
- self._cparser = <cparser.llhttp_t*> \
- PyMem_Malloc(sizeof(cparser.llhttp_t))
- if self._cparser is NULL:
- raise MemoryError()
-
- self._csettings = <cparser.llhttp_settings_t*> \
- PyMem_Malloc(sizeof(cparser.llhttp_settings_t))
- if self._csettings is NULL:
- raise MemoryError()
-
- def __dealloc__(self):
- PyMem_Free(self._cparser)
- PyMem_Free(self._csettings)
-
- cdef _init(self, protocol, cparser.llhttp_type_t mode):
- cparser.llhttp_settings_init(self._csettings)
-
- cparser.llhttp_init(self._cparser, mode, self._csettings)
- self._cparser.data = <void*>self
-
- self._current_header_name = None
- self._current_header_value = None
-
- self._proto_on_header = getattr(protocol, 'on_header', None)
- if self._proto_on_header is not None:
- self._csettings.on_header_field = cb_on_header_field
- self._csettings.on_header_value = cb_on_header_value
- self._proto_on_headers_complete = getattr(
- protocol, 'on_headers_complete', None)
- self._csettings.on_headers_complete = cb_on_headers_complete
-
- self._proto_on_body = getattr(protocol, 'on_body', None)
- if self._proto_on_body is not None:
- self._csettings.on_body = cb_on_body
-
- self._proto_on_message_begin = getattr(
- protocol, 'on_message_begin', None)
- if self._proto_on_message_begin is not None:
- self._csettings.on_message_begin = cb_on_message_begin
-
- self._proto_on_message_complete = getattr(
- protocol, 'on_message_complete', None)
- if self._proto_on_message_complete is not None:
- self._csettings.on_message_complete = cb_on_message_complete
-
- self._proto_on_chunk_header = getattr(
- protocol, 'on_chunk_header', None)
- self._csettings.on_chunk_header = cb_on_chunk_header
-
- self._proto_on_chunk_complete = getattr(
- protocol, 'on_chunk_complete', None)
- self._csettings.on_chunk_complete = cb_on_chunk_complete
-
- self._last_error = None
-
- cdef _maybe_call_on_header(self):
- if self._current_header_value is not None:
- current_header_name = self._current_header_name
- current_header_value = self._current_header_value
-
- self._current_header_name = self._current_header_value = None
-
- if self._proto_on_header is not None:
- self._proto_on_header(current_header_name,
- current_header_value)
-
- cdef _on_header_field(self, bytes field):
- self._maybe_call_on_header()
- if self._current_header_name is None:
- self._current_header_name = field
- else:
- self._current_header_name += field
-
- cdef _on_header_value(self, bytes val):
- if self._current_header_value is None:
- self._current_header_value = val
- else:
- # This is unlikely, as mostly HTTP headers are one-line
- self._current_header_value += val
-
- cdef _on_headers_complete(self):
- self._maybe_call_on_header()
-
- if self._proto_on_headers_complete is not None:
- self._proto_on_headers_complete()
-
- cdef _on_chunk_header(self):
- if (self._current_header_value is not None or
- self._current_header_name is not None):
- raise HttpParserError('invalid headers state')
-
- if self._proto_on_chunk_header is not None:
- self._proto_on_chunk_header()
-
- cdef _on_chunk_complete(self):
- self._maybe_call_on_header()
-
- if self._proto_on_chunk_complete is not None:
- self._proto_on_chunk_complete()
-
- ### Public API ###
-
- def set_dangerous_leniencies(
- self,
- lenient_headers: Optional[bool] = None,
- lenient_chunked_length: Optional[bool] = None,
- lenient_keep_alive: Optional[bool] = None,
- lenient_transfer_encoding: Optional[bool] = None,
- lenient_version: Optional[bool] = None,
- lenient_data_after_close: Optional[bool] = None,
- lenient_optional_lf_after_cr: Optional[bool] = None,
- lenient_optional_cr_before_lf: Optional[bool] = None,
- lenient_optional_crlf_after_chunk: Optional[bool] = None,
- lenient_spaces_after_chunk_size: Optional[bool] = None,
- ):
- cdef cparser.llhttp_t* parser = self._cparser
- if lenient_headers is not None:
- cparser.llhttp_set_lenient_headers(
- parser, lenient_headers)
- if lenient_chunked_length is not None:
- cparser.llhttp_set_lenient_chunked_length(
- parser, lenient_chunked_length)
- if lenient_keep_alive is not None:
- cparser.llhttp_set_lenient_keep_alive(
- parser, lenient_keep_alive)
- if lenient_transfer_encoding is not None:
- cparser.llhttp_set_lenient_transfer_encoding(
- parser, lenient_transfer_encoding)
- if lenient_version is not None:
- cparser.llhttp_set_lenient_version(
- parser, lenient_version)
- if lenient_data_after_close is not None:
- cparser.llhttp_set_lenient_data_after_close(
- parser, lenient_data_after_close)
- if lenient_optional_lf_after_cr is not None:
- cparser.llhttp_set_lenient_optional_lf_after_cr(
- parser, lenient_optional_lf_after_cr)
- if lenient_optional_cr_before_lf is not None:
- cparser.llhttp_set_lenient_optional_cr_before_lf(
- parser, lenient_optional_cr_before_lf)
- if lenient_optional_crlf_after_chunk is not None:
- cparser.llhttp_set_lenient_optional_crlf_after_chunk(
- parser, lenient_optional_crlf_after_chunk)
- if lenient_spaces_after_chunk_size is not None:
- cparser.llhttp_set_lenient_spaces_after_chunk_size(
- parser, lenient_spaces_after_chunk_size)
-
- def get_http_version(self):
- cdef cparser.llhttp_t* parser = self._cparser
- return '{}.{}'.format(parser.http_major, parser.http_minor)
-
- def should_keep_alive(self):
- return bool(cparser.llhttp_should_keep_alive(self._cparser))
-
- def should_upgrade(self):
- cdef cparser.llhttp_t* parser = self._cparser
- return bool(parser.upgrade)
-
- def feed_data(self, data):
- cdef:
- size_t data_len
- cparser.llhttp_errno_t err
- Py_buffer *buf
- bint owning_buf = False
- const char* err_pos
-
- if PyMemoryView_Check(data):
- buf = PyMemoryView_GET_BUFFER(data)
- data_len = <size_t>buf.len
- err = cparser.llhttp_execute(
- self._cparser,
- <char*>buf.buf,
- data_len)
-
- else:
- buf = &self.py_buf
- PyObject_GetBuffer(data, buf, PyBUF_SIMPLE)
- owning_buf = True
- data_len = <size_t>buf.len
-
- err = cparser.llhttp_execute(
- self._cparser,
- <char*>buf.buf,
- data_len)
-
- try:
- if self._cparser.upgrade == 1 and err == cparser.HPE_PAUSED_UPGRADE:
- err_pos = cparser.llhttp_get_error_pos(self._cparser)
-
- # Immediately free the parser from "error" state, simulating
- # http-parser behavior here because 1) we never had the API to
- # allow users manually "resume after upgrade", and 2) the use
- # case for resuming parsing is very rare.
- cparser.llhttp_resume_after_upgrade(self._cparser)
-
- # The err_pos here is specific for the input buf. So if we ever
- # switch to the llhttp behavior (re-raise HttpParserUpgrade for
- # successive calls to feed_data() until resume_after_upgrade is
- # called), we have to store the result and keep our own state.
- raise HttpParserUpgrade(err_pos - <char*>buf.buf)
- finally:
- if owning_buf:
- PyBuffer_Release(buf)
-
- if err != cparser.HPE_OK:
- ex = parser_error_from_errno(
- self._cparser,
- <cparser.llhttp_errno_t> self._cparser.error)
- if isinstance(ex, HttpParserCallbackError):
- if self._last_error is not None:
- ex.__context__ = self._last_error
- self._last_error = None
- raise ex
-
-
- cdef class HttpRequestParser(HttpParser):
-
- def __init__(self, protocol):
- self._init(protocol, cparser.HTTP_REQUEST)
-
- self._proto_on_url = getattr(protocol, 'on_url', None)
- if self._proto_on_url is not None:
- self._csettings.on_url = cb_on_url
-
- def get_method(self):
- cdef cparser.llhttp_t* parser = self._cparser
- return cparser.llhttp_method_name(<cparser.llhttp_method_t> parser.method)
-
-
- cdef class HttpResponseParser(HttpParser):
-
- def __init__(self, protocol):
- self._init(protocol, cparser.HTTP_RESPONSE)
-
- self._proto_on_status = getattr(protocol, 'on_status', None)
- if self._proto_on_status is not None:
- self._csettings.on_status = cb_on_status
-
- def get_status_code(self):
- cdef cparser.llhttp_t* parser = self._cparser
- return parser.status_code
-
-
- cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1:
- cdef HttpParser pyparser = <HttpParser>parser.data
- try:
- pyparser._proto_on_message_begin()
- except BaseException as ex:
- pyparser._last_error = ex
- return -1
- else:
- return 0
-
-
- cdef int cb_on_url(cparser.llhttp_t* parser,
- const char *at, size_t length) except -1:
- cdef HttpParser pyparser = <HttpParser>parser.data
- try:
- pyparser._proto_on_url(at[:length])
- except BaseException as ex:
- cparser.llhttp_set_error_reason(parser, "`on_url` callback error")
- pyparser._last_error = ex
- return cparser.HPE_USER
- else:
- return 0
-
-
- cdef int cb_on_status(cparser.llhttp_t* parser,
- const char *at, size_t length) except -1:
- cdef HttpParser pyparser = <HttpParser>parser.data
- try:
- pyparser._proto_on_status(at[:length])
- except BaseException as ex:
- cparser.llhttp_set_error_reason(parser, "`on_status` callback error")
- pyparser._last_error = ex
- return cparser.HPE_USER
- else:
- return 0
-
-
- cdef int cb_on_header_field(cparser.llhttp_t* parser,
- const char *at, size_t length) except -1:
- cdef HttpParser pyparser = <HttpParser>parser.data
- try:
- pyparser._on_header_field(at[:length])
- except BaseException as ex:
- cparser.llhttp_set_error_reason(parser, "`on_header_field` callback error")
- pyparser._last_error = ex
- return cparser.HPE_USER
- else:
- return 0
-
-
- cdef int cb_on_header_value(cparser.llhttp_t* parser,
- const char *at, size_t length) except -1:
- cdef HttpParser pyparser = <HttpParser>parser.data
- try:
- pyparser._on_header_value(at[:length])
- except BaseException as ex:
- cparser.llhttp_set_error_reason(parser, "`on_header_value` callback error")
- pyparser._last_error = ex
- return cparser.HPE_USER
- else:
- return 0
-
-
- cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1:
- cdef HttpParser pyparser = <HttpParser>parser.data
- try:
- pyparser._on_headers_complete()
- except BaseException as ex:
- pyparser._last_error = ex
- return -1
- else:
- if pyparser._cparser.upgrade:
- return 1
- else:
- return 0
-
-
- cdef int cb_on_body(cparser.llhttp_t* parser,
- const char *at, size_t length) except -1:
- cdef HttpParser pyparser = <HttpParser>parser.data
- try:
- pyparser._proto_on_body(at[:length])
- except BaseException as ex:
- cparser.llhttp_set_error_reason(parser, "`on_body` callback error")
- pyparser._last_error = ex
- return cparser.HPE_USER
- else:
- return 0
-
-
- cdef int cb_on_message_complete(cparser.llhttp_t* parser) except -1:
- cdef HttpParser pyparser = <HttpParser>parser.data
- try:
- pyparser._proto_on_message_complete()
- except BaseException as ex:
- pyparser._last_error = ex
- return -1
- else:
- return 0
-
-
- cdef int cb_on_chunk_header(cparser.llhttp_t* parser) except -1:
- cdef HttpParser pyparser = <HttpParser>parser.data
- try:
- pyparser._on_chunk_header()
- except BaseException as ex:
- pyparser._last_error = ex
- return -1
- else:
- return 0
-
-
- cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1:
- cdef HttpParser pyparser = <HttpParser>parser.data
- try:
- pyparser._on_chunk_complete()
- except BaseException as ex:
- pyparser._last_error = ex
- return -1
- else:
- return 0
-
-
- cdef parser_error_from_errno(cparser.llhttp_t* parser, cparser.llhttp_errno_t errno):
- cdef bytes reason = cparser.llhttp_get_error_reason(parser)
-
- if errno in (cparser.HPE_CB_MESSAGE_BEGIN,
- cparser.HPE_CB_HEADERS_COMPLETE,
- cparser.HPE_CB_MESSAGE_COMPLETE,
- cparser.HPE_CB_CHUNK_HEADER,
- cparser.HPE_CB_CHUNK_COMPLETE,
- cparser.HPE_USER):
- cls = HttpParserCallbackError
-
- elif errno == cparser.HPE_INVALID_STATUS:
- cls = HttpParserInvalidStatusError
-
- elif errno == cparser.HPE_INVALID_METHOD:
- cls = HttpParserInvalidMethodError
-
- elif errno == cparser.HPE_INVALID_URL:
- cls = HttpParserInvalidURLError
-
- else:
- cls = HttpParserError
-
- return cls(reason.decode('latin-1'))
|