Non puoi selezionare più di 25 argomenti Gli argomenti devono iniziare con una lettera o un numero, possono includere trattini ('-') e possono essere lunghi fino a 35 caratteri.
 
 
 
 

207 righe
5.2 KiB

  1. /* C implementation of performance sensitive functions. */
  2. #define PY_SSIZE_T_CLEAN
  3. #include <Python.h>
  4. #include <stdint.h> /* uint32_t, uint64_t */
  5. #if __SSE2__
  6. #include <emmintrin.h>
  7. #endif
  8. static const Py_ssize_t MASK_LEN = 4;
  9. /* Similar to PyBytes_AsStringAndSize, but accepts more types */
  10. static int
  11. _PyBytesLike_AsStringAndSize(PyObject *obj, char **buffer, Py_ssize_t *length)
  12. {
  13. // This supports bytes, bytearrays, and C-contiguous memoryview objects,
  14. // which are the most useful data structures for handling byte streams.
  15. // websockets.framing.prepare_data() returns only values of these types.
  16. // Any object implementing the buffer protocol could be supported, however
  17. // that would require allocation or copying memory, which is expensive.
  18. if (PyBytes_Check(obj))
  19. {
  20. *buffer = PyBytes_AS_STRING(obj);
  21. *length = PyBytes_GET_SIZE(obj);
  22. }
  23. else if (PyByteArray_Check(obj))
  24. {
  25. *buffer = PyByteArray_AS_STRING(obj);
  26. *length = PyByteArray_GET_SIZE(obj);
  27. }
  28. else if (PyMemoryView_Check(obj))
  29. {
  30. Py_buffer *mv_buf;
  31. mv_buf = PyMemoryView_GET_BUFFER(obj);
  32. if (PyBuffer_IsContiguous(mv_buf, 'C'))
  33. {
  34. *buffer = mv_buf->buf;
  35. *length = mv_buf->len;
  36. }
  37. else
  38. {
  39. PyErr_Format(
  40. PyExc_TypeError,
  41. "expected a contiguous memoryview");
  42. return -1;
  43. }
  44. }
  45. else
  46. {
  47. PyErr_Format(
  48. PyExc_TypeError,
  49. "expected a bytes-like object, %.200s found",
  50. Py_TYPE(obj)->tp_name);
  51. return -1;
  52. }
  53. return 0;
  54. }
  55. /* C implementation of websockets.utils.apply_mask */
  56. static PyObject *
  57. apply_mask(PyObject *self, PyObject *args, PyObject *kwds)
  58. {
  59. // In order to support various bytes-like types, accept any Python object.
  60. static char *kwlist[] = {"data", "mask", NULL};
  61. PyObject *input_obj;
  62. PyObject *mask_obj;
  63. // A pointer to a char * + length will be extracted from the data and mask
  64. // arguments, possibly via a Py_buffer.
  65. char *input;
  66. Py_ssize_t input_len;
  67. char *mask;
  68. Py_ssize_t mask_len;
  69. // Initialize a PyBytesObject then get a pointer to the underlying char *
  70. // in order to avoid an extra memory copy in PyBytes_FromStringAndSize.
  71. PyObject *result;
  72. char *output;
  73. // Other variables.
  74. Py_ssize_t i = 0;
  75. // Parse inputs.
  76. if (!PyArg_ParseTupleAndKeywords(
  77. args, kwds, "OO", kwlist, &input_obj, &mask_obj))
  78. {
  79. return NULL;
  80. }
  81. if (_PyBytesLike_AsStringAndSize(input_obj, &input, &input_len) == -1)
  82. {
  83. return NULL;
  84. }
  85. if (_PyBytesLike_AsStringAndSize(mask_obj, &mask, &mask_len) == -1)
  86. {
  87. return NULL;
  88. }
  89. if (mask_len != MASK_LEN)
  90. {
  91. PyErr_SetString(PyExc_ValueError, "mask must contain 4 bytes");
  92. return NULL;
  93. }
  94. // Create output.
  95. result = PyBytes_FromStringAndSize(NULL, input_len);
  96. if (result == NULL)
  97. {
  98. return NULL;
  99. }
  100. // Since we juste created result, we don't need error checks.
  101. output = PyBytes_AS_STRING(result);
  102. // Perform the masking operation.
  103. // Apparently GCC cannot figure out the following optimizations by itself.
  104. // We need a new scope for MSVC 2010 (non C99 friendly)
  105. {
  106. #if __SSE2__
  107. // With SSE2 support, XOR by blocks of 16 bytes = 128 bits.
  108. // Since we cannot control the 16-bytes alignment of input and output
  109. // buffers, we rely on loadu/storeu rather than load/store.
  110. Py_ssize_t input_len_128 = input_len & ~15;
  111. __m128i mask_128 = _mm_set1_epi32(*(uint32_t *)mask);
  112. for (; i < input_len_128; i += 16)
  113. {
  114. __m128i in_128 = _mm_loadu_si128((__m128i *)(input + i));
  115. __m128i out_128 = _mm_xor_si128(in_128, mask_128);
  116. _mm_storeu_si128((__m128i *)(output + i), out_128);
  117. }
  118. #else
  119. // Without SSE2 support, XOR by blocks of 8 bytes = 64 bits.
  120. // We assume the memory allocator aligns everything on 8 bytes boundaries.
  121. Py_ssize_t input_len_64 = input_len & ~7;
  122. uint32_t mask_32 = *(uint32_t *)mask;
  123. uint64_t mask_64 = ((uint64_t)mask_32 << 32) | (uint64_t)mask_32;
  124. for (; i < input_len_64; i += 8)
  125. {
  126. *(uint64_t *)(output + i) = *(uint64_t *)(input + i) ^ mask_64;
  127. }
  128. #endif
  129. }
  130. // XOR the remainder of the input byte by byte.
  131. for (; i < input_len; i++)
  132. {
  133. output[i] = input[i] ^ mask[i & (MASK_LEN - 1)];
  134. }
  135. return result;
  136. }
  137. static PyMethodDef speedups_methods[] = {
  138. {
  139. "apply_mask",
  140. (PyCFunction)apply_mask,
  141. METH_VARARGS | METH_KEYWORDS,
  142. "Apply masking to websocket message.",
  143. },
  144. {NULL, NULL, 0, NULL}, /* Sentinel */
  145. };
  146. static struct PyModuleDef speedups_module = {
  147. PyModuleDef_HEAD_INIT,
  148. "websocket.speedups", /* m_name */
  149. "C implementation of performance sensitive functions.",
  150. /* m_doc */
  151. -1, /* m_size */
  152. speedups_methods, /* m_methods */
  153. NULL,
  154. NULL,
  155. NULL,
  156. NULL
  157. };
  158. PyMODINIT_FUNC
  159. PyInit_speedups(void)
  160. {
  161. return PyModule_Create(&speedups_module);
  162. }