You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

397 lines
13 KiB

  1. import sys
  2. import math
  3. from collections.abc import Mapping, Sequence, Set
  4. from datetime import datetime
  5. from sentry_sdk.utils import (
  6. AnnotatedValue,
  7. capture_internal_exception,
  8. disable_capture_event,
  9. format_timestamp,
  10. safe_repr,
  11. strip_string,
  12. )
  13. from typing import TYPE_CHECKING
  14. if TYPE_CHECKING:
  15. from types import TracebackType
  16. from typing import Any
  17. from typing import Callable
  18. from typing import ContextManager
  19. from typing import Dict
  20. from typing import List
  21. from typing import Optional
  22. from typing import Type
  23. from typing import Union
  24. from sentry_sdk._types import NotImplementedType
  25. Span = Dict[str, Any]
  26. ReprProcessor = Callable[[Any, Dict[str, Any]], Union[NotImplementedType, str]]
  27. Segment = Union[str, int]
  28. # Bytes are technically not strings in Python 3, but we can serialize them
  29. serializable_str_types = (str, bytes, bytearray, memoryview)
  30. # Maximum length of JSON-serialized event payloads that can be safely sent
  31. # before the server may reject the event due to its size. This is not intended
  32. # to reflect actual values defined server-side, but rather only be an upper
  33. # bound for events sent by the SDK.
  34. #
  35. # Can be overwritten if wanting to send more bytes, e.g. with a custom server.
  36. # When changing this, keep in mind that events may be a little bit larger than
  37. # this value due to attached metadata, so keep the number conservative.
  38. MAX_EVENT_BYTES = 10**6
  39. # Maximum depth and breadth of databags. Excess data will be trimmed. If
  40. # max_request_body_size is "always", request bodies won't be trimmed.
  41. MAX_DATABAG_DEPTH = 5
  42. MAX_DATABAG_BREADTH = 10
  43. CYCLE_MARKER = "<cyclic>"
  44. global_repr_processors = [] # type: List[ReprProcessor]
  45. def add_global_repr_processor(processor):
  46. # type: (ReprProcessor) -> None
  47. global_repr_processors.append(processor)
  48. sequence_types = [Sequence, Set] # type: List[type]
  49. def add_repr_sequence_type(ty):
  50. # type: (type) -> None
  51. sequence_types.append(ty)
  52. class Memo:
  53. __slots__ = ("_ids", "_objs")
  54. def __init__(self):
  55. # type: () -> None
  56. self._ids = {} # type: Dict[int, Any]
  57. self._objs = [] # type: List[Any]
  58. def memoize(self, obj):
  59. # type: (Any) -> ContextManager[bool]
  60. self._objs.append(obj)
  61. return self
  62. def __enter__(self):
  63. # type: () -> bool
  64. obj = self._objs[-1]
  65. if id(obj) in self._ids:
  66. return True
  67. else:
  68. self._ids[id(obj)] = obj
  69. return False
  70. def __exit__(
  71. self,
  72. ty, # type: Optional[Type[BaseException]]
  73. value, # type: Optional[BaseException]
  74. tb, # type: Optional[TracebackType]
  75. ):
  76. # type: (...) -> None
  77. self._ids.pop(id(self._objs.pop()), None)
  78. def serialize(event, **kwargs):
  79. # type: (Dict[str, Any], **Any) -> Dict[str, Any]
  80. """
  81. A very smart serializer that takes a dict and emits a json-friendly dict.
  82. Currently used for serializing the final Event and also prematurely while fetching the stack
  83. local variables for each frame in a stacktrace.
  84. It works internally with 'databags' which are arbitrary data structures like Mapping, Sequence and Set.
  85. The algorithm itself is a recursive graph walk down the data structures it encounters.
  86. It has the following responsibilities:
  87. * Trimming databags and keeping them within MAX_DATABAG_BREADTH and MAX_DATABAG_DEPTH.
  88. * Calling safe_repr() on objects appropriately to keep them informative and readable in the final payload.
  89. * Annotating the payload with the _meta field whenever trimming happens.
  90. :param max_request_body_size: If set to "always", will never trim request bodies.
  91. :param max_value_length: The max length to strip strings to, defaults to sentry_sdk.consts.DEFAULT_MAX_VALUE_LENGTH
  92. :param is_vars: If we're serializing vars early, we want to repr() things that are JSON-serializable to make their type more apparent. For example, it's useful to see the difference between a unicode-string and a bytestring when viewing a stacktrace.
  93. :param custom_repr: A custom repr function that runs before safe_repr on the object to be serialized. If it returns None or throws internally, we will fallback to safe_repr.
  94. """
  95. memo = Memo()
  96. path = [] # type: List[Segment]
  97. meta_stack = [] # type: List[Dict[str, Any]]
  98. keep_request_bodies = (
  99. kwargs.pop("max_request_body_size", None) == "always"
  100. ) # type: bool
  101. max_value_length = kwargs.pop("max_value_length", None) # type: Optional[int]
  102. is_vars = kwargs.pop("is_vars", False)
  103. custom_repr = kwargs.pop("custom_repr", None) # type: Callable[..., Optional[str]]
  104. def _safe_repr_wrapper(value):
  105. # type: (Any) -> str
  106. try:
  107. repr_value = None
  108. if custom_repr is not None:
  109. repr_value = custom_repr(value)
  110. return repr_value or safe_repr(value)
  111. except Exception:
  112. return safe_repr(value)
  113. def _annotate(**meta):
  114. # type: (**Any) -> None
  115. while len(meta_stack) <= len(path):
  116. try:
  117. segment = path[len(meta_stack) - 1]
  118. node = meta_stack[-1].setdefault(str(segment), {})
  119. except IndexError:
  120. node = {}
  121. meta_stack.append(node)
  122. meta_stack[-1].setdefault("", {}).update(meta)
  123. def _is_databag():
  124. # type: () -> Optional[bool]
  125. """
  126. A databag is any value that we need to trim.
  127. True for stuff like vars, request bodies, breadcrumbs and extra.
  128. :returns: `True` for "yes", `False` for :"no", `None` for "maybe soon".
  129. """
  130. try:
  131. if is_vars:
  132. return True
  133. is_request_body = _is_request_body()
  134. if is_request_body in (True, None):
  135. return is_request_body
  136. p0 = path[0]
  137. if p0 == "breadcrumbs" and path[1] == "values":
  138. path[2]
  139. return True
  140. if p0 == "extra":
  141. return True
  142. except IndexError:
  143. return None
  144. return False
  145. def _is_request_body():
  146. # type: () -> Optional[bool]
  147. try:
  148. if path[0] == "request" and path[1] == "data":
  149. return True
  150. except IndexError:
  151. return None
  152. return False
  153. def _serialize_node(
  154. obj, # type: Any
  155. is_databag=None, # type: Optional[bool]
  156. is_request_body=None, # type: Optional[bool]
  157. should_repr_strings=None, # type: Optional[bool]
  158. segment=None, # type: Optional[Segment]
  159. remaining_breadth=None, # type: Optional[Union[int, float]]
  160. remaining_depth=None, # type: Optional[Union[int, float]]
  161. ):
  162. # type: (...) -> Any
  163. if segment is not None:
  164. path.append(segment)
  165. try:
  166. with memo.memoize(obj) as result:
  167. if result:
  168. return CYCLE_MARKER
  169. return _serialize_node_impl(
  170. obj,
  171. is_databag=is_databag,
  172. is_request_body=is_request_body,
  173. should_repr_strings=should_repr_strings,
  174. remaining_depth=remaining_depth,
  175. remaining_breadth=remaining_breadth,
  176. )
  177. except BaseException:
  178. capture_internal_exception(sys.exc_info())
  179. if is_databag:
  180. return "<failed to serialize, use init(debug=True) to see error logs>"
  181. return None
  182. finally:
  183. if segment is not None:
  184. path.pop()
  185. del meta_stack[len(path) + 1 :]
  186. def _flatten_annotated(obj):
  187. # type: (Any) -> Any
  188. if isinstance(obj, AnnotatedValue):
  189. _annotate(**obj.metadata)
  190. obj = obj.value
  191. return obj
  192. def _serialize_node_impl(
  193. obj,
  194. is_databag,
  195. is_request_body,
  196. should_repr_strings,
  197. remaining_depth,
  198. remaining_breadth,
  199. ):
  200. # type: (Any, Optional[bool], Optional[bool], Optional[bool], Optional[Union[float, int]], Optional[Union[float, int]]) -> Any
  201. if isinstance(obj, AnnotatedValue):
  202. should_repr_strings = False
  203. if should_repr_strings is None:
  204. should_repr_strings = is_vars
  205. if is_databag is None:
  206. is_databag = _is_databag()
  207. if is_request_body is None:
  208. is_request_body = _is_request_body()
  209. if is_databag:
  210. if is_request_body and keep_request_bodies:
  211. remaining_depth = float("inf")
  212. remaining_breadth = float("inf")
  213. else:
  214. if remaining_depth is None:
  215. remaining_depth = MAX_DATABAG_DEPTH
  216. if remaining_breadth is None:
  217. remaining_breadth = MAX_DATABAG_BREADTH
  218. obj = _flatten_annotated(obj)
  219. if remaining_depth is not None and remaining_depth <= 0:
  220. _annotate(rem=[["!limit", "x"]])
  221. if is_databag:
  222. return _flatten_annotated(
  223. strip_string(_safe_repr_wrapper(obj), max_length=max_value_length)
  224. )
  225. return None
  226. if is_databag and global_repr_processors:
  227. hints = {"memo": memo, "remaining_depth": remaining_depth}
  228. for processor in global_repr_processors:
  229. result = processor(obj, hints)
  230. if result is not NotImplemented:
  231. return _flatten_annotated(result)
  232. sentry_repr = getattr(type(obj), "__sentry_repr__", None)
  233. if obj is None or isinstance(obj, (bool, int, float)):
  234. if should_repr_strings or (
  235. isinstance(obj, float) and (math.isinf(obj) or math.isnan(obj))
  236. ):
  237. return _safe_repr_wrapper(obj)
  238. else:
  239. return obj
  240. elif callable(sentry_repr):
  241. return sentry_repr(obj)
  242. elif isinstance(obj, datetime):
  243. return (
  244. str(format_timestamp(obj))
  245. if not should_repr_strings
  246. else _safe_repr_wrapper(obj)
  247. )
  248. elif isinstance(obj, Mapping):
  249. # Create temporary copy here to avoid calling too much code that
  250. # might mutate our dictionary while we're still iterating over it.
  251. obj = dict(obj.items())
  252. rv_dict = {} # type: Dict[str, Any]
  253. i = 0
  254. for k, v in obj.items():
  255. if remaining_breadth is not None and i >= remaining_breadth:
  256. _annotate(len=len(obj))
  257. break
  258. str_k = str(k)
  259. v = _serialize_node(
  260. v,
  261. segment=str_k,
  262. should_repr_strings=should_repr_strings,
  263. is_databag=is_databag,
  264. is_request_body=is_request_body,
  265. remaining_depth=(
  266. remaining_depth - 1 if remaining_depth is not None else None
  267. ),
  268. remaining_breadth=remaining_breadth,
  269. )
  270. rv_dict[str_k] = v
  271. i += 1
  272. return rv_dict
  273. elif not isinstance(obj, serializable_str_types) and isinstance(
  274. obj, tuple(sequence_types)
  275. ):
  276. rv_list = []
  277. for i, v in enumerate(obj):
  278. if remaining_breadth is not None and i >= remaining_breadth:
  279. _annotate(len=len(obj))
  280. break
  281. rv_list.append(
  282. _serialize_node(
  283. v,
  284. segment=i,
  285. should_repr_strings=should_repr_strings,
  286. is_databag=is_databag,
  287. is_request_body=is_request_body,
  288. remaining_depth=(
  289. remaining_depth - 1 if remaining_depth is not None else None
  290. ),
  291. remaining_breadth=remaining_breadth,
  292. )
  293. )
  294. return rv_list
  295. if should_repr_strings:
  296. obj = _safe_repr_wrapper(obj)
  297. else:
  298. if isinstance(obj, bytes) or isinstance(obj, bytearray):
  299. obj = obj.decode("utf-8", "replace")
  300. if not isinstance(obj, str):
  301. obj = _safe_repr_wrapper(obj)
  302. is_span_description = (
  303. len(path) == 3 and path[0] == "spans" and path[-1] == "description"
  304. )
  305. if is_span_description:
  306. return obj
  307. return _flatten_annotated(strip_string(obj, max_length=max_value_length))
  308. #
  309. # Start of serialize() function
  310. #
  311. disable_capture_event.set(True)
  312. try:
  313. serialized_event = _serialize_node(event, **kwargs)
  314. if not is_vars and meta_stack and isinstance(serialized_event, dict):
  315. serialized_event["_meta"] = meta_stack[0]
  316. return serialized_event
  317. finally:
  318. disable_capture_event.set(False)