You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

249 line
7.5 KiB

  1. """
  2. Functions to parse datetime objects.
  3. We're using regular expressions rather than time.strptime because:
  4. - They provide both validation and parsing.
  5. - They're more flexible for datetimes.
  6. - The date/datetime/time constructors produce friendlier error messages.
  7. Stolen from https://raw.githubusercontent.com/django/django/master/django/utils/dateparse.py at
  8. 9718fa2e8abe430c3526a9278dd976443d4ae3c6
  9. Changed to:
  10. * use standard python datetime types not django.utils.timezone
  11. * raise ValueError when regex doesn't match rather than returning None
  12. * support parsing unix timestamps for dates and datetimes
  13. """
  14. import re
  15. from datetime import date, datetime, time, timedelta, timezone
  16. from typing import Dict, Optional, Type, Union
  17. from . import errors
  18. date_expr = r'(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})'
  19. time_expr = (
  20. r'(?P<hour>\d{1,2}):(?P<minute>\d{1,2})'
  21. r'(?::(?P<second>\d{1,2})(?:\.(?P<microsecond>\d{1,6})\d{0,6})?)?'
  22. r'(?P<tzinfo>Z|[+-]\d{2}(?::?\d{2})?)?$'
  23. )
  24. date_re = re.compile(f'{date_expr}$')
  25. time_re = re.compile(time_expr)
  26. datetime_re = re.compile(f'{date_expr}[T ]{time_expr}')
  27. standard_duration_re = re.compile(
  28. r'^'
  29. r'(?:(?P<days>-?\d+) (days?, )?)?'
  30. r'((?:(?P<hours>-?\d+):)(?=\d+:\d+))?'
  31. r'(?:(?P<minutes>-?\d+):)?'
  32. r'(?P<seconds>-?\d+)'
  33. r'(?:\.(?P<microseconds>\d{1,6})\d{0,6})?'
  34. r'$'
  35. )
  36. # Support the sections of ISO 8601 date representation that are accepted by timedelta
  37. iso8601_duration_re = re.compile(
  38. r'^(?P<sign>[-+]?)'
  39. r'P'
  40. r'(?:(?P<days>\d+(.\d+)?)D)?'
  41. r'(?:T'
  42. r'(?:(?P<hours>\d+(.\d+)?)H)?'
  43. r'(?:(?P<minutes>\d+(.\d+)?)M)?'
  44. r'(?:(?P<seconds>\d+(.\d+)?)S)?'
  45. r')?'
  46. r'$'
  47. )
  48. EPOCH = datetime(1970, 1, 1)
  49. # if greater than this, the number is in ms, if less than or equal it's in seconds
  50. # (in seconds this is 11th October 2603, in ms it's 20th August 1970)
  51. MS_WATERSHED = int(2e10)
  52. # slightly more than datetime.max in ns - (datetime.max - EPOCH).total_seconds() * 1e9
  53. MAX_NUMBER = int(3e20)
  54. StrBytesIntFloat = Union[str, bytes, int, float]
  55. def get_numeric(value: StrBytesIntFloat, native_expected_type: str) -> Union[None, int, float]:
  56. if isinstance(value, (int, float)):
  57. return value
  58. try:
  59. return float(value)
  60. except ValueError:
  61. return None
  62. except TypeError:
  63. raise TypeError(f'invalid type; expected {native_expected_type}, string, bytes, int or float')
  64. def from_unix_seconds(seconds: Union[int, float]) -> datetime:
  65. if seconds > MAX_NUMBER:
  66. return datetime.max
  67. elif seconds < -MAX_NUMBER:
  68. return datetime.min
  69. while abs(seconds) > MS_WATERSHED:
  70. seconds /= 1000
  71. dt = EPOCH + timedelta(seconds=seconds)
  72. return dt.replace(tzinfo=timezone.utc)
  73. def _parse_timezone(value: Optional[str], error: Type[Exception]) -> Union[None, int, timezone]:
  74. if value == 'Z':
  75. return timezone.utc
  76. elif value is not None:
  77. offset_mins = int(value[-2:]) if len(value) > 3 else 0
  78. offset = 60 * int(value[1:3]) + offset_mins
  79. if value[0] == '-':
  80. offset = -offset
  81. try:
  82. return timezone(timedelta(minutes=offset))
  83. except ValueError:
  84. raise error()
  85. else:
  86. return None
  87. def parse_date(value: Union[date, StrBytesIntFloat]) -> date:
  88. """
  89. Parse a date/int/float/string and return a datetime.date.
  90. Raise ValueError if the input is well formatted but not a valid date.
  91. Raise ValueError if the input isn't well formatted.
  92. """
  93. if isinstance(value, date):
  94. if isinstance(value, datetime):
  95. return value.date()
  96. else:
  97. return value
  98. number = get_numeric(value, 'date')
  99. if number is not None:
  100. return from_unix_seconds(number).date()
  101. if isinstance(value, bytes):
  102. value = value.decode()
  103. match = date_re.match(value) # type: ignore
  104. if match is None:
  105. raise errors.DateError()
  106. kw = {k: int(v) for k, v in match.groupdict().items()}
  107. try:
  108. return date(**kw)
  109. except ValueError:
  110. raise errors.DateError()
  111. def parse_time(value: Union[time, StrBytesIntFloat]) -> time:
  112. """
  113. Parse a time/string and return a datetime.time.
  114. Raise ValueError if the input is well formatted but not a valid time.
  115. Raise ValueError if the input isn't well formatted, in particular if it contains an offset.
  116. """
  117. if isinstance(value, time):
  118. return value
  119. number = get_numeric(value, 'time')
  120. if number is not None:
  121. if number >= 86400:
  122. # doesn't make sense since the time time loop back around to 0
  123. raise errors.TimeError()
  124. return (datetime.min + timedelta(seconds=number)).time()
  125. if isinstance(value, bytes):
  126. value = value.decode()
  127. match = time_re.match(value) # type: ignore
  128. if match is None:
  129. raise errors.TimeError()
  130. kw = match.groupdict()
  131. if kw['microsecond']:
  132. kw['microsecond'] = kw['microsecond'].ljust(6, '0')
  133. tzinfo = _parse_timezone(kw.pop('tzinfo'), errors.TimeError)
  134. kw_: Dict[str, Union[None, int, timezone]] = {k: int(v) for k, v in kw.items() if v is not None}
  135. kw_['tzinfo'] = tzinfo
  136. try:
  137. return time(**kw_) # type: ignore
  138. except ValueError:
  139. raise errors.TimeError()
  140. def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:
  141. """
  142. Parse a datetime/int/float/string and return a datetime.datetime.
  143. This function supports time zone offsets. When the input contains one,
  144. the output uses a timezone with a fixed offset from UTC.
  145. Raise ValueError if the input is well formatted but not a valid datetime.
  146. Raise ValueError if the input isn't well formatted.
  147. """
  148. if isinstance(value, datetime):
  149. return value
  150. number = get_numeric(value, 'datetime')
  151. if number is not None:
  152. return from_unix_seconds(number)
  153. if isinstance(value, bytes):
  154. value = value.decode()
  155. match = datetime_re.match(value) # type: ignore
  156. if match is None:
  157. raise errors.DateTimeError()
  158. kw = match.groupdict()
  159. if kw['microsecond']:
  160. kw['microsecond'] = kw['microsecond'].ljust(6, '0')
  161. tzinfo = _parse_timezone(kw.pop('tzinfo'), errors.DateTimeError)
  162. kw_: Dict[str, Union[None, int, timezone]] = {k: int(v) for k, v in kw.items() if v is not None}
  163. kw_['tzinfo'] = tzinfo
  164. try:
  165. return datetime(**kw_) # type: ignore
  166. except ValueError:
  167. raise errors.DateTimeError()
  168. def parse_duration(value: StrBytesIntFloat) -> timedelta:
  169. """
  170. Parse a duration int/float/string and return a datetime.timedelta.
  171. The preferred format for durations in Django is '%d %H:%M:%S.%f'.
  172. Also supports ISO 8601 representation.
  173. """
  174. if isinstance(value, timedelta):
  175. return value
  176. if isinstance(value, (int, float)):
  177. # below code requires a string
  178. value = str(value)
  179. elif isinstance(value, bytes):
  180. value = value.decode()
  181. try:
  182. match = standard_duration_re.match(value) or iso8601_duration_re.match(value)
  183. except TypeError:
  184. raise TypeError('invalid type; expected timedelta, string, bytes, int or float')
  185. if not match:
  186. raise errors.DurationError()
  187. kw = match.groupdict()
  188. sign = -1 if kw.pop('sign', '+') == '-' else 1
  189. if kw.get('microseconds'):
  190. kw['microseconds'] = kw['microseconds'].ljust(6, '0')
  191. if kw.get('seconds') and kw.get('microseconds') and kw['seconds'].startswith('-'):
  192. kw['microseconds'] = '-' + kw['microseconds']
  193. kw_ = {k: float(v) for k, v in kw.items() if v is not None}
  194. return sign * timedelta(**kw_)