You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

885 line
31 KiB

  1. """
  2. passlib.utils.binary - binary data encoding/decoding/manipulation
  3. """
  4. #=============================================================================
  5. # imports
  6. #=============================================================================
  7. # core
  8. from __future__ import absolute_import, division, print_function
  9. from base64 import (
  10. b64encode,
  11. b64decode,
  12. b32decode as _b32decode,
  13. b32encode as _b32encode,
  14. )
  15. from binascii import b2a_base64, a2b_base64, Error as _BinAsciiError
  16. import logging
  17. log = logging.getLogger(__name__)
  18. # site
  19. # pkg
  20. from passlib import exc
  21. from passlib.utils.compat import (
  22. PY3, bascii_to_str,
  23. irange, imap, iter_byte_chars, join_byte_values, join_byte_elems,
  24. nextgetter, suppress_cause,
  25. u, unicode, unicode_or_bytes_types,
  26. )
  27. from passlib.utils.decor import memoized_property
  28. # from passlib.utils import BASE64_CHARS, HASH64_CHARS
  29. # local
  30. __all__ = [
  31. # constants
  32. "BASE64_CHARS", "PADDED_BASE64_CHARS",
  33. "AB64_CHARS",
  34. "HASH64_CHARS",
  35. "BCRYPT_CHARS",
  36. "HEX_CHARS", "LOWER_HEX_CHARS", "UPPER_HEX_CHARS",
  37. "ALL_BYTE_VALUES",
  38. # misc
  39. "compile_byte_translation",
  40. # base64
  41. 'ab64_encode', 'ab64_decode',
  42. 'b64s_encode', 'b64s_decode',
  43. # base32
  44. "b32encode", "b32decode",
  45. # custom encodings
  46. 'Base64Engine',
  47. 'LazyBase64Engine',
  48. 'h64',
  49. 'h64big',
  50. 'bcrypt64',
  51. ]
  52. #=============================================================================
  53. # constant strings
  54. #=============================================================================
  55. #-------------------------------------------------------------
  56. # common salt_chars & checksum_chars values
  57. #-------------------------------------------------------------
  58. #: standard base64 charmap
  59. BASE64_CHARS = u("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
  60. #: alt base64 charmap -- "." instead of "+"
  61. AB64_CHARS = u("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789./")
  62. #: charmap used by HASH64 encoding.
  63. HASH64_CHARS = u("./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
  64. #: charmap used by BCrypt
  65. BCRYPT_CHARS = u("./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
  66. #: std base64 chars + padding char
  67. PADDED_BASE64_CHARS = BASE64_CHARS + u("=")
  68. #: all hex chars
  69. HEX_CHARS = u("0123456789abcdefABCDEF")
  70. #: upper case hex chars
  71. UPPER_HEX_CHARS = u("0123456789ABCDEF")
  72. #: lower case hex chars
  73. LOWER_HEX_CHARS = u("0123456789abcdef")
  74. #-------------------------------------------------------------
  75. # byte strings
  76. #-------------------------------------------------------------
  77. #: special byte string containing all possible byte values
  78. #: NOTE: for efficiency, this is treated as singleton by some of the code
  79. ALL_BYTE_VALUES = join_byte_values(irange(256))
  80. #: some string constants we reuse
  81. B_EMPTY = b''
  82. B_NULL = b'\x00'
  83. B_EQUAL = b'='
  84. #=============================================================================
  85. # byte translation
  86. #=============================================================================
  87. #: base list used to compile byte translations
  88. _TRANSLATE_SOURCE = list(iter_byte_chars(ALL_BYTE_VALUES))
  89. def compile_byte_translation(mapping, source=None):
  90. """
  91. return a 256-byte string for translating bytes using specified mapping.
  92. bytes not specified by mapping will be left alone.
  93. :param mapping:
  94. dict mapping input byte (str or int) -> output byte (str or int).
  95. :param source:
  96. optional existing byte translation string to use as base.
  97. (must be 255-length byte string). defaults to identity mapping.
  98. :returns:
  99. 255-length byte string for passing to bytes().translate.
  100. """
  101. if source is None:
  102. target = _TRANSLATE_SOURCE[:]
  103. else:
  104. assert isinstance(source, bytes) and len(source) == 255
  105. target = list(iter_byte_chars(source))
  106. for k, v in mapping.items():
  107. if isinstance(k, unicode_or_bytes_types):
  108. k = ord(k)
  109. assert isinstance(k, int) and 0 <= k < 256
  110. if isinstance(v, unicode):
  111. v = v.encode("ascii")
  112. assert isinstance(v, bytes) and len(v) == 1
  113. target[k] = v
  114. return B_EMPTY.join(target)
  115. #=============================================================================
  116. # unpadding / stripped base64 encoding
  117. #=============================================================================
  118. def b64s_encode(data):
  119. """
  120. encode using shortened base64 format which omits padding & whitespace.
  121. uses default ``+/`` altchars.
  122. """
  123. return b2a_base64(data).rstrip(_BASE64_STRIP)
  124. def b64s_decode(data):
  125. """
  126. decode from shortened base64 format which omits padding & whitespace.
  127. uses default ``+/`` altchars.
  128. """
  129. if isinstance(data, unicode):
  130. # needs bytes for replace() call, but want to accept ascii-unicode ala a2b_base64()
  131. try:
  132. data = data.encode("ascii")
  133. except UnicodeEncodeError:
  134. raise suppress_cause(ValueError("string argument should contain only ASCII characters"))
  135. off = len(data) & 3
  136. if off == 0:
  137. pass
  138. elif off == 2:
  139. data += _BASE64_PAD2
  140. elif off == 3:
  141. data += _BASE64_PAD1
  142. else: # off == 1
  143. raise ValueError("invalid base64 input")
  144. try:
  145. return a2b_base64(data)
  146. except _BinAsciiError as err:
  147. raise suppress_cause(TypeError(err))
  148. #=============================================================================
  149. # adapted-base64 encoding
  150. #=============================================================================
  151. _BASE64_STRIP = b"=\n"
  152. _BASE64_PAD1 = b"="
  153. _BASE64_PAD2 = b"=="
  154. # XXX: Passlib 1.8/1.9 -- deprecate everything that's using ab64_encode(),
  155. # have it start outputing b64s_encode() instead? can use a64_decode() to retain backwards compat.
  156. def ab64_encode(data):
  157. """
  158. encode using shortened base64 format which omits padding & whitespace.
  159. uses custom ``./`` altchars.
  160. it is primarily used by Passlib's custom pbkdf2 hashes.
  161. """
  162. return b64s_encode(data).replace(b"+", b".")
  163. def ab64_decode(data):
  164. """
  165. decode from shortened base64 format which omits padding & whitespace.
  166. uses custom ``./`` altchars, but supports decoding normal ``+/`` altchars as well.
  167. it is primarily used by Passlib's custom pbkdf2 hashes.
  168. """
  169. if isinstance(data, unicode):
  170. # needs bytes for replace() call, but want to accept ascii-unicode ala a2b_base64()
  171. try:
  172. data = data.encode("ascii")
  173. except UnicodeEncodeError:
  174. raise suppress_cause(ValueError("string argument should contain only ASCII characters"))
  175. return b64s_decode(data.replace(b".", b"+"))
  176. #=============================================================================
  177. # base32 codec
  178. #=============================================================================
  179. def b32encode(source):
  180. """
  181. wrapper around :func:`base64.b32encode` which strips padding,
  182. and returns a native string.
  183. """
  184. # NOTE: using upper case by default here, since 'I & L' are less
  185. # visually ambiguous than 'i & l'
  186. return bascii_to_str(_b32encode(source).rstrip(B_EQUAL))
  187. #: byte translation map to replace common mistyped base32 chars.
  188. #: XXX: could correct '1' -> 'I', but could be a mistyped lower-case 'l', so leaving it alone.
  189. _b32_translate = compile_byte_translation({"8": "B", "0": "O"})
  190. #: helper to add padding
  191. _b32_decode_pad = B_EQUAL * 8
  192. def b32decode(source):
  193. """
  194. wrapper around :func:`base64.b32decode`
  195. which handles common mistyped chars.
  196. padding optional, ignored if present.
  197. """
  198. # encode & correct for typos
  199. if isinstance(source, unicode):
  200. source = source.encode("ascii")
  201. source = source.translate(_b32_translate)
  202. # pad things so final string is multiple of 8
  203. remainder = len(source) & 0x7
  204. if remainder:
  205. source += _b32_decode_pad[:-remainder]
  206. # XXX: py27 stdlib's version of this has some inefficiencies,
  207. # could look into using optimized version.
  208. return _b32decode(source, True)
  209. #=============================================================================
  210. # base64-variant encoding
  211. #=============================================================================
  212. class Base64Engine(object):
  213. """Provides routines for encoding/decoding base64 data using
  214. arbitrary character mappings, selectable endianness, etc.
  215. :arg charmap:
  216. A string of 64 unique characters,
  217. which will be used to encode successive 6-bit chunks of data.
  218. A character's position within the string should correspond
  219. to its 6-bit value.
  220. :param big:
  221. Whether the encoding should be big-endian (default False).
  222. .. note::
  223. This class does not currently handle base64's padding characters
  224. in any way what so ever.
  225. Raw Bytes <-> Encoded Bytes
  226. ===========================
  227. The following methods convert between raw bytes,
  228. and strings encoded using the engine's specific base64 variant:
  229. .. automethod:: encode_bytes
  230. .. automethod:: decode_bytes
  231. .. automethod:: encode_transposed_bytes
  232. .. automethod:: decode_transposed_bytes
  233. ..
  234. .. automethod:: check_repair_unused
  235. .. automethod:: repair_unused
  236. Integers <-> Encoded Bytes
  237. ==========================
  238. The following methods allow encoding and decoding
  239. unsigned integers to and from the engine's specific base64 variant.
  240. Endianess is determined by the engine's ``big`` constructor keyword.
  241. .. automethod:: encode_int6
  242. .. automethod:: decode_int6
  243. .. automethod:: encode_int12
  244. .. automethod:: decode_int12
  245. .. automethod:: encode_int24
  246. .. automethod:: decode_int24
  247. .. automethod:: encode_int64
  248. .. automethod:: decode_int64
  249. Informational Attributes
  250. ========================
  251. .. attribute:: charmap
  252. unicode string containing list of characters used in encoding;
  253. position in string matches 6bit value of character.
  254. .. attribute:: bytemap
  255. bytes version of :attr:`charmap`
  256. .. attribute:: big
  257. boolean flag indicating this using big-endian encoding.
  258. """
  259. #===================================================================
  260. # instance attrs
  261. #===================================================================
  262. # public config
  263. bytemap = None # charmap as bytes
  264. big = None # little or big endian
  265. # filled in by init based on charmap.
  266. # (byte elem: single byte under py2, 8bit int under py3)
  267. _encode64 = None # maps 6bit value -> byte elem
  268. _decode64 = None # maps byte elem -> 6bit value
  269. # helpers filled in by init based on endianness
  270. _encode_bytes = None # throws IndexError if bad value (shouldn't happen)
  271. _decode_bytes = None # throws KeyError if bad char.
  272. #===================================================================
  273. # init
  274. #===================================================================
  275. def __init__(self, charmap, big=False):
  276. # validate charmap, generate encode64/decode64 helper functions.
  277. if isinstance(charmap, unicode):
  278. charmap = charmap.encode("latin-1")
  279. elif not isinstance(charmap, bytes):
  280. raise exc.ExpectedStringError(charmap, "charmap")
  281. if len(charmap) != 64:
  282. raise ValueError("charmap must be 64 characters in length")
  283. if len(set(charmap)) != 64:
  284. raise ValueError("charmap must not contain duplicate characters")
  285. self.bytemap = charmap
  286. self._encode64 = charmap.__getitem__
  287. lookup = dict((value, idx) for idx, value in enumerate(charmap))
  288. self._decode64 = lookup.__getitem__
  289. # validate big, set appropriate helper functions.
  290. self.big = big
  291. if big:
  292. self._encode_bytes = self._encode_bytes_big
  293. self._decode_bytes = self._decode_bytes_big
  294. else:
  295. self._encode_bytes = self._encode_bytes_little
  296. self._decode_bytes = self._decode_bytes_little
  297. # TODO: support padding character
  298. ##if padding is not None:
  299. ## if isinstance(padding, unicode):
  300. ## padding = padding.encode("latin-1")
  301. ## elif not isinstance(padding, bytes):
  302. ## raise TypeError("padding char must be unicode or bytes")
  303. ## if len(padding) != 1:
  304. ## raise ValueError("padding must be single character")
  305. ##self.padding = padding
  306. @property
  307. def charmap(self):
  308. """charmap as unicode"""
  309. return self.bytemap.decode("latin-1")
  310. #===================================================================
  311. # encoding byte strings
  312. #===================================================================
  313. def encode_bytes(self, source):
  314. """encode bytes to base64 string.
  315. :arg source: byte string to encode.
  316. :returns: byte string containing encoded data.
  317. """
  318. if not isinstance(source, bytes):
  319. raise TypeError("source must be bytes, not %s" % (type(source),))
  320. chunks, tail = divmod(len(source), 3)
  321. if PY3:
  322. next_value = nextgetter(iter(source))
  323. else:
  324. next_value = nextgetter(ord(elem) for elem in source)
  325. gen = self._encode_bytes(next_value, chunks, tail)
  326. out = join_byte_elems(imap(self._encode64, gen))
  327. ##if tail:
  328. ## padding = self.padding
  329. ## if padding:
  330. ## out += padding * (3-tail)
  331. return out
  332. def _encode_bytes_little(self, next_value, chunks, tail):
  333. """helper used by encode_bytes() to handle little-endian encoding"""
  334. #
  335. # output bit layout:
  336. #
  337. # first byte: v1 543210
  338. #
  339. # second byte: v1 ....76
  340. # +v2 3210..
  341. #
  342. # third byte: v2 ..7654
  343. # +v3 10....
  344. #
  345. # fourth byte: v3 765432
  346. #
  347. idx = 0
  348. while idx < chunks:
  349. v1 = next_value()
  350. v2 = next_value()
  351. v3 = next_value()
  352. yield v1 & 0x3f
  353. yield ((v2 & 0x0f)<<2)|(v1>>6)
  354. yield ((v3 & 0x03)<<4)|(v2>>4)
  355. yield v3>>2
  356. idx += 1
  357. if tail:
  358. v1 = next_value()
  359. if tail == 1:
  360. # note: 4 msb of last byte are padding
  361. yield v1 & 0x3f
  362. yield v1>>6
  363. else:
  364. assert tail == 2
  365. # note: 2 msb of last byte are padding
  366. v2 = next_value()
  367. yield v1 & 0x3f
  368. yield ((v2 & 0x0f)<<2)|(v1>>6)
  369. yield v2>>4
  370. def _encode_bytes_big(self, next_value, chunks, tail):
  371. """helper used by encode_bytes() to handle big-endian encoding"""
  372. #
  373. # output bit layout:
  374. #
  375. # first byte: v1 765432
  376. #
  377. # second byte: v1 10....
  378. # +v2 ..7654
  379. #
  380. # third byte: v2 3210..
  381. # +v3 ....76
  382. #
  383. # fourth byte: v3 543210
  384. #
  385. idx = 0
  386. while idx < chunks:
  387. v1 = next_value()
  388. v2 = next_value()
  389. v3 = next_value()
  390. yield v1>>2
  391. yield ((v1&0x03)<<4)|(v2>>4)
  392. yield ((v2&0x0f)<<2)|(v3>>6)
  393. yield v3 & 0x3f
  394. idx += 1
  395. if tail:
  396. v1 = next_value()
  397. if tail == 1:
  398. # note: 4 lsb of last byte are padding
  399. yield v1>>2
  400. yield (v1&0x03)<<4
  401. else:
  402. assert tail == 2
  403. # note: 2 lsb of last byte are padding
  404. v2 = next_value()
  405. yield v1>>2
  406. yield ((v1&0x03)<<4)|(v2>>4)
  407. yield ((v2&0x0f)<<2)
  408. #===================================================================
  409. # decoding byte strings
  410. #===================================================================
  411. def decode_bytes(self, source):
  412. """decode bytes from base64 string.
  413. :arg source: byte string to decode.
  414. :returns: byte string containing decoded data.
  415. """
  416. if not isinstance(source, bytes):
  417. raise TypeError("source must be bytes, not %s" % (type(source),))
  418. ##padding = self.padding
  419. ##if padding:
  420. ## # TODO: add padding size check?
  421. ## source = source.rstrip(padding)
  422. chunks, tail = divmod(len(source), 4)
  423. if tail == 1:
  424. # only 6 bits left, can't encode a whole byte!
  425. raise ValueError("input string length cannot be == 1 mod 4")
  426. next_value = nextgetter(imap(self._decode64, source))
  427. try:
  428. return join_byte_values(self._decode_bytes(next_value, chunks, tail))
  429. except KeyError as err:
  430. raise ValueError("invalid character: %r" % (err.args[0],))
  431. def _decode_bytes_little(self, next_value, chunks, tail):
  432. """helper used by decode_bytes() to handle little-endian encoding"""
  433. #
  434. # input bit layout:
  435. #
  436. # first byte: v1 ..543210
  437. # +v2 10......
  438. #
  439. # second byte: v2 ....5432
  440. # +v3 3210....
  441. #
  442. # third byte: v3 ......54
  443. # +v4 543210..
  444. #
  445. idx = 0
  446. while idx < chunks:
  447. v1 = next_value()
  448. v2 = next_value()
  449. v3 = next_value()
  450. v4 = next_value()
  451. yield v1 | ((v2 & 0x3) << 6)
  452. yield (v2>>2) | ((v3 & 0xF) << 4)
  453. yield (v3>>4) | (v4<<2)
  454. idx += 1
  455. if tail:
  456. # tail is 2 or 3
  457. v1 = next_value()
  458. v2 = next_value()
  459. yield v1 | ((v2 & 0x3) << 6)
  460. # NOTE: if tail == 2, 4 msb of v2 are ignored (should be 0)
  461. if tail == 3:
  462. # NOTE: 2 msb of v3 are ignored (should be 0)
  463. v3 = next_value()
  464. yield (v2>>2) | ((v3 & 0xF) << 4)
  465. def _decode_bytes_big(self, next_value, chunks, tail):
  466. """helper used by decode_bytes() to handle big-endian encoding"""
  467. #
  468. # input bit layout:
  469. #
  470. # first byte: v1 543210..
  471. # +v2 ......54
  472. #
  473. # second byte: v2 3210....
  474. # +v3 ....5432
  475. #
  476. # third byte: v3 10......
  477. # +v4 ..543210
  478. #
  479. idx = 0
  480. while idx < chunks:
  481. v1 = next_value()
  482. v2 = next_value()
  483. v3 = next_value()
  484. v4 = next_value()
  485. yield (v1<<2) | (v2>>4)
  486. yield ((v2&0xF)<<4) | (v3>>2)
  487. yield ((v3&0x3)<<6) | v4
  488. idx += 1
  489. if tail:
  490. # tail is 2 or 3
  491. v1 = next_value()
  492. v2 = next_value()
  493. yield (v1<<2) | (v2>>4)
  494. # NOTE: if tail == 2, 4 lsb of v2 are ignored (should be 0)
  495. if tail == 3:
  496. # NOTE: 2 lsb of v3 are ignored (should be 0)
  497. v3 = next_value()
  498. yield ((v2&0xF)<<4) | (v3>>2)
  499. #===================================================================
  500. # encode/decode helpers
  501. #===================================================================
  502. # padmap2/3 - dict mapping last char of string ->
  503. # equivalent char with no padding bits set.
  504. def __make_padset(self, bits):
  505. """helper to generate set of valid last chars & bytes"""
  506. pset = set(c for i,c in enumerate(self.bytemap) if not i & bits)
  507. pset.update(c for i,c in enumerate(self.charmap) if not i & bits)
  508. return frozenset(pset)
  509. @memoized_property
  510. def _padinfo2(self):
  511. """mask to clear padding bits, and valid last bytes (for strings 2 % 4)"""
  512. # 4 bits of last char unused (lsb for big, msb for little)
  513. bits = 15 if self.big else (15<<2)
  514. return ~bits, self.__make_padset(bits)
  515. @memoized_property
  516. def _padinfo3(self):
  517. """mask to clear padding bits, and valid last bytes (for strings 3 % 4)"""
  518. # 2 bits of last char unused (lsb for big, msb for little)
  519. bits = 3 if self.big else (3<<4)
  520. return ~bits, self.__make_padset(bits)
  521. def check_repair_unused(self, source):
  522. """helper to detect & clear invalid unused bits in last character.
  523. :arg source:
  524. encoded data (as ascii bytes or unicode).
  525. :returns:
  526. `(True, result)` if the string was repaired,
  527. `(False, source)` if the string was ok as-is.
  528. """
  529. # figure out how many padding bits there are in last char.
  530. tail = len(source) & 3
  531. if tail == 2:
  532. mask, padset = self._padinfo2
  533. elif tail == 3:
  534. mask, padset = self._padinfo3
  535. elif not tail:
  536. return False, source
  537. else:
  538. raise ValueError("source length must != 1 mod 4")
  539. # check if last char is ok (padset contains bytes & unicode versions)
  540. last = source[-1]
  541. if last in padset:
  542. return False, source
  543. # we have dirty bits - repair the string by decoding last char,
  544. # clearing the padding bits via <mask>, and encoding new char.
  545. if isinstance(source, unicode):
  546. cm = self.charmap
  547. last = cm[cm.index(last) & mask]
  548. assert last in padset, "failed to generate valid padding char"
  549. else:
  550. # NOTE: this assumes ascii-compat encoding, and that
  551. # all chars used by encoding are 7-bit ascii.
  552. last = self._encode64(self._decode64(last) & mask)
  553. assert last in padset, "failed to generate valid padding char"
  554. if PY3:
  555. last = bytes([last])
  556. return True, source[:-1] + last
  557. def repair_unused(self, source):
  558. return self.check_repair_unused(source)[1]
  559. ##def transcode(self, source, other):
  560. ## return ''.join(
  561. ## other.charmap[self.charmap.index(char)]
  562. ## for char in source
  563. ## )
  564. ##def random_encoded_bytes(self, size, random=None, unicode=False):
  565. ## "return random encoded string of given size"
  566. ## data = getrandstr(random or rng,
  567. ## self.charmap if unicode else self.bytemap, size)
  568. ## return self.repair_unused(data)
  569. #===================================================================
  570. # transposed encoding/decoding
  571. #===================================================================
  572. def encode_transposed_bytes(self, source, offsets):
  573. """encode byte string, first transposing source using offset list"""
  574. if not isinstance(source, bytes):
  575. raise TypeError("source must be bytes, not %s" % (type(source),))
  576. tmp = join_byte_elems(source[off] for off in offsets)
  577. return self.encode_bytes(tmp)
  578. def decode_transposed_bytes(self, source, offsets):
  579. """decode byte string, then reverse transposition described by offset list"""
  580. # NOTE: if transposition does not use all bytes of source,
  581. # the original can't be recovered... and join_byte_elems() will throw
  582. # an error because 1+ values in <buf> will be None.
  583. tmp = self.decode_bytes(source)
  584. buf = [None] * len(offsets)
  585. for off, char in zip(offsets, tmp):
  586. buf[off] = char
  587. return join_byte_elems(buf)
  588. #===================================================================
  589. # integer decoding helpers - mainly used by des_crypt family
  590. #===================================================================
  591. def _decode_int(self, source, bits):
  592. """decode base64 string -> integer
  593. :arg source: base64 string to decode.
  594. :arg bits: number of bits in resulting integer.
  595. :raises ValueError:
  596. * if the string contains invalid base64 characters.
  597. * if the string is not long enough - it must be at least
  598. ``int(ceil(bits/6))`` in length.
  599. :returns:
  600. a integer in the range ``0 <= n < 2**bits``
  601. """
  602. if not isinstance(source, bytes):
  603. raise TypeError("source must be bytes, not %s" % (type(source),))
  604. big = self.big
  605. pad = -bits % 6
  606. chars = (bits+pad)/6
  607. if len(source) != chars:
  608. raise ValueError("source must be %d chars" % (chars,))
  609. decode = self._decode64
  610. out = 0
  611. try:
  612. for c in source if big else reversed(source):
  613. out = (out<<6) + decode(c)
  614. except KeyError:
  615. raise ValueError("invalid character in string: %r" % (c,))
  616. if pad:
  617. # strip padding bits
  618. if big:
  619. out >>= pad
  620. else:
  621. out &= (1<<bits)-1
  622. return out
  623. #---------------------------------------------------------------
  624. # optimized versions for common integer sizes
  625. #---------------------------------------------------------------
  626. def decode_int6(self, source):
  627. """decode single character -> 6 bit integer"""
  628. if not isinstance(source, bytes):
  629. raise TypeError("source must be bytes, not %s" % (type(source),))
  630. if len(source) != 1:
  631. raise ValueError("source must be exactly 1 byte")
  632. if PY3:
  633. # convert to 8bit int before doing lookup
  634. source = source[0]
  635. try:
  636. return self._decode64(source)
  637. except KeyError:
  638. raise ValueError("invalid character")
  639. def decode_int12(self, source):
  640. """decodes 2 char string -> 12-bit integer"""
  641. if not isinstance(source, bytes):
  642. raise TypeError("source must be bytes, not %s" % (type(source),))
  643. if len(source) != 2:
  644. raise ValueError("source must be exactly 2 bytes")
  645. decode = self._decode64
  646. try:
  647. if self.big:
  648. return decode(source[1]) + (decode(source[0])<<6)
  649. else:
  650. return decode(source[0]) + (decode(source[1])<<6)
  651. except KeyError:
  652. raise ValueError("invalid character")
  653. def decode_int24(self, source):
  654. """decodes 4 char string -> 24-bit integer"""
  655. if not isinstance(source, bytes):
  656. raise TypeError("source must be bytes, not %s" % (type(source),))
  657. if len(source) != 4:
  658. raise ValueError("source must be exactly 4 bytes")
  659. decode = self._decode64
  660. try:
  661. if self.big:
  662. return decode(source[3]) + (decode(source[2])<<6)+ \
  663. (decode(source[1])<<12) + (decode(source[0])<<18)
  664. else:
  665. return decode(source[0]) + (decode(source[1])<<6)+ \
  666. (decode(source[2])<<12) + (decode(source[3])<<18)
  667. except KeyError:
  668. raise ValueError("invalid character")
  669. def decode_int30(self, source):
  670. """decode 5 char string -> 30 bit integer"""
  671. return self._decode_int(source, 30)
  672. def decode_int64(self, source):
  673. """decode 11 char base64 string -> 64-bit integer
  674. this format is used primarily by des-crypt & variants to encode
  675. the DES output value used as a checksum.
  676. """
  677. return self._decode_int(source, 64)
  678. #===================================================================
  679. # integer encoding helpers - mainly used by des_crypt family
  680. #===================================================================
  681. def _encode_int(self, value, bits):
  682. """encode integer into base64 format
  683. :arg value: non-negative integer to encode
  684. :arg bits: number of bits to encode
  685. :returns:
  686. a string of length ``int(ceil(bits/6.0))``.
  687. """
  688. assert value >= 0, "caller did not sanitize input"
  689. pad = -bits % 6
  690. bits += pad
  691. if self.big:
  692. itr = irange(bits-6, -6, -6)
  693. # shift to add lsb padding.
  694. value <<= pad
  695. else:
  696. itr = irange(0, bits, 6)
  697. # padding is msb, so no change needed.
  698. return join_byte_elems(imap(self._encode64,
  699. ((value>>off) & 0x3f for off in itr)))
  700. #---------------------------------------------------------------
  701. # optimized versions for common integer sizes
  702. #---------------------------------------------------------------
  703. def encode_int6(self, value):
  704. """encodes 6-bit integer -> single hash64 character"""
  705. if value < 0 or value > 63:
  706. raise ValueError("value out of range")
  707. if PY3:
  708. return self.bytemap[value:value+1]
  709. else:
  710. return self._encode64(value)
  711. def encode_int12(self, value):
  712. """encodes 12-bit integer -> 2 char string"""
  713. if value < 0 or value > 0xFFF:
  714. raise ValueError("value out of range")
  715. raw = [value & 0x3f, (value>>6) & 0x3f]
  716. if self.big:
  717. raw = reversed(raw)
  718. return join_byte_elems(imap(self._encode64, raw))
  719. def encode_int24(self, value):
  720. """encodes 24-bit integer -> 4 char string"""
  721. if value < 0 or value > 0xFFFFFF:
  722. raise ValueError("value out of range")
  723. raw = [value & 0x3f, (value>>6) & 0x3f,
  724. (value>>12) & 0x3f, (value>>18) & 0x3f]
  725. if self.big:
  726. raw = reversed(raw)
  727. return join_byte_elems(imap(self._encode64, raw))
  728. def encode_int30(self, value):
  729. """decode 5 char string -> 30 bit integer"""
  730. if value < 0 or value > 0x3fffffff:
  731. raise ValueError("value out of range")
  732. return self._encode_int(value, 30)
  733. def encode_int64(self, value):
  734. """encode 64-bit integer -> 11 char hash64 string
  735. this format is used primarily by des-crypt & variants to encode
  736. the DES output value used as a checksum.
  737. """
  738. if value < 0 or value > 0xffffffffffffffff:
  739. raise ValueError("value out of range")
  740. return self._encode_int(value, 64)
  741. #===================================================================
  742. # eof
  743. #===================================================================
  744. class LazyBase64Engine(Base64Engine):
  745. """Base64Engine which delays initialization until it's accessed"""
  746. _lazy_opts = None
  747. def __init__(self, *args, **kwds):
  748. self._lazy_opts = (args, kwds)
  749. def _lazy_init(self):
  750. args, kwds = self._lazy_opts
  751. super(LazyBase64Engine, self).__init__(*args, **kwds)
  752. del self._lazy_opts
  753. self.__class__ = Base64Engine
  754. def __getattribute__(self, attr):
  755. if not attr.startswith("_"):
  756. self._lazy_init()
  757. return object.__getattribute__(self, attr)
  758. #-------------------------------------------------------------
  759. # common variants
  760. #-------------------------------------------------------------
  761. h64 = LazyBase64Engine(HASH64_CHARS)
  762. h64big = LazyBase64Engine(HASH64_CHARS, big=True)
  763. bcrypt64 = LazyBase64Engine(BCRYPT_CHARS, big=True)
  764. #=============================================================================
  765. # eof
  766. #=============================================================================