You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

268 lines
7.2 KiB

  1. // Copyright 2016 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. //go:build (!amd64 && !arm64) || appengine || !gc || noasm
  6. // +build !amd64,!arm64 appengine !gc noasm
  7. package s2
  8. import (
  9. "fmt"
  10. "strconv"
  11. )
  12. // decode writes the decoding of src to dst. It assumes that the varint-encoded
  13. // length of the decompressed bytes has already been read, and that len(dst)
  14. // equals that length.
  15. //
  16. // It returns 0 on success or a decodeErrCodeXxx error code on failure.
  17. func s2Decode(dst, src []byte) int {
  18. const debug = false
  19. if debug {
  20. fmt.Println("Starting decode, dst len:", len(dst))
  21. }
  22. var d, s, length int
  23. offset := 0
  24. // As long as we can read at least 5 bytes...
  25. for s < len(src)-5 {
  26. switch src[s] & 0x03 {
  27. case tagLiteral:
  28. x := uint32(src[s] >> 2)
  29. switch {
  30. case x < 60:
  31. s++
  32. case x == 60:
  33. s += 2
  34. x = uint32(src[s-1])
  35. case x == 61:
  36. s += 3
  37. x = uint32(src[s-2]) | uint32(src[s-1])<<8
  38. case x == 62:
  39. s += 4
  40. x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
  41. case x == 63:
  42. s += 5
  43. x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
  44. }
  45. length = int(x) + 1
  46. if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
  47. return decodeErrCodeCorrupt
  48. }
  49. if debug {
  50. fmt.Println("literals, length:", length, "d-after:", d+length)
  51. }
  52. copy(dst[d:], src[s:s+length])
  53. d += length
  54. s += length
  55. continue
  56. case tagCopy1:
  57. s += 2
  58. length = int(src[s-2]) >> 2 & 0x7
  59. toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
  60. if toffset == 0 {
  61. if debug {
  62. fmt.Print("(repeat) ")
  63. }
  64. // keep last offset
  65. switch length {
  66. case 5:
  67. s += 1
  68. length = int(uint32(src[s-1])) + 4
  69. case 6:
  70. s += 2
  71. length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
  72. case 7:
  73. s += 3
  74. length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
  75. default: // 0-> 4
  76. }
  77. } else {
  78. offset = toffset
  79. }
  80. length += 4
  81. case tagCopy2:
  82. s += 3
  83. length = 1 + int(src[s-3])>>2
  84. offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
  85. case tagCopy4:
  86. s += 5
  87. length = 1 + int(src[s-5])>>2
  88. offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
  89. }
  90. if offset <= 0 || d < offset || length > len(dst)-d {
  91. return decodeErrCodeCorrupt
  92. }
  93. if debug {
  94. fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
  95. }
  96. // Copy from an earlier sub-slice of dst to a later sub-slice.
  97. // If no overlap, use the built-in copy:
  98. if offset > length {
  99. copy(dst[d:d+length], dst[d-offset:])
  100. d += length
  101. continue
  102. }
  103. // Unlike the built-in copy function, this byte-by-byte copy always runs
  104. // forwards, even if the slices overlap. Conceptually, this is:
  105. //
  106. // d += forwardCopy(dst[d:d+length], dst[d-offset:])
  107. //
  108. // We align the slices into a and b and show the compiler they are the same size.
  109. // This allows the loop to run without bounds checks.
  110. a := dst[d : d+length]
  111. b := dst[d-offset:]
  112. b = b[:len(a)]
  113. for i := range a {
  114. a[i] = b[i]
  115. }
  116. d += length
  117. }
  118. // Remaining with extra checks...
  119. for s < len(src) {
  120. switch src[s] & 0x03 {
  121. case tagLiteral:
  122. x := uint32(src[s] >> 2)
  123. switch {
  124. case x < 60:
  125. s++
  126. case x == 60:
  127. s += 2
  128. if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
  129. return decodeErrCodeCorrupt
  130. }
  131. x = uint32(src[s-1])
  132. case x == 61:
  133. s += 3
  134. if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
  135. return decodeErrCodeCorrupt
  136. }
  137. x = uint32(src[s-2]) | uint32(src[s-1])<<8
  138. case x == 62:
  139. s += 4
  140. if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
  141. return decodeErrCodeCorrupt
  142. }
  143. x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
  144. case x == 63:
  145. s += 5
  146. if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
  147. return decodeErrCodeCorrupt
  148. }
  149. x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
  150. }
  151. length = int(x) + 1
  152. if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
  153. return decodeErrCodeCorrupt
  154. }
  155. if debug {
  156. fmt.Println("literals, length:", length, "d-after:", d+length)
  157. }
  158. copy(dst[d:], src[s:s+length])
  159. d += length
  160. s += length
  161. continue
  162. case tagCopy1:
  163. s += 2
  164. if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
  165. return decodeErrCodeCorrupt
  166. }
  167. length = int(src[s-2]) >> 2 & 0x7
  168. toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
  169. if toffset == 0 {
  170. if debug {
  171. fmt.Print("(repeat) ")
  172. }
  173. // keep last offset
  174. switch length {
  175. case 5:
  176. s += 1
  177. if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
  178. return decodeErrCodeCorrupt
  179. }
  180. length = int(uint32(src[s-1])) + 4
  181. case 6:
  182. s += 2
  183. if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
  184. return decodeErrCodeCorrupt
  185. }
  186. length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
  187. case 7:
  188. s += 3
  189. if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
  190. return decodeErrCodeCorrupt
  191. }
  192. length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
  193. default: // 0-> 4
  194. }
  195. } else {
  196. offset = toffset
  197. }
  198. length += 4
  199. case tagCopy2:
  200. s += 3
  201. if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
  202. return decodeErrCodeCorrupt
  203. }
  204. length = 1 + int(src[s-3])>>2
  205. offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
  206. case tagCopy4:
  207. s += 5
  208. if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
  209. return decodeErrCodeCorrupt
  210. }
  211. length = 1 + int(src[s-5])>>2
  212. offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
  213. }
  214. if offset <= 0 || d < offset || length > len(dst)-d {
  215. return decodeErrCodeCorrupt
  216. }
  217. if debug {
  218. fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
  219. }
  220. // Copy from an earlier sub-slice of dst to a later sub-slice.
  221. // If no overlap, use the built-in copy:
  222. if offset > length {
  223. copy(dst[d:d+length], dst[d-offset:])
  224. d += length
  225. continue
  226. }
  227. // Unlike the built-in copy function, this byte-by-byte copy always runs
  228. // forwards, even if the slices overlap. Conceptually, this is:
  229. //
  230. // d += forwardCopy(dst[d:d+length], dst[d-offset:])
  231. //
  232. // We align the slices into a and b and show the compiler they are the same size.
  233. // This allows the loop to run without bounds checks.
  234. a := dst[d : d+length]
  235. b := dst[d-offset:]
  236. b = b[:len(a)]
  237. for i := range a {
  238. a[i] = b[i]
  239. }
  240. d += length
  241. }
  242. if d != len(dst) {
  243. return decodeErrCodeCorrupt
  244. }
  245. return 0
  246. }