Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

432 строки
11 KiB

  1. // Copyright 2016 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "math/bits"
  8. )
  9. // hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
  10. // Preferably h should be a constant and should always be <32.
  11. func hash4(u uint64, h uint8) uint32 {
  12. const prime4bytes = 2654435761
  13. return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
  14. }
  15. // hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
  16. // Preferably h should be a constant and should always be <64.
  17. func hash5(u uint64, h uint8) uint32 {
  18. const prime5bytes = 889523592379
  19. return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
  20. }
  21. // hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
  22. // Preferably h should be a constant and should always be <64.
  23. func hash7(u uint64, h uint8) uint32 {
  24. const prime7bytes = 58295818150454627
  25. return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
  26. }
  27. // hash8 returns the hash of u to fit in a hash table with h bits.
  28. // Preferably h should be a constant and should always be <64.
  29. func hash8(u uint64, h uint8) uint32 {
  30. const prime8bytes = 0xcf1bbcdcb7a56463
  31. return uint32((u * prime8bytes) >> ((64 - h) & 63))
  32. }
  33. // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
  34. // assumes that the varint-encoded length of the decompressed bytes has already
  35. // been written.
  36. //
  37. // It also assumes that:
  38. // len(dst) >= MaxEncodedLen(len(src)) &&
  39. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  40. func encodeBlockBetterGo(dst, src []byte) (d int) {
  41. // sLimit is when to stop looking for offset/length copies. The inputMargin
  42. // lets us use a fast path for emitLiteral in the main loop, while we are
  43. // looking for copies.
  44. sLimit := len(src) - inputMargin
  45. if len(src) < minNonLiteralBlockSize {
  46. return 0
  47. }
  48. // Initialize the hash tables.
  49. const (
  50. // Long hash matches.
  51. lTableBits = 16
  52. maxLTableSize = 1 << lTableBits
  53. // Short hash matches.
  54. sTableBits = 14
  55. maxSTableSize = 1 << sTableBits
  56. )
  57. var lTable [maxLTableSize]uint32
  58. var sTable [maxSTableSize]uint32
  59. // Bail if we can't compress to at least this.
  60. dstLimit := len(src) - len(src)>>5 - 6
  61. // nextEmit is where in src the next emitLiteral should start from.
  62. nextEmit := 0
  63. // The encoded form must start with a literal, as there are no previous
  64. // bytes to copy, so we start looking for hash matches at s == 1.
  65. s := 1
  66. cv := load64(src, s)
  67. // We initialize repeat to 0, so we never match on first attempt
  68. repeat := 0
  69. for {
  70. candidateL := 0
  71. nextS := 0
  72. for {
  73. // Next src position to check
  74. nextS = s + (s-nextEmit)>>7 + 1
  75. if nextS > sLimit {
  76. goto emitRemainder
  77. }
  78. hashL := hash7(cv, lTableBits)
  79. hashS := hash4(cv, sTableBits)
  80. candidateL = int(lTable[hashL])
  81. candidateS := int(sTable[hashS])
  82. lTable[hashL] = uint32(s)
  83. sTable[hashS] = uint32(s)
  84. // Check repeat at offset checkRep.
  85. const checkRep = 1
  86. if false && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  87. base := s + checkRep
  88. // Extend back
  89. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  90. i--
  91. base--
  92. }
  93. d += emitLiteral(dst[d:], src[nextEmit:base])
  94. // Extend forward
  95. candidate := s - repeat + 4 + checkRep
  96. s += 4 + checkRep
  97. for s < len(src) {
  98. if len(src)-s < 8 {
  99. if src[s] == src[candidate] {
  100. s++
  101. candidate++
  102. continue
  103. }
  104. break
  105. }
  106. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  107. s += bits.TrailingZeros64(diff) >> 3
  108. break
  109. }
  110. s += 8
  111. candidate += 8
  112. }
  113. if nextEmit > 0 {
  114. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  115. d += emitRepeat(dst[d:], repeat, s-base)
  116. } else {
  117. // First match, cannot be repeat.
  118. d += emitCopy(dst[d:], repeat, s-base)
  119. }
  120. nextEmit = s
  121. if s >= sLimit {
  122. goto emitRemainder
  123. }
  124. cv = load64(src, s)
  125. continue
  126. }
  127. if uint32(cv) == load32(src, candidateL) {
  128. break
  129. }
  130. // Check our short candidate
  131. if uint32(cv) == load32(src, candidateS) {
  132. // Try a long candidate at s+1
  133. hashL = hash7(cv>>8, lTableBits)
  134. candidateL = int(lTable[hashL])
  135. lTable[hashL] = uint32(s + 1)
  136. if uint32(cv>>8) == load32(src, candidateL) {
  137. s++
  138. break
  139. }
  140. // Use our short candidate.
  141. candidateL = candidateS
  142. break
  143. }
  144. cv = load64(src, nextS)
  145. s = nextS
  146. }
  147. // Extend backwards
  148. for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
  149. candidateL--
  150. s--
  151. }
  152. // Bail if we exceed the maximum size.
  153. if d+(s-nextEmit) > dstLimit {
  154. return 0
  155. }
  156. base := s
  157. offset := base - candidateL
  158. // Extend the 4-byte match as long as possible.
  159. s += 4
  160. candidateL += 4
  161. for s < len(src) {
  162. if len(src)-s < 8 {
  163. if src[s] == src[candidateL] {
  164. s++
  165. candidateL++
  166. continue
  167. }
  168. break
  169. }
  170. if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
  171. s += bits.TrailingZeros64(diff) >> 3
  172. break
  173. }
  174. s += 8
  175. candidateL += 8
  176. }
  177. if offset > 65535 && s-base <= 5 && repeat != offset {
  178. // Bail if the match is equal or worse to the encoding.
  179. s = nextS + 1
  180. if s >= sLimit {
  181. goto emitRemainder
  182. }
  183. cv = load64(src, s)
  184. continue
  185. }
  186. d += emitLiteral(dst[d:], src[nextEmit:base])
  187. if repeat == offset {
  188. d += emitRepeat(dst[d:], offset, s-base)
  189. } else {
  190. d += emitCopy(dst[d:], offset, s-base)
  191. repeat = offset
  192. }
  193. nextEmit = s
  194. if s >= sLimit {
  195. goto emitRemainder
  196. }
  197. if d > dstLimit {
  198. // Do we have space for more, if not bail.
  199. return 0
  200. }
  201. // Index match start+1 (long) and start+2 (short)
  202. index0 := base + 1
  203. // Index match end-2 (long) and end-1 (short)
  204. index1 := s - 2
  205. cv0 := load64(src, index0)
  206. cv1 := load64(src, index1)
  207. cv = load64(src, s)
  208. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  209. lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
  210. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  211. lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
  212. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  213. sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
  214. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  215. }
  216. emitRemainder:
  217. if nextEmit < len(src) {
  218. // Bail if we exceed the maximum size.
  219. if d+len(src)-nextEmit > dstLimit {
  220. return 0
  221. }
  222. d += emitLiteral(dst[d:], src[nextEmit:])
  223. }
  224. return d
  225. }
  226. // encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It
  227. // assumes that the varint-encoded length of the decompressed bytes has already
  228. // been written.
  229. //
  230. // It also assumes that:
  231. // len(dst) >= MaxEncodedLen(len(src)) &&
  232. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  233. func encodeBlockBetterSnappyGo(dst, src []byte) (d int) {
  234. // sLimit is when to stop looking for offset/length copies. The inputMargin
  235. // lets us use a fast path for emitLiteral in the main loop, while we are
  236. // looking for copies.
  237. sLimit := len(src) - inputMargin
  238. if len(src) < minNonLiteralBlockSize {
  239. return 0
  240. }
  241. // Initialize the hash tables.
  242. const (
  243. // Long hash matches.
  244. lTableBits = 16
  245. maxLTableSize = 1 << lTableBits
  246. // Short hash matches.
  247. sTableBits = 14
  248. maxSTableSize = 1 << sTableBits
  249. )
  250. var lTable [maxLTableSize]uint32
  251. var sTable [maxSTableSize]uint32
  252. // Bail if we can't compress to at least this.
  253. dstLimit := len(src) - len(src)>>5 - 6
  254. // nextEmit is where in src the next emitLiteral should start from.
  255. nextEmit := 0
  256. // The encoded form must start with a literal, as there are no previous
  257. // bytes to copy, so we start looking for hash matches at s == 1.
  258. s := 1
  259. cv := load64(src, s)
  260. // We initialize repeat to 0, so we never match on first attempt
  261. repeat := 0
  262. const maxSkip = 100
  263. for {
  264. candidateL := 0
  265. nextS := 0
  266. for {
  267. // Next src position to check
  268. nextS = (s-nextEmit)>>7 + 1
  269. if nextS > maxSkip {
  270. nextS = s + maxSkip
  271. } else {
  272. nextS += s
  273. }
  274. if nextS > sLimit {
  275. goto emitRemainder
  276. }
  277. hashL := hash7(cv, lTableBits)
  278. hashS := hash4(cv, sTableBits)
  279. candidateL = int(lTable[hashL])
  280. candidateS := int(sTable[hashS])
  281. lTable[hashL] = uint32(s)
  282. sTable[hashS] = uint32(s)
  283. if uint32(cv) == load32(src, candidateL) {
  284. break
  285. }
  286. // Check our short candidate
  287. if uint32(cv) == load32(src, candidateS) {
  288. // Try a long candidate at s+1
  289. hashL = hash7(cv>>8, lTableBits)
  290. candidateL = int(lTable[hashL])
  291. lTable[hashL] = uint32(s + 1)
  292. if uint32(cv>>8) == load32(src, candidateL) {
  293. s++
  294. break
  295. }
  296. // Use our short candidate.
  297. candidateL = candidateS
  298. break
  299. }
  300. cv = load64(src, nextS)
  301. s = nextS
  302. }
  303. // Extend backwards
  304. for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
  305. candidateL--
  306. s--
  307. }
  308. // Bail if we exceed the maximum size.
  309. if d+(s-nextEmit) > dstLimit {
  310. return 0
  311. }
  312. base := s
  313. offset := base - candidateL
  314. // Extend the 4-byte match as long as possible.
  315. s += 4
  316. candidateL += 4
  317. for s < len(src) {
  318. if len(src)-s < 8 {
  319. if src[s] == src[candidateL] {
  320. s++
  321. candidateL++
  322. continue
  323. }
  324. break
  325. }
  326. if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
  327. s += bits.TrailingZeros64(diff) >> 3
  328. break
  329. }
  330. s += 8
  331. candidateL += 8
  332. }
  333. if offset > 65535 && s-base <= 5 && repeat != offset {
  334. // Bail if the match is equal or worse to the encoding.
  335. s = nextS + 1
  336. if s >= sLimit {
  337. goto emitRemainder
  338. }
  339. cv = load64(src, s)
  340. continue
  341. }
  342. d += emitLiteral(dst[d:], src[nextEmit:base])
  343. d += emitCopyNoRepeat(dst[d:], offset, s-base)
  344. repeat = offset
  345. nextEmit = s
  346. if s >= sLimit {
  347. goto emitRemainder
  348. }
  349. if d > dstLimit {
  350. // Do we have space for more, if not bail.
  351. return 0
  352. }
  353. // Index match start+1 (long) and start+2 (short)
  354. index0 := base + 1
  355. // Index match end-2 (long) and end-1 (short)
  356. index1 := s - 2
  357. cv0 := load64(src, index0)
  358. cv1 := load64(src, index1)
  359. cv = load64(src, s)
  360. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  361. lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
  362. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  363. lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
  364. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  365. sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
  366. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  367. }
  368. emitRemainder:
  369. if nextEmit < len(src) {
  370. // Bail if we exceed the maximum size.
  371. if d+len(src)-nextEmit > dstLimit {
  372. return 0
  373. }
  374. d += emitLiteral(dst[d:], src[nextEmit:])
  375. }
  376. return d
  377. }