No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

631 líneas
16 KiB

  1. // Copyright 2016 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "fmt"
  8. "math/bits"
  9. )
  10. // encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It
  11. // assumes that the varint-encoded length of the decompressed bytes has already
  12. // been written.
  13. //
  14. // It also assumes that:
  15. // len(dst) >= MaxEncodedLen(len(src)) &&
  16. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  17. func encodeBlockBest(dst, src []byte) (d int) {
  18. // Initialize the hash tables.
  19. const (
  20. // Long hash matches.
  21. lTableBits = 19
  22. maxLTableSize = 1 << lTableBits
  23. // Short hash matches.
  24. sTableBits = 16
  25. maxSTableSize = 1 << sTableBits
  26. inputMargin = 8 + 2
  27. )
  28. // sLimit is when to stop looking for offset/length copies. The inputMargin
  29. // lets us use a fast path for emitLiteral in the main loop, while we are
  30. // looking for copies.
  31. sLimit := len(src) - inputMargin
  32. if len(src) < minNonLiteralBlockSize {
  33. return 0
  34. }
  35. var lTable [maxLTableSize]uint64
  36. var sTable [maxSTableSize]uint64
  37. // Bail if we can't compress to at least this.
  38. dstLimit := len(src) - 5
  39. // nextEmit is where in src the next emitLiteral should start from.
  40. nextEmit := 0
  41. // The encoded form must start with a literal, as there are no previous
  42. // bytes to copy, so we start looking for hash matches at s == 1.
  43. s := 1
  44. cv := load64(src, s)
  45. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  46. repeat := 1
  47. const lowbitMask = 0xffffffff
  48. getCur := func(x uint64) int {
  49. return int(x & lowbitMask)
  50. }
  51. getPrev := func(x uint64) int {
  52. return int(x >> 32)
  53. }
  54. const maxSkip = 64
  55. for {
  56. type match struct {
  57. offset int
  58. s int
  59. length int
  60. score int
  61. rep bool
  62. }
  63. var best match
  64. for {
  65. // Next src position to check
  66. nextS := (s-nextEmit)>>8 + 1
  67. if nextS > maxSkip {
  68. nextS = s + maxSkip
  69. } else {
  70. nextS += s
  71. }
  72. if nextS > sLimit {
  73. goto emitRemainder
  74. }
  75. hashL := hash8(cv, lTableBits)
  76. hashS := hash4(cv, sTableBits)
  77. candidateL := lTable[hashL]
  78. candidateS := sTable[hashS]
  79. score := func(m match) int {
  80. // Matches that are longer forward are penalized since we must emit it as a literal.
  81. score := m.length - m.s
  82. if nextEmit == m.s {
  83. // If we do not have to emit literals, we save 1 byte
  84. score++
  85. }
  86. offset := m.s - m.offset
  87. if m.rep {
  88. return score - emitRepeatSize(offset, m.length)
  89. }
  90. return score - emitCopySize(offset, m.length)
  91. }
  92. matchAt := func(offset, s int, first uint32, rep bool) match {
  93. if best.length != 0 && best.s-best.offset == s-offset {
  94. // Don't retest if we have the same offset.
  95. return match{offset: offset, s: s}
  96. }
  97. if load32(src, offset) != first {
  98. return match{offset: offset, s: s}
  99. }
  100. m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
  101. s += 4
  102. for s <= sLimit {
  103. if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
  104. m.length += bits.TrailingZeros64(diff) >> 3
  105. break
  106. }
  107. s += 8
  108. m.length += 8
  109. }
  110. m.length -= offset
  111. m.score = score(m)
  112. if m.score <= -m.s {
  113. // Eliminate if no savings, we might find a better one.
  114. m.length = 0
  115. }
  116. return m
  117. }
  118. bestOf := func(a, b match) match {
  119. if b.length == 0 {
  120. return a
  121. }
  122. if a.length == 0 {
  123. return b
  124. }
  125. as := a.score + b.s
  126. bs := b.score + a.s
  127. if as >= bs {
  128. return a
  129. }
  130. return b
  131. }
  132. best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
  133. best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
  134. best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
  135. {
  136. best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
  137. if best.length > 0 {
  138. // s+1
  139. nextShort := sTable[hash4(cv>>8, sTableBits)]
  140. s := s + 1
  141. cv := load64(src, s)
  142. nextLong := lTable[hash8(cv, lTableBits)]
  143. best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
  144. best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
  145. best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
  146. best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
  147. // Repeat at + 2
  148. best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
  149. // s+2
  150. if true {
  151. nextShort = sTable[hash4(cv>>8, sTableBits)]
  152. s++
  153. cv = load64(src, s)
  154. nextLong = lTable[hash8(cv, lTableBits)]
  155. best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
  156. best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
  157. best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
  158. best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
  159. }
  160. // Search for a match at best match end, see if that is better.
  161. if sAt := best.s + best.length; sAt < sLimit {
  162. sBack := best.s
  163. backL := best.length
  164. // Load initial values
  165. cv = load64(src, sBack)
  166. // Search for mismatch
  167. next := lTable[hash8(load64(src, sAt), lTableBits)]
  168. //next := sTable[hash4(load64(src, sAt), sTableBits)]
  169. if checkAt := getCur(next) - backL; checkAt > 0 {
  170. best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
  171. }
  172. if checkAt := getPrev(next) - backL; checkAt > 0 {
  173. best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
  174. }
  175. }
  176. }
  177. }
  178. // Update table
  179. lTable[hashL] = uint64(s) | candidateL<<32
  180. sTable[hashS] = uint64(s) | candidateS<<32
  181. if best.length > 0 {
  182. break
  183. }
  184. cv = load64(src, nextS)
  185. s = nextS
  186. }
  187. // Extend backwards, not needed for repeats...
  188. s = best.s
  189. if !best.rep {
  190. for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
  191. best.offset--
  192. best.length++
  193. s--
  194. }
  195. }
  196. if false && best.offset >= s {
  197. panic(fmt.Errorf("t %d >= s %d", best.offset, s))
  198. }
  199. // Bail if we exceed the maximum size.
  200. if d+(s-nextEmit) > dstLimit {
  201. return 0
  202. }
  203. base := s
  204. offset := s - best.offset
  205. s += best.length
  206. if offset > 65535 && s-base <= 5 && !best.rep {
  207. // Bail if the match is equal or worse to the encoding.
  208. s = best.s + 1
  209. if s >= sLimit {
  210. goto emitRemainder
  211. }
  212. cv = load64(src, s)
  213. continue
  214. }
  215. d += emitLiteral(dst[d:], src[nextEmit:base])
  216. if best.rep {
  217. if nextEmit > 0 {
  218. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  219. d += emitRepeat(dst[d:], offset, best.length)
  220. } else {
  221. // First match, cannot be repeat.
  222. d += emitCopy(dst[d:], offset, best.length)
  223. }
  224. } else {
  225. d += emitCopy(dst[d:], offset, best.length)
  226. }
  227. repeat = offset
  228. nextEmit = s
  229. if s >= sLimit {
  230. goto emitRemainder
  231. }
  232. if d > dstLimit {
  233. // Do we have space for more, if not bail.
  234. return 0
  235. }
  236. // Fill tables...
  237. for i := best.s + 1; i < s; i++ {
  238. cv0 := load64(src, i)
  239. long0 := hash8(cv0, lTableBits)
  240. short0 := hash4(cv0, sTableBits)
  241. lTable[long0] = uint64(i) | lTable[long0]<<32
  242. sTable[short0] = uint64(i) | sTable[short0]<<32
  243. }
  244. cv = load64(src, s)
  245. }
  246. emitRemainder:
  247. if nextEmit < len(src) {
  248. // Bail if we exceed the maximum size.
  249. if d+len(src)-nextEmit > dstLimit {
  250. return 0
  251. }
  252. d += emitLiteral(dst[d:], src[nextEmit:])
  253. }
  254. return d
  255. }
  256. // encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
  257. // assumes that the varint-encoded length of the decompressed bytes has already
  258. // been written.
  259. //
  260. // It also assumes that:
  261. // len(dst) >= MaxEncodedLen(len(src)) &&
  262. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  263. func encodeBlockBestSnappy(dst, src []byte) (d int) {
  264. // Initialize the hash tables.
  265. const (
  266. // Long hash matches.
  267. lTableBits = 19
  268. maxLTableSize = 1 << lTableBits
  269. // Short hash matches.
  270. sTableBits = 16
  271. maxSTableSize = 1 << sTableBits
  272. inputMargin = 8 + 2
  273. )
  274. // sLimit is when to stop looking for offset/length copies. The inputMargin
  275. // lets us use a fast path for emitLiteral in the main loop, while we are
  276. // looking for copies.
  277. sLimit := len(src) - inputMargin
  278. if len(src) < minNonLiteralBlockSize {
  279. return 0
  280. }
  281. var lTable [maxLTableSize]uint64
  282. var sTable [maxSTableSize]uint64
  283. // Bail if we can't compress to at least this.
  284. dstLimit := len(src) - 5
  285. // nextEmit is where in src the next emitLiteral should start from.
  286. nextEmit := 0
  287. // The encoded form must start with a literal, as there are no previous
  288. // bytes to copy, so we start looking for hash matches at s == 1.
  289. s := 1
  290. cv := load64(src, s)
  291. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  292. repeat := 1
  293. const lowbitMask = 0xffffffff
  294. getCur := func(x uint64) int {
  295. return int(x & lowbitMask)
  296. }
  297. getPrev := func(x uint64) int {
  298. return int(x >> 32)
  299. }
  300. const maxSkip = 64
  301. for {
  302. type match struct {
  303. offset int
  304. s int
  305. length int
  306. score int
  307. }
  308. var best match
  309. for {
  310. // Next src position to check
  311. nextS := (s-nextEmit)>>8 + 1
  312. if nextS > maxSkip {
  313. nextS = s + maxSkip
  314. } else {
  315. nextS += s
  316. }
  317. if nextS > sLimit {
  318. goto emitRemainder
  319. }
  320. hashL := hash8(cv, lTableBits)
  321. hashS := hash4(cv, sTableBits)
  322. candidateL := lTable[hashL]
  323. candidateS := sTable[hashS]
  324. score := func(m match) int {
  325. // Matches that are longer forward are penalized since we must emit it as a literal.
  326. score := m.length - m.s
  327. if nextEmit == m.s {
  328. // If we do not have to emit literals, we save 1 byte
  329. score++
  330. }
  331. offset := m.s - m.offset
  332. return score - emitCopyNoRepeatSize(offset, m.length)
  333. }
  334. matchAt := func(offset, s int, first uint32) match {
  335. if best.length != 0 && best.s-best.offset == s-offset {
  336. // Don't retest if we have the same offset.
  337. return match{offset: offset, s: s}
  338. }
  339. if load32(src, offset) != first {
  340. return match{offset: offset, s: s}
  341. }
  342. m := match{offset: offset, s: s, length: 4 + offset}
  343. s += 4
  344. for s <= sLimit {
  345. if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
  346. m.length += bits.TrailingZeros64(diff) >> 3
  347. break
  348. }
  349. s += 8
  350. m.length += 8
  351. }
  352. m.length -= offset
  353. m.score = score(m)
  354. if m.score <= -m.s {
  355. // Eliminate if no savings, we might find a better one.
  356. m.length = 0
  357. }
  358. return m
  359. }
  360. bestOf := func(a, b match) match {
  361. if b.length == 0 {
  362. return a
  363. }
  364. if a.length == 0 {
  365. return b
  366. }
  367. as := a.score + b.s
  368. bs := b.score + a.s
  369. if as >= bs {
  370. return a
  371. }
  372. return b
  373. }
  374. best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv)))
  375. best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv)))
  376. best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv)))
  377. {
  378. best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
  379. if best.length > 0 {
  380. // s+1
  381. nextShort := sTable[hash4(cv>>8, sTableBits)]
  382. s := s + 1
  383. cv := load64(src, s)
  384. nextLong := lTable[hash8(cv, lTableBits)]
  385. best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
  386. best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
  387. best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
  388. best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
  389. // Repeat at + 2
  390. best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
  391. // s+2
  392. if true {
  393. nextShort = sTable[hash4(cv>>8, sTableBits)]
  394. s++
  395. cv = load64(src, s)
  396. nextLong = lTable[hash8(cv, lTableBits)]
  397. best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
  398. best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
  399. best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
  400. best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
  401. }
  402. // Search for a match at best match end, see if that is better.
  403. if sAt := best.s + best.length; sAt < sLimit {
  404. sBack := best.s
  405. backL := best.length
  406. // Load initial values
  407. cv = load64(src, sBack)
  408. // Search for mismatch
  409. next := lTable[hash8(load64(src, sAt), lTableBits)]
  410. //next := sTable[hash4(load64(src, sAt), sTableBits)]
  411. if checkAt := getCur(next) - backL; checkAt > 0 {
  412. best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
  413. }
  414. if checkAt := getPrev(next) - backL; checkAt > 0 {
  415. best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
  416. }
  417. }
  418. }
  419. }
  420. // Update table
  421. lTable[hashL] = uint64(s) | candidateL<<32
  422. sTable[hashS] = uint64(s) | candidateS<<32
  423. if best.length > 0 {
  424. break
  425. }
  426. cv = load64(src, nextS)
  427. s = nextS
  428. }
  429. // Extend backwards, not needed for repeats...
  430. s = best.s
  431. if true {
  432. for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
  433. best.offset--
  434. best.length++
  435. s--
  436. }
  437. }
  438. if false && best.offset >= s {
  439. panic(fmt.Errorf("t %d >= s %d", best.offset, s))
  440. }
  441. // Bail if we exceed the maximum size.
  442. if d+(s-nextEmit) > dstLimit {
  443. return 0
  444. }
  445. base := s
  446. offset := s - best.offset
  447. s += best.length
  448. if offset > 65535 && s-base <= 5 {
  449. // Bail if the match is equal or worse to the encoding.
  450. s = best.s + 1
  451. if s >= sLimit {
  452. goto emitRemainder
  453. }
  454. cv = load64(src, s)
  455. continue
  456. }
  457. d += emitLiteral(dst[d:], src[nextEmit:base])
  458. d += emitCopyNoRepeat(dst[d:], offset, best.length)
  459. repeat = offset
  460. nextEmit = s
  461. if s >= sLimit {
  462. goto emitRemainder
  463. }
  464. if d > dstLimit {
  465. // Do we have space for more, if not bail.
  466. return 0
  467. }
  468. // Fill tables...
  469. for i := best.s + 1; i < s; i++ {
  470. cv0 := load64(src, i)
  471. long0 := hash8(cv0, lTableBits)
  472. short0 := hash4(cv0, sTableBits)
  473. lTable[long0] = uint64(i) | lTable[long0]<<32
  474. sTable[short0] = uint64(i) | sTable[short0]<<32
  475. }
  476. cv = load64(src, s)
  477. }
  478. emitRemainder:
  479. if nextEmit < len(src) {
  480. // Bail if we exceed the maximum size.
  481. if d+len(src)-nextEmit > dstLimit {
  482. return 0
  483. }
  484. d += emitLiteral(dst[d:], src[nextEmit:])
  485. }
  486. return d
  487. }
  488. // emitCopySize returns the size to encode the offset+length
  489. //
  490. // It assumes that:
  491. // 1 <= offset && offset <= math.MaxUint32
  492. // 4 <= length && length <= 1 << 24
  493. func emitCopySize(offset, length int) int {
  494. if offset >= 65536 {
  495. i := 0
  496. if length > 64 {
  497. length -= 64
  498. if length >= 4 {
  499. // Emit remaining as repeats
  500. return 5 + emitRepeatSize(offset, length)
  501. }
  502. i = 5
  503. }
  504. if length == 0 {
  505. return i
  506. }
  507. return i + 5
  508. }
  509. // Offset no more than 2 bytes.
  510. if length > 64 {
  511. if offset < 2048 {
  512. // Emit 8 bytes, then rest as repeats...
  513. return 2 + emitRepeatSize(offset, length-8)
  514. }
  515. // Emit remaining as repeats, at least 4 bytes remain.
  516. return 3 + emitRepeatSize(offset, length-60)
  517. }
  518. if length >= 12 || offset >= 2048 {
  519. return 3
  520. }
  521. // Emit the remaining copy, encoded as 2 bytes.
  522. return 2
  523. }
  524. // emitCopyNoRepeatSize returns the size to encode the offset+length
  525. //
  526. // It assumes that:
  527. // 1 <= offset && offset <= math.MaxUint32
  528. // 4 <= length && length <= 1 << 24
  529. func emitCopyNoRepeatSize(offset, length int) int {
  530. if offset >= 65536 {
  531. return 5 + 5*(length/64)
  532. }
  533. // Offset no more than 2 bytes.
  534. if length > 64 {
  535. // Emit remaining as repeats, at least 4 bytes remain.
  536. return 3 + 3*(length/60)
  537. }
  538. if length >= 12 || offset >= 2048 {
  539. return 3
  540. }
  541. // Emit the remaining copy, encoded as 2 bytes.
  542. return 2
  543. }
  544. // emitRepeatSize returns the number of bytes required to encode a repeat.
  545. // Length must be at least 4 and < 1<<24
  546. func emitRepeatSize(offset, length int) int {
  547. // Repeat offset, make length cheaper
  548. if length <= 4+4 || (length < 8+4 && offset < 2048) {
  549. return 2
  550. }
  551. if length < (1<<8)+4+4 {
  552. return 3
  553. }
  554. if length < (1<<16)+(1<<8)+4 {
  555. return 4
  556. }
  557. const maxRepeat = (1 << 24) - 1
  558. length -= (1 << 16) - 4
  559. left := 0
  560. if length > maxRepeat {
  561. left = length - maxRepeat + 4
  562. length = maxRepeat - 4
  563. }
  564. if left > 0 {
  565. return 5 + emitRepeatSize(offset, left)
  566. }
  567. return 5
  568. }