Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

492 рядки
13 KiB

  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "errors"
  7. "fmt"
  8. "io"
  9. )
  10. type seq struct {
  11. litLen uint32
  12. matchLen uint32
  13. offset uint32
  14. // Codes are stored here for the encoder
  15. // so they only have to be looked up once.
  16. llCode, mlCode, ofCode uint8
  17. }
  18. type seqVals struct {
  19. ll, ml, mo int
  20. }
  21. func (s seq) String() string {
  22. if s.offset <= 3 {
  23. if s.offset == 0 {
  24. return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset: INVALID (0)")
  25. }
  26. return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset, " (repeat)")
  27. }
  28. return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset-3, " (new)")
  29. }
  30. type seqCompMode uint8
  31. const (
  32. compModePredefined seqCompMode = iota
  33. compModeRLE
  34. compModeFSE
  35. compModeRepeat
  36. )
  37. type sequenceDec struct {
  38. // decoder keeps track of the current state and updates it from the bitstream.
  39. fse *fseDecoder
  40. state fseState
  41. repeat bool
  42. }
  43. // init the state of the decoder with input from stream.
  44. func (s *sequenceDec) init(br *bitReader) error {
  45. if s.fse == nil {
  46. return errors.New("sequence decoder not defined")
  47. }
  48. s.state.init(br, s.fse.actualTableLog, s.fse.dt[:1<<s.fse.actualTableLog])
  49. return nil
  50. }
  51. // sequenceDecs contains all 3 sequence decoders and their state.
  52. type sequenceDecs struct {
  53. litLengths sequenceDec
  54. offsets sequenceDec
  55. matchLengths sequenceDec
  56. prevOffset [3]int
  57. dict []byte
  58. literals []byte
  59. out []byte
  60. nSeqs int
  61. br *bitReader
  62. seqSize int
  63. windowSize int
  64. maxBits uint8
  65. maxSyncLen uint64
  66. }
  67. // initialize all 3 decoders from the stream input.
  68. func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) error {
  69. if err := s.litLengths.init(br); err != nil {
  70. return errors.New("litLengths:" + err.Error())
  71. }
  72. if err := s.offsets.init(br); err != nil {
  73. return errors.New("offsets:" + err.Error())
  74. }
  75. if err := s.matchLengths.init(br); err != nil {
  76. return errors.New("matchLengths:" + err.Error())
  77. }
  78. s.br = br
  79. s.prevOffset = hist.recentOffsets
  80. s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
  81. s.windowSize = hist.windowSize
  82. s.out = out
  83. s.dict = nil
  84. if hist.dict != nil {
  85. s.dict = hist.dict.content
  86. }
  87. return nil
  88. }
  89. // execute will execute the decoded sequence with the provided history.
  90. // The sequence must be evaluated before being sent.
  91. func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
  92. if len(s.dict) == 0 {
  93. return s.executeSimple(seqs, hist)
  94. }
  95. // Ensure we have enough output size...
  96. if len(s.out)+s.seqSize > cap(s.out) {
  97. addBytes := s.seqSize + len(s.out)
  98. s.out = append(s.out, make([]byte, addBytes)...)
  99. s.out = s.out[:len(s.out)-addBytes]
  100. }
  101. if debugDecoder {
  102. printf("Execute %d seqs with hist %d, dict %d, literals: %d into %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals), s.seqSize)
  103. }
  104. var t = len(s.out)
  105. out := s.out[:t+s.seqSize]
  106. for _, seq := range seqs {
  107. // Add literals
  108. copy(out[t:], s.literals[:seq.ll])
  109. t += seq.ll
  110. s.literals = s.literals[seq.ll:]
  111. // Copy from dictionary...
  112. if seq.mo > t+len(hist) || seq.mo > s.windowSize {
  113. if len(s.dict) == 0 {
  114. return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
  115. }
  116. // we may be in dictionary.
  117. dictO := len(s.dict) - (seq.mo - (t + len(hist)))
  118. if dictO < 0 || dictO >= len(s.dict) {
  119. return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, t+len(hist)+len(s.dict))
  120. }
  121. end := dictO + seq.ml
  122. if end > len(s.dict) {
  123. n := len(s.dict) - dictO
  124. copy(out[t:], s.dict[dictO:])
  125. t += n
  126. seq.ml -= n
  127. } else {
  128. copy(out[t:], s.dict[dictO:end])
  129. t += end - dictO
  130. continue
  131. }
  132. }
  133. // Copy from history.
  134. if v := seq.mo - t; v > 0 {
  135. // v is the start position in history from end.
  136. start := len(hist) - v
  137. if seq.ml > v {
  138. // Some goes into current block.
  139. // Copy remainder of history
  140. copy(out[t:], hist[start:])
  141. t += v
  142. seq.ml -= v
  143. } else {
  144. copy(out[t:], hist[start:start+seq.ml])
  145. t += seq.ml
  146. continue
  147. }
  148. }
  149. // We must be in current buffer now
  150. if seq.ml > 0 {
  151. start := t - seq.mo
  152. if seq.ml <= t-start {
  153. // No overlap
  154. copy(out[t:], out[start:start+seq.ml])
  155. t += seq.ml
  156. continue
  157. } else {
  158. // Overlapping copy
  159. // Extend destination slice and copy one byte at the time.
  160. src := out[start : start+seq.ml]
  161. dst := out[t:]
  162. dst = dst[:len(src)]
  163. t += len(src)
  164. // Destination is the space we just added.
  165. for i := range src {
  166. dst[i] = src[i]
  167. }
  168. }
  169. }
  170. }
  171. // Add final literals
  172. copy(out[t:], s.literals)
  173. if debugDecoder {
  174. t += len(s.literals)
  175. if t != len(out) {
  176. panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
  177. }
  178. }
  179. s.out = out
  180. return nil
  181. }
  182. // decode sequences from the stream with the provided history.
  183. func (s *sequenceDecs) decodeSync(hist []byte) error {
  184. supported, err := s.decodeSyncSimple(hist)
  185. if supported {
  186. return err
  187. }
  188. br := s.br
  189. seqs := s.nSeqs
  190. startSize := len(s.out)
  191. // Grab full sizes tables, to avoid bounds checks.
  192. llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
  193. llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
  194. out := s.out
  195. maxBlockSize := maxCompressedBlockSize
  196. if s.windowSize < maxBlockSize {
  197. maxBlockSize = s.windowSize
  198. }
  199. for i := seqs - 1; i >= 0; i-- {
  200. if br.overread() {
  201. printf("reading sequence %d, exceeded available data\n", seqs-i)
  202. return io.ErrUnexpectedEOF
  203. }
  204. var ll, mo, ml int
  205. if br.off > 4+((maxOffsetBits+16+16)>>3) {
  206. // inlined function:
  207. // ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
  208. // Final will not read from stream.
  209. var llB, mlB, moB uint8
  210. ll, llB = llState.final()
  211. ml, mlB = mlState.final()
  212. mo, moB = ofState.final()
  213. // extra bits are stored in reverse order.
  214. br.fillFast()
  215. mo += br.getBits(moB)
  216. if s.maxBits > 32 {
  217. br.fillFast()
  218. }
  219. ml += br.getBits(mlB)
  220. ll += br.getBits(llB)
  221. if moB > 1 {
  222. s.prevOffset[2] = s.prevOffset[1]
  223. s.prevOffset[1] = s.prevOffset[0]
  224. s.prevOffset[0] = mo
  225. } else {
  226. // mo = s.adjustOffset(mo, ll, moB)
  227. // Inlined for rather big speedup
  228. if ll == 0 {
  229. // There is an exception though, when current sequence's literals_length = 0.
  230. // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
  231. // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
  232. mo++
  233. }
  234. if mo == 0 {
  235. mo = s.prevOffset[0]
  236. } else {
  237. var temp int
  238. if mo == 3 {
  239. temp = s.prevOffset[0] - 1
  240. } else {
  241. temp = s.prevOffset[mo]
  242. }
  243. if temp == 0 {
  244. // 0 is not valid; input is corrupted; force offset to 1
  245. println("WARNING: temp was 0")
  246. temp = 1
  247. }
  248. if mo != 1 {
  249. s.prevOffset[2] = s.prevOffset[1]
  250. }
  251. s.prevOffset[1] = s.prevOffset[0]
  252. s.prevOffset[0] = temp
  253. mo = temp
  254. }
  255. }
  256. br.fillFast()
  257. } else {
  258. ll, mo, ml = s.next(br, llState, mlState, ofState)
  259. br.fill()
  260. }
  261. if debugSequences {
  262. println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
  263. }
  264. if ll > len(s.literals) {
  265. return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
  266. }
  267. size := ll + ml + len(out)
  268. if size-startSize > maxBlockSize {
  269. return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
  270. }
  271. if size > cap(out) {
  272. // Not enough size, which can happen under high volume block streaming conditions
  273. // but could be if destination slice is too small for sync operations.
  274. // over-allocating here can create a large amount of GC pressure so we try to keep
  275. // it as contained as possible
  276. used := len(out) - startSize
  277. addBytes := 256 + ll + ml + used>>2
  278. // Clamp to max block size.
  279. if used+addBytes > maxBlockSize {
  280. addBytes = maxBlockSize - used
  281. }
  282. out = append(out, make([]byte, addBytes)...)
  283. out = out[:len(out)-addBytes]
  284. }
  285. if ml > maxMatchLen {
  286. return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
  287. }
  288. // Add literals
  289. out = append(out, s.literals[:ll]...)
  290. s.literals = s.literals[ll:]
  291. if mo == 0 && ml > 0 {
  292. return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
  293. }
  294. if mo > len(out)+len(hist) || mo > s.windowSize {
  295. if len(s.dict) == 0 {
  296. return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
  297. }
  298. // we may be in dictionary.
  299. dictO := len(s.dict) - (mo - (len(out) + len(hist)))
  300. if dictO < 0 || dictO >= len(s.dict) {
  301. return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
  302. }
  303. end := dictO + ml
  304. if end > len(s.dict) {
  305. out = append(out, s.dict[dictO:]...)
  306. ml -= len(s.dict) - dictO
  307. } else {
  308. out = append(out, s.dict[dictO:end]...)
  309. mo = 0
  310. ml = 0
  311. }
  312. }
  313. // Copy from history.
  314. // TODO: Blocks without history could be made to ignore this completely.
  315. if v := mo - len(out); v > 0 {
  316. // v is the start position in history from end.
  317. start := len(hist) - v
  318. if ml > v {
  319. // Some goes into current block.
  320. // Copy remainder of history
  321. out = append(out, hist[start:]...)
  322. ml -= v
  323. } else {
  324. out = append(out, hist[start:start+ml]...)
  325. ml = 0
  326. }
  327. }
  328. // We must be in current buffer now
  329. if ml > 0 {
  330. start := len(out) - mo
  331. if ml <= len(out)-start {
  332. // No overlap
  333. out = append(out, out[start:start+ml]...)
  334. } else {
  335. // Overlapping copy
  336. // Extend destination slice and copy one byte at the time.
  337. out = out[:len(out)+ml]
  338. src := out[start : start+ml]
  339. // Destination is the space we just added.
  340. dst := out[len(out)-ml:]
  341. dst = dst[:len(src)]
  342. for i := range src {
  343. dst[i] = src[i]
  344. }
  345. }
  346. }
  347. if i == 0 {
  348. // This is the last sequence, so we shouldn't update state.
  349. break
  350. }
  351. // Manually inlined, ~ 5-20% faster
  352. // Update all 3 states at once. Approx 20% faster.
  353. nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
  354. if nBits == 0 {
  355. llState = llTable[llState.newState()&maxTableMask]
  356. mlState = mlTable[mlState.newState()&maxTableMask]
  357. ofState = ofTable[ofState.newState()&maxTableMask]
  358. } else {
  359. bits := br.get32BitsFast(nBits)
  360. lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
  361. llState = llTable[(llState.newState()+lowBits)&maxTableMask]
  362. lowBits = uint16(bits >> (ofState.nbBits() & 31))
  363. lowBits &= bitMask[mlState.nbBits()&15]
  364. mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
  365. lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
  366. ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
  367. }
  368. }
  369. // Check if space for literals
  370. if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
  371. return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
  372. }
  373. // Add final literals
  374. s.out = append(out, s.literals...)
  375. return br.close()
  376. }
  377. var bitMask [16]uint16
  378. func init() {
  379. for i := range bitMask[:] {
  380. bitMask[i] = uint16((1 << uint(i)) - 1)
  381. }
  382. }
  383. func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
  384. // Final will not read from stream.
  385. ll, llB := llState.final()
  386. ml, mlB := mlState.final()
  387. mo, moB := ofState.final()
  388. // extra bits are stored in reverse order.
  389. br.fill()
  390. if s.maxBits <= 32 {
  391. mo += br.getBits(moB)
  392. ml += br.getBits(mlB)
  393. ll += br.getBits(llB)
  394. } else {
  395. mo += br.getBits(moB)
  396. br.fill()
  397. // matchlength+literal length, max 32 bits
  398. ml += br.getBits(mlB)
  399. ll += br.getBits(llB)
  400. }
  401. mo = s.adjustOffset(mo, ll, moB)
  402. return
  403. }
  404. func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int {
  405. if offsetB > 1 {
  406. s.prevOffset[2] = s.prevOffset[1]
  407. s.prevOffset[1] = s.prevOffset[0]
  408. s.prevOffset[0] = offset
  409. return offset
  410. }
  411. if litLen == 0 {
  412. // There is an exception though, when current sequence's literals_length = 0.
  413. // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
  414. // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
  415. offset++
  416. }
  417. if offset == 0 {
  418. return s.prevOffset[0]
  419. }
  420. var temp int
  421. if offset == 3 {
  422. temp = s.prevOffset[0] - 1
  423. } else {
  424. temp = s.prevOffset[offset]
  425. }
  426. if temp == 0 {
  427. // 0 is not valid; input is corrupted; force offset to 1
  428. println("temp was 0")
  429. temp = 1
  430. }
  431. if offset != 1 {
  432. s.prevOffset[2] = s.prevOffset[1]
  433. }
  434. s.prevOffset[1] = s.prevOffset[0]
  435. s.prevOffset[0] = temp
  436. return temp
  437. }