25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

1342 satır
36 KiB

  1. // Copyright 2011 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "crypto/rand"
  8. "encoding/binary"
  9. "errors"
  10. "fmt"
  11. "io"
  12. "math"
  13. "math/bits"
  14. "runtime"
  15. "sync"
  16. )
  17. // Encode returns the encoded form of src. The returned slice may be a sub-
  18. // slice of dst if dst was large enough to hold the entire encoded block.
  19. // Otherwise, a newly allocated slice will be returned.
  20. //
  21. // The dst and src must not overlap. It is valid to pass a nil dst.
  22. //
  23. // The blocks will require the same amount of memory to decode as encoding,
  24. // and does not make for concurrent decoding.
  25. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  26. //
  27. // If you need to encode larger amounts of data, consider using
  28. // the streaming interface which gives all of these features.
  29. func Encode(dst, src []byte) []byte {
  30. if n := MaxEncodedLen(len(src)); n < 0 {
  31. panic(ErrTooLarge)
  32. } else if cap(dst) < n {
  33. dst = make([]byte, n)
  34. } else {
  35. dst = dst[:n]
  36. }
  37. // The block starts with the varint-encoded length of the decompressed bytes.
  38. d := binary.PutUvarint(dst, uint64(len(src)))
  39. if len(src) == 0 {
  40. return dst[:d]
  41. }
  42. if len(src) < minNonLiteralBlockSize {
  43. d += emitLiteral(dst[d:], src)
  44. return dst[:d]
  45. }
  46. n := encodeBlock(dst[d:], src)
  47. if n > 0 {
  48. d += n
  49. return dst[:d]
  50. }
  51. // Not compressible
  52. d += emitLiteral(dst[d:], src)
  53. return dst[:d]
  54. }
  55. // EncodeBetter returns the encoded form of src. The returned slice may be a sub-
  56. // slice of dst if dst was large enough to hold the entire encoded block.
  57. // Otherwise, a newly allocated slice will be returned.
  58. //
  59. // EncodeBetter compresses better than Encode but typically with a
  60. // 10-40% speed decrease on both compression and decompression.
  61. //
  62. // The dst and src must not overlap. It is valid to pass a nil dst.
  63. //
  64. // The blocks will require the same amount of memory to decode as encoding,
  65. // and does not make for concurrent decoding.
  66. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  67. //
  68. // If you need to encode larger amounts of data, consider using
  69. // the streaming interface which gives all of these features.
  70. func EncodeBetter(dst, src []byte) []byte {
  71. if n := MaxEncodedLen(len(src)); n < 0 {
  72. panic(ErrTooLarge)
  73. } else if len(dst) < n {
  74. dst = make([]byte, n)
  75. }
  76. // The block starts with the varint-encoded length of the decompressed bytes.
  77. d := binary.PutUvarint(dst, uint64(len(src)))
  78. if len(src) == 0 {
  79. return dst[:d]
  80. }
  81. if len(src) < minNonLiteralBlockSize {
  82. d += emitLiteral(dst[d:], src)
  83. return dst[:d]
  84. }
  85. n := encodeBlockBetter(dst[d:], src)
  86. if n > 0 {
  87. d += n
  88. return dst[:d]
  89. }
  90. // Not compressible
  91. d += emitLiteral(dst[d:], src)
  92. return dst[:d]
  93. }
  94. // EncodeBest returns the encoded form of src. The returned slice may be a sub-
  95. // slice of dst if dst was large enough to hold the entire encoded block.
  96. // Otherwise, a newly allocated slice will be returned.
  97. //
  98. // EncodeBest compresses as good as reasonably possible but with a
  99. // big speed decrease.
  100. //
  101. // The dst and src must not overlap. It is valid to pass a nil dst.
  102. //
  103. // The blocks will require the same amount of memory to decode as encoding,
  104. // and does not make for concurrent decoding.
  105. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  106. //
  107. // If you need to encode larger amounts of data, consider using
  108. // the streaming interface which gives all of these features.
  109. func EncodeBest(dst, src []byte) []byte {
  110. if n := MaxEncodedLen(len(src)); n < 0 {
  111. panic(ErrTooLarge)
  112. } else if len(dst) < n {
  113. dst = make([]byte, n)
  114. }
  115. // The block starts with the varint-encoded length of the decompressed bytes.
  116. d := binary.PutUvarint(dst, uint64(len(src)))
  117. if len(src) == 0 {
  118. return dst[:d]
  119. }
  120. if len(src) < minNonLiteralBlockSize {
  121. d += emitLiteral(dst[d:], src)
  122. return dst[:d]
  123. }
  124. n := encodeBlockBest(dst[d:], src)
  125. if n > 0 {
  126. d += n
  127. return dst[:d]
  128. }
  129. // Not compressible
  130. d += emitLiteral(dst[d:], src)
  131. return dst[:d]
  132. }
  133. // EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
  134. // slice of dst if dst was large enough to hold the entire encoded block.
  135. // Otherwise, a newly allocated slice will be returned.
  136. //
  137. // The output is Snappy compatible and will likely decompress faster.
  138. //
  139. // The dst and src must not overlap. It is valid to pass a nil dst.
  140. //
  141. // The blocks will require the same amount of memory to decode as encoding,
  142. // and does not make for concurrent decoding.
  143. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  144. //
  145. // If you need to encode larger amounts of data, consider using
  146. // the streaming interface which gives all of these features.
  147. func EncodeSnappy(dst, src []byte) []byte {
  148. if n := MaxEncodedLen(len(src)); n < 0 {
  149. panic(ErrTooLarge)
  150. } else if cap(dst) < n {
  151. dst = make([]byte, n)
  152. } else {
  153. dst = dst[:n]
  154. }
  155. // The block starts with the varint-encoded length of the decompressed bytes.
  156. d := binary.PutUvarint(dst, uint64(len(src)))
  157. if len(src) == 0 {
  158. return dst[:d]
  159. }
  160. if len(src) < minNonLiteralBlockSize {
  161. d += emitLiteral(dst[d:], src)
  162. return dst[:d]
  163. }
  164. n := encodeBlockSnappy(dst[d:], src)
  165. if n > 0 {
  166. d += n
  167. return dst[:d]
  168. }
  169. // Not compressible
  170. d += emitLiteral(dst[d:], src)
  171. return dst[:d]
  172. }
  173. // EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
  174. // slice of dst if dst was large enough to hold the entire encoded block.
  175. // Otherwise, a newly allocated slice will be returned.
  176. //
  177. // The output is Snappy compatible and will likely decompress faster.
  178. //
  179. // The dst and src must not overlap. It is valid to pass a nil dst.
  180. //
  181. // The blocks will require the same amount of memory to decode as encoding,
  182. // and does not make for concurrent decoding.
  183. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  184. //
  185. // If you need to encode larger amounts of data, consider using
  186. // the streaming interface which gives all of these features.
  187. func EncodeSnappyBetter(dst, src []byte) []byte {
  188. if n := MaxEncodedLen(len(src)); n < 0 {
  189. panic(ErrTooLarge)
  190. } else if cap(dst) < n {
  191. dst = make([]byte, n)
  192. } else {
  193. dst = dst[:n]
  194. }
  195. // The block starts with the varint-encoded length of the decompressed bytes.
  196. d := binary.PutUvarint(dst, uint64(len(src)))
  197. if len(src) == 0 {
  198. return dst[:d]
  199. }
  200. if len(src) < minNonLiteralBlockSize {
  201. d += emitLiteral(dst[d:], src)
  202. return dst[:d]
  203. }
  204. n := encodeBlockBetterSnappy(dst[d:], src)
  205. if n > 0 {
  206. d += n
  207. return dst[:d]
  208. }
  209. // Not compressible
  210. d += emitLiteral(dst[d:], src)
  211. return dst[:d]
  212. }
  213. // EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
  214. // slice of dst if dst was large enough to hold the entire encoded block.
  215. // Otherwise, a newly allocated slice will be returned.
  216. //
  217. // The output is Snappy compatible and will likely decompress faster.
  218. //
  219. // The dst and src must not overlap. It is valid to pass a nil dst.
  220. //
  221. // The blocks will require the same amount of memory to decode as encoding,
  222. // and does not make for concurrent decoding.
  223. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  224. //
  225. // If you need to encode larger amounts of data, consider using
  226. // the streaming interface which gives all of these features.
  227. func EncodeSnappyBest(dst, src []byte) []byte {
  228. if n := MaxEncodedLen(len(src)); n < 0 {
  229. panic(ErrTooLarge)
  230. } else if cap(dst) < n {
  231. dst = make([]byte, n)
  232. } else {
  233. dst = dst[:n]
  234. }
  235. // The block starts with the varint-encoded length of the decompressed bytes.
  236. d := binary.PutUvarint(dst, uint64(len(src)))
  237. if len(src) == 0 {
  238. return dst[:d]
  239. }
  240. if len(src) < minNonLiteralBlockSize {
  241. d += emitLiteral(dst[d:], src)
  242. return dst[:d]
  243. }
  244. n := encodeBlockBestSnappy(dst[d:], src)
  245. if n > 0 {
  246. d += n
  247. return dst[:d]
  248. }
  249. // Not compressible
  250. d += emitLiteral(dst[d:], src)
  251. return dst[:d]
  252. }
  253. // ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
  254. // If the destination is nil or too small, a new will be allocated.
  255. // The blocks are not validated, so garbage in = garbage out.
  256. // dst may not overlap block data.
  257. // Any data in dst is preserved as is, so it will not be considered a block.
  258. func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
  259. totalSize := uint64(0)
  260. compSize := 0
  261. for _, b := range blocks {
  262. l, hdr, err := decodedLen(b)
  263. if err != nil {
  264. return nil, err
  265. }
  266. totalSize += uint64(l)
  267. compSize += len(b) - hdr
  268. }
  269. if totalSize == 0 {
  270. dst = append(dst, 0)
  271. return dst, nil
  272. }
  273. if totalSize > math.MaxUint32 {
  274. return nil, ErrTooLarge
  275. }
  276. var tmp [binary.MaxVarintLen32]byte
  277. hdrSize := binary.PutUvarint(tmp[:], totalSize)
  278. wantSize := hdrSize + compSize
  279. if cap(dst)-len(dst) < wantSize {
  280. dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
  281. }
  282. dst = append(dst, tmp[:hdrSize]...)
  283. for _, b := range blocks {
  284. _, hdr, err := decodedLen(b)
  285. if err != nil {
  286. return nil, err
  287. }
  288. dst = append(dst, b[hdr:]...)
  289. }
  290. return dst, nil
  291. }
  292. // inputMargin is the minimum number of extra input bytes to keep, inside
  293. // encodeBlock's inner loop. On some architectures, this margin lets us
  294. // implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
  295. // literals can be implemented as a single load to and store from a 16-byte
  296. // register. That literal's actual length can be as short as 1 byte, so this
  297. // can copy up to 15 bytes too much, but that's OK as subsequent iterations of
  298. // the encoding loop will fix up the copy overrun, and this inputMargin ensures
  299. // that we don't overrun the dst and src buffers.
  300. const inputMargin = 8
  301. // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
  302. // will be accepted by the encoder.
  303. const minNonLiteralBlockSize = 32
  304. // MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
  305. // Blocks this big are highly discouraged, though.
  306. const MaxBlockSize = math.MaxUint32 - binary.MaxVarintLen32 - 5
  307. // MaxEncodedLen returns the maximum length of a snappy block, given its
  308. // uncompressed length.
  309. //
  310. // It will return a negative value if srcLen is too large to encode.
  311. // 32 bit platforms will have lower thresholds for rejecting big content.
  312. func MaxEncodedLen(srcLen int) int {
  313. n := uint64(srcLen)
  314. if n > 0xffffffff {
  315. // Also includes negative.
  316. return -1
  317. }
  318. // Size of the varint encoded block size.
  319. n = n + uint64((bits.Len64(n)+7)/7)
  320. // Add maximum size of encoding block as literals.
  321. n += uint64(literalExtraSize(int64(srcLen)))
  322. if n > 0xffffffff {
  323. return -1
  324. }
  325. return int(n)
  326. }
  327. var errClosed = errors.New("s2: Writer is closed")
  328. // NewWriter returns a new Writer that compresses to w, using the
  329. // framing format described at
  330. // https://github.com/google/snappy/blob/master/framing_format.txt
  331. //
  332. // Users must call Close to guarantee all data has been forwarded to
  333. // the underlying io.Writer and that resources are released.
  334. // They may also call Flush zero or more times before calling Close.
  335. func NewWriter(w io.Writer, opts ...WriterOption) *Writer {
  336. w2 := Writer{
  337. blockSize: defaultBlockSize,
  338. concurrency: runtime.GOMAXPROCS(0),
  339. randSrc: rand.Reader,
  340. level: levelFast,
  341. }
  342. for _, opt := range opts {
  343. if err := opt(&w2); err != nil {
  344. w2.errState = err
  345. return &w2
  346. }
  347. }
  348. w2.obufLen = obufHeaderLen + MaxEncodedLen(w2.blockSize)
  349. w2.paramsOK = true
  350. w2.ibuf = make([]byte, 0, w2.blockSize)
  351. w2.buffers.New = func() interface{} {
  352. return make([]byte, w2.obufLen)
  353. }
  354. w2.Reset(w)
  355. return &w2
  356. }
  357. // Writer is an io.Writer that can write Snappy-compressed bytes.
  358. type Writer struct {
  359. errMu sync.Mutex
  360. errState error
  361. // ibuf is a buffer for the incoming (uncompressed) bytes.
  362. ibuf []byte
  363. blockSize int
  364. obufLen int
  365. concurrency int
  366. written int64
  367. uncompWritten int64 // Bytes sent to compression
  368. output chan chan result
  369. buffers sync.Pool
  370. pad int
  371. writer io.Writer
  372. randSrc io.Reader
  373. writerWg sync.WaitGroup
  374. index Index
  375. // wroteStreamHeader is whether we have written the stream header.
  376. wroteStreamHeader bool
  377. paramsOK bool
  378. snappy bool
  379. flushOnWrite bool
  380. appendIndex bool
  381. level uint8
  382. }
  383. const (
  384. levelUncompressed = iota + 1
  385. levelFast
  386. levelBetter
  387. levelBest
  388. )
  389. type result struct {
  390. b []byte
  391. // Uncompressed start offset
  392. startOffset int64
  393. }
  394. // err returns the previously set error.
  395. // If no error has been set it is set to err if not nil.
  396. func (w *Writer) err(err error) error {
  397. w.errMu.Lock()
  398. errSet := w.errState
  399. if errSet == nil && err != nil {
  400. w.errState = err
  401. errSet = err
  402. }
  403. w.errMu.Unlock()
  404. return errSet
  405. }
  406. // Reset discards the writer's state and switches the Snappy writer to write to w.
  407. // This permits reusing a Writer rather than allocating a new one.
  408. func (w *Writer) Reset(writer io.Writer) {
  409. if !w.paramsOK {
  410. return
  411. }
  412. // Close previous writer, if any.
  413. if w.output != nil {
  414. close(w.output)
  415. w.writerWg.Wait()
  416. w.output = nil
  417. }
  418. w.errState = nil
  419. w.ibuf = w.ibuf[:0]
  420. w.wroteStreamHeader = false
  421. w.written = 0
  422. w.writer = writer
  423. w.uncompWritten = 0
  424. w.index.reset(w.blockSize)
  425. // If we didn't get a writer, stop here.
  426. if writer == nil {
  427. return
  428. }
  429. // If no concurrency requested, don't spin up writer goroutine.
  430. if w.concurrency == 1 {
  431. return
  432. }
  433. toWrite := make(chan chan result, w.concurrency)
  434. w.output = toWrite
  435. w.writerWg.Add(1)
  436. // Start a writer goroutine that will write all output in order.
  437. go func() {
  438. defer w.writerWg.Done()
  439. // Get a queued write.
  440. for write := range toWrite {
  441. // Wait for the data to be available.
  442. input := <-write
  443. in := input.b
  444. if len(in) > 0 {
  445. if w.err(nil) == nil {
  446. // Don't expose data from previous buffers.
  447. toWrite := in[:len(in):len(in)]
  448. // Write to output.
  449. n, err := writer.Write(toWrite)
  450. if err == nil && n != len(toWrite) {
  451. err = io.ErrShortBuffer
  452. }
  453. _ = w.err(err)
  454. w.err(w.index.add(w.written, input.startOffset))
  455. w.written += int64(n)
  456. }
  457. }
  458. if cap(in) >= w.obufLen {
  459. w.buffers.Put(in)
  460. }
  461. // close the incoming write request.
  462. // This can be used for synchronizing flushes.
  463. close(write)
  464. }
  465. }()
  466. }
  467. // Write satisfies the io.Writer interface.
  468. func (w *Writer) Write(p []byte) (nRet int, errRet error) {
  469. if err := w.err(nil); err != nil {
  470. return 0, err
  471. }
  472. if w.flushOnWrite {
  473. return w.write(p)
  474. }
  475. // If we exceed the input buffer size, start writing
  476. for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err(nil) == nil {
  477. var n int
  478. if len(w.ibuf) == 0 {
  479. // Large write, empty buffer.
  480. // Write directly from p to avoid copy.
  481. n, _ = w.write(p)
  482. } else {
  483. n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
  484. w.ibuf = w.ibuf[:len(w.ibuf)+n]
  485. w.write(w.ibuf)
  486. w.ibuf = w.ibuf[:0]
  487. }
  488. nRet += n
  489. p = p[n:]
  490. }
  491. if err := w.err(nil); err != nil {
  492. return nRet, err
  493. }
  494. // p should always be able to fit into w.ibuf now.
  495. n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
  496. w.ibuf = w.ibuf[:len(w.ibuf)+n]
  497. nRet += n
  498. return nRet, nil
  499. }
  500. // ReadFrom implements the io.ReaderFrom interface.
  501. // Using this is typically more efficient since it avoids a memory copy.
  502. // ReadFrom reads data from r until EOF or error.
  503. // The return value n is the number of bytes read.
  504. // Any error except io.EOF encountered during the read is also returned.
  505. func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) {
  506. if err := w.err(nil); err != nil {
  507. return 0, err
  508. }
  509. if len(w.ibuf) > 0 {
  510. err := w.Flush()
  511. if err != nil {
  512. return 0, err
  513. }
  514. }
  515. if br, ok := r.(byter); ok {
  516. buf := br.Bytes()
  517. if err := w.EncodeBuffer(buf); err != nil {
  518. return 0, err
  519. }
  520. return int64(len(buf)), w.Flush()
  521. }
  522. for {
  523. inbuf := w.buffers.Get().([]byte)[:w.blockSize+obufHeaderLen]
  524. n2, err := io.ReadFull(r, inbuf[obufHeaderLen:])
  525. if err != nil {
  526. if err == io.ErrUnexpectedEOF {
  527. err = io.EOF
  528. }
  529. if err != io.EOF {
  530. return n, w.err(err)
  531. }
  532. }
  533. if n2 == 0 {
  534. break
  535. }
  536. n += int64(n2)
  537. err2 := w.writeFull(inbuf[:n2+obufHeaderLen])
  538. if w.err(err2) != nil {
  539. break
  540. }
  541. if err != nil {
  542. // We got EOF and wrote everything
  543. break
  544. }
  545. }
  546. return n, w.err(nil)
  547. }
  548. // AddSkippableBlock will add a skippable block to the stream.
  549. // The ID must be 0x80-0xfe (inclusive).
  550. // Length of the skippable block must be <= 16777215 bytes.
  551. func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) {
  552. if err := w.err(nil); err != nil {
  553. return err
  554. }
  555. if len(data) == 0 {
  556. return nil
  557. }
  558. if id < 0x80 || id > chunkTypePadding {
  559. return fmt.Errorf("invalid skippable block id %x", id)
  560. }
  561. if len(data) > maxChunkSize {
  562. return fmt.Errorf("skippable block excessed maximum size")
  563. }
  564. var header [4]byte
  565. chunkLen := 4 + len(data)
  566. header[0] = id
  567. header[1] = uint8(chunkLen >> 0)
  568. header[2] = uint8(chunkLen >> 8)
  569. header[3] = uint8(chunkLen >> 16)
  570. if w.concurrency == 1 {
  571. write := func(b []byte) error {
  572. n, err := w.writer.Write(b)
  573. if err = w.err(err); err != nil {
  574. return err
  575. }
  576. if n != len(data) {
  577. return w.err(io.ErrShortWrite)
  578. }
  579. w.written += int64(n)
  580. return w.err(nil)
  581. }
  582. if !w.wroteStreamHeader {
  583. w.wroteStreamHeader = true
  584. if w.snappy {
  585. if err := write([]byte(magicChunkSnappy)); err != nil {
  586. return err
  587. }
  588. } else {
  589. if err := write([]byte(magicChunk)); err != nil {
  590. return err
  591. }
  592. }
  593. }
  594. if err := write(header[:]); err != nil {
  595. return err
  596. }
  597. if err := write(data); err != nil {
  598. return err
  599. }
  600. }
  601. // Create output...
  602. if !w.wroteStreamHeader {
  603. w.wroteStreamHeader = true
  604. hWriter := make(chan result)
  605. w.output <- hWriter
  606. if w.snappy {
  607. hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
  608. } else {
  609. hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
  610. }
  611. }
  612. // Copy input.
  613. inbuf := w.buffers.Get().([]byte)[:4]
  614. copy(inbuf, header[:])
  615. inbuf = append(inbuf, data...)
  616. output := make(chan result, 1)
  617. // Queue output.
  618. w.output <- output
  619. output <- result{startOffset: w.uncompWritten, b: inbuf}
  620. return nil
  621. }
  622. // EncodeBuffer will add a buffer to the stream.
  623. // This is the fastest way to encode a stream,
  624. // but the input buffer cannot be written to by the caller
  625. // until Flush or Close has been called when concurrency != 1.
  626. //
  627. // If you cannot control that, use the regular Write function.
  628. //
  629. // Note that input is not buffered.
  630. // This means that each write will result in discrete blocks being created.
  631. // For buffered writes, use the regular Write function.
  632. func (w *Writer) EncodeBuffer(buf []byte) (err error) {
  633. if err := w.err(nil); err != nil {
  634. return err
  635. }
  636. if w.flushOnWrite {
  637. _, err := w.write(buf)
  638. return err
  639. }
  640. // Flush queued data first.
  641. if len(w.ibuf) > 0 {
  642. err := w.Flush()
  643. if err != nil {
  644. return err
  645. }
  646. }
  647. if w.concurrency == 1 {
  648. _, err := w.writeSync(buf)
  649. return err
  650. }
  651. // Spawn goroutine and write block to output channel.
  652. if !w.wroteStreamHeader {
  653. w.wroteStreamHeader = true
  654. hWriter := make(chan result)
  655. w.output <- hWriter
  656. if w.snappy {
  657. hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
  658. } else {
  659. hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
  660. }
  661. }
  662. for len(buf) > 0 {
  663. // Cut input.
  664. uncompressed := buf
  665. if len(uncompressed) > w.blockSize {
  666. uncompressed = uncompressed[:w.blockSize]
  667. }
  668. buf = buf[len(uncompressed):]
  669. // Get an output buffer.
  670. obuf := w.buffers.Get().([]byte)[:len(uncompressed)+obufHeaderLen]
  671. output := make(chan result)
  672. // Queue output now, so we keep order.
  673. w.output <- output
  674. res := result{
  675. startOffset: w.uncompWritten,
  676. }
  677. w.uncompWritten += int64(len(uncompressed))
  678. go func() {
  679. checksum := crc(uncompressed)
  680. // Set to uncompressed.
  681. chunkType := uint8(chunkTypeUncompressedData)
  682. chunkLen := 4 + len(uncompressed)
  683. // Attempt compressing.
  684. n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed)))
  685. n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed)
  686. // Check if we should use this, or store as uncompressed instead.
  687. if n2 > 0 {
  688. chunkType = uint8(chunkTypeCompressedData)
  689. chunkLen = 4 + n + n2
  690. obuf = obuf[:obufHeaderLen+n+n2]
  691. } else {
  692. // copy uncompressed
  693. copy(obuf[obufHeaderLen:], uncompressed)
  694. }
  695. // Fill in the per-chunk header that comes before the body.
  696. obuf[0] = chunkType
  697. obuf[1] = uint8(chunkLen >> 0)
  698. obuf[2] = uint8(chunkLen >> 8)
  699. obuf[3] = uint8(chunkLen >> 16)
  700. obuf[4] = uint8(checksum >> 0)
  701. obuf[5] = uint8(checksum >> 8)
  702. obuf[6] = uint8(checksum >> 16)
  703. obuf[7] = uint8(checksum >> 24)
  704. // Queue final output.
  705. res.b = obuf
  706. output <- res
  707. }()
  708. }
  709. return nil
  710. }
  711. func (w *Writer) encodeBlock(obuf, uncompressed []byte) int {
  712. if w.snappy {
  713. switch w.level {
  714. case levelFast:
  715. return encodeBlockSnappy(obuf, uncompressed)
  716. case levelBetter:
  717. return encodeBlockBetterSnappy(obuf, uncompressed)
  718. case levelBest:
  719. return encodeBlockBestSnappy(obuf, uncompressed)
  720. }
  721. return 0
  722. }
  723. switch w.level {
  724. case levelFast:
  725. return encodeBlock(obuf, uncompressed)
  726. case levelBetter:
  727. return encodeBlockBetter(obuf, uncompressed)
  728. case levelBest:
  729. return encodeBlockBest(obuf, uncompressed)
  730. }
  731. return 0
  732. }
  733. func (w *Writer) write(p []byte) (nRet int, errRet error) {
  734. if err := w.err(nil); err != nil {
  735. return 0, err
  736. }
  737. if w.concurrency == 1 {
  738. return w.writeSync(p)
  739. }
  740. // Spawn goroutine and write block to output channel.
  741. for len(p) > 0 {
  742. if !w.wroteStreamHeader {
  743. w.wroteStreamHeader = true
  744. hWriter := make(chan result)
  745. w.output <- hWriter
  746. if w.snappy {
  747. hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
  748. } else {
  749. hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
  750. }
  751. }
  752. var uncompressed []byte
  753. if len(p) > w.blockSize {
  754. uncompressed, p = p[:w.blockSize], p[w.blockSize:]
  755. } else {
  756. uncompressed, p = p, nil
  757. }
  758. // Copy input.
  759. // If the block is incompressible, this is used for the result.
  760. inbuf := w.buffers.Get().([]byte)[:len(uncompressed)+obufHeaderLen]
  761. obuf := w.buffers.Get().([]byte)[:w.obufLen]
  762. copy(inbuf[obufHeaderLen:], uncompressed)
  763. uncompressed = inbuf[obufHeaderLen:]
  764. output := make(chan result)
  765. // Queue output now, so we keep order.
  766. w.output <- output
  767. res := result{
  768. startOffset: w.uncompWritten,
  769. }
  770. w.uncompWritten += int64(len(uncompressed))
  771. go func() {
  772. checksum := crc(uncompressed)
  773. // Set to uncompressed.
  774. chunkType := uint8(chunkTypeUncompressedData)
  775. chunkLen := 4 + len(uncompressed)
  776. // Attempt compressing.
  777. n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed)))
  778. n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed)
  779. // Check if we should use this, or store as uncompressed instead.
  780. if n2 > 0 {
  781. chunkType = uint8(chunkTypeCompressedData)
  782. chunkLen = 4 + n + n2
  783. obuf = obuf[:obufHeaderLen+n+n2]
  784. } else {
  785. // Use input as output.
  786. obuf, inbuf = inbuf, obuf
  787. }
  788. // Fill in the per-chunk header that comes before the body.
  789. obuf[0] = chunkType
  790. obuf[1] = uint8(chunkLen >> 0)
  791. obuf[2] = uint8(chunkLen >> 8)
  792. obuf[3] = uint8(chunkLen >> 16)
  793. obuf[4] = uint8(checksum >> 0)
  794. obuf[5] = uint8(checksum >> 8)
  795. obuf[6] = uint8(checksum >> 16)
  796. obuf[7] = uint8(checksum >> 24)
  797. // Queue final output.
  798. res.b = obuf
  799. output <- res
  800. // Put unused buffer back in pool.
  801. w.buffers.Put(inbuf)
  802. }()
  803. nRet += len(uncompressed)
  804. }
  805. return nRet, nil
  806. }
  807. // writeFull is a special version of write that will always write the full buffer.
  808. // Data to be compressed should start at offset obufHeaderLen and fill the remainder of the buffer.
  809. // The data will be written as a single block.
  810. // The caller is not allowed to use inbuf after this function has been called.
  811. func (w *Writer) writeFull(inbuf []byte) (errRet error) {
  812. if err := w.err(nil); err != nil {
  813. return err
  814. }
  815. if w.concurrency == 1 {
  816. _, err := w.writeSync(inbuf[obufHeaderLen:])
  817. return err
  818. }
  819. // Spawn goroutine and write block to output channel.
  820. if !w.wroteStreamHeader {
  821. w.wroteStreamHeader = true
  822. hWriter := make(chan result)
  823. w.output <- hWriter
  824. if w.snappy {
  825. hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
  826. } else {
  827. hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
  828. }
  829. }
  830. // Get an output buffer.
  831. obuf := w.buffers.Get().([]byte)[:w.obufLen]
  832. uncompressed := inbuf[obufHeaderLen:]
  833. output := make(chan result)
  834. // Queue output now, so we keep order.
  835. w.output <- output
  836. res := result{
  837. startOffset: w.uncompWritten,
  838. }
  839. w.uncompWritten += int64(len(uncompressed))
  840. go func() {
  841. checksum := crc(uncompressed)
  842. // Set to uncompressed.
  843. chunkType := uint8(chunkTypeUncompressedData)
  844. chunkLen := 4 + len(uncompressed)
  845. // Attempt compressing.
  846. n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed)))
  847. n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed)
  848. // Check if we should use this, or store as uncompressed instead.
  849. if n2 > 0 {
  850. chunkType = uint8(chunkTypeCompressedData)
  851. chunkLen = 4 + n + n2
  852. obuf = obuf[:obufHeaderLen+n+n2]
  853. } else {
  854. // Use input as output.
  855. obuf, inbuf = inbuf, obuf
  856. }
  857. // Fill in the per-chunk header that comes before the body.
  858. obuf[0] = chunkType
  859. obuf[1] = uint8(chunkLen >> 0)
  860. obuf[2] = uint8(chunkLen >> 8)
  861. obuf[3] = uint8(chunkLen >> 16)
  862. obuf[4] = uint8(checksum >> 0)
  863. obuf[5] = uint8(checksum >> 8)
  864. obuf[6] = uint8(checksum >> 16)
  865. obuf[7] = uint8(checksum >> 24)
  866. // Queue final output.
  867. res.b = obuf
  868. output <- res
  869. // Put unused buffer back in pool.
  870. w.buffers.Put(inbuf)
  871. }()
  872. return nil
  873. }
  874. func (w *Writer) writeSync(p []byte) (nRet int, errRet error) {
  875. if err := w.err(nil); err != nil {
  876. return 0, err
  877. }
  878. if !w.wroteStreamHeader {
  879. w.wroteStreamHeader = true
  880. var n int
  881. var err error
  882. if w.snappy {
  883. n, err = w.writer.Write([]byte(magicChunkSnappy))
  884. } else {
  885. n, err = w.writer.Write([]byte(magicChunk))
  886. }
  887. if err != nil {
  888. return 0, w.err(err)
  889. }
  890. if n != len(magicChunk) {
  891. return 0, w.err(io.ErrShortWrite)
  892. }
  893. w.written += int64(n)
  894. }
  895. for len(p) > 0 {
  896. var uncompressed []byte
  897. if len(p) > w.blockSize {
  898. uncompressed, p = p[:w.blockSize], p[w.blockSize:]
  899. } else {
  900. uncompressed, p = p, nil
  901. }
  902. obuf := w.buffers.Get().([]byte)[:w.obufLen]
  903. checksum := crc(uncompressed)
  904. // Set to uncompressed.
  905. chunkType := uint8(chunkTypeUncompressedData)
  906. chunkLen := 4 + len(uncompressed)
  907. // Attempt compressing.
  908. n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed)))
  909. n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed)
  910. if n2 > 0 {
  911. chunkType = uint8(chunkTypeCompressedData)
  912. chunkLen = 4 + n + n2
  913. obuf = obuf[:obufHeaderLen+n+n2]
  914. } else {
  915. obuf = obuf[:8]
  916. }
  917. // Fill in the per-chunk header that comes before the body.
  918. obuf[0] = chunkType
  919. obuf[1] = uint8(chunkLen >> 0)
  920. obuf[2] = uint8(chunkLen >> 8)
  921. obuf[3] = uint8(chunkLen >> 16)
  922. obuf[4] = uint8(checksum >> 0)
  923. obuf[5] = uint8(checksum >> 8)
  924. obuf[6] = uint8(checksum >> 16)
  925. obuf[7] = uint8(checksum >> 24)
  926. n, err := w.writer.Write(obuf)
  927. if err != nil {
  928. return 0, w.err(err)
  929. }
  930. if n != len(obuf) {
  931. return 0, w.err(io.ErrShortWrite)
  932. }
  933. w.err(w.index.add(w.written, w.uncompWritten))
  934. w.written += int64(n)
  935. w.uncompWritten += int64(len(uncompressed))
  936. if chunkType == chunkTypeUncompressedData {
  937. // Write uncompressed data.
  938. n, err := w.writer.Write(uncompressed)
  939. if err != nil {
  940. return 0, w.err(err)
  941. }
  942. if n != len(uncompressed) {
  943. return 0, w.err(io.ErrShortWrite)
  944. }
  945. w.written += int64(n)
  946. }
  947. w.buffers.Put(obuf)
  948. // Queue final output.
  949. nRet += len(uncompressed)
  950. }
  951. return nRet, nil
  952. }
  953. // Flush flushes the Writer to its underlying io.Writer.
  954. // This does not apply padding.
  955. func (w *Writer) Flush() error {
  956. if err := w.err(nil); err != nil {
  957. return err
  958. }
  959. // Queue any data still in input buffer.
  960. if len(w.ibuf) != 0 {
  961. if !w.wroteStreamHeader {
  962. _, err := w.writeSync(w.ibuf)
  963. w.ibuf = w.ibuf[:0]
  964. return w.err(err)
  965. } else {
  966. _, err := w.write(w.ibuf)
  967. w.ibuf = w.ibuf[:0]
  968. err = w.err(err)
  969. if err != nil {
  970. return err
  971. }
  972. }
  973. }
  974. if w.output == nil {
  975. return w.err(nil)
  976. }
  977. // Send empty buffer
  978. res := make(chan result)
  979. w.output <- res
  980. // Block until this has been picked up.
  981. res <- result{b: nil, startOffset: w.uncompWritten}
  982. // When it is closed, we have flushed.
  983. <-res
  984. return w.err(nil)
  985. }
  986. // Close calls Flush and then closes the Writer.
  987. // Calling Close multiple times is ok,
  988. // but calling CloseIndex after this will make it not return the index.
  989. func (w *Writer) Close() error {
  990. _, err := w.closeIndex(w.appendIndex)
  991. return err
  992. }
  993. // CloseIndex calls Close and returns an index on first call.
  994. // This is not required if you are only adding index to a stream.
  995. func (w *Writer) CloseIndex() ([]byte, error) {
  996. return w.closeIndex(true)
  997. }
  998. func (w *Writer) closeIndex(idx bool) ([]byte, error) {
  999. err := w.Flush()
  1000. if w.output != nil {
  1001. close(w.output)
  1002. w.writerWg.Wait()
  1003. w.output = nil
  1004. }
  1005. var index []byte
  1006. if w.err(nil) == nil && w.writer != nil {
  1007. // Create index.
  1008. if idx {
  1009. compSize := int64(-1)
  1010. if w.pad <= 1 {
  1011. compSize = w.written
  1012. }
  1013. index = w.index.appendTo(w.ibuf[:0], w.uncompWritten, compSize)
  1014. // Count as written for padding.
  1015. if w.appendIndex {
  1016. w.written += int64(len(index))
  1017. }
  1018. }
  1019. if w.pad > 1 {
  1020. tmp := w.ibuf[:0]
  1021. if len(index) > 0 {
  1022. // Allocate another buffer.
  1023. tmp = w.buffers.Get().([]byte)[:0]
  1024. defer w.buffers.Put(tmp)
  1025. }
  1026. add := calcSkippableFrame(w.written, int64(w.pad))
  1027. frame, err := skippableFrame(tmp, add, w.randSrc)
  1028. if err = w.err(err); err != nil {
  1029. return nil, err
  1030. }
  1031. n, err2 := w.writer.Write(frame)
  1032. if err2 == nil && n != len(frame) {
  1033. err2 = io.ErrShortWrite
  1034. }
  1035. _ = w.err(err2)
  1036. }
  1037. if len(index) > 0 && w.appendIndex {
  1038. n, err2 := w.writer.Write(index)
  1039. if err2 == nil && n != len(index) {
  1040. err2 = io.ErrShortWrite
  1041. }
  1042. _ = w.err(err2)
  1043. }
  1044. }
  1045. err = w.err(errClosed)
  1046. if err == errClosed {
  1047. return index, nil
  1048. }
  1049. return nil, err
  1050. }
  1051. // calcSkippableFrame will return a total size to be added for written
  1052. // to be divisible by multiple.
  1053. // The value will always be > skippableFrameHeader.
  1054. // The function will panic if written < 0 or wantMultiple <= 0.
  1055. func calcSkippableFrame(written, wantMultiple int64) int {
  1056. if wantMultiple <= 0 {
  1057. panic("wantMultiple <= 0")
  1058. }
  1059. if written < 0 {
  1060. panic("written < 0")
  1061. }
  1062. leftOver := written % wantMultiple
  1063. if leftOver == 0 {
  1064. return 0
  1065. }
  1066. toAdd := wantMultiple - leftOver
  1067. for toAdd < skippableFrameHeader {
  1068. toAdd += wantMultiple
  1069. }
  1070. return int(toAdd)
  1071. }
  1072. // skippableFrame will add a skippable frame with a total size of bytes.
  1073. // total should be >= skippableFrameHeader and < maxBlockSize + skippableFrameHeader
  1074. func skippableFrame(dst []byte, total int, r io.Reader) ([]byte, error) {
  1075. if total == 0 {
  1076. return dst, nil
  1077. }
  1078. if total < skippableFrameHeader {
  1079. return dst, fmt.Errorf("s2: requested skippable frame (%d) < 4", total)
  1080. }
  1081. if int64(total) >= maxBlockSize+skippableFrameHeader {
  1082. return dst, fmt.Errorf("s2: requested skippable frame (%d) >= max 1<<24", total)
  1083. }
  1084. // Chunk type 0xfe "Section 4.4 Padding (chunk type 0xfe)"
  1085. dst = append(dst, chunkTypePadding)
  1086. f := uint32(total - skippableFrameHeader)
  1087. // Add chunk length.
  1088. dst = append(dst, uint8(f), uint8(f>>8), uint8(f>>16))
  1089. // Add data
  1090. start := len(dst)
  1091. dst = append(dst, make([]byte, f)...)
  1092. _, err := io.ReadFull(r, dst[start:])
  1093. return dst, err
  1094. }
  1095. // WriterOption is an option for creating a encoder.
  1096. type WriterOption func(*Writer) error
  1097. // WriterConcurrency will set the concurrency,
  1098. // meaning the maximum number of decoders to run concurrently.
  1099. // The value supplied must be at least 1.
  1100. // By default this will be set to GOMAXPROCS.
  1101. func WriterConcurrency(n int) WriterOption {
  1102. return func(w *Writer) error {
  1103. if n <= 0 {
  1104. return errors.New("concurrency must be at least 1")
  1105. }
  1106. w.concurrency = n
  1107. return nil
  1108. }
  1109. }
  1110. // WriterAddIndex will append an index to the end of a stream
  1111. // when it is closed.
  1112. func WriterAddIndex() WriterOption {
  1113. return func(w *Writer) error {
  1114. w.appendIndex = true
  1115. return nil
  1116. }
  1117. }
  1118. // WriterBetterCompression will enable better compression.
  1119. // EncodeBetter compresses better than Encode but typically with a
  1120. // 10-40% speed decrease on both compression and decompression.
  1121. func WriterBetterCompression() WriterOption {
  1122. return func(w *Writer) error {
  1123. w.level = levelBetter
  1124. return nil
  1125. }
  1126. }
  1127. // WriterBestCompression will enable better compression.
  1128. // EncodeBetter compresses better than Encode but typically with a
  1129. // big speed decrease on compression.
  1130. func WriterBestCompression() WriterOption {
  1131. return func(w *Writer) error {
  1132. w.level = levelBest
  1133. return nil
  1134. }
  1135. }
  1136. // WriterUncompressed will bypass compression.
  1137. // The stream will be written as uncompressed blocks only.
  1138. // If concurrency is > 1 CRC and output will still be done async.
  1139. func WriterUncompressed() WriterOption {
  1140. return func(w *Writer) error {
  1141. w.level = levelUncompressed
  1142. return nil
  1143. }
  1144. }
  1145. // WriterBlockSize allows to override the default block size.
  1146. // Blocks will be this size or smaller.
  1147. // Minimum size is 4KB and and maximum size is 4MB.
  1148. //
  1149. // Bigger blocks may give bigger throughput on systems with many cores,
  1150. // and will increase compression slightly, but it will limit the possible
  1151. // concurrency for smaller payloads for both encoding and decoding.
  1152. // Default block size is 1MB.
  1153. //
  1154. // When writing Snappy compatible output using WriterSnappyCompat,
  1155. // the maximum block size is 64KB.
  1156. func WriterBlockSize(n int) WriterOption {
  1157. return func(w *Writer) error {
  1158. if w.snappy && n > maxSnappyBlockSize || n < minBlockSize {
  1159. return errors.New("s2: block size too large. Must be <= 64K and >=4KB on for snappy compatible output")
  1160. }
  1161. if n > maxBlockSize || n < minBlockSize {
  1162. return errors.New("s2: block size too large. Must be <= 4MB and >=4KB")
  1163. }
  1164. w.blockSize = n
  1165. return nil
  1166. }
  1167. }
  1168. // WriterPadding will add padding to all output so the size will be a multiple of n.
  1169. // This can be used to obfuscate the exact output size or make blocks of a certain size.
  1170. // The contents will be a skippable frame, so it will be invisible by the decoder.
  1171. // n must be > 0 and <= 4MB.
  1172. // The padded area will be filled with data from crypto/rand.Reader.
  1173. // The padding will be applied whenever Close is called on the writer.
  1174. func WriterPadding(n int) WriterOption {
  1175. return func(w *Writer) error {
  1176. if n <= 0 {
  1177. return fmt.Errorf("s2: padding must be at least 1")
  1178. }
  1179. // No need to waste our time.
  1180. if n == 1 {
  1181. w.pad = 0
  1182. }
  1183. if n > maxBlockSize {
  1184. return fmt.Errorf("s2: padding must less than 4MB")
  1185. }
  1186. w.pad = n
  1187. return nil
  1188. }
  1189. }
  1190. // WriterPaddingSrc will get random data for padding from the supplied source.
  1191. // By default crypto/rand is used.
  1192. func WriterPaddingSrc(reader io.Reader) WriterOption {
  1193. return func(w *Writer) error {
  1194. w.randSrc = reader
  1195. return nil
  1196. }
  1197. }
  1198. // WriterSnappyCompat will write snappy compatible output.
  1199. // The output can be decompressed using either snappy or s2.
  1200. // If block size is more than 64KB it is set to that.
  1201. func WriterSnappyCompat() WriterOption {
  1202. return func(w *Writer) error {
  1203. w.snappy = true
  1204. if w.blockSize > 64<<10 {
  1205. // We choose 8 bytes less than 64K, since that will make literal emits slightly more effective.
  1206. // And allows us to skip some size checks.
  1207. w.blockSize = (64 << 10) - 8
  1208. }
  1209. return nil
  1210. }
  1211. }
  1212. // WriterFlushOnWrite will compress blocks on each call to the Write function.
  1213. //
  1214. // This is quite inefficient as blocks size will depend on the write size.
  1215. //
  1216. // Use WriterConcurrency(1) to also make sure that output is flushed.
  1217. // When Write calls return, otherwise they will be written when compression is done.
  1218. func WriterFlushOnWrite() WriterOption {
  1219. return func(w *Writer) error {
  1220. w.flushOnWrite = true
  1221. return nil
  1222. }
  1223. }