25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

308 satır
7.6 KiB

  1. //go:build !amd64 || appengine || !gc || noasm
  2. // +build !amd64 appengine !gc noasm
  3. package s2
  4. import (
  5. "math/bits"
  6. )
  7. // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
  8. // assumes that the varint-encoded length of the decompressed bytes has already
  9. // been written.
  10. //
  11. // It also assumes that:
  12. // len(dst) >= MaxEncodedLen(len(src))
  13. func encodeBlock(dst, src []byte) (d int) {
  14. if len(src) < minNonLiteralBlockSize {
  15. return 0
  16. }
  17. return encodeBlockGo(dst, src)
  18. }
  19. // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
  20. // assumes that the varint-encoded length of the decompressed bytes has already
  21. // been written.
  22. //
  23. // It also assumes that:
  24. // len(dst) >= MaxEncodedLen(len(src))
  25. func encodeBlockBetter(dst, src []byte) (d int) {
  26. return encodeBlockBetterGo(dst, src)
  27. }
  28. // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
  29. // assumes that the varint-encoded length of the decompressed bytes has already
  30. // been written.
  31. //
  32. // It also assumes that:
  33. // len(dst) >= MaxEncodedLen(len(src))
  34. func encodeBlockBetterSnappy(dst, src []byte) (d int) {
  35. return encodeBlockBetterSnappyGo(dst, src)
  36. }
  37. // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
  38. // assumes that the varint-encoded length of the decompressed bytes has already
  39. // been written.
  40. //
  41. // It also assumes that:
  42. // len(dst) >= MaxEncodedLen(len(src))
  43. func encodeBlockSnappy(dst, src []byte) (d int) {
  44. if len(src) < minNonLiteralBlockSize {
  45. return 0
  46. }
  47. return encodeBlockSnappyGo(dst, src)
  48. }
  49. // emitLiteral writes a literal chunk and returns the number of bytes written.
  50. //
  51. // It assumes that:
  52. // dst is long enough to hold the encoded bytes
  53. // 0 <= len(lit) && len(lit) <= math.MaxUint32
  54. func emitLiteral(dst, lit []byte) int {
  55. if len(lit) == 0 {
  56. return 0
  57. }
  58. const num = 63<<2 | tagLiteral
  59. i, n := 0, uint(len(lit)-1)
  60. switch {
  61. case n < 60:
  62. dst[0] = uint8(n)<<2 | tagLiteral
  63. i = 1
  64. case n < 1<<8:
  65. dst[1] = uint8(n)
  66. dst[0] = 60<<2 | tagLiteral
  67. i = 2
  68. case n < 1<<16:
  69. dst[2] = uint8(n >> 8)
  70. dst[1] = uint8(n)
  71. dst[0] = 61<<2 | tagLiteral
  72. i = 3
  73. case n < 1<<24:
  74. dst[3] = uint8(n >> 16)
  75. dst[2] = uint8(n >> 8)
  76. dst[1] = uint8(n)
  77. dst[0] = 62<<2 | tagLiteral
  78. i = 4
  79. default:
  80. dst[4] = uint8(n >> 24)
  81. dst[3] = uint8(n >> 16)
  82. dst[2] = uint8(n >> 8)
  83. dst[1] = uint8(n)
  84. dst[0] = 63<<2 | tagLiteral
  85. i = 5
  86. }
  87. return i + copy(dst[i:], lit)
  88. }
  89. // emitRepeat writes a repeat chunk and returns the number of bytes written.
  90. // Length must be at least 4 and < 1<<24
  91. func emitRepeat(dst []byte, offset, length int) int {
  92. // Repeat offset, make length cheaper
  93. length -= 4
  94. if length <= 4 {
  95. dst[0] = uint8(length)<<2 | tagCopy1
  96. dst[1] = 0
  97. return 2
  98. }
  99. if length < 8 && offset < 2048 {
  100. // Encode WITH offset
  101. dst[1] = uint8(offset)
  102. dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
  103. return 2
  104. }
  105. if length < (1<<8)+4 {
  106. length -= 4
  107. dst[2] = uint8(length)
  108. dst[1] = 0
  109. dst[0] = 5<<2 | tagCopy1
  110. return 3
  111. }
  112. if length < (1<<16)+(1<<8) {
  113. length -= 1 << 8
  114. dst[3] = uint8(length >> 8)
  115. dst[2] = uint8(length >> 0)
  116. dst[1] = 0
  117. dst[0] = 6<<2 | tagCopy1
  118. return 4
  119. }
  120. const maxRepeat = (1 << 24) - 1
  121. length -= 1 << 16
  122. left := 0
  123. if length > maxRepeat {
  124. left = length - maxRepeat + 4
  125. length = maxRepeat - 4
  126. }
  127. dst[4] = uint8(length >> 16)
  128. dst[3] = uint8(length >> 8)
  129. dst[2] = uint8(length >> 0)
  130. dst[1] = 0
  131. dst[0] = 7<<2 | tagCopy1
  132. if left > 0 {
  133. return 5 + emitRepeat(dst[5:], offset, left)
  134. }
  135. return 5
  136. }
  137. // emitCopy writes a copy chunk and returns the number of bytes written.
  138. //
  139. // It assumes that:
  140. // dst is long enough to hold the encoded bytes
  141. // 1 <= offset && offset <= math.MaxUint32
  142. // 4 <= length && length <= 1 << 24
  143. func emitCopy(dst []byte, offset, length int) int {
  144. if offset >= 65536 {
  145. i := 0
  146. if length > 64 {
  147. // Emit a length 64 copy, encoded as 5 bytes.
  148. dst[4] = uint8(offset >> 24)
  149. dst[3] = uint8(offset >> 16)
  150. dst[2] = uint8(offset >> 8)
  151. dst[1] = uint8(offset)
  152. dst[0] = 63<<2 | tagCopy4
  153. length -= 64
  154. if length >= 4 {
  155. // Emit remaining as repeats
  156. return 5 + emitRepeat(dst[5:], offset, length)
  157. }
  158. i = 5
  159. }
  160. if length == 0 {
  161. return i
  162. }
  163. // Emit a copy, offset encoded as 4 bytes.
  164. dst[i+0] = uint8(length-1)<<2 | tagCopy4
  165. dst[i+1] = uint8(offset)
  166. dst[i+2] = uint8(offset >> 8)
  167. dst[i+3] = uint8(offset >> 16)
  168. dst[i+4] = uint8(offset >> 24)
  169. return i + 5
  170. }
  171. // Offset no more than 2 bytes.
  172. if length > 64 {
  173. off := 3
  174. if offset < 2048 {
  175. // emit 8 bytes as tagCopy1, rest as repeats.
  176. dst[1] = uint8(offset)
  177. dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
  178. length -= 8
  179. off = 2
  180. } else {
  181. // Emit a length 60 copy, encoded as 3 bytes.
  182. // Emit remaining as repeat value (minimum 4 bytes).
  183. dst[2] = uint8(offset >> 8)
  184. dst[1] = uint8(offset)
  185. dst[0] = 59<<2 | tagCopy2
  186. length -= 60
  187. }
  188. // Emit remaining as repeats, at least 4 bytes remain.
  189. return off + emitRepeat(dst[off:], offset, length)
  190. }
  191. if length >= 12 || offset >= 2048 {
  192. // Emit the remaining copy, encoded as 3 bytes.
  193. dst[2] = uint8(offset >> 8)
  194. dst[1] = uint8(offset)
  195. dst[0] = uint8(length-1)<<2 | tagCopy2
  196. return 3
  197. }
  198. // Emit the remaining copy, encoded as 2 bytes.
  199. dst[1] = uint8(offset)
  200. dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
  201. return 2
  202. }
  203. // emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
  204. //
  205. // It assumes that:
  206. // dst is long enough to hold the encoded bytes
  207. // 1 <= offset && offset <= math.MaxUint32
  208. // 4 <= length && length <= 1 << 24
  209. func emitCopyNoRepeat(dst []byte, offset, length int) int {
  210. if offset >= 65536 {
  211. i := 0
  212. if length > 64 {
  213. // Emit a length 64 copy, encoded as 5 bytes.
  214. dst[4] = uint8(offset >> 24)
  215. dst[3] = uint8(offset >> 16)
  216. dst[2] = uint8(offset >> 8)
  217. dst[1] = uint8(offset)
  218. dst[0] = 63<<2 | tagCopy4
  219. length -= 64
  220. if length >= 4 {
  221. // Emit remaining as repeats
  222. return 5 + emitCopyNoRepeat(dst[5:], offset, length)
  223. }
  224. i = 5
  225. }
  226. if length == 0 {
  227. return i
  228. }
  229. // Emit a copy, offset encoded as 4 bytes.
  230. dst[i+0] = uint8(length-1)<<2 | tagCopy4
  231. dst[i+1] = uint8(offset)
  232. dst[i+2] = uint8(offset >> 8)
  233. dst[i+3] = uint8(offset >> 16)
  234. dst[i+4] = uint8(offset >> 24)
  235. return i + 5
  236. }
  237. // Offset no more than 2 bytes.
  238. if length > 64 {
  239. // Emit a length 60 copy, encoded as 3 bytes.
  240. // Emit remaining as repeat value (minimum 4 bytes).
  241. dst[2] = uint8(offset >> 8)
  242. dst[1] = uint8(offset)
  243. dst[0] = 59<<2 | tagCopy2
  244. length -= 60
  245. // Emit remaining as repeats, at least 4 bytes remain.
  246. return 3 + emitCopyNoRepeat(dst[3:], offset, length)
  247. }
  248. if length >= 12 || offset >= 2048 {
  249. // Emit the remaining copy, encoded as 3 bytes.
  250. dst[2] = uint8(offset >> 8)
  251. dst[1] = uint8(offset)
  252. dst[0] = uint8(length-1)<<2 | tagCopy2
  253. return 3
  254. }
  255. // Emit the remaining copy, encoded as 2 bytes.
  256. dst[1] = uint8(offset)
  257. dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
  258. return 2
  259. }
  260. // matchLen returns how many bytes match in a and b
  261. //
  262. // It assumes that:
  263. // len(a) <= len(b)
  264. //
  265. func matchLen(a []byte, b []byte) int {
  266. b = b[:len(a)]
  267. var checked int
  268. if len(a) > 4 {
  269. // Try 4 bytes first
  270. if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
  271. return bits.TrailingZeros32(diff) >> 3
  272. }
  273. // Switch to 8 byte matching.
  274. checked = 4
  275. a = a[4:]
  276. b = b[4:]
  277. for len(a) >= 8 {
  278. b = b[:len(a)]
  279. if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
  280. return checked + (bits.TrailingZeros64(diff) >> 3)
  281. }
  282. checked += 8
  283. a = a[8:]
  284. b = b[8:]
  285. }
  286. }
  287. b = b[:len(a)]
  288. for i := range a {
  289. if a[i] != b[i] {
  290. return int(i) + checked
  291. }
  292. }
  293. return len(a) + checked
  294. }