Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 
 

584 lignes
20 KiB

  1. // VariantKey
  2. //
  3. // variantkey.h
  4. //
  5. // @category Libraries
  6. // @author Nicola Asuni <nicola.asuni@genomicsplc.com>
  7. // @copyright 2017-2018 GENOMICS plc
  8. // @license MIT (see LICENSE)
  9. // @link https://github.com/genomicsplc/variantkey
  10. //
  11. // LICENSE
  12. //
  13. // Copyright (c) 2017-2018 GENOMICS plc
  14. //
  15. // Permission is hereby granted, free of charge, to any person obtaining a copy
  16. // of this software and associated documentation files (the "Software"), to deal
  17. // in the Software without restriction, including without limitation the rights
  18. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  19. // copies of the Software, and to permit persons to whom the Software is
  20. // furnished to do so, subject to the following conditions:
  21. //
  22. // The above copyright notice and this permission notice shall be included in
  23. // all copies or substantial portions of the Software.
  24. //
  25. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  26. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  27. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  28. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  29. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  30. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  31. // THE SOFTWARE.
  32. /**
  33. * @file variantkey.h
  34. * @brief VariantKey main functions.
  35. *
  36. * The functions provided here allows to generate and process a 64 bit Unsigned Integer Keys for Human Genetic Variants.
  37. * The VariantKey is sortable for chromosome and position,
  38. * and it is also fully reversible for variants with up to 11 bases between Reference and Alternate alleles.
  39. * It can be used to sort, search and match variant-based data easily and very quickly.
  40. */
  41. #ifndef VARIANTKEY_H
  42. #define VARIANTKEY_H
  43. #include <inttypes.h>
  44. #include <stddef.h>
  45. #include <stdio.h>
  46. #include "hex.h"
  47. #define VKMASK_CHROM 0xF800000000000000 //!< VariantKey binary mask for CHROM [ 11111000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ]
  48. #define VKMASK_POS 0x07FFFFFF80000000 //!< VariantKey binary mask for POS [ 00000111 11111111 11111111 11111111 10000000 00000000 00000000 00000000 ]
  49. #define VKMASK_CHROMPOS 0xFFFFFFFF80000000 //!< VariantKey binary mask for CHROM+POS [ 11111111 11111111 11111111 11111111 10000000 00000000 00000000 00000000 ]
  50. #define VKMASK_REFALT 0x000000007FFFFFFF //!< VariantKey binary mask for REF+ALT [ 00000000 00000000 00000000 00000000 01111111 11111111 11111111 11111111 ]
  51. #define VKSHIFT_CHROM 59 //!< CHROM LSB position from the VariantKey LSB
  52. #define VKSHIFT_POS 31 //!< POS LSB position from the VariantKey LSB
  53. /**
  54. * VariantKey struct.
  55. * Contains the numerically encoded VariantKey components (CHROM, POS, REF+ALT).
  56. */
  57. typedef struct variantkey_t
  58. {
  59. uint8_t chrom; //!< Chromosome encoded number (only the LSB 5 bit are used)
  60. uint32_t pos; //!< Reference position, with the first base having position 0 (only the LSB 28 bit are used)
  61. uint32_t refalt; //!< Code for Reference and Alternate allele (only the LSB 31 bits are used)
  62. } variantkey_t;
  63. /**
  64. * Struct containing the minimum and maximum VariantKey values for range searches.
  65. */
  66. typedef struct vkrange_t
  67. {
  68. uint64_t min; //!< Minimum VariantKey value for any given REF+ALT encoding
  69. uint64_t max; //!< Maximum VariantKey value for any given REF+ALT encoding
  70. } vkrange_t;
  71. /** @brief Returns chromosome numerical encoding.
  72. *
  73. * @param chrom Chromosome. An identifier from the reference genome, no white-space permitted.
  74. * @param size Length of the chrom string, excluding the terminating null byte.
  75. *
  76. * @return CHROM code
  77. */
  78. static inline uint8_t encode_chrom(const char *chrom, size_t size)
  79. {
  80. // X > 23 ; Y > 24 ; M > 25
  81. static const uint8_t onecharmap[] =
  82. {
  83. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  84. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  85. /* M X Y */
  86. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,23,24, 0, 0, 0, 0, 0, 0,
  87. /* m x y */
  88. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,23,24, 0, 0, 0, 0, 0, 0,
  89. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  90. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  91. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  92. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  93. };
  94. // remove "chr" prefix
  95. if ((size > 3)
  96. && ((chrom[0] == 'c') || (chrom[0] == 'C'))
  97. && ((chrom[1] == 'h') || (chrom[1] == 'H'))
  98. && ((chrom[2] == 'r') || (chrom[2] == 'R')))
  99. {
  100. chrom += 3;
  101. size -= 3;
  102. }
  103. if (size == 0)
  104. {
  105. return 0;
  106. }
  107. if ((chrom[0] <= '9') && (chrom[0] >= '0')) // Number
  108. {
  109. size_t i;
  110. uint8_t v = (chrom[0] - '0');
  111. for (i = 1; i < size; i++)
  112. {
  113. if ((chrom[i] > '9') || (chrom[i] < '0'))
  114. {
  115. return 0; // NA
  116. }
  117. v = ((v * 10) + (chrom[i] - '0'));
  118. }
  119. return v;
  120. }
  121. if ((size == 1) || ((size == 2) && ((chrom[1] == 'T') || (chrom[1] == 't'))))
  122. {
  123. return onecharmap[((uint8_t)chrom[0])];
  124. }
  125. return 0; // NA
  126. }
  127. /** @brief Decode the chromosome numerical code.
  128. *
  129. * @param code CHROM code.
  130. * @param chrom CHROM string buffer to be returned. Its size should be enough to contain the results (max 4 bytes).
  131. *
  132. * @return If successful, the total number of characters written is returned,
  133. * excluding the null-character appended at the end of the string,
  134. * otherwise a negative number is returned in case of failure.
  135. */
  136. static inline size_t decode_chrom(uint8_t code, char *chrom)
  137. {
  138. if ((code < 1) || (code > 25))
  139. {
  140. return sprintf(chrom, "NA");
  141. }
  142. if (code < 23)
  143. {
  144. return sprintf(chrom, "%" PRIu8, code);
  145. }
  146. static const char *map[] = {"X", "Y", "MT"};
  147. return sprintf(chrom, "%s", map[(code - 23)]);
  148. }
  149. static inline uint32_t encode_base(const uint8_t c)
  150. {
  151. /*
  152. Encode base:
  153. A > 0
  154. C > 1
  155. G > 2
  156. T > 3
  157. */
  158. static const uint32_t map[] =
  159. {
  160. 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  161. 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  162. /*A C G T*/
  163. 4,0,4,1,4,4,4,2,4,4,4,4,4,4,4,4,4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4,
  164. /*a c g t*/
  165. 4,0,4,1,4,4,4,2,4,4,4,4,4,4,4,4,4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4,
  166. 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  167. 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  168. 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  169. 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  170. };
  171. return map[c];
  172. }
  173. static inline int encode_allele(uint32_t *h, uint8_t *bitpos, const char *str, size_t size)
  174. {
  175. uint32_t v;
  176. while (size--)
  177. {
  178. v = encode_base(*str++);
  179. if (v > 3)
  180. {
  181. return -1;
  182. }
  183. *bitpos -= 2;
  184. *h |= (v << *bitpos);
  185. }
  186. return 0;
  187. }
  188. static inline uint32_t encode_refalt_rev(const char *ref, size_t sizeref, const char *alt, size_t sizealt)
  189. {
  190. //[******** ******** ******** ******** *RRRRAAA A1122334 45566778 8990011*]
  191. uint32_t h = 0;
  192. h |= ((uint32_t)(sizeref) << 27); // RRRR: length of (REF - 1)
  193. h |= ((uint32_t)(sizealt) << 23); // AAAA: length of (ALT - 1)
  194. uint8_t bitpos = 23;
  195. if ((encode_allele(&h, &bitpos, ref, sizeref) < 0) || (encode_allele(&h, &bitpos, alt, sizealt) < 0))
  196. {
  197. return 0; // error code
  198. }
  199. return h;
  200. }
  201. // Mix two 32 bit hash numbers using a MurmurHash3-like algorithm
  202. static inline uint32_t muxhash(uint32_t k, uint32_t h)
  203. {
  204. k *= 0xcc9e2d51;
  205. k = (k >> 17) | (k << 15);
  206. k *= 0x1b873593;
  207. h ^= k;
  208. h = (h >> 19) | (h << 13);
  209. return ((h * 5) + 0xe6546b64);
  210. }
  211. static inline uint32_t encode_packchar(int c)
  212. {
  213. if (c < 'A')
  214. {
  215. return 27;
  216. }
  217. if (c >= 'a')
  218. {
  219. return (uint32_t)(c - 'a' + 1);
  220. }
  221. return (uint32_t)(c - 'A' + 1);
  222. }
  223. // pack blocks of 6 characters in 32 bit (6 x 5 bit + 2 spare bit) [ 01111122 22233333 44444555 55666660 ]
  224. static inline uint32_t pack_chars_tail(const char *str, size_t size)
  225. {
  226. uint32_t h = 0;
  227. const char *pos = (str + size - 1);
  228. switch (size)
  229. {
  230. case 5:
  231. h ^= encode_packchar(*pos--) << (1 + (5 * 1));
  232. // fall through
  233. case 4:
  234. h ^= encode_packchar(*pos--) << (1 + (5 * 2));
  235. // fall through
  236. case 3:
  237. h ^= encode_packchar(*pos--) << (1 + (5 * 3));
  238. // fall through
  239. case 2:
  240. h ^= encode_packchar(*pos--) << (1 + (5 * 4));
  241. // fall through
  242. case 1:
  243. h ^= encode_packchar(*pos) << (1 + (5 * 5));
  244. }
  245. return h;
  246. }
  247. static inline uint32_t pack_chars(const char *str)
  248. {
  249. const char *pos = (str + 5);
  250. return ((encode_packchar(*pos) << 1)
  251. ^ (encode_packchar(*(pos-1)) << (1 + (5 * 1)))
  252. ^ (encode_packchar(*(pos-2)) << (1 + (5 * 2)))
  253. ^ (encode_packchar(*(pos-3)) << (1 + (5 * 3)))
  254. ^ (encode_packchar(*(pos-4)) << (1 + (5 * 4)))
  255. ^ (encode_packchar(*(pos-5)) << (1 + (5 * 5))));
  256. }
  257. // Return a 32 bit hash of a nucleotide string
  258. static inline uint32_t hash32(const char *str, size_t size)
  259. {
  260. uint32_t h = 0;
  261. size_t len = 6;
  262. while (size >= len)
  263. {
  264. h = muxhash(pack_chars(str), h);
  265. str += len;
  266. size -= len;
  267. }
  268. if (size > 0)
  269. {
  270. h = muxhash(pack_chars_tail(str, size), h);
  271. }
  272. return h;
  273. }
  274. static inline uint32_t encode_refalt_hash(const char *ref, size_t sizeref, const char *alt, size_t sizealt)
  275. {
  276. // 0x3 is the separator character between REF and ALT [00000000 00000000 00000000 00000011]
  277. uint32_t h = muxhash(hash32(alt, sizealt), muxhash(0x3, hash32(ref, sizeref)));
  278. // MurmurHash3 finalization mix - force all bits of a hash block to avalanche
  279. h ^= h >> 16;
  280. h *= 0x85ebca6b;
  281. h ^= h >> 13;
  282. h *= 0xc2b2ae35;
  283. h ^= h >> 16;
  284. return ((h >> 1) | 0x1); // 0x1 is the set bit to indicate HASH mode [00000000 00000000 00000000 00000001]
  285. }
  286. /** @brief Returns reference+alternate numerical encoding.
  287. *
  288. * @param ref Reference allele. String containing a sequence of nucleotide letters.
  289. * The value in the pos field refers to the position of the first nucleotide in the String.
  290. * Characters must be A-Z, a-z or *
  291. * @param sizeref Length of the ref string, excluding the terminating null byte.
  292. * @param alt Alternate non-reference allele string.
  293. * Characters must be A-Z, a-z or *
  294. * @param sizealt Length of the alt string, excluding the terminating null byte.
  295. *
  296. * @return REF+ALT code
  297. */
  298. static inline uint32_t encode_refalt(const char *ref, size_t sizeref, const char *alt, size_t sizealt)
  299. {
  300. if ((sizeref + sizealt) <= 11)
  301. {
  302. uint32_t h = encode_refalt_rev(ref, sizeref, alt, sizealt);
  303. if (h != 0)
  304. {
  305. return h;
  306. }
  307. }
  308. return encode_refalt_hash(ref, sizeref, alt, sizealt);
  309. }
  310. static inline char decode_base(uint32_t code, int bitpos)
  311. {
  312. static const char base[4] = {'A', 'C', 'G', 'T'};
  313. return base[((code >> bitpos) & 0x3)]; // 0x3 is the 2 bit mask [00000011]
  314. }
  315. static inline size_t decode_refalt_rev(uint32_t code, char *ref, size_t *sizeref, char *alt, size_t *sizealt)
  316. {
  317. *sizeref = (size_t)((code & 0x78000000) >> 27); // [01111000 00000000 00000000 00000000]
  318. *sizealt = (size_t)((code & 0x07800000) >> 23); // [00000111 10000000 00000000 00000000]
  319. switch (*sizeref)
  320. {
  321. case 10:
  322. ref[9] = decode_base(code, (3 + (2 * 0)));
  323. // fall through
  324. case 9:
  325. ref[8] = decode_base(code, (3 + (2 * 1)));
  326. // fall through
  327. case 8:
  328. ref[7] = decode_base(code, (3 + (2 * 2)));
  329. // fall through
  330. case 7:
  331. ref[6] = decode_base(code, (3 + (2 * 3)));
  332. // fall through
  333. case 6:
  334. ref[5] = decode_base(code, (3 + (2 * 4)));
  335. // fall through
  336. case 5:
  337. ref[4] = decode_base(code, (3 + (2 * 5)));
  338. // fall through
  339. case 4:
  340. ref[3] = decode_base(code, (3 + (2 * 6)));
  341. // fall through
  342. case 3:
  343. ref[2] = decode_base(code, (3 + (2 * 7)));
  344. // fall through
  345. case 2:
  346. ref[1] = decode_base(code, (3 + (2 * 8)));
  347. // fall through
  348. case 1:
  349. ref[0] = decode_base(code, (3 + (2 * 9)));
  350. }
  351. ref[*sizeref] = 0;
  352. uint8_t bitpos = (23 - ((*sizeref) << 1));
  353. switch (*sizealt)
  354. {
  355. case 10:
  356. alt[9] = decode_base(code, bitpos - (2 * 10));
  357. // fall through
  358. case 9:
  359. alt[8] = decode_base(code, bitpos - (2 * 9));
  360. // fall through
  361. case 8:
  362. alt[7] = decode_base(code, bitpos - (2 * 8));
  363. // fall through
  364. case 7:
  365. alt[6] = decode_base(code, bitpos - (2 * 7));
  366. // fall through
  367. case 6:
  368. alt[5] = decode_base(code, bitpos - (2 * 6));
  369. // fall through
  370. case 5:
  371. alt[4] = decode_base(code, bitpos - (2 * 5));
  372. // fall through
  373. case 4:
  374. alt[3] = decode_base(code, bitpos - (2 * 4));
  375. // fall through
  376. case 3:
  377. alt[2] = decode_base(code, bitpos - (2 * 3));
  378. // fall through
  379. case 2:
  380. alt[1] = decode_base(code, bitpos - (2 * 2));
  381. // fall through
  382. case 1:
  383. alt[0] = decode_base(code, bitpos - (2 * 1));
  384. }
  385. alt[*sizealt] = 0;
  386. return (*sizeref + *sizealt);
  387. }
  388. /** @brief Decode the 32 bit REF+ALT code if reversible (if it has 11 or less bases in total and only contains ACGT letters).
  389. *
  390. * @param code REF+ALT code
  391. * @param ref REF string buffer to be returned.
  392. * @param sizeref Pointer to the size of the ref buffer, excluding the terminating null byte.
  393. * This will contain the final ref size.
  394. * @param alt ALT string buffer to be returned.
  395. * @param sizealt Pointer to the size of the alt buffer, excluding the terminating null byte.
  396. * This will contain the final alt size.
  397. *
  398. * @return If the code is reversible, then the total number of characters of REF+ALT is returned.
  399. * Otherwise 0 is returned.
  400. */
  401. static inline size_t decode_refalt(uint32_t code, char *ref, size_t *sizeref, char *alt, size_t *sizealt)
  402. {
  403. if (code & 0x1) // check last bit
  404. {
  405. return 0; // non-reversible encoding
  406. }
  407. return decode_refalt_rev(code, ref, sizeref, alt, sizealt);
  408. }
  409. /** @brief Returns a 64 bit variant key based on the pre-encoded CHROM, POS (0-based) and REF+ALT.
  410. *
  411. * @param chrom Encoded Chromosome (see encode_chrom).
  412. * @param pos Position. The reference position, with the first base having position 0.
  413. * @param refalt Encoded Reference + Alternate (see encode_refalt).
  414. *
  415. * @return VariantKey 64 bit code.
  416. */
  417. static inline uint64_t encode_variantkey(uint8_t chrom, uint32_t pos, uint32_t refalt)
  418. {
  419. return (((uint64_t)chrom << VKSHIFT_CHROM) | ((uint64_t)pos << VKSHIFT_POS) | (uint64_t)refalt);
  420. }
  421. /** @brief Extract the CHROM code from VariantKey.
  422. *
  423. * @param vk VariantKey code.
  424. *
  425. * @return CHROM code.
  426. */
  427. static inline uint8_t extract_variantkey_chrom(uint64_t vk)
  428. {
  429. return (uint8_t)((vk & VKMASK_CHROM) >> VKSHIFT_CHROM);
  430. }
  431. /** @brief Extract the POS code from VariantKey.
  432. *
  433. * @param vk VariantKey code.
  434. *
  435. * @return POS.
  436. */
  437. static inline uint32_t extract_variantkey_pos(uint64_t vk)
  438. {
  439. return (uint32_t)((vk & VKMASK_POS) >> VKSHIFT_POS);
  440. }
  441. /** @brief Extract the REF+ALT code from VariantKey.
  442. *
  443. * @param vk VariantKey code.
  444. *
  445. * @return REF+ALT code.
  446. */
  447. static inline uint32_t extract_variantkey_refalt(uint64_t vk)
  448. {
  449. return (uint32_t)(vk & VKMASK_REFALT);
  450. }
  451. /** @brief Decode a VariantKey code and returns the components as variantkey_t structure.
  452. *
  453. * @param code VariantKey code.
  454. * @param vk Decoded variantkey structure.
  455. */
  456. static inline void decode_variantkey(uint64_t code, variantkey_t *vk)
  457. {
  458. vk->chrom = extract_variantkey_chrom(code);
  459. vk->pos = extract_variantkey_pos(code);
  460. vk->refalt = extract_variantkey_refalt(code);
  461. }
  462. /** @brief Returns a 64 bit variant key based on CHROM, POS (0-based), REF, ALT.
  463. *
  464. * @param chrom Chromosome. An identifier from the reference genome, no white-space or leading zeros permitted.
  465. * @param sizechrom Length of the chrom string, excluding the terminating null byte.
  466. * @param pos Position. The reference position, with the first base having position 0.
  467. * @param ref Reference allele. String containing a sequence of nucleotide letters.
  468. * The value in the pos field refers to the position of the first nucleotide in the String.
  469. * Characters must be A-Z, a-z or *
  470. * @param sizeref Length of the ref string, excluding the terminating null byte.
  471. * @param alt Alternate non-reference allele string.
  472. * Characters must be A-Z, a-z or *
  473. * @param sizealt Length of the alt string, excluding the terminating null byte.
  474. *
  475. * @return VariantKey 64 bit code.
  476. */
  477. static inline uint64_t variantkey(const char *chrom, size_t sizechrom, uint32_t pos, const char *ref, size_t sizeref, const char *alt, size_t sizealt)
  478. {
  479. return encode_variantkey(encode_chrom(chrom, sizechrom), pos, encode_refalt(ref, sizeref, alt, sizealt));
  480. }
  481. /** @brief Returns minimum and maximum VariantKeys for range searches.
  482. *
  483. * @param chrom Chromosome encoded number.
  484. * @param pos_min Start reference position, with the first base having position 0.
  485. * @param pos_max End reference position, with the first base having position 0.
  486. * @param range VariantKey range values.
  487. */
  488. static inline void variantkey_range(uint8_t chrom, uint32_t pos_min, uint32_t pos_max, vkrange_t *range)
  489. {
  490. uint64_t c = ((uint64_t)chrom << VKSHIFT_CHROM);
  491. range->min = (c | ((uint64_t)pos_min << VKSHIFT_POS));
  492. range->max = (c | ((uint64_t)pos_max << VKSHIFT_POS) | VKMASK_REFALT);
  493. }
  494. static inline int8_t compare_uint64_t(uint64_t a, uint64_t b)
  495. {
  496. return (a < b) ? -1 : (a > b);
  497. }
  498. /** @brief Compares two VariantKeys by chromosome only.
  499. *
  500. * @param vka The first VariantKey to be compared.
  501. * @param vkb The second VariantKey to be compared.
  502. *
  503. * @return -1 if the first chromosome is smaller than the second, 0 if they are equal and 1 if the first is greater than the second.
  504. */
  505. static inline int8_t compare_variantkey_chrom(uint64_t vka, uint64_t vkb)
  506. {
  507. return compare_uint64_t((vka >> VKSHIFT_CHROM), (vkb >> VKSHIFT_CHROM));
  508. }
  509. /** @brief Compares two VariantKeys by chromosome and position.
  510. *
  511. * @param vka The first VariantKey to be compared.
  512. * @param vkb The second VariantKey to be compared.
  513. *
  514. * @return -1 if the first CHROM+POS is smaller than the second, 0 if they are equal and 1 if the first is greater than the second.
  515. */
  516. static inline int8_t compare_variantkey_chrom_pos(uint64_t vka, uint64_t vkb)
  517. {
  518. return compare_uint64_t((vka >> VKSHIFT_POS), (vkb >> VKSHIFT_POS));
  519. }
  520. /** @brief Returns VariantKey hexadecimal string (16 characters).
  521. *
  522. * The string represent a 64 bit number or:
  523. * - 5 bit for CHROM
  524. * - 28 bit for POS
  525. * - 31 bit for REF+ALT
  526. *
  527. * @param vk VariantKey code.
  528. * @param str String buffer to be returned (it must be sized 17 bytes at least).
  529. *
  530. * @return Upon successful return, these function returns the number of characters processed
  531. * (excluding the null byte used to end output to strings).
  532. * If the buffer size is not sufficient, then the return value is the number of characters required for
  533. * buffer string, including the terminating null byte.
  534. */
  535. static inline size_t variantkey_hex(uint64_t vk, char *str)
  536. {
  537. return hex_uint64_t(vk, str);
  538. }
  539. /** @brief Parses a VariantKey hexadecimal string and returns the code.
  540. *
  541. * @param vs VariantKey hexadecimal string (it must contain 16 hexadecimal characters).
  542. *
  543. * @return A VariantKey code.
  544. */
  545. static inline uint64_t parse_variantkey_hex(const char *vs)
  546. {
  547. return parse_hex_uint64_t(vs);
  548. }
  549. #endif // VARIANTKEY_H