您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 

188 行
6.4 KiB

  1. # -*- coding: utf-8 -*-
  2. """Writing of VCF files to ``file``-like objects
  3. Currently, only writing to plain-text files is supported
  4. """
  5. from . import parser
  6. from . import record
  7. from . import bgzf
  8. __author__ = "Manuel Holtgrewe <manuel.holtgrewe@bihealth.de>"
  9. def format_atomic(value, section):
  10. """Format atomic value
  11. This function also takes care of escaping the value in case one of the
  12. reserved characters occurs in the value.
  13. """
  14. # Perform escaping
  15. if isinstance(value, str):
  16. if any(r in value for r in record.RESERVED_CHARS[section]):
  17. for k, v in record.ESCAPE_MAPPING:
  18. value = value.replace(k, v)
  19. # String-format the given value
  20. if value is None:
  21. return "."
  22. else:
  23. return str(value)
  24. def format_value(field_info, value, section):
  25. """Format possibly compound value given the FieldInfo"""
  26. if section == "FORMAT" and field_info.id == "FT":
  27. if not value:
  28. return "."
  29. elif isinstance(value, list):
  30. return ";".join(map(lambda x: format_atomic(x, section), value))
  31. elif field_info.number == 1:
  32. if value is None:
  33. return "."
  34. else:
  35. return format_atomic(value, section)
  36. else:
  37. if not value:
  38. return "."
  39. else:
  40. return ",".join(map(lambda x: format_atomic(x, section), value))
  41. class Writer:
  42. """Class for writing VCF files to ``file``-like objects
  43. Instead of using the constructor, use the class methods
  44. :py:meth:`~Writer.from_stream` and
  45. :py:meth:`~Writer.from_path`.
  46. The writer has to be constructed with a :py:class:`~vcfpy.header.Header`
  47. object and the full VCF header will be written immediately on construction.
  48. This, of course, implies that modifying the header after construction is
  49. illegal.
  50. """
  51. @classmethod
  52. def from_stream(klass, stream, header, path=None, use_bgzf=None):
  53. """Create new :py:class:`Writer` from file
  54. Note that for getting bgzf support, you have to pass in a stream
  55. opened in binary mode. Further, you either have to provide a ``path``
  56. ending in ``".gz"`` or set ``use_bgzf=True``. Otherwise, you will
  57. get the notorious "TypeError: 'str' does not support the buffer
  58. interface".
  59. :param stream: ``file``-like object to write to
  60. :param header: VCF header to use, lines and samples are deep-copied
  61. :param path: optional string with path to store (for display only)
  62. :param use_bgzf: indicator whether to write bgzf to ``stream``
  63. if ``True``, prevent if ``False``, interpret ``path`` if ``None``
  64. """
  65. if use_bgzf or (use_bgzf is None and path and path.endswith(".gz")):
  66. stream = bgzf.BgzfWriter(fileobj=stream)
  67. return Writer(stream, header, path)
  68. @classmethod
  69. def from_path(klass, path, header):
  70. """Create new :py:class:`Writer` from path
  71. :param path: the path to load from (converted to ``str`` for
  72. compatibility with ``path.py``)
  73. :param header: VCF header to use, lines and samples are deep-copied
  74. """
  75. path = str(path)
  76. use_bgzf = False # we already interpret path
  77. if path.endswith(".gz"):
  78. f = bgzf.BgzfWriter(filename=path)
  79. else:
  80. f = open(path, "wt")
  81. return klass.from_stream(f, header, path, use_bgzf=use_bgzf)
  82. def __init__(self, stream, header, path=None):
  83. #: stream (``file``-like object) to read from
  84. self.stream = stream
  85. #: the :py:class:~vcfpy.header.Header` to write out, will be
  86. #: deep-copied into the ``Writer`` on initialization
  87. self.header = header.copy()
  88. #: optional ``str`` with the path to the stream
  89. self.path = path
  90. # write out headers
  91. self._write_header()
  92. def _write_header(self):
  93. """Write out the header"""
  94. for line in self.header.lines:
  95. print(line.serialize(), file=self.stream)
  96. if self.header.samples.names:
  97. print(
  98. "\t".join(list(parser.REQUIRE_SAMPLE_HEADER) + self.header.samples.names),
  99. file=self.stream,
  100. )
  101. else:
  102. print("\t".join(parser.REQUIRE_NO_SAMPLE_HEADER), file=self.stream)
  103. def close(self):
  104. """Close underlying stream"""
  105. self.stream.close()
  106. def write_record(self, record):
  107. """Write out the given :py:class:`vcfpy.record.Record` to this
  108. Writer"""
  109. self._serialize_record(record)
  110. def _serialize_record(self, record):
  111. """Serialize whole Record"""
  112. f = self._empty_to_dot
  113. row = [record.CHROM, record.POS]
  114. row.append(f(";".join(record.ID)))
  115. row.append(f(record.REF))
  116. if not record.ALT:
  117. row.append(".")
  118. else:
  119. row.append(",".join([f(a.serialize()) for a in record.ALT]))
  120. row.append(f(record.QUAL))
  121. row.append(f(";".join(record.FILTER)))
  122. row.append(f(self._serialize_info(record)))
  123. if record.FORMAT:
  124. row.append(":".join(record.FORMAT))
  125. row += [
  126. self._serialize_call(record.FORMAT, record.call_for_sample[s])
  127. for s in self.header.samples.names
  128. ]
  129. print(*row, sep="\t", file=self.stream)
  130. def _serialize_info(self, record):
  131. """Return serialized version of record.INFO"""
  132. result = []
  133. for key, value in record.INFO.items():
  134. info = self.header.get_info_field_info(key)
  135. if info.type == "Flag":
  136. result.append(key)
  137. else:
  138. result.append("{}={}".format(key, format_value(info, value, "INFO")))
  139. return ";".join(result)
  140. def _serialize_call(self, format_, call):
  141. """Return serialized version of the Call using the record's FORMAT'"""
  142. if isinstance(call, record.UnparsedCall):
  143. return call.unparsed_data
  144. else:
  145. result = [
  146. format_value(self.header.get_format_field_info(key), call.data.get(key), "FORMAT")
  147. for key in format_
  148. ]
  149. return ":".join(result)
  150. @classmethod
  151. def _empty_to_dot(klass, val):
  152. """Return val or '.' if empty value"""
  153. if val == "" or val is None or val == []:
  154. return "."
  155. else:
  156. return val
  157. def __enter__(self):
  158. return self
  159. def __exit__(self, type_, value, traceback):
  160. self.close()