25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

220 lines
7.2 KiB

  1. """Utilities for extracting common archive formats"""
  2. import contextlib
  3. import os
  4. import posixpath
  5. import shutil
  6. import tarfile
  7. import zipfile
  8. from ._path import ensure_directory
  9. from distutils.errors import DistutilsError
  10. __all__ = [
  11. "unpack_archive",
  12. "unpack_zipfile",
  13. "unpack_tarfile",
  14. "default_filter",
  15. "UnrecognizedFormat",
  16. "extraction_drivers",
  17. "unpack_directory",
  18. ]
  19. class UnrecognizedFormat(DistutilsError):
  20. """Couldn't recognize the archive type"""
  21. def default_filter(src, dst):
  22. """The default progress/filter callback; returns True for all files"""
  23. return dst
  24. def unpack_archive(
  25. filename, extract_dir, progress_filter=default_filter, drivers=None
  26. ) -> None:
  27. """Unpack `filename` to `extract_dir`, or raise ``UnrecognizedFormat``
  28. `progress_filter` is a function taking two arguments: a source path
  29. internal to the archive ('/'-separated), and a filesystem path where it
  30. will be extracted. The callback must return the desired extract path
  31. (which may be the same as the one passed in), or else ``None`` to skip
  32. that file or directory. The callback can thus be used to report on the
  33. progress of the extraction, as well as to filter the items extracted or
  34. alter their extraction paths.
  35. `drivers`, if supplied, must be a non-empty sequence of functions with the
  36. same signature as this function (minus the `drivers` argument), that raise
  37. ``UnrecognizedFormat`` if they do not support extracting the designated
  38. archive type. The `drivers` are tried in sequence until one is found that
  39. does not raise an error, or until all are exhausted (in which case
  40. ``UnrecognizedFormat`` is raised). If you do not supply a sequence of
  41. drivers, the module's ``extraction_drivers`` constant will be used, which
  42. means that ``unpack_zipfile`` and ``unpack_tarfile`` will be tried, in that
  43. order.
  44. """
  45. for driver in drivers or extraction_drivers:
  46. try:
  47. driver(filename, extract_dir, progress_filter)
  48. except UnrecognizedFormat:
  49. continue
  50. else:
  51. return
  52. else:
  53. raise UnrecognizedFormat(f"Not a recognized archive type: {filename}")
  54. def unpack_directory(filename, extract_dir, progress_filter=default_filter) -> None:
  55. """ "Unpack" a directory, using the same interface as for archives
  56. Raises ``UnrecognizedFormat`` if `filename` is not a directory
  57. """
  58. if not os.path.isdir(filename):
  59. raise UnrecognizedFormat(f"{filename} is not a directory")
  60. paths = {
  61. filename: ('', extract_dir),
  62. }
  63. for base, dirs, files in os.walk(filename):
  64. src, dst = paths[base]
  65. for d in dirs:
  66. paths[os.path.join(base, d)] = src + d + '/', os.path.join(dst, d)
  67. for f in files:
  68. target = os.path.join(dst, f)
  69. target = progress_filter(src + f, target)
  70. if not target:
  71. # skip non-files
  72. continue
  73. ensure_directory(target)
  74. f = os.path.join(base, f)
  75. shutil.copyfile(f, target)
  76. shutil.copystat(f, target)
  77. def unpack_zipfile(filename, extract_dir, progress_filter=default_filter) -> None:
  78. """Unpack zip `filename` to `extract_dir`
  79. Raises ``UnrecognizedFormat`` if `filename` is not a zipfile (as determined
  80. by ``zipfile.is_zipfile()``). See ``unpack_archive()`` for an explanation
  81. of the `progress_filter` argument.
  82. """
  83. if not zipfile.is_zipfile(filename):
  84. raise UnrecognizedFormat(f"{filename} is not a zip file")
  85. with zipfile.ZipFile(filename) as z:
  86. _unpack_zipfile_obj(z, extract_dir, progress_filter)
  87. def _unpack_zipfile_obj(zipfile_obj, extract_dir, progress_filter=default_filter):
  88. """Internal/private API used by other parts of setuptools.
  89. Similar to ``unpack_zipfile``, but receives an already opened :obj:`zipfile.ZipFile`
  90. object instead of a filename.
  91. """
  92. for info in zipfile_obj.infolist():
  93. name = info.filename
  94. # don't extract absolute paths or ones with .. in them
  95. if name.startswith('/') or '..' in name.split('/'):
  96. continue
  97. target = os.path.join(extract_dir, *name.split('/'))
  98. target = progress_filter(name, target)
  99. if not target:
  100. continue
  101. if name.endswith('/'):
  102. # directory
  103. ensure_directory(target)
  104. else:
  105. # file
  106. ensure_directory(target)
  107. data = zipfile_obj.read(info.filename)
  108. with open(target, 'wb') as f:
  109. f.write(data)
  110. unix_attributes = info.external_attr >> 16
  111. if unix_attributes:
  112. os.chmod(target, unix_attributes)
  113. def _resolve_tar_file_or_dir(tar_obj, tar_member_obj):
  114. """Resolve any links and extract link targets as normal files."""
  115. while tar_member_obj is not None and (
  116. tar_member_obj.islnk() or tar_member_obj.issym()
  117. ):
  118. linkpath = tar_member_obj.linkname
  119. if tar_member_obj.issym():
  120. base = posixpath.dirname(tar_member_obj.name)
  121. linkpath = posixpath.join(base, linkpath)
  122. linkpath = posixpath.normpath(linkpath)
  123. tar_member_obj = tar_obj._getmember(linkpath)
  124. is_file_or_dir = tar_member_obj is not None and (
  125. tar_member_obj.isfile() or tar_member_obj.isdir()
  126. )
  127. if is_file_or_dir:
  128. return tar_member_obj
  129. raise LookupError('Got unknown file type')
  130. def _iter_open_tar(tar_obj, extract_dir, progress_filter):
  131. """Emit member-destination pairs from a tar archive."""
  132. # don't do any chowning!
  133. tar_obj.chown = lambda *args: None
  134. with contextlib.closing(tar_obj):
  135. for member in tar_obj:
  136. name = member.name
  137. # don't extract absolute paths or ones with .. in them
  138. if name.startswith('/') or '..' in name.split('/'):
  139. continue
  140. prelim_dst = os.path.join(extract_dir, *name.split('/'))
  141. try:
  142. member = _resolve_tar_file_or_dir(tar_obj, member)
  143. except LookupError:
  144. continue
  145. final_dst = progress_filter(name, prelim_dst)
  146. if not final_dst:
  147. continue
  148. if final_dst.endswith(os.sep):
  149. final_dst = final_dst[:-1]
  150. yield member, final_dst
  151. def unpack_tarfile(filename, extract_dir, progress_filter=default_filter) -> bool:
  152. """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
  153. Raises ``UnrecognizedFormat`` if `filename` is not a tarfile (as determined
  154. by ``tarfile.open()``). See ``unpack_archive()`` for an explanation
  155. of the `progress_filter` argument.
  156. """
  157. try:
  158. tarobj = tarfile.open(filename)
  159. except tarfile.TarError as e:
  160. raise UnrecognizedFormat(
  161. f"{filename} is not a compressed or uncompressed tar file"
  162. ) from e
  163. for member, final_dst in _iter_open_tar(
  164. tarobj,
  165. extract_dir,
  166. progress_filter,
  167. ):
  168. try:
  169. # XXX Ugh
  170. tarobj._extract_member(member, final_dst)
  171. except tarfile.ExtractError:
  172. # chown/chmod/mkfifo/mknode/makedev failed
  173. pass
  174. return True
  175. extraction_drivers = unpack_directory, unpack_zipfile, unpack_tarfile