Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 

330 rader
8.2 KiB

  1. import io
  2. import posixpath
  3. import zipfile
  4. import itertools
  5. import contextlib
  6. import sys
  7. import pathlib
  8. if sys.version_info < (3, 7):
  9. from collections import OrderedDict
  10. else:
  11. OrderedDict = dict
  12. __all__ = ['Path']
  13. def _parents(path):
  14. """
  15. Given a path with elements separated by
  16. posixpath.sep, generate all parents of that path.
  17. >>> list(_parents('b/d'))
  18. ['b']
  19. >>> list(_parents('/b/d/'))
  20. ['/b']
  21. >>> list(_parents('b/d/f/'))
  22. ['b/d', 'b']
  23. >>> list(_parents('b'))
  24. []
  25. >>> list(_parents(''))
  26. []
  27. """
  28. return itertools.islice(_ancestry(path), 1, None)
  29. def _ancestry(path):
  30. """
  31. Given a path with elements separated by
  32. posixpath.sep, generate all elements of that path
  33. >>> list(_ancestry('b/d'))
  34. ['b/d', 'b']
  35. >>> list(_ancestry('/b/d/'))
  36. ['/b/d', '/b']
  37. >>> list(_ancestry('b/d/f/'))
  38. ['b/d/f', 'b/d', 'b']
  39. >>> list(_ancestry('b'))
  40. ['b']
  41. >>> list(_ancestry(''))
  42. []
  43. """
  44. path = path.rstrip(posixpath.sep)
  45. while path and path != posixpath.sep:
  46. yield path
  47. path, tail = posixpath.split(path)
  48. _dedupe = OrderedDict.fromkeys
  49. """Deduplicate an iterable in original order"""
  50. def _difference(minuend, subtrahend):
  51. """
  52. Return items in minuend not in subtrahend, retaining order
  53. with O(1) lookup.
  54. """
  55. return itertools.filterfalse(set(subtrahend).__contains__, minuend)
  56. class CompleteDirs(zipfile.ZipFile):
  57. """
  58. A ZipFile subclass that ensures that implied directories
  59. are always included in the namelist.
  60. """
  61. @staticmethod
  62. def _implied_dirs(names):
  63. parents = itertools.chain.from_iterable(map(_parents, names))
  64. as_dirs = (p + posixpath.sep for p in parents)
  65. return _dedupe(_difference(as_dirs, names))
  66. def namelist(self):
  67. names = super(CompleteDirs, self).namelist()
  68. return names + list(self._implied_dirs(names))
  69. def _name_set(self):
  70. return set(self.namelist())
  71. def resolve_dir(self, name):
  72. """
  73. If the name represents a directory, return that name
  74. as a directory (with the trailing slash).
  75. """
  76. names = self._name_set()
  77. dirname = name + '/'
  78. dir_match = name not in names and dirname in names
  79. return dirname if dir_match else name
  80. @classmethod
  81. def make(cls, source):
  82. """
  83. Given a source (filename or zipfile), return an
  84. appropriate CompleteDirs subclass.
  85. """
  86. if isinstance(source, CompleteDirs):
  87. return source
  88. if not isinstance(source, zipfile.ZipFile):
  89. return cls(_pathlib_compat(source))
  90. # Only allow for FastLookup when supplied zipfile is read-only
  91. if 'r' not in source.mode:
  92. cls = CompleteDirs
  93. source.__class__ = cls
  94. return source
  95. class FastLookup(CompleteDirs):
  96. """
  97. ZipFile subclass to ensure implicit
  98. dirs exist and are resolved rapidly.
  99. """
  100. def namelist(self):
  101. with contextlib.suppress(AttributeError):
  102. return self.__names
  103. self.__names = super(FastLookup, self).namelist()
  104. return self.__names
  105. def _name_set(self):
  106. with contextlib.suppress(AttributeError):
  107. return self.__lookup
  108. self.__lookup = super(FastLookup, self)._name_set()
  109. return self.__lookup
  110. def _pathlib_compat(path):
  111. """
  112. For path-like objects, convert to a filename for compatibility
  113. on Python 3.6.1 and earlier.
  114. """
  115. try:
  116. return path.__fspath__()
  117. except AttributeError:
  118. return str(path)
  119. class Path:
  120. """
  121. A pathlib-compatible interface for zip files.
  122. Consider a zip file with this structure::
  123. .
  124. ├── a.txt
  125. └── b
  126. ├── c.txt
  127. └── d
  128. └── e.txt
  129. >>> data = io.BytesIO()
  130. >>> zf = zipfile.ZipFile(data, 'w')
  131. >>> zf.writestr('a.txt', 'content of a')
  132. >>> zf.writestr('b/c.txt', 'content of c')
  133. >>> zf.writestr('b/d/e.txt', 'content of e')
  134. >>> zf.filename = 'mem/abcde.zip'
  135. Path accepts the zipfile object itself or a filename
  136. >>> root = Path(zf)
  137. From there, several path operations are available.
  138. Directory iteration (including the zip file itself):
  139. >>> a, b = root.iterdir()
  140. >>> a
  141. Path('mem/abcde.zip', 'a.txt')
  142. >>> b
  143. Path('mem/abcde.zip', 'b/')
  144. name property:
  145. >>> b.name
  146. 'b'
  147. join with divide operator:
  148. >>> c = b / 'c.txt'
  149. >>> c
  150. Path('mem/abcde.zip', 'b/c.txt')
  151. >>> c.name
  152. 'c.txt'
  153. Read text:
  154. >>> c.read_text()
  155. 'content of c'
  156. existence:
  157. >>> c.exists()
  158. True
  159. >>> (b / 'missing.txt').exists()
  160. False
  161. Coercion to string:
  162. >>> import os
  163. >>> str(c).replace(os.sep, posixpath.sep)
  164. 'mem/abcde.zip/b/c.txt'
  165. At the root, ``name``, ``filename``, and ``parent``
  166. resolve to the zipfile. Note these attributes are not
  167. valid and will raise a ``ValueError`` if the zipfile
  168. has no filename.
  169. >>> root.name
  170. 'abcde.zip'
  171. >>> str(root.filename).replace(os.sep, posixpath.sep)
  172. 'mem/abcde.zip'
  173. >>> str(root.parent)
  174. 'mem'
  175. """
  176. __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
  177. def __init__(self, root, at=""):
  178. """
  179. Construct a Path from a ZipFile or filename.
  180. Note: When the source is an existing ZipFile object,
  181. its type (__class__) will be mutated to a
  182. specialized type. If the caller wishes to retain the
  183. original type, the caller should either create a
  184. separate ZipFile object or pass a filename.
  185. """
  186. self.root = FastLookup.make(root)
  187. self.at = at
  188. def open(self, mode='r', *args, pwd=None, **kwargs):
  189. """
  190. Open this entry as text or binary following the semantics
  191. of ``pathlib.Path.open()`` by passing arguments through
  192. to io.TextIOWrapper().
  193. """
  194. if self.is_dir():
  195. raise IsADirectoryError(self)
  196. zip_mode = mode[0]
  197. if not self.exists() and zip_mode == 'r':
  198. raise FileNotFoundError(self)
  199. stream = self.root.open(self.at, zip_mode, pwd=pwd)
  200. if 'b' in mode:
  201. if args or kwargs:
  202. raise ValueError("encoding args invalid for binary operation")
  203. return stream
  204. return io.TextIOWrapper(stream, *args, **kwargs)
  205. @property
  206. def name(self):
  207. return pathlib.Path(self.at).name or self.filename.name
  208. @property
  209. def suffix(self):
  210. return pathlib.Path(self.at).suffix or self.filename.suffix
  211. @property
  212. def suffixes(self):
  213. return pathlib.Path(self.at).suffixes or self.filename.suffixes
  214. @property
  215. def stem(self):
  216. return pathlib.Path(self.at).stem or self.filename.stem
  217. @property
  218. def filename(self):
  219. return pathlib.Path(self.root.filename).joinpath(self.at)
  220. def read_text(self, *args, **kwargs):
  221. with self.open('r', *args, **kwargs) as strm:
  222. return strm.read()
  223. def read_bytes(self):
  224. with self.open('rb') as strm:
  225. return strm.read()
  226. def _is_child(self, path):
  227. return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
  228. def _next(self, at):
  229. return self.__class__(self.root, at)
  230. def is_dir(self):
  231. return not self.at or self.at.endswith("/")
  232. def is_file(self):
  233. return self.exists() and not self.is_dir()
  234. def exists(self):
  235. return self.at in self.root._name_set()
  236. def iterdir(self):
  237. if not self.is_dir():
  238. raise ValueError("Can't listdir a file")
  239. subs = map(self._next, self.root.namelist())
  240. return filter(self._is_child, subs)
  241. def __str__(self):
  242. return posixpath.join(self.root.filename, self.at)
  243. def __repr__(self):
  244. return self.__repr.format(self=self)
  245. def joinpath(self, *other):
  246. next = posixpath.join(self.at, *map(_pathlib_compat, other))
  247. return self._next(self.root.resolve_dir(next))
  248. __truediv__ = joinpath
  249. @property
  250. def parent(self):
  251. if not self.at:
  252. return self.filename.parent
  253. parent_at = posixpath.dirname(self.at.rstrip('/'))
  254. if parent_at:
  255. parent_at += '/'
  256. return self._next(parent_at)