You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

186 rivejä
5.9 KiB

  1. """
  2. Filename globbing utility. Mostly a copy of `glob` from Python 3.5.
  3. Changes include:
  4. * `yield from` and PEP3102 `*` removed.
  5. * Hidden files are not ignored.
  6. """
  7. from __future__ import annotations
  8. import fnmatch
  9. import os
  10. import re
  11. from collections.abc import Iterable, Iterator
  12. from typing import TYPE_CHECKING, AnyStr, overload
  13. if TYPE_CHECKING:
  14. from _typeshed import BytesPath, StrOrBytesPath, StrPath
  15. __all__ = ["glob", "iglob", "escape"]
  16. def glob(pathname: AnyStr, recursive: bool = False) -> list[AnyStr]:
  17. """Return a list of paths matching a pathname pattern.
  18. The pattern may contain simple shell-style wildcards a la
  19. fnmatch. However, unlike fnmatch, filenames starting with a
  20. dot are special cases that are not matched by '*' and '?'
  21. patterns.
  22. If recursive is true, the pattern '**' will match any files and
  23. zero or more directories and subdirectories.
  24. """
  25. return list(iglob(pathname, recursive=recursive))
  26. def iglob(pathname: AnyStr, recursive: bool = False) -> Iterator[AnyStr]:
  27. """Return an iterator which yields the paths matching a pathname pattern.
  28. The pattern may contain simple shell-style wildcards a la
  29. fnmatch. However, unlike fnmatch, filenames starting with a
  30. dot are special cases that are not matched by '*' and '?'
  31. patterns.
  32. If recursive is true, the pattern '**' will match any files and
  33. zero or more directories and subdirectories.
  34. """
  35. it = _iglob(pathname, recursive)
  36. if recursive and _isrecursive(pathname):
  37. s = next(it) # skip empty string
  38. assert not s
  39. return it
  40. def _iglob(pathname: AnyStr, recursive: bool) -> Iterator[AnyStr]:
  41. dirname, basename = os.path.split(pathname)
  42. glob_in_dir = glob2 if recursive and _isrecursive(basename) else glob1
  43. if not has_magic(pathname):
  44. if basename:
  45. if os.path.lexists(pathname):
  46. yield pathname
  47. else:
  48. # Patterns ending with a slash should match only directories
  49. if os.path.isdir(dirname):
  50. yield pathname
  51. return
  52. if not dirname:
  53. yield from glob_in_dir(dirname, basename)
  54. return
  55. # `os.path.split()` returns the argument itself as a dirname if it is a
  56. # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
  57. # contains magic characters (i.e. r'\\?\C:').
  58. if dirname != pathname and has_magic(dirname):
  59. dirs: Iterable[AnyStr] = _iglob(dirname, recursive)
  60. else:
  61. dirs = [dirname]
  62. if not has_magic(basename):
  63. glob_in_dir = glob0
  64. for dirname in dirs:
  65. for name in glob_in_dir(dirname, basename):
  66. yield os.path.join(dirname, name)
  67. # These 2 helper functions non-recursively glob inside a literal directory.
  68. # They return a list of basenames. `glob1` accepts a pattern while `glob0`
  69. # takes a literal basename (so it only has to check for its existence).
  70. @overload
  71. def glob1(dirname: StrPath, pattern: str) -> list[str]: ...
  72. @overload
  73. def glob1(dirname: BytesPath, pattern: bytes) -> list[bytes]: ...
  74. def glob1(dirname: StrOrBytesPath, pattern: str | bytes) -> list[str] | list[bytes]:
  75. if not dirname:
  76. if isinstance(pattern, bytes):
  77. dirname = os.curdir.encode('ASCII')
  78. else:
  79. dirname = os.curdir
  80. try:
  81. names = os.listdir(dirname)
  82. except OSError:
  83. return []
  84. # mypy false-positives: str or bytes type possibility is always kept in sync
  85. return fnmatch.filter(names, pattern) # type: ignore[type-var, return-value]
  86. def glob0(dirname, basename):
  87. if not basename:
  88. # `os.path.split()` returns an empty basename for paths ending with a
  89. # directory separator. 'q*x/' should match only directories.
  90. if os.path.isdir(dirname):
  91. return [basename]
  92. else:
  93. if os.path.lexists(os.path.join(dirname, basename)):
  94. return [basename]
  95. return []
  96. # This helper function recursively yields relative pathnames inside a literal
  97. # directory.
  98. @overload
  99. def glob2(dirname: StrPath, pattern: str) -> Iterator[str]: ...
  100. @overload
  101. def glob2(dirname: BytesPath, pattern: bytes) -> Iterator[bytes]: ...
  102. def glob2(dirname: StrOrBytesPath, pattern: str | bytes) -> Iterator[str | bytes]:
  103. assert _isrecursive(pattern)
  104. yield pattern[:0]
  105. yield from _rlistdir(dirname)
  106. # Recursively yields relative pathnames inside a literal directory.
  107. @overload
  108. def _rlistdir(dirname: StrPath) -> Iterator[str]: ...
  109. @overload
  110. def _rlistdir(dirname: BytesPath) -> Iterator[bytes]: ...
  111. def _rlistdir(dirname: StrOrBytesPath) -> Iterator[str | bytes]:
  112. if not dirname:
  113. if isinstance(dirname, bytes):
  114. dirname = os.curdir.encode('ASCII')
  115. else:
  116. dirname = os.curdir
  117. try:
  118. names = os.listdir(dirname)
  119. except OSError:
  120. return
  121. for x in names:
  122. yield x
  123. # mypy false-positives: str or bytes type possibility is always kept in sync
  124. path = os.path.join(dirname, x) if dirname else x # type: ignore[arg-type]
  125. for y in _rlistdir(path):
  126. yield os.path.join(x, y) # type: ignore[arg-type]
  127. magic_check = re.compile('([*?[])')
  128. magic_check_bytes = re.compile(b'([*?[])')
  129. def has_magic(s: str | bytes) -> bool:
  130. if isinstance(s, bytes):
  131. return magic_check_bytes.search(s) is not None
  132. else:
  133. return magic_check.search(s) is not None
  134. def _isrecursive(pattern: str | bytes) -> bool:
  135. if isinstance(pattern, bytes):
  136. return pattern == b'**'
  137. else:
  138. return pattern == '**'
  139. def escape(pathname):
  140. """Escape all special characters."""
  141. # Escaping is done by wrapping any of "*?[" between square brackets.
  142. # Metacharacters do not work in the drive part and shouldn't be escaped.
  143. drive, pathname = os.path.splitdrive(pathname)
  144. if isinstance(pathname, bytes):
  145. pathname = magic_check_bytes.sub(rb'[\1]', pathname)
  146. else:
  147. pathname = magic_check.sub(r'[\1]', pathname)
  148. return drive + pathname