您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 

1138 行
38 KiB

  1. """PyPI and direct package downloading."""
  2. from __future__ import annotations
  3. import base64
  4. import configparser
  5. import hashlib
  6. import html
  7. import http.client
  8. import io
  9. import itertools
  10. import os
  11. import re
  12. import shutil
  13. import socket
  14. import subprocess
  15. import sys
  16. import urllib.error
  17. import urllib.parse
  18. import urllib.request
  19. from fnmatch import translate
  20. from functools import wraps
  21. from typing import NamedTuple
  22. from more_itertools import unique_everseen
  23. import setuptools
  24. from pkg_resources import (
  25. BINARY_DIST,
  26. CHECKOUT_DIST,
  27. DEVELOP_DIST,
  28. EGG_DIST,
  29. SOURCE_DIST,
  30. Distribution,
  31. Environment,
  32. Requirement,
  33. find_distributions,
  34. normalize_path,
  35. parse_version,
  36. safe_name,
  37. safe_version,
  38. to_filename,
  39. )
  40. from setuptools.wheel import Wheel
  41. from .unicode_utils import _cfg_read_utf8_with_fallback, _read_utf8_with_fallback
  42. from distutils import log
  43. from distutils.errors import DistutilsError
  44. EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
  45. HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
  46. PYPI_MD5 = re.compile(
  47. r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
  48. r'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\)'
  49. )
  50. URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
  51. EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
  52. __all__ = [
  53. 'PackageIndex',
  54. 'distros_for_url',
  55. 'parse_bdist_wininst',
  56. 'interpret_distro_name',
  57. ]
  58. _SOCKET_TIMEOUT = 15
  59. user_agent = f"setuptools/{setuptools.__version__} Python-urllib/{sys.version_info.major}.{sys.version_info.minor}"
  60. def parse_requirement_arg(spec):
  61. try:
  62. return Requirement.parse(spec)
  63. except ValueError as e:
  64. raise DistutilsError(
  65. f"Not a URL, existing file, or requirement spec: {spec!r}"
  66. ) from e
  67. def parse_bdist_wininst(name):
  68. """Return (base,pyversion) or (None,None) for possible .exe name"""
  69. lower = name.lower()
  70. base, py_ver, plat = None, None, None
  71. if lower.endswith('.exe'):
  72. if lower.endswith('.win32.exe'):
  73. base = name[:-10]
  74. plat = 'win32'
  75. elif lower.startswith('.win32-py', -16):
  76. py_ver = name[-7:-4]
  77. base = name[:-16]
  78. plat = 'win32'
  79. elif lower.endswith('.win-amd64.exe'):
  80. base = name[:-14]
  81. plat = 'win-amd64'
  82. elif lower.startswith('.win-amd64-py', -20):
  83. py_ver = name[-7:-4]
  84. base = name[:-20]
  85. plat = 'win-amd64'
  86. return base, py_ver, plat
  87. def egg_info_for_url(url):
  88. parts = urllib.parse.urlparse(url)
  89. _scheme, server, path, _parameters, _query, fragment = parts
  90. base = urllib.parse.unquote(path.split('/')[-1])
  91. if server == 'sourceforge.net' and base == 'download': # XXX Yuck
  92. base = urllib.parse.unquote(path.split('/')[-2])
  93. if '#' in base:
  94. base, fragment = base.split('#', 1)
  95. return base, fragment
  96. def distros_for_url(url, metadata=None):
  97. """Yield egg or source distribution objects that might be found at a URL"""
  98. base, fragment = egg_info_for_url(url)
  99. yield from distros_for_location(url, base, metadata)
  100. if fragment:
  101. match = EGG_FRAGMENT.match(fragment)
  102. if match:
  103. yield from interpret_distro_name(
  104. url, match.group(1), metadata, precedence=CHECKOUT_DIST
  105. )
  106. def distros_for_location(location, basename, metadata=None):
  107. """Yield egg or source distribution objects based on basename"""
  108. if basename.endswith('.egg.zip'):
  109. basename = basename[:-4] # strip the .zip
  110. if basename.endswith('.egg') and '-' in basename:
  111. # only one, unambiguous interpretation
  112. return [Distribution.from_location(location, basename, metadata)]
  113. if basename.endswith('.whl') and '-' in basename:
  114. wheel = Wheel(basename)
  115. if not wheel.is_compatible():
  116. return []
  117. return [
  118. Distribution(
  119. location=location,
  120. project_name=wheel.project_name,
  121. version=wheel.version,
  122. # Increase priority over eggs.
  123. precedence=EGG_DIST + 1,
  124. )
  125. ]
  126. if basename.endswith('.exe'):
  127. win_base, py_ver, platform = parse_bdist_wininst(basename)
  128. if win_base is not None:
  129. return interpret_distro_name(
  130. location, win_base, metadata, py_ver, BINARY_DIST, platform
  131. )
  132. # Try source distro extensions (.zip, .tgz, etc.)
  133. #
  134. for ext in EXTENSIONS:
  135. if basename.endswith(ext):
  136. basename = basename[: -len(ext)]
  137. return interpret_distro_name(location, basename, metadata)
  138. return [] # no extension matched
  139. def distros_for_filename(filename, metadata=None):
  140. """Yield possible egg or source distribution objects based on a filename"""
  141. return distros_for_location(
  142. normalize_path(filename), os.path.basename(filename), metadata
  143. )
  144. def interpret_distro_name(
  145. location, basename, metadata, py_version=None, precedence=SOURCE_DIST, platform=None
  146. ):
  147. """Generate the interpretation of a source distro name
  148. Note: if `location` is a filesystem filename, you should call
  149. ``pkg_resources.normalize_path()`` on it before passing it to this
  150. routine!
  151. """
  152. parts = basename.split('-')
  153. if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):
  154. # it is a bdist_dumb, not an sdist -- bail out
  155. return
  156. # find the pivot (p) that splits the name from the version.
  157. # infer the version as the first item that has a digit.
  158. for p in range(len(parts)):
  159. if parts[p][:1].isdigit():
  160. break
  161. else:
  162. p = len(parts)
  163. yield Distribution(
  164. location,
  165. metadata,
  166. '-'.join(parts[:p]),
  167. '-'.join(parts[p:]),
  168. py_version=py_version,
  169. precedence=precedence,
  170. platform=platform,
  171. )
  172. def unique_values(func):
  173. """
  174. Wrap a function returning an iterable such that the resulting iterable
  175. only ever yields unique items.
  176. """
  177. @wraps(func)
  178. def wrapper(*args, **kwargs):
  179. return unique_everseen(func(*args, **kwargs))
  180. return wrapper
  181. REL = re.compile(r"""<([^>]*\srel\s{0,10}=\s{0,10}['"]?([^'" >]+)[^>]*)>""", re.I)
  182. """
  183. Regex for an HTML tag with 'rel="val"' attributes.
  184. """
  185. @unique_values
  186. def find_external_links(url, page):
  187. """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
  188. for match in REL.finditer(page):
  189. tag, rel = match.groups()
  190. rels = set(map(str.strip, rel.lower().split(',')))
  191. if 'homepage' in rels or 'download' in rels:
  192. for match in HREF.finditer(tag):
  193. yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
  194. for tag in ("<th>Home Page", "<th>Download URL"):
  195. pos = page.find(tag)
  196. if pos != -1:
  197. match = HREF.search(page, pos)
  198. if match:
  199. yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
  200. class ContentChecker:
  201. """
  202. A null content checker that defines the interface for checking content
  203. """
  204. def feed(self, block):
  205. """
  206. Feed a block of data to the hash.
  207. """
  208. return
  209. def is_valid(self):
  210. """
  211. Check the hash. Return False if validation fails.
  212. """
  213. return True
  214. def report(self, reporter, template):
  215. """
  216. Call reporter with information about the checker (hash name)
  217. substituted into the template.
  218. """
  219. return
  220. class HashChecker(ContentChecker):
  221. pattern = re.compile(
  222. r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
  223. r'(?P<expected>[a-f0-9]+)'
  224. )
  225. def __init__(self, hash_name, expected) -> None:
  226. self.hash_name = hash_name
  227. self.hash = hashlib.new(hash_name)
  228. self.expected = expected
  229. @classmethod
  230. def from_url(cls, url):
  231. "Construct a (possibly null) ContentChecker from a URL"
  232. fragment = urllib.parse.urlparse(url)[-1]
  233. if not fragment:
  234. return ContentChecker()
  235. match = cls.pattern.search(fragment)
  236. if not match:
  237. return ContentChecker()
  238. return cls(**match.groupdict())
  239. def feed(self, block):
  240. self.hash.update(block)
  241. def is_valid(self):
  242. return self.hash.hexdigest() == self.expected
  243. def report(self, reporter, template):
  244. msg = template % self.hash_name
  245. return reporter(msg)
  246. class PackageIndex(Environment):
  247. """A distribution index that scans web pages for download URLs"""
  248. def __init__(
  249. self,
  250. index_url: str = "https://pypi.org/simple/",
  251. hosts=('*',),
  252. ca_bundle=None,
  253. verify_ssl: bool = True,
  254. *args,
  255. **kw,
  256. ) -> None:
  257. super().__init__(*args, **kw)
  258. self.index_url = index_url + "/"[: not index_url.endswith('/')]
  259. self.scanned_urls: dict = {}
  260. self.fetched_urls: dict = {}
  261. self.package_pages: dict = {}
  262. self.allows = re.compile('|'.join(map(translate, hosts))).match
  263. self.to_scan: list = []
  264. self.opener = urllib.request.urlopen
  265. def add(self, dist):
  266. # ignore invalid versions
  267. try:
  268. parse_version(dist.version)
  269. except Exception:
  270. return None
  271. return super().add(dist)
  272. # FIXME: 'PackageIndex.process_url' is too complex (14)
  273. def process_url(self, url, retrieve: bool = False) -> None: # noqa: C901
  274. """Evaluate a URL as a possible download, and maybe retrieve it"""
  275. if url in self.scanned_urls and not retrieve:
  276. return
  277. self.scanned_urls[url] = True
  278. if not URL_SCHEME(url):
  279. self.process_filename(url)
  280. return
  281. else:
  282. dists = list(distros_for_url(url))
  283. if dists:
  284. if not self.url_ok(url):
  285. return
  286. self.debug("Found link: %s", url)
  287. if dists or not retrieve or url in self.fetched_urls:
  288. list(map(self.add, dists))
  289. return # don't need the actual page
  290. if not self.url_ok(url):
  291. self.fetched_urls[url] = True
  292. return
  293. self.info("Reading %s", url)
  294. self.fetched_urls[url] = True # prevent multiple fetch attempts
  295. tmpl = "Download error on %s: %%s -- Some packages may not be found!"
  296. f = self.open_url(url, tmpl % url)
  297. if f is None:
  298. return
  299. if isinstance(f, urllib.error.HTTPError) and f.code == 401:
  300. self.info(f"Authentication error: {f.msg}")
  301. self.fetched_urls[f.url] = True
  302. if 'html' not in f.headers.get('content-type', '').lower():
  303. f.close() # not html, we can't process it
  304. return
  305. base = f.url # handle redirects
  306. page = f.read()
  307. if not isinstance(page, str):
  308. # In Python 3 and got bytes but want str.
  309. if isinstance(f, urllib.error.HTTPError):
  310. # Errors have no charset, assume latin1:
  311. charset = 'latin-1'
  312. else:
  313. charset = f.headers.get_param('charset') or 'latin-1'
  314. page = page.decode(charset, "ignore")
  315. f.close()
  316. for match in HREF.finditer(page):
  317. link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
  318. self.process_url(link)
  319. if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
  320. page = self.process_index(url, page)
  321. def process_filename(self, fn, nested: bool = False) -> None:
  322. # process filenames or directories
  323. if not os.path.exists(fn):
  324. self.warn("Not found: %s", fn)
  325. return
  326. if os.path.isdir(fn) and not nested:
  327. path = os.path.realpath(fn)
  328. for item in os.listdir(path):
  329. self.process_filename(os.path.join(path, item), True)
  330. dists = distros_for_filename(fn)
  331. if dists:
  332. self.debug("Found: %s", fn)
  333. list(map(self.add, dists))
  334. def url_ok(self, url, fatal: bool = False) -> bool:
  335. s = URL_SCHEME(url)
  336. is_file = s and s.group(1).lower() == 'file'
  337. if is_file or self.allows(urllib.parse.urlparse(url)[1]):
  338. return True
  339. msg = (
  340. "\nNote: Bypassing %s (disallowed host; see "
  341. "https://setuptools.pypa.io/en/latest/deprecated/"
  342. "easy_install.html#restricting-downloads-with-allow-hosts for details).\n"
  343. )
  344. if fatal:
  345. raise DistutilsError(msg % url)
  346. else:
  347. self.warn(msg, url)
  348. return False
  349. def scan_egg_links(self, search_path) -> None:
  350. dirs = filter(os.path.isdir, search_path)
  351. egg_links = (
  352. (path, entry)
  353. for path in dirs
  354. for entry in os.listdir(path)
  355. if entry.endswith('.egg-link')
  356. )
  357. list(itertools.starmap(self.scan_egg_link, egg_links))
  358. def scan_egg_link(self, path, entry) -> None:
  359. content = _read_utf8_with_fallback(os.path.join(path, entry))
  360. # filter non-empty lines
  361. lines = list(filter(None, map(str.strip, content.splitlines())))
  362. if len(lines) != 2:
  363. # format is not recognized; punt
  364. return
  365. egg_path, _setup_path = lines
  366. for dist in find_distributions(os.path.join(path, egg_path)):
  367. dist.location = os.path.join(path, *lines)
  368. dist.precedence = SOURCE_DIST
  369. self.add(dist)
  370. def _scan(self, link):
  371. # Process a URL to see if it's for a package page
  372. NO_MATCH_SENTINEL = None, None
  373. if not link.startswith(self.index_url):
  374. return NO_MATCH_SENTINEL
  375. parts = list(map(urllib.parse.unquote, link[len(self.index_url) :].split('/')))
  376. if len(parts) != 2 or '#' in parts[1]:
  377. return NO_MATCH_SENTINEL
  378. # it's a package page, sanitize and index it
  379. pkg = safe_name(parts[0])
  380. ver = safe_version(parts[1])
  381. self.package_pages.setdefault(pkg.lower(), {})[link] = True
  382. return to_filename(pkg), to_filename(ver)
  383. def process_index(self, url, page):
  384. """Process the contents of a PyPI page"""
  385. # process an index page into the package-page index
  386. for match in HREF.finditer(page):
  387. try:
  388. self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
  389. except ValueError:
  390. pass
  391. pkg, ver = self._scan(url) # ensure this page is in the page index
  392. if not pkg:
  393. return "" # no sense double-scanning non-package pages
  394. # process individual package page
  395. for new_url in find_external_links(url, page):
  396. # Process the found URL
  397. base, frag = egg_info_for_url(new_url)
  398. if base.endswith('.py') and not frag:
  399. if ver:
  400. new_url += f'#egg={pkg}-{ver}'
  401. else:
  402. self.need_version_info(url)
  403. self.scan_url(new_url)
  404. return PYPI_MD5.sub(
  405. lambda m: '<a href="{}#md5={}">{}</a>'.format(*m.group(1, 3, 2)), page
  406. )
  407. def need_version_info(self, url) -> None:
  408. self.scan_all(
  409. "Page at %s links to .py file(s) without version info; an index "
  410. "scan is required.",
  411. url,
  412. )
  413. def scan_all(self, msg=None, *args) -> None:
  414. if self.index_url not in self.fetched_urls:
  415. if msg:
  416. self.warn(msg, *args)
  417. self.info("Scanning index of all packages (this may take a while)")
  418. self.scan_url(self.index_url)
  419. def find_packages(self, requirement) -> None:
  420. self.scan_url(self.index_url + requirement.unsafe_name + '/')
  421. if not self.package_pages.get(requirement.key):
  422. # Fall back to safe version of the name
  423. self.scan_url(self.index_url + requirement.project_name + '/')
  424. if not self.package_pages.get(requirement.key):
  425. # We couldn't find the target package, so search the index page too
  426. self.not_found_in_index(requirement)
  427. for url in list(self.package_pages.get(requirement.key, ())):
  428. # scan each page that might be related to the desired package
  429. self.scan_url(url)
  430. def obtain(self, requirement, installer=None):
  431. self.prescan()
  432. self.find_packages(requirement)
  433. for dist in self[requirement.key]:
  434. if dist in requirement:
  435. return dist
  436. self.debug("%s does not match %s", requirement, dist)
  437. return super().obtain(requirement, installer)
  438. def check_hash(self, checker, filename, tfp) -> None:
  439. """
  440. checker is a ContentChecker
  441. """
  442. checker.report(self.debug, f"Validating %s checksum for {filename}")
  443. if not checker.is_valid():
  444. tfp.close()
  445. os.unlink(filename)
  446. raise DistutilsError(
  447. f"{checker.hash.name} validation failed for {os.path.basename(filename)}; "
  448. "possible download problem?"
  449. )
  450. def add_find_links(self, urls) -> None:
  451. """Add `urls` to the list that will be prescanned for searches"""
  452. for url in urls:
  453. if (
  454. self.to_scan is None # if we have already "gone online"
  455. or not URL_SCHEME(url) # or it's a local file/directory
  456. or url.startswith('file:')
  457. or list(distros_for_url(url)) # or a direct package link
  458. ):
  459. # then go ahead and process it now
  460. self.scan_url(url)
  461. else:
  462. # otherwise, defer retrieval till later
  463. self.to_scan.append(url)
  464. def prescan(self):
  465. """Scan urls scheduled for prescanning (e.g. --find-links)"""
  466. if self.to_scan:
  467. list(map(self.scan_url, self.to_scan))
  468. self.to_scan = None # from now on, go ahead and process immediately
  469. def not_found_in_index(self, requirement) -> None:
  470. if self[requirement.key]: # we've seen at least one distro
  471. meth, msg = self.info, "Couldn't retrieve index page for %r"
  472. else: # no distros seen for this name, might be misspelled
  473. meth, msg = self.warn, "Couldn't find index page for %r (maybe misspelled?)"
  474. meth(msg, requirement.unsafe_name)
  475. self.scan_all()
  476. def download(self, spec, tmpdir):
  477. """Locate and/or download `spec` to `tmpdir`, returning a local path
  478. `spec` may be a ``Requirement`` object, or a string containing a URL,
  479. an existing local filename, or a project/version requirement spec
  480. (i.e. the string form of a ``Requirement`` object). If it is the URL
  481. of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
  482. that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
  483. automatically created alongside the downloaded file.
  484. If `spec` is a ``Requirement`` object or a string containing a
  485. project/version requirement spec, this method returns the location of
  486. a matching distribution (possibly after downloading it to `tmpdir`).
  487. If `spec` is a locally existing file or directory name, it is simply
  488. returned unchanged. If `spec` is a URL, it is downloaded to a subpath
  489. of `tmpdir`, and the local filename is returned. Various errors may be
  490. raised if a problem occurs during downloading.
  491. """
  492. if not isinstance(spec, Requirement):
  493. scheme = URL_SCHEME(spec)
  494. if scheme:
  495. # It's a url, download it to tmpdir
  496. found = self._download_url(spec, tmpdir)
  497. base, fragment = egg_info_for_url(spec)
  498. if base.endswith('.py'):
  499. found = self.gen_setup(found, fragment, tmpdir)
  500. return found
  501. elif os.path.exists(spec):
  502. # Existing file or directory, just return it
  503. return spec
  504. else:
  505. spec = parse_requirement_arg(spec)
  506. return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
  507. def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME
  508. self,
  509. requirement,
  510. tmpdir,
  511. force_scan: bool = False,
  512. source: bool = False,
  513. develop_ok: bool = False,
  514. local_index=None,
  515. ) -> Distribution | None:
  516. """Obtain a distribution suitable for fulfilling `requirement`
  517. `requirement` must be a ``pkg_resources.Requirement`` instance.
  518. If necessary, or if the `force_scan` flag is set, the requirement is
  519. searched for in the (online) package index as well as the locally
  520. installed packages. If a distribution matching `requirement` is found,
  521. the returned distribution's ``location`` is the value you would have
  522. gotten from calling the ``download()`` method with the matching
  523. distribution's URL or filename. If no matching distribution is found,
  524. ``None`` is returned.
  525. If the `source` flag is set, only source distributions and source
  526. checkout links will be considered. Unless the `develop_ok` flag is
  527. set, development and system eggs (i.e., those using the ``.egg-info``
  528. format) will be ignored.
  529. """
  530. # process a Requirement
  531. self.info("Searching for %s", requirement)
  532. skipped = set()
  533. dist = None
  534. def find(req, env: Environment | None = None):
  535. if env is None:
  536. env = self
  537. # Find a matching distribution; may be called more than once
  538. for dist in env[req.key]:
  539. if dist.precedence == DEVELOP_DIST and not develop_ok:
  540. if dist not in skipped:
  541. self.warn(
  542. "Skipping development or system egg: %s",
  543. dist,
  544. )
  545. skipped.add(dist)
  546. continue
  547. test = dist in req and (dist.precedence <= SOURCE_DIST or not source)
  548. if test:
  549. loc = self.download(dist.location, tmpdir)
  550. dist.download_location = loc
  551. if os.path.exists(dist.download_location):
  552. return dist
  553. return None
  554. if force_scan:
  555. self.prescan()
  556. self.find_packages(requirement)
  557. dist = find(requirement)
  558. if not dist and local_index is not None:
  559. dist = find(requirement, local_index)
  560. if dist is None:
  561. if self.to_scan is not None:
  562. self.prescan()
  563. dist = find(requirement)
  564. if dist is None and not force_scan:
  565. self.find_packages(requirement)
  566. dist = find(requirement)
  567. if dist is None:
  568. self.warn(
  569. "No local packages or working download links found for %s%s",
  570. (source and "a source distribution of " or ""),
  571. requirement,
  572. )
  573. return None
  574. else:
  575. self.info("Best match: %s", dist)
  576. return dist.clone(location=dist.download_location)
  577. def fetch(
  578. self, requirement, tmpdir, force_scan: bool = False, source: bool = False
  579. ) -> str | None:
  580. """Obtain a file suitable for fulfilling `requirement`
  581. DEPRECATED; use the ``fetch_distribution()`` method now instead. For
  582. backward compatibility, this routine is identical but returns the
  583. ``location`` of the downloaded distribution instead of a distribution
  584. object.
  585. """
  586. dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
  587. if dist is not None:
  588. return dist.location
  589. return None
  590. def gen_setup(self, filename, fragment, tmpdir):
  591. match = EGG_FRAGMENT.match(fragment)
  592. dists = (
  593. match
  594. and [
  595. d
  596. for d in interpret_distro_name(filename, match.group(1), None)
  597. if d.version
  598. ]
  599. or []
  600. )
  601. if len(dists) == 1: # unambiguous ``#egg`` fragment
  602. basename = os.path.basename(filename)
  603. # Make sure the file has been downloaded to the temp dir.
  604. if os.path.dirname(filename) != tmpdir:
  605. dst = os.path.join(tmpdir, basename)
  606. if not (os.path.exists(dst) and os.path.samefile(filename, dst)):
  607. shutil.copy2(filename, dst)
  608. filename = dst
  609. with open(os.path.join(tmpdir, 'setup.py'), 'w', encoding="utf-8") as file:
  610. file.write(
  611. "from setuptools import setup\n"
  612. f"setup(name={dists[0].project_name!r}, version={dists[0].version!r}, py_modules=[{os.path.splitext(basename)[0]!r}])\n"
  613. )
  614. return filename
  615. elif match:
  616. raise DistutilsError(
  617. f"Can't unambiguously interpret project/version identifier {fragment!r}; "
  618. "any dashes in the name or version should be escaped using "
  619. f"underscores. {dists!r}"
  620. )
  621. else:
  622. raise DistutilsError(
  623. "Can't process plain .py files without an '#egg=name-version'"
  624. " suffix to enable automatic setup script generation."
  625. )
  626. dl_blocksize = 8192
  627. def _download_to(self, url, filename):
  628. self.info("Downloading %s", url)
  629. # Download the file
  630. fp = None
  631. try:
  632. checker = HashChecker.from_url(url)
  633. fp = self.open_url(url)
  634. if isinstance(fp, urllib.error.HTTPError):
  635. raise DistutilsError(f"Can't download {url}: {fp.code} {fp.msg}")
  636. headers = fp.info()
  637. blocknum = 0
  638. bs = self.dl_blocksize
  639. size = -1
  640. if "content-length" in headers:
  641. # Some servers return multiple Content-Length headers :(
  642. sizes = headers.get_all('Content-Length')
  643. size = max(map(int, sizes))
  644. self.reporthook(url, filename, blocknum, bs, size)
  645. with open(filename, 'wb') as tfp:
  646. while True:
  647. block = fp.read(bs)
  648. if block:
  649. checker.feed(block)
  650. tfp.write(block)
  651. blocknum += 1
  652. self.reporthook(url, filename, blocknum, bs, size)
  653. else:
  654. break
  655. self.check_hash(checker, filename, tfp)
  656. return headers
  657. finally:
  658. if fp:
  659. fp.close()
  660. def reporthook(self, url, filename, blocknum, blksize, size) -> None:
  661. pass # no-op
  662. # FIXME:
  663. def open_url(self, url, warning=None): # noqa: C901 # is too complex (12)
  664. if url.startswith('file:'):
  665. return local_open(url)
  666. try:
  667. return open_with_auth(url, self.opener)
  668. except (ValueError, http.client.InvalidURL) as v:
  669. msg = ' '.join([str(arg) for arg in v.args])
  670. if warning:
  671. self.warn(warning, msg)
  672. else:
  673. raise DistutilsError(f'{url} {msg}') from v
  674. except urllib.error.HTTPError as v:
  675. return v
  676. except urllib.error.URLError as v:
  677. if warning:
  678. self.warn(warning, v.reason)
  679. else:
  680. raise DistutilsError(f"Download error for {url}: {v.reason}") from v
  681. except http.client.BadStatusLine as v:
  682. if warning:
  683. self.warn(warning, v.line)
  684. else:
  685. raise DistutilsError(
  686. f'{url} returned a bad status line. The server might be '
  687. f'down, {v.line}'
  688. ) from v
  689. except (http.client.HTTPException, OSError) as v:
  690. if warning:
  691. self.warn(warning, v)
  692. else:
  693. raise DistutilsError(f"Download error for {url}: {v}") from v
  694. def _download_url(self, url, tmpdir):
  695. # Determine download filename
  696. #
  697. name, _fragment = egg_info_for_url(url)
  698. if name:
  699. while '..' in name:
  700. name = name.replace('..', '.').replace('\\', '_')
  701. else:
  702. name = "__downloaded__" # default if URL has no path contents
  703. if name.endswith('.egg.zip'):
  704. name = name[:-4] # strip the extra .zip before download
  705. filename = os.path.join(tmpdir, name)
  706. return self._download_vcs(url, filename) or self._download_other(url, filename)
  707. @staticmethod
  708. def _resolve_vcs(url):
  709. """
  710. >>> rvcs = PackageIndex._resolve_vcs
  711. >>> rvcs('git+http://foo/bar')
  712. 'git'
  713. >>> rvcs('hg+https://foo/bar')
  714. 'hg'
  715. >>> rvcs('git:myhost')
  716. 'git'
  717. >>> rvcs('hg:myhost')
  718. >>> rvcs('http://foo/bar')
  719. """
  720. scheme = urllib.parse.urlsplit(url).scheme
  721. pre, sep, _post = scheme.partition('+')
  722. # svn and git have their own protocol; hg does not
  723. allowed = set(['svn', 'git'] + ['hg'] * bool(sep))
  724. return next(iter({pre} & allowed), None)
  725. def _download_vcs(self, url, spec_filename):
  726. vcs = self._resolve_vcs(url)
  727. if not vcs:
  728. return None
  729. if vcs == 'svn':
  730. raise DistutilsError(
  731. f"Invalid config, SVN download is not supported: {url}"
  732. )
  733. filename, _, _ = spec_filename.partition('#')
  734. url, rev = self._vcs_split_rev_from_url(url)
  735. self.info(f"Doing {vcs} clone from {url} to {filename}")
  736. subprocess.check_call([vcs, 'clone', '--quiet', url, filename])
  737. co_commands = dict(
  738. git=[vcs, '-C', filename, 'checkout', '--quiet', rev],
  739. hg=[vcs, '--cwd', filename, 'up', '-C', '-r', rev, '-q'],
  740. )
  741. if rev is not None:
  742. self.info(f"Checking out {rev}")
  743. subprocess.check_call(co_commands[vcs])
  744. return filename
  745. def _download_other(self, url, filename):
  746. scheme = urllib.parse.urlsplit(url).scheme
  747. if scheme == 'file': # pragma: no cover
  748. return urllib.request.url2pathname(urllib.parse.urlparse(url).path)
  749. # raise error if not allowed
  750. self.url_ok(url, True)
  751. return self._attempt_download(url, filename)
  752. def scan_url(self, url) -> None:
  753. self.process_url(url, True)
  754. def _attempt_download(self, url, filename):
  755. headers = self._download_to(url, filename)
  756. if 'html' in headers.get('content-type', '').lower():
  757. return self._invalid_download_html(url, headers, filename)
  758. else:
  759. return filename
  760. def _invalid_download_html(self, url, headers, filename):
  761. os.unlink(filename)
  762. raise DistutilsError(f"Unexpected HTML page found at {url}")
  763. @staticmethod
  764. def _vcs_split_rev_from_url(url):
  765. """
  766. Given a possible VCS URL, return a clean URL and resolved revision if any.
  767. >>> vsrfu = PackageIndex._vcs_split_rev_from_url
  768. >>> vsrfu('git+https://github.com/pypa/setuptools@v69.0.0#egg-info=setuptools')
  769. ('https://github.com/pypa/setuptools', 'v69.0.0')
  770. >>> vsrfu('git+https://github.com/pypa/setuptools#egg-info=setuptools')
  771. ('https://github.com/pypa/setuptools', None)
  772. >>> vsrfu('http://foo/bar')
  773. ('http://foo/bar', None)
  774. """
  775. parts = urllib.parse.urlsplit(url)
  776. clean_scheme = parts.scheme.split('+', 1)[-1]
  777. # Some fragment identification fails
  778. no_fragment_path, _, _ = parts.path.partition('#')
  779. pre, sep, post = no_fragment_path.rpartition('@')
  780. clean_path, rev = (pre, post) if sep else (post, None)
  781. resolved = parts._replace(
  782. scheme=clean_scheme,
  783. path=clean_path,
  784. # discard the fragment
  785. fragment='',
  786. ).geturl()
  787. return resolved, rev
  788. def debug(self, msg, *args) -> None:
  789. log.debug(msg, *args)
  790. def info(self, msg, *args) -> None:
  791. log.info(msg, *args)
  792. def warn(self, msg, *args) -> None:
  793. log.warn(msg, *args)
  794. # This pattern matches a character entity reference (a decimal numeric
  795. # references, a hexadecimal numeric reference, or a named reference).
  796. entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
  797. def decode_entity(match):
  798. what = match.group(0)
  799. return html.unescape(what)
  800. def htmldecode(text):
  801. """
  802. Decode HTML entities in the given text.
  803. >>> htmldecode(
  804. ... 'https://../package_name-0.1.2.tar.gz'
  805. ... '?tokena=A&amp;tokenb=B">package_name-0.1.2.tar.gz')
  806. 'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
  807. """
  808. return entity_sub(decode_entity, text)
  809. def socket_timeout(timeout=15):
  810. def _socket_timeout(func):
  811. def _socket_timeout(*args, **kwargs):
  812. old_timeout = socket.getdefaulttimeout()
  813. socket.setdefaulttimeout(timeout)
  814. try:
  815. return func(*args, **kwargs)
  816. finally:
  817. socket.setdefaulttimeout(old_timeout)
  818. return _socket_timeout
  819. return _socket_timeout
  820. def _encode_auth(auth):
  821. """
  822. Encode auth from a URL suitable for an HTTP header.
  823. >>> str(_encode_auth('username%3Apassword'))
  824. 'dXNlcm5hbWU6cGFzc3dvcmQ='
  825. Long auth strings should not cause a newline to be inserted.
  826. >>> long_auth = 'username:' + 'password'*10
  827. >>> chr(10) in str(_encode_auth(long_auth))
  828. False
  829. """
  830. auth_s = urllib.parse.unquote(auth)
  831. # convert to bytes
  832. auth_bytes = auth_s.encode()
  833. encoded_bytes = base64.b64encode(auth_bytes)
  834. # convert back to a string
  835. encoded = encoded_bytes.decode()
  836. # strip the trailing carriage return
  837. return encoded.replace('\n', '')
  838. class Credential(NamedTuple):
  839. """
  840. A username/password pair.
  841. Displayed separated by `:`.
  842. >>> str(Credential('username', 'password'))
  843. 'username:password'
  844. """
  845. username: str
  846. password: str
  847. def __str__(self) -> str:
  848. return f'{self.username}:{self.password}'
  849. class PyPIConfig(configparser.RawConfigParser):
  850. def __init__(self):
  851. """
  852. Load from ~/.pypirc
  853. """
  854. defaults = dict.fromkeys(['username', 'password', 'repository'], '')
  855. super().__init__(defaults)
  856. rc = os.path.join(os.path.expanduser('~'), '.pypirc')
  857. if os.path.exists(rc):
  858. _cfg_read_utf8_with_fallback(self, rc)
  859. @property
  860. def creds_by_repository(self):
  861. sections_with_repositories = [
  862. section
  863. for section in self.sections()
  864. if self.get(section, 'repository').strip()
  865. ]
  866. return dict(map(self._get_repo_cred, sections_with_repositories))
  867. def _get_repo_cred(self, section):
  868. repo = self.get(section, 'repository').strip()
  869. return repo, Credential(
  870. self.get(section, 'username').strip(),
  871. self.get(section, 'password').strip(),
  872. )
  873. def find_credential(self, url):
  874. """
  875. If the URL indicated appears to be a repository defined in this
  876. config, return the credential for that repository.
  877. """
  878. for repository, cred in self.creds_by_repository.items():
  879. if url.startswith(repository):
  880. return cred
  881. return None
  882. def open_with_auth(url, opener=urllib.request.urlopen):
  883. """Open a urllib2 request, handling HTTP authentication"""
  884. parsed = urllib.parse.urlparse(url)
  885. scheme, netloc, path, params, query, frag = parsed
  886. # Double scheme does not raise on macOS as revealed by a
  887. # failing test. We would expect "nonnumeric port". Refs #20.
  888. if netloc.endswith(':'):
  889. raise http.client.InvalidURL("nonnumeric port: ''")
  890. if scheme in ('http', 'https'):
  891. auth, address = _splituser(netloc)
  892. else:
  893. auth, address = (None, None)
  894. if not auth:
  895. cred = PyPIConfig().find_credential(url)
  896. if cred:
  897. auth = str(cred)
  898. info = cred.username, url
  899. log.info('Authenticating as %s for %s (from .pypirc)', *info)
  900. if auth:
  901. auth = "Basic " + _encode_auth(auth)
  902. parts = scheme, address, path, params, query, frag
  903. new_url = urllib.parse.urlunparse(parts)
  904. request = urllib.request.Request(new_url)
  905. request.add_header("Authorization", auth)
  906. else:
  907. request = urllib.request.Request(url)
  908. request.add_header('User-Agent', user_agent)
  909. fp = opener(request)
  910. if auth:
  911. # Put authentication info back into request URL if same host,
  912. # so that links found on the page will work
  913. s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
  914. if s2 == scheme and h2 == address:
  915. parts = s2, netloc, path2, param2, query2, frag2
  916. fp.url = urllib.parse.urlunparse(parts)
  917. return fp
  918. # copy of urllib.parse._splituser from Python 3.8
  919. # See https://github.com/python/cpython/issues/80072.
  920. def _splituser(host):
  921. """splituser('user[:passwd]@host[:port]')
  922. --> 'user[:passwd]', 'host[:port]'."""
  923. user, delim, host = host.rpartition('@')
  924. return (user if delim else None), host
  925. # adding a timeout to avoid freezing package_index
  926. open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
  927. def fix_sf_url(url):
  928. return url # backward compatibility
  929. def local_open(url):
  930. """Read a local path, with special support for directories"""
  931. _scheme, _server, path, _param, _query, _frag = urllib.parse.urlparse(url)
  932. filename = urllib.request.url2pathname(path)
  933. if os.path.isfile(filename):
  934. return urllib.request.urlopen(url)
  935. elif path.endswith('/') and os.path.isdir(filename):
  936. files = []
  937. for f in os.listdir(filename):
  938. filepath = os.path.join(filename, f)
  939. if f == 'index.html':
  940. body = _read_utf8_with_fallback(filepath)
  941. break
  942. elif os.path.isdir(filepath):
  943. f += '/'
  944. files.append(f'<a href="{f}">{f}</a>')
  945. else:
  946. tmpl = "<html><head><title>{url}</title></head><body>{files}</body></html>"
  947. body = tmpl.format(url=url, files='\n'.join(files))
  948. status, message = 200, "OK"
  949. else:
  950. status, message, body = 404, "Path not found", "Not found"
  951. headers = {'content-type': 'text/html'}
  952. body_stream = io.StringIO(body)
  953. return urllib.error.HTTPError(url, status, message, headers, body_stream)