Typing for spack checksum code paths (#42183)

This commit is contained in:
Harmen Stoppels 2024-01-19 14:56:20 +01:00 committed by GitHub
parent 75e96b856e
commit edc8a5f249
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 79 additions and 85 deletions

View File

@ -5,6 +5,7 @@
import re import re
import sys import sys
from typing import Dict, Optional
import llnl.string import llnl.string
import llnl.util.lang import llnl.util.lang
@ -25,7 +26,7 @@
) )
from spack.util.editor import editor from spack.util.editor import editor
from spack.util.format import get_version_lines from spack.util.format import get_version_lines
from spack.version import Version from spack.version import StandardVersion, Version
description = "checksum available versions of a package" description = "checksum available versions of a package"
section = "packaging" section = "packaging"
@ -89,32 +90,30 @@ def checksum(parser, args):
spec = spack.spec.Spec(args.package) spec = spack.spec.Spec(args.package)
# Get the package we're going to generate checksums for # Get the package we're going to generate checksums for
pkg = spack.repo.PATH.get_pkg_class(spec.name)(spec) pkg: PackageBase = spack.repo.PATH.get_pkg_class(spec.name)(spec)
# Skip manually downloaded packages # Skip manually downloaded packages
if pkg.manual_download: if pkg.manual_download:
raise ManualDownloadRequiredError(pkg.download_instr) raise ManualDownloadRequiredError(pkg.download_instr)
versions = [Version(v) for v in args.versions] versions = [StandardVersion.from_string(v) for v in args.versions]
# Define placeholder for remote versions. # Define placeholder for remote versions. This'll help reduce redundant work if we need to
# This'll help reduce redundant work if we need to check for the existance # check for the existence of remote versions more than once.
# of remote versions more than once. remote_versions: Optional[Dict[StandardVersion, str]] = None
remote_versions = None
# Add latest version if requested # Add latest version if requested
if args.latest: if args.latest:
remote_versions = pkg.fetch_remote_versions(args.jobs) remote_versions = pkg.fetch_remote_versions(concurrency=args.jobs)
if len(remote_versions) > 0: if len(remote_versions) > 0:
latest_version = sorted(remote_versions.keys(), reverse=True)[0] versions.append(max(remote_versions.keys()))
versions.append(latest_version)
# Add preferred version if requested # Add preferred version if requested (todo: exclude git versions)
if args.preferred: if args.preferred:
versions.append(preferred_version(pkg)) versions.append(preferred_version(pkg))
# Store a dict of the form version -> URL # Store a dict of the form version -> URL
url_dict = {} url_dict: Dict[StandardVersion, str] = {}
for version in versions: for version in versions:
if deprecated_version(pkg, version): if deprecated_version(pkg, version):
@ -124,16 +123,16 @@ def checksum(parser, args):
if url is not None: if url is not None:
url_dict[version] = url url_dict[version] = url
continue continue
# if we get here, it's because no valid url was provided by the package # If we get here, it's because no valid url was provided by the package. Do expensive
# do expensive fallback to try to recover # fallback to try to recover
if remote_versions is None: if remote_versions is None:
remote_versions = pkg.fetch_remote_versions(args.jobs) remote_versions = pkg.fetch_remote_versions(concurrency=args.jobs)
if version in remote_versions: if version in remote_versions:
url_dict[version] = remote_versions[version] url_dict[version] = remote_versions[version]
if len(versions) <= 0: if len(versions) <= 0:
if remote_versions is None: if remote_versions is None:
remote_versions = pkg.fetch_remote_versions(args.jobs) remote_versions = pkg.fetch_remote_versions(concurrency=args.jobs)
url_dict = remote_versions url_dict = remote_versions
# A spidered URL can differ from the package.py *computed* URL, pointing to different tarballs. # A spidered URL can differ from the package.py *computed* URL, pointing to different tarballs.

View File

@ -67,7 +67,7 @@
from spack.stage import DIYStage, ResourceStage, Stage, StageComposite, compute_stage_name from spack.stage import DIYStage, ResourceStage, Stage, StageComposite, compute_stage_name
from spack.util.executable import ProcessError, which from spack.util.executable import ProcessError, which
from spack.util.package_hash import package_hash from spack.util.package_hash import package_hash
from spack.version import GitVersion, StandardVersion, Version from spack.version import GitVersion, StandardVersion
FLAG_HANDLER_RETURN_TYPE = Tuple[ FLAG_HANDLER_RETURN_TYPE = Tuple[
Optional[Iterable[str]], Optional[Iterable[str]], Optional[Iterable[str]] Optional[Iterable[str]], Optional[Iterable[str]], Optional[Iterable[str]]
@ -94,29 +94,26 @@
spack_times_log = "install_times.json" spack_times_log = "install_times.json"
def deprecated_version(pkg, version): def deprecated_version(pkg: "PackageBase", version: Union[str, StandardVersion]) -> bool:
"""Return True if the version is deprecated, False otherwise. """Return True iff the version is deprecated.
Arguments: Arguments:
pkg (PackageBase): The package whose version is to be checked. pkg: The package whose version is to be checked.
version (str or spack.version.StandardVersion): The version being checked version: The version being checked
""" """
if not isinstance(version, StandardVersion): if not isinstance(version, StandardVersion):
version = Version(version) version = StandardVersion.from_string(version)
for k, v in pkg.versions.items(): details = pkg.versions.get(version)
if version == k and v.get("deprecated", False): return details is not None and details.get("deprecated", False)
return True
return False
def preferred_version(pkg): def preferred_version(pkg: "PackageBase"):
""" """
Returns a sorted list of the preferred versions of the package. Returns a sorted list of the preferred versions of the package.
Arguments: Arguments:
pkg (PackageBase): The package whose versions are to be assessed. pkg: The package whose versions are to be assessed.
""" """
# Here we sort first on the fact that a version is marked # Here we sort first on the fact that a version is marked
# as preferred in the package, then on the fact that the # as preferred in the package, then on the fact that the
@ -903,22 +900,16 @@ def version(self):
@classmethod @classmethod
@memoized @memoized
def version_urls(cls): def version_urls(cls) -> Dict[StandardVersion, str]:
"""OrderedDict of explicitly defined URLs for versions of this package. """Dict of explicitly defined URLs for versions of this package.
Return: Return:
An OrderedDict (version -> URL) different versions of this An dict mapping version to url, ordered by version.
package, sorted by version.
A version's URL only appears in the result if it has an an A version's URL only appears in the result if it has an an explicitly defined ``url``
explicitly defined ``url`` argument. So, this list may be empty argument. So, this list may be empty if a package only defines ``url`` at the top level.
if a package only defines ``url`` at the top level.
""" """
version_urls = collections.OrderedDict() return {v: args["url"] for v, args in sorted(cls.versions.items()) if "url" in args}
for v, args in sorted(cls.versions.items()):
if "url" in args:
version_urls[v] = args["url"]
return version_urls
def nearest_url(self, version): def nearest_url(self, version):
"""Finds the URL with the "closest" version to ``version``. """Finds the URL with the "closest" version to ``version``.
@ -961,36 +952,39 @@ def update_external_dependencies(self, extendee_spec=None):
""" """
pass pass
def all_urls_for_version(self, version): def all_urls_for_version(self, version: StandardVersion) -> List[str]:
"""Return all URLs derived from version_urls(), url, urls, and """Return all URLs derived from version_urls(), url, urls, and
list_url (if it contains a version) in a package in that order. list_url (if it contains a version) in a package in that order.
Args: Args:
version (spack.version.Version): the version for which a URL is sought version: the version for which a URL is sought
""" """
uf = None uf = None
if type(self).url_for_version != PackageBase.url_for_version: if type(self).url_for_version != PackageBase.url_for_version:
uf = self.url_for_version uf = self.url_for_version
return self._implement_all_urls_for_version(version, uf) return self._implement_all_urls_for_version(version, uf)
def _implement_all_urls_for_version(self, version, custom_url_for_version=None): def _implement_all_urls_for_version(
if not isinstance(version, StandardVersion): self,
version = Version(version) version: Union[str, StandardVersion],
custom_url_for_version: Optional[Callable[[StandardVersion], Optional[str]]] = None,
) -> List[str]:
version = StandardVersion.from_string(version) if isinstance(version, str) else version
urls = [] urls: List[str] = []
# If we have a specific URL for this version, don't extrapolate. # If we have a specific URL for this version, don't extrapolate.
version_urls = self.version_urls() url = self.version_urls().get(version)
if version in version_urls: if url:
urls.append(version_urls[version]) urls.append(url)
# if there is a custom url_for_version, use it # if there is a custom url_for_version, use it
if custom_url_for_version is not None: if custom_url_for_version is not None:
u = custom_url_for_version(version) u = custom_url_for_version(version)
if u not in urls and u is not None: if u is not None and u not in urls:
urls.append(u) urls.append(u)
def sub_and_add(u): def sub_and_add(u: Optional[str]) -> None:
if u is None: if u is None:
return return
# skip the url if there is no version to replace # skip the url if there is no version to replace
@ -998,9 +992,7 @@ def sub_and_add(u):
spack.url.parse_version(u) spack.url.parse_version(u)
except spack.url.UndetectableVersionError: except spack.url.UndetectableVersionError:
return return
nu = spack.url.substitute_version(u, self.url_version(version)) urls.append(spack.url.substitute_version(u, self.url_version(version)))
urls.append(nu)
# If no specific URL, use the default, class-level URL # If no specific URL, use the default, class-level URL
sub_and_add(getattr(self, "url", None)) sub_and_add(getattr(self, "url", None))
@ -2358,15 +2350,14 @@ def format_doc(cls, **kwargs):
return results.getvalue() return results.getvalue()
@property @property
def all_urls(self): def all_urls(self) -> List[str]:
"""A list of all URLs in a package. """A list of all URLs in a package.
Check both class-level and version-specific URLs. Check both class-level and version-specific URLs.
Returns: Returns a list of URLs
list: a list of URLs
""" """
urls = [] urls: List[str] = []
if hasattr(self, "url") and self.url: if hasattr(self, "url") and self.url:
urls.append(self.url) urls.append(self.url)
@ -2379,7 +2370,9 @@ def all_urls(self):
urls.append(args["url"]) urls.append(args["url"])
return urls return urls
def fetch_remote_versions(self, concurrency=None): def fetch_remote_versions(
self, concurrency: Optional[int] = None
) -> Dict[StandardVersion, str]:
"""Find remote versions of this package. """Find remote versions of this package.
Uses ``list_url`` and any other URLs listed in the package file. Uses ``list_url`` and any other URLs listed in the package file.

View File

@ -29,6 +29,7 @@
import os import os
import pathlib import pathlib
import re import re
from typing import Any, Dict, Optional, Sequence, Union
import llnl.url import llnl.url
from llnl.path import convert_to_posix_path from llnl.path import convert_to_posix_path
@ -284,20 +285,19 @@ def parse_version_offset(path):
raise UndetectableVersionError(original_path) raise UndetectableVersionError(original_path)
def parse_version(path): def parse_version(path: str) -> spack.version.StandardVersion:
"""Try to extract a version string from a filename or URL. """Try to extract a version string from a filename or URL.
Args: Args:
path (str): The filename or URL for the package path: The filename or URL for the package
Returns: Returns: The version of the package
spack.version.Version: The version of the package
Raises: Raises:
UndetectableVersionError: If the URL does not match any regexes UndetectableVersionError: If the URL does not match any regexes
""" """
version, start, length, i, regex = parse_version_offset(path) version, start, length, i, regex = parse_version_offset(path)
return spack.version.Version(version) return spack.version.StandardVersion.from_string(version)
def parse_name_offset(path, v=None): def parse_name_offset(path, v=None):
@ -604,8 +604,12 @@ def color_url(path, **kwargs):
def find_versions_of_archive( def find_versions_of_archive(
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None archive_urls: Union[str, Sequence[str]],
): list_url: Optional[str] = None,
list_depth: int = 0,
concurrency: Optional[int] = 32,
reference_package: Optional[Any] = None,
) -> Dict[spack.version.StandardVersion, str]:
"""Scrape web pages for new versions of a tarball. This function prefers URLs in the """Scrape web pages for new versions of a tarball. This function prefers URLs in the
following order: links found on the scraped page that match a url generated by the following order: links found on the scraped page that match a url generated by the
reference package, found and in the archive_urls list, found and derived from those reference package, found and in the archive_urls list, found and derived from those
@ -613,22 +617,18 @@ def find_versions_of_archive(
archive_urls list is included for the version. archive_urls list is included for the version.
Args: Args:
archive_urls (str or list or tuple): URL or sequence of URLs for archive_urls: URL or sequence of URLs for different versions of a package. Typically these
different versions of a package. Typically these are just the are just the tarballs from the package file itself. By default, this searches the
tarballs from the package file itself. By default, this searches parent directories of archives.
the parent directories of archives. list_url: URL for a listing of archives. Spack will scrape these pages for download links
list_url (str or None): URL for a listing of archives. that look like the archive URL.
Spack will scrape these pages for download links that look list_depth: max depth to follow links on list_url pages. Defaults to 0.
like the archive URL. concurrency: maximum number of concurrent requests
list_depth (int): max depth to follow links on list_url pages. reference_package: a spack package used as a reference for url detection. Uses the
Defaults to 0. url_for_version method on the package to produce reference urls which, if found, are
concurrency (int): maximum number of concurrent requests preferred.
reference_package (spack.package_base.PackageBase or None): a spack package
used as a reference for url detection. Uses the url_for_version
method on the package to produce reference urls which, if found,
are preferred.
""" """
if not isinstance(archive_urls, (list, tuple)): if isinstance(archive_urls, str):
archive_urls = [archive_urls] archive_urls = [archive_urls]
# Generate a list of list_urls based on archive urls and any # Generate a list of list_urls based on archive urls and any
@ -689,7 +689,7 @@ def find_versions_of_archive(
# Build a dict version -> URL from any links that match the wildcards. # Build a dict version -> URL from any links that match the wildcards.
# Walk through archive_url links first. # Walk through archive_url links first.
# Any conflicting versions will be overwritten by the list_url links. # Any conflicting versions will be overwritten by the list_url links.
versions = {} versions: Dict[spack.version.StandardVersion, str] = {}
matched = set() matched = set()
for url in sorted(links): for url in sorted(links):
url = convert_to_posix_path(url) url = convert_to_posix_path(url)

View File

@ -17,7 +17,7 @@
import urllib.parse import urllib.parse
from html.parser import HTMLParser from html.parser import HTMLParser
from pathlib import Path, PurePosixPath from pathlib import Path, PurePosixPath
from typing import IO, Dict, List, Optional, Set, Union from typing import IO, Dict, Iterable, List, Optional, Set, Union
from urllib.error import HTTPError, URLError from urllib.error import HTTPError, URLError
from urllib.request import HTTPSHandler, Request, build_opener from urllib.request import HTTPSHandler, Request, build_opener
@ -554,7 +554,9 @@ def list_url(url, recursive=False):
return gcs.get_all_blobs(recursive=recursive) return gcs.get_all_blobs(recursive=recursive)
def spider(root_urls: Union[str, List[str]], depth: int = 0, concurrency: Optional[int] = None): def spider(
root_urls: Union[str, Iterable[str]], depth: int = 0, concurrency: Optional[int] = None
):
"""Get web pages from root URLs. """Get web pages from root URLs.
If depth is specified (e.g., depth=2), then this will also follow up to <depth> levels If depth is specified (e.g., depth=2), then this will also follow up to <depth> levels