R versions (#29258)

* lower priority of package-provided urls

This change favors urls found in a scraped page over those provided by
the package from `url_for_version`.  In most cases this doesn't matter,
but R specifically returns known bad URLs in some cases, and the
fallback path for a failed fetch uses `fetch_remote_versions` to find a
substitute.  This fixes that problem.

fixes #29204

* consider what links actually exist in all cases

Checksum was only actually scraping when called with no versions.  It
now always scrapes and then selects URLs from the set of URLs known to
exist whenever possible.

fixes #25831

* bow to the wrath of flake8

* test-fetch urls from package, prefer if successful

* Update lib/spack/spack/package.py

Co-authored-by: Seth R. Johnson <johnsonsr@ornl.gov>

* reword as suggested

* re-enable mypy specific ignore and ignore pyflakes

* remove flake8 ignore from .flake8

* address review comments

* address comments

* add sneaky missing substitute

I missed this one because we call substitute on a URL that doesn't
contain a version component.  I'm not sure how that's supposed to work,
but apparently it's required by at least one mock package, so back in it
goes.

Co-authored-by: Seth R. Johnson <johnsonsr@ornl.gov>
This commit is contained in:
Tom Scogland 2022-03-18 17:42:17 -07:00 committed by GitHub
parent 531b1c5c3d
commit 9e01e17dc6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 128 additions and 42 deletions

View File

@ -57,32 +57,32 @@ def checksum(parser, args):
pkg = spack.repo.get(args.package)
url_dict = {}
if args.versions:
# If the user asked for specific versions, use those
for version in args.versions:
versions = args.versions
if (not versions) and args.preferred:
versions = [preferred_version(pkg)]
if versions:
remote_versions = None
for version in versions:
version = ver(version)
if not isinstance(version, Version):
tty.die("Cannot generate checksums for version lists or "
"version ranges. Use unambiguous versions.")
url_dict[version] = pkg.url_for_version(version)
elif args.preferred:
version = preferred_version(pkg)
url_dict = dict([(version, pkg.url_for_version(version))])
url = pkg.find_valid_url_for_version(version)
if url is not None:
url_dict[version] = url
continue
# if we get here, it's because no valid url was provided by the package
# do expensive fallback to try to recover
if remote_versions is None:
remote_versions = pkg.fetch_remote_versions()
if version in remote_versions:
url_dict[version] = remote_versions[version]
else:
# Otherwise, see what versions we can find online
url_dict = pkg.fetch_remote_versions()
if not url_dict:
tty.die("Could not find any versions for {0}".format(pkg.name))
# And ensure the specified version URLs take precedence, if available
try:
explicit_dict = {}
for v in pkg.versions:
if not v.isdevelop():
explicit_dict[v] = pkg.url_for_version(v)
url_dict.update(explicit_dict)
except spack.package.NoURLError:
pass
if not url_dict:
tty.die("Could not find any versions for {0}".format(pkg.name))
version_lines = spack.stage.get_checksums_for_versions(
url_dict, pkg.name, keep_stage=args.keep_stage,

View File

@ -1560,11 +1560,9 @@ def _extrapolate(pkg, version):
def _from_merged_attrs(fetcher, pkg, version):
"""Create a fetcher from merged package and version attributes."""
if fetcher.url_attr == 'url':
url = pkg.url_for_version(version)
# TODO: refactor this logic into its own method or function
# TODO: to avoid duplication
mirrors = [spack.url.substitute_version(u, version)
for u in getattr(pkg, 'urls', [])[1:]]
mirrors = pkg.all_urls_for_version(version)
url = mirrors[0]
mirrors = mirrors[1:]
attrs = {fetcher.url_attr: url, 'mirrors': mirrors}
else:
url = getattr(pkg, fetcher.url_attr)

View File

@ -977,29 +977,93 @@ def url_for_version(self, version):
See Class Version (version.py)
"""
return self._implement_all_urls_for_version(version)[0]
def all_urls_for_version(self, version, custom_url_for_version=None):
"""Returns all URLs derived from version_urls(), url, urls, and
list_url (if it contains a version) in a package in that order.
version: class Version
The version for which a URL is sought.
See Class Version (version.py)
"""
uf = None
if type(self).url_for_version != Package.url_for_version:
uf = self.url_for_version
return self._implement_all_urls_for_version(version, uf)
def _implement_all_urls_for_version(self, version, custom_url_for_version=None):
if not isinstance(version, Version):
version = Version(version)
urls = []
# If we have a specific URL for this version, don't extrapolate.
version_urls = self.version_urls()
if version in version_urls:
return version_urls[version]
urls.append(version_urls[version])
# if there is a custom url_for_version, use it
if custom_url_for_version is not None:
u = custom_url_for_version(version)
if u not in urls and u is not None:
urls.append(u)
def sub_and_add(u):
if u is None:
return
# skip the url if there is no version to replace
try:
spack.url.parse_version(u)
except spack.url.UndetectableVersionError:
return
nu = spack.url.substitute_version(u, self.url_version(version))
urls.append(nu)
# If no specific URL, use the default, class-level URL
url = getattr(self, 'url', None)
urls = getattr(self, 'urls', [None])
default_url = url or urls[0]
sub_and_add(getattr(self, 'url', None))
for u in getattr(self, 'urls', []):
sub_and_add(u)
# if no exact match AND no class-level default, use the nearest URL
if not default_url:
default_url = self.nearest_url(version)
sub_and_add(getattr(self, 'list_url', None))
# if there are NO URLs to go by, then we can't do anything
# if no version-bearing URLs can be found, try them raw
if not urls:
default_url = getattr(self, "url", getattr(self, "urls", [None])[0])
# if no exact match AND no class-level default, use the nearest URL
if not default_url:
raise NoURLError(self.__class__)
default_url = self.nearest_url(version)
return spack.url.substitute_version(
default_url, self.url_version(version))
# if there are NO URLs to go by, then we can't do anything
if not default_url:
raise NoURLError(self.__class__)
urls.append(
spack.url.substitute_version(
default_url, self.url_version(version)
)
)
return urls
def find_valid_url_for_version(self, version):
"""Returns a URL from which the specified version of this package
may be downloaded after testing whether the url is valid. Will try
url, urls, and list_url before failing.
version: class Version
The version for which a URL is sought.
See Class Version (version.py)
"""
urls = self.all_urls_for_version(version)
for u in urls:
if spack.util.web.url_exists(u):
return u
return None
def _make_resource_stage(self, root_stage, fetcher, resource):
resource_stage_folder = self._resource_stage(resource)

View File

@ -31,7 +31,15 @@ def checksum_type(request):
@pytest.fixture
def pkg_factory():
Pkg = collections.namedtuple(
'Pkg', ['url_for_version', 'urls', 'url', 'versions', 'fetch_options']
"Pkg", [
"url_for_version",
"all_urls_for_version",
"find_valid_url_for_version",
"urls",
"url",
"versions",
"fetch_options",
]
)
def factory(url, urls, fetch_options={}):
@ -40,8 +48,16 @@ def fn(v):
main_url = url or urls[0]
return spack.url.substitute_version(main_url, v)
def fn_urls(v):
urls_loc = urls or [url]
return [spack.url.substitute_version(u, v) for u in urls_loc]
return Pkg(
url_for_version=fn, url=url, urls=urls,
find_valid_url_for_version=fn,
url_for_version=fn,
all_urls_for_version=fn_urls,
url=url,
urls=(urls,),
versions=collections.defaultdict(dict),
fetch_options=fetch_options
)

View File

@ -573,7 +573,11 @@ def _urlopen(req, *args, **kwargs):
def find_versions_of_archive(
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
):
"""Scrape web pages for new versions of a tarball.
"""Scrape web pages for new versions of a tarball. This function prefers URLs in the
following order: links found on the scraped page that match a url generated by the
reference package, found and in the archive_urls list, found and derived from those
in the archive_urls list, and if none are found for a version then the item in the
archive_urls list is included for the version.
Args:
archive_urls (str or list or tuple): URL or sequence of URLs for
@ -652,7 +656,7 @@ def find_versions_of_archive(
# Any conflicting versions will be overwritten by the list_url links.
versions = {}
matched = set()
for url in archive_urls + sorted(links):
for url in sorted(links):
url = convert_to_posix_path(url)
if any(re.search(r, url) for r in regexes):
try:
@ -661,9 +665,7 @@ def find_versions_of_archive(
continue
versions[ver] = url
# prevent this version from getting overwritten
if url in archive_urls:
matched.add(ver)
elif reference_package is not None:
if reference_package is not None:
if url == reference_package.url_for_version(ver):
matched.add(ver)
else:
@ -675,6 +677,12 @@ def find_versions_of_archive(
except spack.url.UndetectableVersionError:
continue
for url in archive_urls:
url = convert_to_posix_path(url)
ver = spack.url.parse_version(url)
if ver not in versions:
versions[ver] = url
return versions