url stats: add --show-issues option (#25792)

* tests: make `spack url [stats|summary]` work on mock packages

Mock packages have historically had mock hashes, but this means they're also invalid
as far as Spack's hash detection is concerned.

- [x] convert all hashes in mock package to md5 or sha256
- [x] ensure that all mock packages have a URL
- [x] ignore some special cases with multiple VCS fetchers

* url stats: add `--show-issues` option

`spack url stats` tells us how many URLs are using what protocol, type of checksum,
etc., but it previously did not tell us which packages and URLs had the issues. This
adds a `--show-issues` option to show URLs with insecure (`http`) URLs or `md5` hashes
(which are now deprecated by NIST).
This commit is contained in:
Todd Gamblin
2021-09-08 07:59:06 -07:00
committed by GitHub
parent c2a6ccbea8
commit c309adb4b3
70 changed files with 266 additions and 169 deletions

View File

@@ -9,6 +9,7 @@
import six.moves.urllib.parse as urllib_parse
import llnl.util.tty.color as color
from llnl.util import tty
import spack.fetch_strategy as fs
@@ -80,9 +81,13 @@ def setup_parser(subparser):
help='print a summary of how well we are parsing package urls')
# Stats
sp.add_parser(
stats_parser = sp.add_parser(
'stats',
help='print statistics on versions and checksums for all packages')
stats_parser.add_argument(
"--show-issues", action="store_true",
help="show packages with issues (md5 hashes, http urls)"
)
def url(parser, args):
@@ -262,6 +267,9 @@ def url_summary(args):
def url_stats(args):
# dictionary of issue type -> package -> descriptions
issues = defaultdict(lambda: defaultdict(lambda: []))
class UrlStats(object):
def __init__(self):
self.total = 0
@@ -270,7 +278,7 @@ def __init__(self):
self.url_type = defaultdict(lambda: 0)
self.git_type = defaultdict(lambda: 0)
def add(self, fetcher):
def add(self, pkg_name, fetcher):
self.total += 1
url_type = fetcher.url_attr
@@ -284,10 +292,18 @@ def add(self, fetcher):
algo = 'no checksum'
self.checksums[algo] += 1
if algo == "md5":
md5_hashes = issues["md5 hashes"]
md5_hashes[pkg_name].append(fetcher.url)
# parse out the URL scheme (https/http/ftp/etc.)
urlinfo = urllib_parse.urlparse(fetcher.url)
self.schemes[urlinfo.scheme] += 1
if urlinfo.scheme == "http":
http_urls = issues["http urls"]
http_urls[pkg_name].append(fetcher.url)
elif url_type == 'git':
if getattr(fetcher, 'commit', None):
self.git_type['commit'] += 1
@@ -305,13 +321,16 @@ def add(self, fetcher):
for pkg in spack.repo.path.all_packages():
npkgs += 1
for v, args in pkg.versions.items():
fetcher = fs.for_package_version(pkg, v)
version_stats.add(fetcher)
for v in pkg.versions:
try:
fetcher = fs.for_package_version(pkg, v)
except (fs.InvalidArgsError, fs.FetcherConflict):
continue
version_stats.add(pkg.name, fetcher)
for _, resources in pkg.resources.items():
for resource in resources:
resource_stats.add(resource.fetcher)
resource_stats.add(pkg.name, resource.fetcher)
# print a nice summary table
tty.msg("URL stats for %d packages:" % npkgs)
@@ -361,6 +380,21 @@ def print_stat(indent, name, stat_name=None):
print_stat(4, git_type, "git_type")
print_line()
if args.show_issues:
total_issues = sum(
len(issues)
for _, pkg_issues in issues.items()
for _, issues in pkg_issues.items()
)
print()
tty.msg("Found %d issues." % total_issues)
for issue_type, pkgs in issues.items():
tty.msg("Package URLs with %s" % issue_type)
for pkg, pkg_issues in pkgs.items():
color.cprint(" @*C{%s}" % pkg)
for issue in pkg_issues:
print(" %s" % issue)
def print_name_and_version(url):
"""Prints a URL. Underlines the detected name with dashes and

View File

@@ -69,13 +69,15 @@ def test_url_with_no_version_fails():
url('parse', 'http://www.netlib.org/voronoi/triangle.zip')
@pytest.mark.maybeslow
@pytest.mark.skipif(
skip_python_26 = pytest.mark.skipif(
sys.version_info < (2, 7),
reason="Python 2.6 tests are run in a container, where "
"networking is super slow"
)
def test_url_list():
@skip_python_26
def test_url_list(mock_packages):
out = url('list')
total_urls = len(out.split('\n'))
@@ -104,13 +106,8 @@ def test_url_list():
assert 0 < correct_version_urls < total_urls
@pytest.mark.maybeslow
@pytest.mark.skipif(
sys.version_info < (2, 7),
reason="Python 2.6 tests are run in a container, where "
"networking is super slow"
)
def test_url_summary():
@skip_python_26
def test_url_summary(mock_packages):
"""Test the URL summary command."""
# test url_summary, the internal function that does the work
(total_urls, correct_names, correct_versions,
@@ -136,12 +133,8 @@ def test_url_summary():
assert out_correct_versions == correct_versions
@pytest.mark.skipif(
sys.version_info < (2, 7),
reason="Python 2.6 tests are run in a container, where "
"networking is super slow"
)
def test_url_stats(capfd):
@skip_python_26
def test_url_stats(capfd, mock_packages):
with capfd.disabled():
output = url('stats')
npkgs = '%d packages' % len(spack.repo.all_package_names())
@@ -151,3 +144,20 @@ def test_url_stats(capfd):
assert 'schemes' in output
assert 'versions' in output
assert 'resources' in output
output = url('stats', '--show-issues')
npkgs = '%d packages' % len(spack.repo.all_package_names())
assert npkgs in output
assert 'url' in output
assert 'git' in output
assert 'schemes' in output
assert 'versions' in output
assert 'resources' in output
assert 'Package URLs with md5 hashes' in output
assert 'needs-relocation' in output
assert 'https://cmake.org/files/v3.4/cmake-0.0.0.tar.gz' in output
assert 'Package URLs with http urls' in output
assert 'zmpi' in output
assert 'http://www.spack-fake-zmpi.org/downloads/zmpi-1.0.tar.gz' in output

View File

@@ -281,25 +281,28 @@ def test_git_url_top_level_url_versions(mock_packages, config):
pkg = spack.repo.get('git-url-top-level')
# leading 62 zeros of sha256 hash
leading_zeros = '0' * 62
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.0')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.url == 'https://example.com/some/tarball-2.0.tar.gz'
assert fetcher.digest == 'abc20'
assert fetcher.digest == leading_zeros + '20'
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.1')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.url == 'https://example.com/some/tarball-2.1.tar.gz'
assert fetcher.digest == 'abc21'
assert fetcher.digest == leading_zeros + '21'
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.2')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.url == 'https://www.example.com/foo2.2.tar.gz'
assert fetcher.digest == 'abc22'
assert fetcher.digest == leading_zeros + '22'
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.3')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.url == 'https://www.example.com/foo2.3.tar.gz'
assert fetcher.digest == 'abc23'
assert fetcher.digest == leading_zeros + '23'
def test_git_url_top_level_git_versions(mock_packages, config):
@@ -409,15 +412,15 @@ def test_fetch_options(mock_packages, config):
fetcher = spack.fetch_strategy.for_package_version(pkg, '1.0')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.digest == 'abc10'
assert fetcher.digest == '00000000000000000000000000000010'
assert fetcher.extra_options == {'timeout': 42, 'cookie': 'foobar'}
fetcher = spack.fetch_strategy.for_package_version(pkg, '1.1')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.digest == 'abc11'
assert fetcher.digest == '00000000000000000000000000000011'
assert fetcher.extra_options == {'timeout': 65}
fetcher = spack.fetch_strategy.for_package_version(pkg, '1.2')
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
assert fetcher.digest == 'abc12'
assert fetcher.digest == '00000000000000000000000000000012'
assert fetcher.extra_options == {'cookie': 'baz'}

View File

@@ -160,13 +160,21 @@ def test_fetch(
@pytest.mark.parametrize('spec,url,digest', [
('url-list-test @0.0.0', 'foo-0.0.0.tar.gz', 'abc000'),
('url-list-test @1.0.0', 'foo-1.0.0.tar.gz', 'abc100'),
('url-list-test @3.0', 'foo-3.0.tar.gz', 'abc30'),
('url-list-test @4.5', 'foo-4.5.tar.gz', 'abc45'),
('url-list-test @2.0.0b2', 'foo-2.0.0b2.tar.gz', 'abc200b2'),
('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', 'abc30a1'),
('url-list-test @4.5-rc5', 'foo-4.5-rc5.tar.gz', 'abc45rc5'),
('url-list-test @0.0.0', 'foo-0.0.0.tar.gz', '00000000000000000000000000000000'),
('url-list-test @1.0.0', 'foo-1.0.0.tar.gz', '00000000000000000000000000000100'),
('url-list-test @3.0', 'foo-3.0.tar.gz', '00000000000000000000000000000030'),
('url-list-test @4.5', 'foo-4.5.tar.gz', '00000000000000000000000000000450'),
(
'url-list-test @2.0.0b2',
'foo-2.0.0b2.tar.gz',
'000000000000000000000000000200b2'
),
('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', '000000000000000000000000000030a1'),
(
'url-list-test @4.5-rc5',
'foo-4.5-rc5.tar.gz',
'000000000000000000000000000045c5'
),
])
@pytest.mark.parametrize('_fetch_method', ['curl', 'urllib'])
def test_from_list_url(mock_packages, config, spec, url, digest, _fetch_method):