url stats: add --show-issues option (#25792)
* tests: make `spack url [stats|summary]` work on mock packages Mock packages have historically had mock hashes, but this means they're also invalid as far as Spack's hash detection is concerned. - [x] convert all hashes in mock package to md5 or sha256 - [x] ensure that all mock packages have a URL - [x] ignore some special cases with multiple VCS fetchers * url stats: add `--show-issues` option `spack url stats` tells us how many URLs are using what protocol, type of checksum, etc., but it previously did not tell us which packages and URLs had the issues. This adds a `--show-issues` option to show URLs with insecure (`http`) URLs or `md5` hashes (which are now deprecated by NIST).
This commit is contained in:
@@ -9,6 +9,7 @@
|
||||
|
||||
import six.moves.urllib.parse as urllib_parse
|
||||
|
||||
import llnl.util.tty.color as color
|
||||
from llnl.util import tty
|
||||
|
||||
import spack.fetch_strategy as fs
|
||||
@@ -80,9 +81,13 @@ def setup_parser(subparser):
|
||||
help='print a summary of how well we are parsing package urls')
|
||||
|
||||
# Stats
|
||||
sp.add_parser(
|
||||
stats_parser = sp.add_parser(
|
||||
'stats',
|
||||
help='print statistics on versions and checksums for all packages')
|
||||
stats_parser.add_argument(
|
||||
"--show-issues", action="store_true",
|
||||
help="show packages with issues (md5 hashes, http urls)"
|
||||
)
|
||||
|
||||
|
||||
def url(parser, args):
|
||||
@@ -262,6 +267,9 @@ def url_summary(args):
|
||||
|
||||
|
||||
def url_stats(args):
|
||||
# dictionary of issue type -> package -> descriptions
|
||||
issues = defaultdict(lambda: defaultdict(lambda: []))
|
||||
|
||||
class UrlStats(object):
|
||||
def __init__(self):
|
||||
self.total = 0
|
||||
@@ -270,7 +278,7 @@ def __init__(self):
|
||||
self.url_type = defaultdict(lambda: 0)
|
||||
self.git_type = defaultdict(lambda: 0)
|
||||
|
||||
def add(self, fetcher):
|
||||
def add(self, pkg_name, fetcher):
|
||||
self.total += 1
|
||||
|
||||
url_type = fetcher.url_attr
|
||||
@@ -284,10 +292,18 @@ def add(self, fetcher):
|
||||
algo = 'no checksum'
|
||||
self.checksums[algo] += 1
|
||||
|
||||
if algo == "md5":
|
||||
md5_hashes = issues["md5 hashes"]
|
||||
md5_hashes[pkg_name].append(fetcher.url)
|
||||
|
||||
# parse out the URL scheme (https/http/ftp/etc.)
|
||||
urlinfo = urllib_parse.urlparse(fetcher.url)
|
||||
self.schemes[urlinfo.scheme] += 1
|
||||
|
||||
if urlinfo.scheme == "http":
|
||||
http_urls = issues["http urls"]
|
||||
http_urls[pkg_name].append(fetcher.url)
|
||||
|
||||
elif url_type == 'git':
|
||||
if getattr(fetcher, 'commit', None):
|
||||
self.git_type['commit'] += 1
|
||||
@@ -305,13 +321,16 @@ def add(self, fetcher):
|
||||
for pkg in spack.repo.path.all_packages():
|
||||
npkgs += 1
|
||||
|
||||
for v, args in pkg.versions.items():
|
||||
fetcher = fs.for_package_version(pkg, v)
|
||||
version_stats.add(fetcher)
|
||||
for v in pkg.versions:
|
||||
try:
|
||||
fetcher = fs.for_package_version(pkg, v)
|
||||
except (fs.InvalidArgsError, fs.FetcherConflict):
|
||||
continue
|
||||
version_stats.add(pkg.name, fetcher)
|
||||
|
||||
for _, resources in pkg.resources.items():
|
||||
for resource in resources:
|
||||
resource_stats.add(resource.fetcher)
|
||||
resource_stats.add(pkg.name, resource.fetcher)
|
||||
|
||||
# print a nice summary table
|
||||
tty.msg("URL stats for %d packages:" % npkgs)
|
||||
@@ -361,6 +380,21 @@ def print_stat(indent, name, stat_name=None):
|
||||
print_stat(4, git_type, "git_type")
|
||||
print_line()
|
||||
|
||||
if args.show_issues:
|
||||
total_issues = sum(
|
||||
len(issues)
|
||||
for _, pkg_issues in issues.items()
|
||||
for _, issues in pkg_issues.items()
|
||||
)
|
||||
print()
|
||||
tty.msg("Found %d issues." % total_issues)
|
||||
for issue_type, pkgs in issues.items():
|
||||
tty.msg("Package URLs with %s" % issue_type)
|
||||
for pkg, pkg_issues in pkgs.items():
|
||||
color.cprint(" @*C{%s}" % pkg)
|
||||
for issue in pkg_issues:
|
||||
print(" %s" % issue)
|
||||
|
||||
|
||||
def print_name_and_version(url):
|
||||
"""Prints a URL. Underlines the detected name with dashes and
|
||||
|
||||
@@ -69,13 +69,15 @@ def test_url_with_no_version_fails():
|
||||
url('parse', 'http://www.netlib.org/voronoi/triangle.zip')
|
||||
|
||||
|
||||
@pytest.mark.maybeslow
|
||||
@pytest.mark.skipif(
|
||||
skip_python_26 = pytest.mark.skipif(
|
||||
sys.version_info < (2, 7),
|
||||
reason="Python 2.6 tests are run in a container, where "
|
||||
"networking is super slow"
|
||||
)
|
||||
def test_url_list():
|
||||
|
||||
|
||||
@skip_python_26
|
||||
def test_url_list(mock_packages):
|
||||
out = url('list')
|
||||
total_urls = len(out.split('\n'))
|
||||
|
||||
@@ -104,13 +106,8 @@ def test_url_list():
|
||||
assert 0 < correct_version_urls < total_urls
|
||||
|
||||
|
||||
@pytest.mark.maybeslow
|
||||
@pytest.mark.skipif(
|
||||
sys.version_info < (2, 7),
|
||||
reason="Python 2.6 tests are run in a container, where "
|
||||
"networking is super slow"
|
||||
)
|
||||
def test_url_summary():
|
||||
@skip_python_26
|
||||
def test_url_summary(mock_packages):
|
||||
"""Test the URL summary command."""
|
||||
# test url_summary, the internal function that does the work
|
||||
(total_urls, correct_names, correct_versions,
|
||||
@@ -136,12 +133,8 @@ def test_url_summary():
|
||||
assert out_correct_versions == correct_versions
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.version_info < (2, 7),
|
||||
reason="Python 2.6 tests are run in a container, where "
|
||||
"networking is super slow"
|
||||
)
|
||||
def test_url_stats(capfd):
|
||||
@skip_python_26
|
||||
def test_url_stats(capfd, mock_packages):
|
||||
with capfd.disabled():
|
||||
output = url('stats')
|
||||
npkgs = '%d packages' % len(spack.repo.all_package_names())
|
||||
@@ -151,3 +144,20 @@ def test_url_stats(capfd):
|
||||
assert 'schemes' in output
|
||||
assert 'versions' in output
|
||||
assert 'resources' in output
|
||||
|
||||
output = url('stats', '--show-issues')
|
||||
npkgs = '%d packages' % len(spack.repo.all_package_names())
|
||||
assert npkgs in output
|
||||
assert 'url' in output
|
||||
assert 'git' in output
|
||||
assert 'schemes' in output
|
||||
assert 'versions' in output
|
||||
assert 'resources' in output
|
||||
|
||||
assert 'Package URLs with md5 hashes' in output
|
||||
assert 'needs-relocation' in output
|
||||
assert 'https://cmake.org/files/v3.4/cmake-0.0.0.tar.gz' in output
|
||||
|
||||
assert 'Package URLs with http urls' in output
|
||||
assert 'zmpi' in output
|
||||
assert 'http://www.spack-fake-zmpi.org/downloads/zmpi-1.0.tar.gz' in output
|
||||
|
||||
@@ -281,25 +281,28 @@ def test_git_url_top_level_url_versions(mock_packages, config):
|
||||
|
||||
pkg = spack.repo.get('git-url-top-level')
|
||||
|
||||
# leading 62 zeros of sha256 hash
|
||||
leading_zeros = '0' * 62
|
||||
|
||||
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.0')
|
||||
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
|
||||
assert fetcher.url == 'https://example.com/some/tarball-2.0.tar.gz'
|
||||
assert fetcher.digest == 'abc20'
|
||||
assert fetcher.digest == leading_zeros + '20'
|
||||
|
||||
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.1')
|
||||
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
|
||||
assert fetcher.url == 'https://example.com/some/tarball-2.1.tar.gz'
|
||||
assert fetcher.digest == 'abc21'
|
||||
assert fetcher.digest == leading_zeros + '21'
|
||||
|
||||
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.2')
|
||||
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
|
||||
assert fetcher.url == 'https://www.example.com/foo2.2.tar.gz'
|
||||
assert fetcher.digest == 'abc22'
|
||||
assert fetcher.digest == leading_zeros + '22'
|
||||
|
||||
fetcher = spack.fetch_strategy.for_package_version(pkg, '2.3')
|
||||
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
|
||||
assert fetcher.url == 'https://www.example.com/foo2.3.tar.gz'
|
||||
assert fetcher.digest == 'abc23'
|
||||
assert fetcher.digest == leading_zeros + '23'
|
||||
|
||||
|
||||
def test_git_url_top_level_git_versions(mock_packages, config):
|
||||
@@ -409,15 +412,15 @@ def test_fetch_options(mock_packages, config):
|
||||
|
||||
fetcher = spack.fetch_strategy.for_package_version(pkg, '1.0')
|
||||
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
|
||||
assert fetcher.digest == 'abc10'
|
||||
assert fetcher.digest == '00000000000000000000000000000010'
|
||||
assert fetcher.extra_options == {'timeout': 42, 'cookie': 'foobar'}
|
||||
|
||||
fetcher = spack.fetch_strategy.for_package_version(pkg, '1.1')
|
||||
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
|
||||
assert fetcher.digest == 'abc11'
|
||||
assert fetcher.digest == '00000000000000000000000000000011'
|
||||
assert fetcher.extra_options == {'timeout': 65}
|
||||
|
||||
fetcher = spack.fetch_strategy.for_package_version(pkg, '1.2')
|
||||
assert isinstance(fetcher, spack.fetch_strategy.URLFetchStrategy)
|
||||
assert fetcher.digest == 'abc12'
|
||||
assert fetcher.digest == '00000000000000000000000000000012'
|
||||
assert fetcher.extra_options == {'cookie': 'baz'}
|
||||
|
||||
@@ -160,13 +160,21 @@ def test_fetch(
|
||||
|
||||
|
||||
@pytest.mark.parametrize('spec,url,digest', [
|
||||
('url-list-test @0.0.0', 'foo-0.0.0.tar.gz', 'abc000'),
|
||||
('url-list-test @1.0.0', 'foo-1.0.0.tar.gz', 'abc100'),
|
||||
('url-list-test @3.0', 'foo-3.0.tar.gz', 'abc30'),
|
||||
('url-list-test @4.5', 'foo-4.5.tar.gz', 'abc45'),
|
||||
('url-list-test @2.0.0b2', 'foo-2.0.0b2.tar.gz', 'abc200b2'),
|
||||
('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', 'abc30a1'),
|
||||
('url-list-test @4.5-rc5', 'foo-4.5-rc5.tar.gz', 'abc45rc5'),
|
||||
('url-list-test @0.0.0', 'foo-0.0.0.tar.gz', '00000000000000000000000000000000'),
|
||||
('url-list-test @1.0.0', 'foo-1.0.0.tar.gz', '00000000000000000000000000000100'),
|
||||
('url-list-test @3.0', 'foo-3.0.tar.gz', '00000000000000000000000000000030'),
|
||||
('url-list-test @4.5', 'foo-4.5.tar.gz', '00000000000000000000000000000450'),
|
||||
(
|
||||
'url-list-test @2.0.0b2',
|
||||
'foo-2.0.0b2.tar.gz',
|
||||
'000000000000000000000000000200b2'
|
||||
),
|
||||
('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', '000000000000000000000000000030a1'),
|
||||
(
|
||||
'url-list-test @4.5-rc5',
|
||||
'foo-4.5-rc5.tar.gz',
|
||||
'000000000000000000000000000045c5'
|
||||
),
|
||||
])
|
||||
@pytest.mark.parametrize('_fetch_method', ['curl', 'urllib'])
|
||||
def test_from_list_url(mock_packages, config, spec, url, digest, _fetch_method):
|
||||
|
||||
Reference in New Issue
Block a user