commands: add resource stats to spack url stats (#13205)
				
					
				
			`spack url stats` now also looks at packages' resources when outputting
statistics.
Example:
```
$ spack url stats
==> URL stats for 3531 packages:
--------------------------------------------------------------
stat                    versions        %   resources        %
--------------------------------------------------------------
url                         8335    89.3%         339    89.0%
    schemes
        https               6489    69.5%          93    24.4%
        ftp                   32     0.3%           8     2.1%
        http                1763    18.9%         237    62.2%
        file                  51     0.5%           1     0.3%
    checksums
        md5                   26     0.3%           0     0.0%
        sha256              8306    89.0%         336    88.2%
        no checksum            3     0.0%           3     0.8%
--------------------------------------------------------------
go                             1     0.0%           0     0.0%
--------------------------------------------------------------
hg                             7     0.1%           0     0.0%
--------------------------------------------------------------
no code                        4     0.0%           0     0.0%
--------------------------------------------------------------
svn                            4     0.0%          16     4.2%
--------------------------------------------------------------
git                          981    10.5%          26     6.8%
    branch                   442     4.7%           4     1.0%
    commit                   362     3.9%          14     3.7%
    no ref                    36     0.4%           2     0.5%
    tag                      141     1.5%           6     1.6%
--------------------------------------------------------------
```
			
			
This commit is contained in:
		| @@ -239,80 +239,104 @@ def url_summary(args): | |||||||
|  |  | ||||||
|  |  | ||||||
| def url_stats(args): | def url_stats(args): | ||||||
|     stats = {}  # stats about fetchers in packages. |     class UrlStats(object): | ||||||
|     nvers = 0   # total number of versions |         def __init__(self): | ||||||
|     npkgs = 0   # total number of packages |             self.total = 0 | ||||||
|  |             self.schemes = defaultdict(lambda: 0) | ||||||
|  |             self.checksums = defaultdict(lambda: 0) | ||||||
|  |             self.url_type = defaultdict(lambda: 0) | ||||||
|  |             self.git_type = defaultdict(lambda: 0) | ||||||
|  |  | ||||||
|     def inc(fstype, category, attr=None): |         def add(self, fetcher): | ||||||
|         """Increment statistics in the stats dict.""" |             self.total += 1 | ||||||
|         categories = stats.setdefault(fstype, {}) |  | ||||||
|         if attr: |  | ||||||
|             cat_stats = categories.setdefault(category, {}) |  | ||||||
|             val = cat_stats.setdefault(attr, 0) |  | ||||||
|             stats[fstype][category][attr] = val + 1 |  | ||||||
|         else: |  | ||||||
|             val = categories.setdefault(category, 0) |  | ||||||
|             stats[fstype][category] = val + 1 |  | ||||||
|  |  | ||||||
|     # over all packages |             url_type = fetcher.url_attr | ||||||
|     for pkg in spack.repo.path.all_packages(): |             self.url_type[url_type or 'no code'] += 1 | ||||||
|         npkgs += 1 |  | ||||||
|  |  | ||||||
|         if not pkg.has_code: |             if url_type == 'url': | ||||||
|             for _ in pkg.versions: |                 digest = getattr(fetcher, 'digest', None) | ||||||
|                 inc('No code', 'total') |                 if digest: | ||||||
|                 nvers += 1 |                     algo = crypto.hash_algo_for_digest(digest) | ||||||
|             continue |  | ||||||
|  |  | ||||||
|         # look at each version |  | ||||||
|         for v, args in pkg.versions.items(): |  | ||||||
|             # figure out what type of fetcher it is |  | ||||||
|             fetcher = fs.for_package_version(pkg, v) |  | ||||||
|             nvers += 1 |  | ||||||
|  |  | ||||||
|             fstype = fetcher.url_attr |  | ||||||
|             inc(fstype, 'total') |  | ||||||
|  |  | ||||||
|             # put some special stats in for particular types of fetchers. |  | ||||||
|             if fstype == 'git': |  | ||||||
|                 if 'commit' in args: |  | ||||||
|                     inc('git', 'security', 'commit') |  | ||||||
|                 else: |                 else: | ||||||
|                     inc('git', 'security', 'no commit') |                     algo = 'no checksum' | ||||||
|             elif fstype == 'url': |                 self.checksums[algo] += 1 | ||||||
|                 for h in crypto.hashes: |  | ||||||
|                     if h in args: |  | ||||||
|                         inc('url', 'checksums', h) |  | ||||||
|                         break |  | ||||||
|                 else: |  | ||||||
|                     if 'checksum' in args: |  | ||||||
|                         h = crypto.hash_algo_for_digest(args['checksum']) |  | ||||||
|                         inc('url', 'checksums', h) |  | ||||||
|                     else: |  | ||||||
|                         inc('url', 'checksums', 'no checksum') |  | ||||||
|  |  | ||||||
|                 # parse out the URL scheme (https/http/ftp/etc.) |                 # parse out the URL scheme (https/http/ftp/etc.) | ||||||
|                 urlinfo = urlparse(fetcher.url) |                 urlinfo = urlparse(fetcher.url) | ||||||
|                 inc('url', 'schemes', urlinfo.scheme) |                 self.schemes[urlinfo.scheme] += 1 | ||||||
|  |  | ||||||
|  |             elif url_type == 'git': | ||||||
|  |                 if getattr(fetcher, 'commit', None): | ||||||
|  |                     self.git_type['commit'] += 1 | ||||||
|  |                 elif getattr(fetcher, 'branch', None): | ||||||
|  |                     self.git_type['branch'] += 1 | ||||||
|  |                 elif getattr(fetcher, 'tag', None): | ||||||
|  |                     self.git_type['tag'] += 1 | ||||||
|  |                 else: | ||||||
|  |                     self.git_type['no ref'] += 1 | ||||||
|  |  | ||||||
|  |     npkgs = 0 | ||||||
|  |     version_stats = UrlStats() | ||||||
|  |     resource_stats = UrlStats() | ||||||
|  |  | ||||||
|  |     for pkg in spack.repo.path.all_packages(): | ||||||
|  |         npkgs += 1 | ||||||
|  |  | ||||||
|  |         for v, args in pkg.versions.items(): | ||||||
|  |             fetcher = fs.for_package_version(pkg, v) | ||||||
|  |             version_stats.add(fetcher) | ||||||
|  |  | ||||||
|  |         for _, resources in pkg.resources.items(): | ||||||
|  |             for resource in resources: | ||||||
|  |                 resource_stats.add(resource.fetcher) | ||||||
|  |  | ||||||
|     # print a nice summary table |     # print a nice summary table | ||||||
|     tty.msg("%d total versions for %d packages:" % (nvers, npkgs)) |     tty.msg("URL stats for %d packages:" % npkgs) | ||||||
|     line_width = 36 |  | ||||||
|     print("-" * line_width) |  | ||||||
|     for fetcher, fetcher_stats in sorted(stats.items(), reverse=True): |  | ||||||
|         fs_total = fetcher_stats['total'] |  | ||||||
|         fs_pct = float(fs_total) / nvers * 100 |  | ||||||
|         print("%-22s%5d%8.1f%%" % (fetcher, fs_total, fs_pct)) |  | ||||||
|  |  | ||||||
|         for category, cat_stats in sorted(fetcher_stats.items(), reverse=True): |     def print_line(): | ||||||
|             if category == 'total': |         print("-" * 62) | ||||||
|                 continue |  | ||||||
|             print("  %s" % category) |  | ||||||
|  |  | ||||||
|             for name, number in sorted(cat_stats.items(), reverse=True): |     def print_stat(indent, name, stat_name=None): | ||||||
|                 pct = float(number) / fs_total * 100 |         width = 20 - indent | ||||||
|                 print("    %-18s%5d%8.1f%%" % (name, number, pct)) |         fmt = " " * indent | ||||||
|         print("-" * line_width) |         fmt += "%%-%ds" % width | ||||||
|  |         if stat_name is None: | ||||||
|  |             print(fmt % name) | ||||||
|  |         else: | ||||||
|  |             fmt += "%12d%8.1f%%%12d%8.1f%%" | ||||||
|  |             v = getattr(version_stats, stat_name).get(name, 0) | ||||||
|  |             r = getattr(resource_stats, stat_name).get(name, 0) | ||||||
|  |             print(fmt % (name, | ||||||
|  |                          v, v / version_stats.total * 100, | ||||||
|  |                          r, r / resource_stats.total * 100)) | ||||||
|  |  | ||||||
|  |     print_line() | ||||||
|  |     print("%-20s%12s%9s%12s%9s" % ("stat", "versions", "%", "resources", "%")) | ||||||
|  |     print_line() | ||||||
|  |     print_stat(0, "url", "url_type") | ||||||
|  |  | ||||||
|  |     print_stat(4, "schemes") | ||||||
|  |     schemes = set(version_stats.schemes) | set(resource_stats.schemes) | ||||||
|  |     for scheme in schemes: | ||||||
|  |         print_stat(8, scheme, "schemes") | ||||||
|  |  | ||||||
|  |     print_stat(4, "checksums") | ||||||
|  |     checksums = set(version_stats.checksums) | set(resource_stats.checksums) | ||||||
|  |     for checksum in checksums: | ||||||
|  |         print_stat(8, checksum, "checksums") | ||||||
|  |     print_line() | ||||||
|  |  | ||||||
|  |     types = set(version_stats.url_type) | set(resource_stats.url_type) | ||||||
|  |     types -= set(["url", "git"]) | ||||||
|  |     for url_type in sorted(types): | ||||||
|  |         print_stat(0, url_type, "url_type") | ||||||
|  |         print_line() | ||||||
|  |  | ||||||
|  |     print_stat(0, "git", "url_type") | ||||||
|  |     git_types = set(version_stats.git_type) | set(resource_stats.git_type) | ||||||
|  |     for git_type in sorted(git_types): | ||||||
|  |         print_stat(4, git_type, "git_type") | ||||||
|  |     print_line() | ||||||
|  |  | ||||||
|  |  | ||||||
| def print_name_and_version(url): | def print_name_and_version(url): | ||||||
|   | |||||||
| @@ -131,4 +131,8 @@ def test_url_stats(capfd): | |||||||
|         output = url('stats') |         output = url('stats') | ||||||
|         npkgs = '%d packages' % len(spack.repo.all_package_names()) |         npkgs = '%d packages' % len(spack.repo.all_package_names()) | ||||||
|         assert npkgs in output |         assert npkgs in output | ||||||
|         assert 'total versions' in output |         assert 'url' in output | ||||||
|  |         assert 'git' in output | ||||||
|  |         assert 'schemes' in output | ||||||
|  |         assert 'versions' in output | ||||||
|  |         assert 'resources' in output | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Todd Gamblin
					Todd Gamblin