url summary: show right and wrong parse counts for each regex

Previously this command only showed total counts for each regular
expression.  This doesn't give you a sense of which regexes are working
well and which ones are not.  We now display the number of right, wrong,
and total URL parses per regex.

It's easier to see where we might improve the URL parsing with this
change.
This commit is contained in:
Todd Gamblin 2019-10-23 00:46:35 -07:00
parent 33499681a4
commit 26ec644fc8

View File

@ -161,9 +161,12 @@ def url_summary(args):
# Collect statistics on which regexes were matched and how often # Collect statistics on which regexes were matched and how often
name_regex_dict = dict() name_regex_dict = dict()
name_count_dict = defaultdict(int) right_name_count = defaultdict(int)
wrong_name_count = defaultdict(int)
version_regex_dict = dict() version_regex_dict = dict()
version_count_dict = defaultdict(int) right_version_count = defaultdict(int)
wrong_version_count = defaultdict(int)
tty.msg('Generating a summary of URL parsing in Spack...') tty.msg('Generating a summary of URL parsing in Spack...')
@ -189,9 +192,11 @@ def url_summary(args):
try: try:
version, vs, vl, vi, vregex = parse_version_offset(url) version, vs, vl, vi, vregex = parse_version_offset(url)
version_regex_dict[vi] = vregex version_regex_dict[vi] = vregex
version_count_dict[vi] += 1
if version_parsed_correctly(pkg, version): if version_parsed_correctly(pkg, version):
correct_versions += 1 correct_versions += 1
right_version_count[vi] += 1
else:
wrong_version_count[vi] += 1
except UndetectableVersionError: except UndetectableVersionError:
pass pass
@ -199,9 +204,11 @@ def url_summary(args):
try: try:
name, ns, nl, ni, nregex = parse_name_offset(url, version) name, ns, nl, ni, nregex = parse_name_offset(url, version)
name_regex_dict[ni] = nregex name_regex_dict[ni] = nregex
name_count_dict[ni] += 1
if name_parsed_correctly(pkg, name): if name_parsed_correctly(pkg, name):
correct_names += 1 correct_names += 1
right_name_count[ni] += 1
else:
wrong_name_count[ni] += 1
except UndetectableNameError: except UndetectableNameError:
pass pass
@ -216,24 +223,34 @@ def url_summary(args):
tty.msg('Statistics on name regular expressions:') tty.msg('Statistics on name regular expressions:')
print() print()
print(' Index Count Regular Expression') print(' Index Right Wrong Total Regular Expression')
for ni in sorted(name_regex_dict.keys()): for ni in sorted(name_regex_dict.keys()):
print(' {0:>3}: {1:>6} r{2!r}'.format( print(' {0:>5} {1:>5} {2:>5} {3:>5} r{4!r}'.format(
ni, name_count_dict[ni], name_regex_dict[ni])) ni,
right_name_count[ni],
wrong_name_count[ni],
right_name_count[ni] + wrong_name_count[ni],
name_regex_dict[ni])
)
print() print()
tty.msg('Statistics on version regular expressions:') tty.msg('Statistics on version regular expressions:')
print() print()
print(' Index Count Regular Expression') print(' Index Right Wrong Total Regular Expression')
for vi in sorted(version_regex_dict.keys()): for vi in sorted(version_regex_dict.keys()):
print(' {0:>3}: {1:>6} r{2!r}'.format( print(' {0:>5} {1:>5} {2:>5} {3:>5} r{4!r}'.format(
vi, version_count_dict[vi], version_regex_dict[vi])) vi,
right_version_count[vi],
wrong_version_count[vi],
right_version_count[vi] + wrong_version_count[vi],
version_regex_dict[vi])
)
print() print()
# Return statistics, only for testing purposes # Return statistics, only for testing purposes
return (total_urls, correct_names, correct_versions, return (total_urls, correct_names, correct_versions,
name_count_dict, version_count_dict) right_name_count, right_version_count)
def url_stats(args): def url_stats(args):