Fix out of date remote spec.yamls while updating the index (#24811)

This commit is contained in:
Scott Wittenburg 2021-07-10 00:49:09 -06:00 committed by GitHub
parent 084bafe18c
commit e0017a6666
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 130 additions and 10 deletions

View File

@ -13,6 +13,7 @@
import sys import sys
import tarfile import tarfile
import tempfile import tempfile
import traceback
from contextlib import closing from contextlib import closing
import ruamel.yaml as yaml import ruamel.yaml as yaml
@ -27,6 +28,7 @@
import spack.config as config import spack.config as config
import spack.database as spack_db import spack.database as spack_db
import spack.fetch_strategy as fs import spack.fetch_strategy as fs
import spack.hash_types as ht
import spack.mirror import spack.mirror
import spack.relocate as relocate import spack.relocate as relocate
import spack.util.file_cache as file_cache import spack.util.file_cache as file_cache
@ -709,12 +711,6 @@ def generate_package_index(cache_prefix):
cache_prefix. This page contains a link for each binary package (.yaml) cache_prefix. This page contains a link for each binary package (.yaml)
under cache_prefix. under cache_prefix.
""" """
tmpdir = tempfile.mkdtemp()
db_root_dir = os.path.join(tmpdir, 'db_root')
db = spack_db.Database(None, db_dir=db_root_dir,
enable_transaction_locking=False,
record_fields=['spec', 'ref_count', 'in_buildcache'])
try: try:
file_list = ( file_list = (
entry entry
@ -735,22 +731,90 @@ def generate_package_index(cache_prefix):
tty.debug('Retrieving spec.yaml files from {0} to build index'.format( tty.debug('Retrieving spec.yaml files from {0} to build index'.format(
cache_prefix)) cache_prefix))
all_mirror_specs = {}
for file_path in file_list: for file_path in file_list:
try: try:
yaml_url = url_util.join(cache_prefix, file_path) yaml_url = url_util.join(cache_prefix, file_path)
tty.debug('fetching {0}'.format(yaml_url)) tty.debug('fetching {0}'.format(yaml_url))
_, _, yaml_file = web_util.read_from_url(yaml_url) _, _, yaml_file = web_util.read_from_url(yaml_url)
yaml_contents = codecs.getreader('utf-8')(yaml_file).read() yaml_contents = codecs.getreader('utf-8')(yaml_file).read()
# yaml_obj = syaml.load(yaml_contents) spec_dict = syaml.load(yaml_contents)
# s = Spec.from_yaml(yaml_obj)
s = Spec.from_yaml(yaml_contents) s = Spec.from_yaml(yaml_contents)
db.add(s, None) all_mirror_specs[s.dag_hash()] = {
db.mark(s, 'in_buildcache', True) 'yaml_url': yaml_url,
'spec': s,
'num_deps': len(list(s.traverse(root=False))),
'binary_cache_checksum': spec_dict['binary_cache_checksum'],
'buildinfo': spec_dict['buildinfo'],
}
except (URLError, web_util.SpackWebError) as url_err: except (URLError, web_util.SpackWebError) as url_err:
tty.error('Error reading spec.yaml: {0}'.format(file_path)) tty.error('Error reading spec.yaml: {0}'.format(file_path))
tty.error(url_err) tty.error(url_err)
sorted_specs = sorted(all_mirror_specs.keys(),
key=lambda k: all_mirror_specs[k]['num_deps'])
tmpdir = tempfile.mkdtemp()
db_root_dir = os.path.join(tmpdir, 'db_root')
db = spack_db.Database(None, db_dir=db_root_dir,
enable_transaction_locking=False,
record_fields=['spec', 'ref_count', 'in_buildcache'])
try: try:
tty.debug('Specs sorted by number of dependencies:')
for dag_hash in sorted_specs:
spec_record = all_mirror_specs[dag_hash]
s = spec_record['spec']
num_deps = spec_record['num_deps']
tty.debug(' {0}/{1} -> {2}'.format(
s.name, dag_hash[:7], num_deps))
if num_deps > 0:
# Check each of this spec's dependencies (which we have already
# processed), as they are the source of truth for their own
# full hash. If the full hash we have for any deps does not
# match what those deps have themselves, then we need to splice
# this spec with those deps, and push this spliced spec
# (spec.yaml file) back to the mirror, as well as update the
# all_mirror_specs dictionary with this spliced spec.
to_splice = []
for dep in s.dependencies():
dep_dag_hash = dep.dag_hash()
if dep_dag_hash in all_mirror_specs:
true_dep = all_mirror_specs[dep_dag_hash]['spec']
if true_dep.full_hash() != dep.full_hash():
to_splice.append(true_dep)
if to_splice:
tty.debug(' needs the following deps spliced:')
for true_dep in to_splice:
tty.debug(' {0}/{1}'.format(
true_dep.name, true_dep.dag_hash()[:7]))
s = s.splice(true_dep, True)
# Push this spliced spec back to the mirror
spliced_yaml = s.to_dict(hash=ht.full_hash)
for key in ['binary_cache_checksum', 'buildinfo']:
spliced_yaml[key] = spec_record[key]
temp_yaml_path = os.path.join(tmpdir, 'spliced.spec.yaml')
with open(temp_yaml_path, 'w') as fd:
fd.write(syaml.dump(spliced_yaml))
spliced_yaml_url = spec_record['yaml_url']
web_util.push_to_url(
temp_yaml_path, spliced_yaml_url, keep_original=False)
tty.debug(' spliced and wrote {0}'.format(
spliced_yaml_url))
spec_record['spec'] = s
db.add(s, None)
db.mark(s, 'in_buildcache', True)
# Now that we have fixed any old spec yamls that might have had the wrong
# full hash for their dependencies, we can generate the index, compute
# the hash, and push those files to the mirror.
index_json_path = os.path.join(db_root_dir, 'index.json') index_json_path = os.path.join(db_root_dir, 'index.json')
with open(index_json_path, 'w') as f: with open(index_json_path, 'w') as f:
db._write_to_file(f) db._write_to_file(f)
@ -782,6 +846,7 @@ def generate_package_index(cache_prefix):
msg = 'Encountered problem pushing package index to {0}: {1}'.format( msg = 'Encountered problem pushing package index to {0}: {1}'.format(
cache_prefix, err) cache_prefix, err)
tty.warn(msg) tty.warn(msg)
tty.debug('\n' + traceback.format_exc())
finally: finally:
shutil.rmtree(tmpdir) shutil.rmtree(tmpdir)

View File

@ -10,12 +10,15 @@
import py import py
import pytest import pytest
import llnl.util.filesystem as fs
import spack.binary_distribution as bindist import spack.binary_distribution as bindist
import spack.config import spack.config
import spack.hooks.sbang as sbang import spack.hooks.sbang as sbang
import spack.main import spack.main
import spack.mirror import spack.mirror
import spack.repo import spack.repo
import spack.spec as spec
import spack.store import spack.store
import spack.util.gpg import spack.util.gpg
import spack.util.web as web_util import spack.util.web as web_util
@ -589,3 +592,55 @@ def test_update_sbang(tmpdir, test_mirror):
open(str(installed_script_style_2_path)).read() open(str(installed_script_style_2_path)).read()
uninstall_cmd('-y', '/%s' % new_spec.dag_hash()) uninstall_cmd('-y', '/%s' % new_spec.dag_hash())
@pytest.mark.usefixtures(
'install_mockery_mutable_config', 'mock_packages', 'mock_fetch',
)
def test_update_index_fix_deps(monkeypatch, tmpdir, mutable_config):
"""Ensure spack buildcache update-index properly fixes up spec.yaml
files on the mirror when updating the buildcache index."""
# Create a temp mirror directory for buildcache usage
mirror_dir = tmpdir.join('mirror_dir')
mirror_url = 'file://{0}'.format(mirror_dir.strpath)
spack.config.set('mirrors', {'test': mirror_url})
a = Spec('a').concretized()
b = Spec('b').concretized()
new_b_full_hash = 'abcdef'
# Install package a with dep b
install_cmd('--no-cache', a.name)
# Create a buildcache for a and its dep b, and update index
buildcache_cmd('create', '-uad', mirror_dir.strpath, a.name)
buildcache_cmd('update-index', '-d', mirror_dir.strpath)
# Simulate an update to b that only affects full hash by simply overwriting
# the full hash in the spec.yaml file on the mirror
b_spec_yaml_name = bindist.tarball_name(b, '.spec.yaml')
b_spec_yaml_path = os.path.join(mirror_dir.strpath,
bindist.build_cache_relative_path(),
b_spec_yaml_name)
fs.filter_file(r"full_hash:\s[^\s]+$",
"full_hash: {0}".format(new_b_full_hash),
b_spec_yaml_path)
# When we update the index, spack should notice that a's notion of the
# full hash of b doesn't match b's notion of it's own full hash, and as
# a result, spack should fix the spec.yaml for a
buildcache_cmd('update-index', '-d', mirror_dir.strpath)
# Read in the concrete spec yaml of a
a_spec_yaml_name = bindist.tarball_name(a, '.spec.yaml')
a_spec_yaml_path = os.path.join(mirror_dir.strpath,
bindist.build_cache_relative_path(),
a_spec_yaml_name)
# Turn concrete spec yaml into a concrete spec (a)
with open(a_spec_yaml_path) as fd:
a_prime = spec.Spec.from_yaml(fd.read())
# Make sure the full hash of b in a's spec yaml matches the new value
assert(a_prime[b.name].full_hash() == new_b_full_hash)