overwrite install refactoring and tests

This commit is contained in:
kshea21 2025-02-19 14:40:47 -08:00 committed by Gregory Becker
parent 0cfd514c0c
commit 56df316cc2
No known key found for this signature in database
GPG Key ID: 2362541F6D14ED84
3 changed files with 142 additions and 75 deletions

View File

@ -1048,6 +1048,7 @@ def replace_directory_transaction(directory_name):
try: try:
yield backup_dir yield backup_dir
except (Exception, KeyboardInterrupt, SystemExit) as inner_exception: except (Exception, KeyboardInterrupt, SystemExit) as inner_exception:
print("hitting the proper exception block")
# Try to recover the original directory, if this fails, raise a # Try to recover the original directory, if this fails, raise a
# composite exception. # composite exception.
try: try:
@ -1056,8 +1057,11 @@ def replace_directory_transaction(directory_name):
shutil.rmtree(directory_name) shutil.rmtree(directory_name)
os.rename(backup_dir, directory_name) os.rename(backup_dir, directory_name)
except Exception as outer_exception: except Exception as outer_exception:
print("CouldNOtRestoreDirectBackup")
raise CouldNotRestoreDirectoryBackup(inner_exception, outer_exception) raise CouldNotRestoreDirectoryBackup(inner_exception, outer_exception)
for a,b,c in os.walk(directory_name):
print(a,b,c)
tty.debug("Directory recovered [{0}]".format(directory_name)) tty.debug("Directory recovered [{0}]".format(directory_name))
raise raise
else: else:

View File

@ -37,7 +37,7 @@
import time import time
from collections import defaultdict from collections import defaultdict
from gzip import GzipFile from gzip import GzipFile
from typing import Dict, Iterator, List, Optional, Set, Tuple, Union from typing import Dict, Iterator, List, Optional, Set, Tuple, Union, Callable
import llnl.util.filesystem as fs import llnl.util.filesystem as fs
import llnl.util.lock as lk import llnl.util.lock as lk
@ -905,7 +905,7 @@ def __init__(
``TypeError`` if provided an argument of the wrong type ``TypeError`` if provided an argument of the wrong type
``ValueError`` if provided an argument with the wrong value or state ``ValueError`` if provided an argument with the wrong value or state
""" """
# Ensure dealing with a package that has a concrete spec # Ensure dealing with a package that has a concrete spec
if not isinstance(pkg, spack.package_base.PackageBase): if not isinstance(pkg, spack.package_base.PackageBase):
raise TypeError(f"{str(pkg)} must be a package") raise TypeError(f"{str(pkg)} must be a package")
@ -976,6 +976,9 @@ def __init__(
self.attempts = attempts self.attempts = attempts
self._update() self._update()
# initialize cache variables
self._install_action = None
def start(self): def start(self):
"""Start the work of this task.""" """Start the work of this task."""
raise NotImplementedError raise NotImplementedError
@ -1104,7 +1107,12 @@ def _setup_install_dir(self, pkg: "spack.package_base.PackageBase") -> None:
spack.store.STORE.layout.write_host_environment(pkg.spec) spack.store.STORE.layout.write_host_environment(pkg.spec)
@property @property
def install_action(self: "Task") -> InstallAction: def install_action(self):
if not self._install_action:
self._install_action = self.get_install_action()
return self._install_action
def get_install_action(self: "Task") -> InstallAction:
""" """
Determine whether the installation should be overwritten (if it already Determine whether the installation should be overwritten (if it already
exists) or skipped (if has been handled by another process). exists) or skipped (if has been handled by another process).
@ -1113,6 +1121,7 @@ def install_action(self: "Task") -> InstallAction:
installation should proceed as normal (i.e. no need to transactionally installation should proceed as normal (i.e. no need to transactionally
preserve the old prefix). preserve the old prefix).
""" """
print(self.request.overwrite)
# If we don't have to overwrite, do a normal install # If we don't have to overwrite, do a normal install
if self.pkg.spec.dag_hash() not in self.request.overwrite: if self.pkg.spec.dag_hash() not in self.request.overwrite:
return InstallAction.INSTALL return InstallAction.INSTALL
@ -1225,6 +1234,9 @@ def start(self):
Otherwise, start a process for of the requested spec and/or Otherwise, start a process for of the requested spec and/or
dependency represented by the BuildTask.""" dependency represented by the BuildTask."""
for a,b,c in os.walk(self.pkg.prefix):
print("start",a,b,c)
assert not self.started, "Cannot start a task that has already been started." assert not self.started, "Cannot start a task that has already been started."
self.started = True self.started = True
@ -1258,10 +1270,13 @@ def start(self):
self._setup_install_dir(pkg) self._setup_install_dir(pkg)
# Create a child process to do the actual installation. # Create a child process to do the actual installation.
child_process = overwrite_process if action == InstallAction.OVERWRITE else build_process process_start_method = spack.build_environment.start_build_process
self.process_handle = spack.build_environment.start_build_process( if action == InstallAction.OVERWRITE:
self.pkg, child_process, self.request.install_args process_start_method = overwrite_start_build_process
self.process_handle = process_start_method(
self.pkg, build_process, self.request.install_args
) )
# Identify the child process # Identify the child process
@ -1287,6 +1302,7 @@ def complete(self):
pkg = self.pkg pkg = self.pkg
tests = install_args.get("tests") tests = install_args.get("tests")
print("error result in complete",self.error_result)
self.status = BuildStatus.INSTALLING self.status = BuildStatus.INSTALLING
pkg.run_tests = tests is True or tests and pkg.name in tests pkg.run_tests = tests is True or tests and pkg.name in tests
@ -1304,6 +1320,8 @@ def complete(self):
# If an error arises from installing a package, # If an error arises from installing a package,
# raise spack.error.InstallError # raise spack.error.InstallError
if self.error_result is not None: if self.error_result is not None:
print("error result in completei 2",self.error_result)
raise self.error_result raise self.error_result
# hook that allows tests to inspect the Package before installation # hook that allows tests to inspect the Package before installation
@ -1313,6 +1331,7 @@ def complete(self):
try: try:
# Check if the task's child process has completed # Check if the task's child process has completed
print("error result in completei 3",self.error_result)
spack.package_base.PackageBase._verbose = self.process_handle.complete() spack.package_base.PackageBase._verbose = self.process_handle.complete()
# Note: PARENT of the build process adds the new package to # Note: PARENT of the build process adds the new package to
# the database, so that we don't need to re-read from file. # the database, so that we don't need to re-read from file.
@ -1323,6 +1342,8 @@ def complete(self):
pid = f"{self.pid}: " if tty.show_pid() else "" pid = f"{self.pid}: " if tty.show_pid() else ""
tty.debug(f"{pid}{str(e)}") tty.debug(f"{pid}{str(e)}")
tty.debug(f"Package stage directory: {pkg.stage.source_path}") tty.debug(f"Package stage directory: {pkg.stage.source_path}")
finally:
print("error result in completei 4",self.error_result)
return ExecuteResult.SUCCESS return ExecuteResult.SUCCESS
@ -1607,17 +1628,18 @@ def _prepare_for_install(self, task: Task) -> None:
if not installed_in_db: if not installed_in_db:
# Ensure there is no other installed spec with the same prefix dir # Ensure there is no other installed spec with the same prefix dir
if spack.store.STORE.db.is_occupied_install_prefix(task.pkg.spec.prefix): if spack.store.STORE.db.is_occupied_install_prefix(task.pkg.spec.prefix):
raise spack.error.InstallError( task.error_result = spack.error.InstallError(
f"Install prefix collision for {task.pkg_id}", f"Install prefix collision for {task.pkg_id}",
long_msg=f"Prefix directory {task.pkg.spec.prefix} already " long_msg=f"Prefix directory {task.pkg.spec.prefix} already "
"used by another installed spec.", "used by another installed spec.",
pkg=task.pkg, pkg=task.pkg,
) )
return
# Make sure the installation directory is in the desired state # Make sure the installation directory is in the desired state
# for uninstalled specs. # for uninstalled specs.
if os.path.isdir(task.pkg.spec.prefix): if os.path.isdir(task.pkg.spec.prefix):
if not keep_prefix: if not keep_prefix and task.install_action != InstallAction.OVERWRITE:
task.pkg.remove_prefix() task.pkg.remove_prefix()
else: else:
tty.debug(f"{task.pkg_id} is partially installed") tty.debug(f"{task.pkg_id} is partially installed")
@ -2133,10 +2155,16 @@ def start_task(
self, task: Task, install_status: InstallStatus, term_status: TermStatusLine self, task: Task, install_status: InstallStatus, term_status: TermStatusLine
) -> None: ) -> None:
"""Attempts to start a package installation.""" """Attempts to start a package installation."""
for a,b,c in os.walk(task.pkg.prefix):
print("start_task", a,b,c)
pkg, pkg_id, spec = task.pkg, task.pkg_id, task.pkg.spec pkg, pkg_id, spec = task.pkg, task.pkg_id, task.pkg.spec
install_status.next_pkg(pkg) install_status.next_pkg(pkg)
# install_status.set_term_title(f"Processing {task.pkg.name}") # install_status.set_term_title(f"Processing {task.pkg.name}")
tty.debug(f"Processing {pkg_id}: task={task}") tty.debug(f"Processing {pkg_id}: task={task}")
for a,b,c in os.walk(task.pkg.prefix):
print("2", a,b,c)
# Skip the installation if the spec is not being installed locally # Skip the installation if the spec is not being installed locally
# (i.e., if external or upstream) BUT flag it as installed since # (i.e., if external or upstream) BUT flag it as installed since
@ -2146,6 +2174,8 @@ def start_task(
self._flag_installed(pkg, task.dependents) self._flag_installed(pkg, task.dependents)
task.no_op = True task.no_op = True
return return
for a,b,c in os.walk(task.pkg.prefix):
print("3", a,b,c)
# Flag a failed spec. Do not need an (install) prefix lock since # Flag a failed spec. Do not need an (install) prefix lock since
# assume using a separate (failed) prefix lock file. # assume using a separate (failed) prefix lock file.
@ -2156,6 +2186,9 @@ def start_task(
if self.fail_fast: if self.fail_fast:
task.error_result = spack.error.InstallError(_fail_fast_err, pkg=pkg) task.error_result = spack.error.InstallError(_fail_fast_err, pkg=pkg)
for a,b,c in os.walk(task.pkg.prefix):
print("4", a,b,c)
# Attempt to get a write lock. If we can't get the lock then # Attempt to get a write lock. If we can't get the lock then
# another process is likely (un)installing the spec or has # another process is likely (un)installing the spec or has
@ -2176,6 +2209,8 @@ def start_task(
self._requeue_task(task, install_status) self._requeue_task(task, install_status)
task.no_op = True task.no_op = True
return return
for a,b,c in os.walk(task.pkg.prefix):
print("5", a,b,c)
term_status.clear() term_status.clear()
@ -2184,9 +2219,13 @@ def start_task(
if task.request.overwrite and task.explicit: if task.request.overwrite and task.explicit:
task.request.overwrite_time = time.time() task.request.overwrite_time = time.time()
for a,b,c in os.walk(task.pkg.prefix):
print("6", a,b,c)
# Determine state of installation artifacts and adjust accordingly. # Determine state of installation artifacts and adjust accordingly.
# install_status.set_term_title(f"Preparing {task.pkg.name}") # install_status.set_term_title(f"Preparing {task.pkg.name}")
self._prepare_for_install(task) self._prepare_for_install(task)
for a,b,c in os.walk(task.pkg.prefix):
print("7", a,b,c)
# Flag an already installed package # Flag an already installed package
if pkg_id in self.installed: if pkg_id in self.installed:
@ -2272,7 +2311,7 @@ def complete_task(self, task: Task, install_status: InstallStatus) -> Optional[T
self._requeue_task(task, install_status) self._requeue_task(task, install_status)
return None return None
# Overwrite process exception handling # Overwrite install exception handling
except fs.CouldNotRestoreDirectoryBackup as e: except fs.CouldNotRestoreDirectoryBackup as e:
spack.store.STORE.db.remove(task.pkg.spec) spack.store.STORE.db.remove(task.pkg.spec)
tty.error( tty.error(
@ -2285,6 +2324,19 @@ def complete_task(self, task: Task, install_status: InstallStatus) -> Optional[T
raise e.inner_exception raise e.inner_exception
except (Exception, SystemExit) as exc: except (Exception, SystemExit) as exc:
# Overwrite process exception handling
if isinstance(exc, spack.build_environment.ChildError) and exc.name == "CouldNotRestoreDirectoryBackup":
print("could not restore exception")
spack.store.STORE.db.remove(task.pkg.spec)
tty.error(
f"Recovery of install dir of {task.pkg.name} failed due to "
f"{exc.outer_exception.__class__.__name__}: {str(exc.outer_exception)}. "
"The spec is now uninstalled."
)
# Unwrap the actual installation exception.
raise exc.inner_exception
self._update_failed(task, True, exc) self._update_failed(task, True, exc)
# Best effort installs suppress the exception and mark the # Best effort installs suppress the exception and mark the
@ -2310,7 +2362,7 @@ def complete_task(self, task: Task, install_status: InstallStatus) -> Optional[T
finally: finally:
# Remove the install prefix if anything went wrong during # Remove the install prefix if anything went wrong during
# install. # install.
if not keep_prefix and not action == InstallAction.OVERWRITE: if not keep_prefix and action != InstallAction.OVERWRITE:
pkg.remove_prefix() pkg.remove_prefix()
# Perform basic task cleanup for the installed spec to # Perform basic task cleanup for the installed spec to
@ -2358,6 +2410,8 @@ def install(self) -> None:
# Iterate through the done tasks and complete them # Iterate through the done tasks and complete them
for task in done: for task in done:
try: try:
# If complete_task does not return None, the build request failed
print("install_status when passed to complete_task", install_status, type(install_status))
failure = self.complete_task(task, install_status) failure = self.complete_task(task, install_status)
if failure: if failure:
failed_build_requests.append(failure) failed_build_requests.append(failure)
@ -2652,16 +2706,13 @@ def build_process(pkg: "spack.package_base.PackageBase", install_args: dict) ->
return installer.run() return installer.run()
def overwrite_process(pkg: "spack.package_base.PackageBase", install_args: dict) -> bool: def overwrite_start_build_process(pkg: "spack.package_base.PackageBase", method: Callable, install_args: dict) -> bool:
# TODO:I don't know if this comment accurately reflects what's going on anymore
# TODO: think it should move to the error handling
# Try to run the install task overwriting the package prefix. # Try to run the install task overwriting the package prefix.
# If this fails, try to recover the original install prefix. If that fails # If this fails, try to recover the original install prefix. If that fails
# too, mark the spec as uninstalled. This function always the original # too, mark the spec as uninstalled. This function always the original
# install error if installation fails. # install error if installation fails. See ''complete_task()''
with fs.replace_directory_transaction(pkg.prefix): with fs.replace_directory_transaction(pkg.prefix):
return build_process(pkg, install_args) return spack.build_environment.start_build_process(pkg, method, install_args)
def deprecate(spec: "spack.spec.Spec", deprecator: "spack.spec.Spec", link_fn) -> None: def deprecate(spec: "spack.spec.Spec", deprecator: "spack.spec.Spec", link_fn) -> None:
"""Deprecate this package in favor of deprecator spec""" """Deprecate this package in favor of deprecator spec"""

View File

@ -721,13 +721,35 @@ def test_install_splice_root_from_binary(
assert len(spack.store.STORE.db.query()) == len(list(out.traverse())) assert len(spack.store.STORE.db.query()) == len(list(out.traverse()))
class MockInstallStatus:
def next_pkg(self, *args, **kwargs):
pass
def set_term_title(self, *args, **kwargs):
pass
def get_progress(self):
pass
class MockTermStatusLine:
def add(self, *args, **kwargs):
pass
def clear(self):
pass
def test_installing_task_use_cache(install_mockery, monkeypatch): def test_installing_task_use_cache(install_mockery, monkeypatch):
installer = create_installer(["trivial-install-test-package"], {}) installer = create_installer(["trivial-install-test-package"], {})
request = installer.build_requests[0] request = installer.build_requests[0]
# task = create_build_task(request.pkg) task = create_build_task(request.pkg)
install_status = MockInstallStatus()
term_status = MockTermStatusLine()
monkeypatch.setattr(inst, "_install_from_cache", _true) monkeypatch.setattr(inst, "_install_from_cache", _true)
# installer.start_task(task, , None) installer.start_task(task, install_status, term_status)
installer.complete_task(task, install_status)
assert request.pkg_id in installer.installed assert request.pkg_id in installer.installed
@ -746,7 +768,7 @@ def _missing(*args, **kwargs):
request = installer.build_requests[0] request = installer.build_requests[0]
task = create_build_task(request.pkg) task = create_build_task(request.pkg)
# Drop one of the specs so its task is missing before _install_task # Drop one of the specs so its task is missing before _complete_task
popped_task = installer._pop_ready_task() popped_task = installer._pop_ready_task()
assert inst.package_id(popped_task.pkg.spec) not in installer.build_tasks assert inst.package_id(popped_task.pkg.spec) not in installer.build_tasks
@ -1198,107 +1220,97 @@ def test_install_implicit(install_mockery, mock_fetch):
assert not create_build_task(pkg).explicit assert not create_build_task(pkg).explicit
# WIP def test_overwrite_install_backup_success(
def test_overwrite_install_backup_success(temporary_store, config, mock_packages, tmpdir): monkeypatch, temporary_store, config, mock_packages, tmpdir
):
""" """
When doing an overwrite install that fails, Spack should restore the backup When doing an overwrite install that fails, Spack should restore the backup
of the original prefix, and leave the original spec marked installed. of the original prefix, and leave the original spec marked installed.
""" """
# call overwrite_install and have it fail
# active the error handling
# ensure that the backup is restored
# ensure that the original spec is still installed
# Get a build task. TODO: Refactor this to avoid calling internal methods. # Get a build task. TODO: Refactor this to avoid calling internal methods.
installer = create_installer(["pkg-b"]) installer = create_installer(["pkg-b"])
installer._init_queue() installer._init_queue()
task = installer._pop_task() task = installer._pop_task()
install_status = MockInstallStatus()
term_status = MockTermStatusLine()
# Make sure the install prefix exists with some trivial file # Make sure the install prefix exists with some trivial file
installed_file = os.path.join(task.pkg.prefix, "some_file") installed_file = os.path.join(task.pkg.prefix, "some_file")
fs.touchp(installed_file) fs.touchp(installed_file)
# TODO: remove this, as it uses the old install_task method
class InstallerThatWipesThePrefixDir:
def _install_task(self, task, install_status):
shutil.rmtree(task.pkg.prefix, ignore_errors=True)
fs.mkdirp(task.pkg.prefix)
raise Exception("Some fatal install error")
# Install that wipes the prefix directory # Install that wipes the prefix directory
def wiped_installer(): def wipe_prefix(pkg, install_args):
shutil.rmtree(task.pkg.prefix) shutil.rmtree(pkg.prefix, ignore_errors=True)
fs.mkdirp(pkg.prefix)
raise Exception("Some fatal install error")
class FakeDatabase: monkeypatch.setattr(inst, "build_process", wipe_prefix)
called = False
def remove(self, spec): def fail(*args, **kwargs):
self.called = True assert False
fake_installer = InstallerThatWipesThePrefixDir() # Make sure the package is not marked uninstalled
fake_db = FakeDatabase() monkeypatch.setattr(spack.store.STORE.db, "remove", fail)
overwrite_install = inst.OverwriteInstall(fake_installer, fake_db, task, None) # Make sure that the installer does an overwrite install
monkeypatch.setattr(task, "_install_action", inst.InstallAction.OVERWRITE)
# Installation should throw the installation exception, not the backup # Installation should throw the installation exception, not the backup
# failure. # failure.
installer.start_task(task, install_status, term_status)
with pytest.raises(Exception, match="Some fatal install error"): with pytest.raises(Exception, match="Some fatal install error"):
overwrite_install.install() installer.complete_task(task, install_status)
# Make sure the package is not marked uninstalled and the original dir # Check that the original file is back.
# is back.
assert not fake_db.called
assert os.path.exists(installed_file) assert os.path.exists(installed_file)
def test_overwrite_install_backup_failure(temporary_store, config, mock_packages, tmpdir): def test_overwrite_install_backup_failure(
monkeypatch, temporary_store, config, mock_packages, tmpdir
):
""" """
When doing an overwrite install that fails, Spack should try to recover the When doing an overwrite install that fails, Spack should try to recover the
original prefix. If that fails, the spec is lost, and it should be removed original prefix. If that fails, the spec is lost, and it should be removed
from the database. from the database.
""" """
# Note: this test relies on installing a package with no dependencies
class InstallerThatAccidentallyDeletesTheBackupDir:
def install(self):
# Remove the backup directory, which is at the same level as the prefix,
# starting with .backup
backup_glob = os.path.join(
os.path.dirname(os.path.normpath(task.pkg.prefix)), ".backup*"
)
for backup in glob.iglob(backup_glob):
shutil.rmtree(backup)
raise Exception("Some fatal install error")
class FakeDatabase:
called = False
def remove(self, spec):
self.called = True
# Get a build task. TODO: refactor this to avoid calling internal methods # Get a build task. TODO: refactor this to avoid calling internal methods
installer = create_installer(["pkg-c"]) installer = create_installer(["pkg-c"])
installer._init_queue() installer._init_queue()
task = installer._pop_task() task = installer._pop_task()
install_status = MockInstallStatus()
term_status = MockTermStatusLine()
# Make sure the install prefix exists # Make sure the install prefix exists
installed_file = os.path.join(task.pkg.prefix, "some_file") installed_file = os.path.join(task.pkg.prefix, "some_file")
fs.touchp(installed_file) fs.touchp(installed_file)
fake_installer = InstallerThatAccidentallyDeletesTheBackupDir() # Install that removes the backup directory, which is at the same level as
fake_db = FakeDatabase() # the prefix, starting with .backup
overwrite_install = inst.OverwriteInstall(fake_installer, fake_db, task, None) def remove_backup(*args, **kwargs):
backup_glob = os.path.join(os.path.dirname(os.path.normpath(task.pkg.prefix)), ".backup*")
for backup in glob.iglob(backup_glob):
shutil.rmtree(backup)
raise Exception("Some fatal install error")
monkeypatch.setattr(inst, "build_process", remove_backup)
called = False
def remove(*args, **kwargs):
called = True
monkeypatch.setattr(spack.store.STORE.db, "remove", remove)
# Make sure that the installer does an overwrite install
monkeypatch.setattr(task, "_install_action", inst.InstallAction.OVERWRITE)
# Installation should throw the installation exception, not the backup # Installation should throw the installation exception, not the backup
# failure. # failure.
installer.start_task(task, install_status, term_status)
with pytest.raises(Exception, match="Some fatal install error"): with pytest.raises(Exception, match="Some fatal install error"):
overwrite_install.install() installer.complete_task(task, install_status)
# Make sure that `remove` was called on the database after an unsuccessful # Make sure that `remove` was called on the database after an unsuccessful
# attempt to restore the backup. # attempt to restore the backup.
assert fake_db.called assert called
def test_term_status_line(): def test_term_status_line():