Reduce the size on disk for logs (#42122)

* Reduce the size on disk for logs

This PR does two things:

1. Store a compressed `spack-build-out.txt.gz`
2. Get rid of phase logs, as they are duplicates of
   `spack-build-out.txt`

The logs are not compressed in the stage dir, so on build failure the
workflow for users is no different.

It's just that on install the logs are rarely used, and if needed, users
can easily `gzip -d` or `zgrep` them.

In the case of GCC installs, the compressed logs are <5% of the original
size, which is typically dozens of MBs.

* get rid of "backwards compat" of file names in stage dirs
This commit is contained in:
Harmen Stoppels 2024-01-19 01:02:11 +01:00 committed by GitHub
parent 2d37d5474d
commit 4d54688782
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 39 additions and 83 deletions

View File

@ -290,11 +290,11 @@ def require_user_confirmation_for_overwrite(concrete_specs, args):
def _dump_log_on_error(e: spack.build_environment.InstallError): def _dump_log_on_error(e: spack.build_environment.InstallError):
e.print_context() e.print_context()
assert e.pkg, "Expected InstallError to include the associated package" assert e.pkg, "Expected InstallError to include the associated package"
if not os.path.exists(e.pkg.build_log_path): if not os.path.exists(e.pkg.log_path):
tty.error("'spack install' created no log.") tty.error("'spack install' created no log.")
else: else:
sys.stderr.write("Full build log:\n") sys.stderr.write("Full build log:\n")
with open(e.pkg.build_log_path, errors="replace") as log: with open(e.pkg.log_path, errors="replace") as log:
shutil.copyfileobj(log, sys.stderr) shutil.copyfileobj(log, sys.stderr)

View File

@ -1834,19 +1834,18 @@ def _get_overwrite_specs(self):
] ]
def _install_log_links(self, spec): def _install_log_links(self, spec):
if not spec.external: if spec.external:
# Make sure log directory exists return
log_path = self.log_path # Make sure log directory exists
fs.mkdirp(log_path) log_path = self.log_path
fs.mkdirp(log_path)
with fs.working_dir(self.path): with fs.working_dir(self.path):
# Link the resulting log file into logs dir # Link the resulting log file into logs dir
build_log_link = os.path.join( build_log_link = os.path.join(log_path, f"{spec.name}-{spec.dag_hash(7)}.log")
log_path, "%s-%s.log" % (spec.name, spec.dag_hash(7)) if os.path.lexists(build_log_link):
) os.remove(build_log_link)
if os.path.lexists(build_log_link): symlink(spec.package.install_log_path, build_log_link)
os.remove(build_log_link)
symlink(spec.package.build_log_path, build_log_link)
def _partition_roots_by_install_status(self): def _partition_roots_by_install_status(self):
"""Partition root specs into those that do not have to be passed to the """Partition root specs into those that do not have to be passed to the

View File

@ -36,6 +36,7 @@
import sys import sys
import time import time
from collections import defaultdict from collections import defaultdict
from gzip import GzipFile
from typing import Dict, Iterator, List, Optional, Set, Tuple from typing import Dict, Iterator, List, Optional, Set, Tuple
import llnl.util.filesystem as fs import llnl.util.filesystem as fs
@ -638,13 +639,12 @@ def archive_install_logs(pkg: "spack.package_base.PackageBase", phase_log_dir: s
pkg: the package that was built and installed pkg: the package that was built and installed
phase_log_dir: path to the archive directory phase_log_dir: path to the archive directory
""" """
# Archive the whole stdout + stderr for the package # Copy a compressed version of the install log
fs.install(pkg.log_path, pkg.install_log_path) with open(pkg.log_path, "rb") as f, open(pkg.install_log_path, "wb") as g:
# Use GzipFile directly so we can omit filename / mtime in header
# Archive all phase log paths gzip_file = GzipFile(filename="", mode="wb", compresslevel=6, mtime=0, fileobj=g)
for phase_log in pkg.phase_log_files: shutil.copyfileobj(f, gzip_file)
log_file = os.path.basename(phase_log) gzip_file.close()
fs.install(phase_log, os.path.join(phase_log_dir, log_file))
# Archive the install-phase test log, if present # Archive the install-phase test log, if present
pkg.archive_install_test_log() pkg.archive_install_test_log()

View File

@ -1131,13 +1131,7 @@ def stage(self, stage):
@property @property
def env_path(self): def env_path(self):
"""Return the build environment file path associated with staging.""" """Return the build environment file path associated with staging."""
# Backward compatibility: Return the name of an existing log path; return os.path.join(self.stage.path, _spack_build_envfile)
# otherwise, return the current install env path name.
old_filename = os.path.join(self.stage.path, "spack-build.env")
if os.path.exists(old_filename):
return old_filename
else:
return os.path.join(self.stage.path, _spack_build_envfile)
@property @property
def env_mods_path(self): def env_mods_path(self):
@ -1168,13 +1162,6 @@ def install_env_path(self):
@property @property
def log_path(self): def log_path(self):
"""Return the build log file path associated with staging.""" """Return the build log file path associated with staging."""
# Backward compatibility: Return the name of an existing log path.
for filename in ["spack-build.out", "spack-build.txt"]:
old_log = os.path.join(self.stage.path, filename)
if os.path.exists(old_log):
return old_log
# Otherwise, return the current log path name.
return os.path.join(self.stage.path, _spack_build_logfile) return os.path.join(self.stage.path, _spack_build_logfile)
@property @property
@ -1187,15 +1174,15 @@ def phase_log_files(self):
@property @property
def install_log_path(self): def install_log_path(self):
"""Return the build log file path on successful installation.""" """Return the (compressed) build log file path on successful installation"""
# Backward compatibility: Return the name of an existing install log. # Backward compatibility: Return the name of an existing install log.
for filename in ["build.out", "build.txt"]: for filename in [_spack_build_logfile, "build.out", "build.txt"]:
old_log = os.path.join(self.metadata_dir, filename) old_log = os.path.join(self.metadata_dir, filename)
if os.path.exists(old_log): if os.path.exists(old_log):
return old_log return old_log
# Otherwise, return the current install log path name. # Otherwise, return the current install log path name.
return os.path.join(self.metadata_dir, _spack_build_logfile) return os.path.join(self.metadata_dir, _spack_build_logfile + ".gz")
@property @property
def configure_args_path(self): def configure_args_path(self):
@ -2088,15 +2075,6 @@ def unit_test_check(self):
""" """
return True return True
@property
def build_log_path(self):
"""
Return the expected (or current) build log file path. The path points
to the staging build file until the software is successfully installed,
when it points to the file in the installation directory.
"""
return self.install_log_path if self.spec.installed else self.log_path
@classmethod @classmethod
def inject_flags(cls: Type[Pb], name: str, flags: Iterable[str]) -> FLAG_HANDLER_RETURN_TYPE: def inject_flags(cls: Type[Pb], name: str, flags: Iterable[str]) -> FLAG_HANDLER_RETURN_TYPE:
""" """

View File

@ -6,6 +6,7 @@
import collections import collections
import contextlib import contextlib
import functools import functools
import gzip
import os import os
import time import time
import traceback import traceback
@ -190,9 +191,13 @@ def on_success(self, pkg, kwargs, package_record):
def fetch_log(self, pkg): def fetch_log(self, pkg):
try: try:
with open(pkg.build_log_path, "r", encoding="utf-8") as stream: if os.path.exists(pkg.install_log_path):
return "".join(stream.readlines()) stream = gzip.open(pkg.install_log_path, "rt")
except Exception: else:
stream = open(pkg.log_path)
with stream as f:
return f.read()
except OSError:
return f"Cannot open log for {pkg.spec.cshort_spec}" return f"Cannot open log for {pkg.spec.cshort_spec}"
def extract_package_from_signature(self, instance, *args, **kwargs): def extract_package_from_signature(self, instance, *args, **kwargs):

View File

@ -5,6 +5,7 @@
import argparse import argparse
import builtins import builtins
import filecmp import filecmp
import gzip
import itertools import itertools
import os import os
import pathlib import pathlib
@ -137,8 +138,7 @@ def test_package_output(tmpdir, capsys, install_mockery, mock_fetch):
pkg = spec.package pkg = spec.package
pkg.do_install(verbose=True) pkg.do_install(verbose=True)
log_file = pkg.build_log_path with gzip.open(pkg.install_log_path, "rt") as f:
with open(log_file) as f:
out = f.read() out = f.read()
# make sure that output from the actual package file appears in the # make sure that output from the actual package file appears in the

View File

@ -568,8 +568,8 @@ def mock_repo_path():
def _pkg_install_fn(pkg, spec, prefix): def _pkg_install_fn(pkg, spec, prefix):
# sanity_check_prefix requires something in the install directory # sanity_check_prefix requires something in the install directory
mkdirp(prefix.bin) mkdirp(prefix.bin)
if not os.path.exists(spec.package.build_log_path): if not os.path.exists(spec.package.install_log_path):
touchp(spec.package.build_log_path) touchp(spec.package.install_log_path)
@pytest.fixture @pytest.fixture

View File

@ -444,41 +444,15 @@ def test_nosource_pkg_install_post_install(install_mockery, mock_fetch, mock_pac
def test_pkg_build_paths(install_mockery): def test_pkg_build_paths(install_mockery):
# Get a basic concrete spec for the trivial install package. # Get a basic concrete spec for the trivial install package.
spec = Spec("trivial-install-test-package").concretized() spec = Spec("trivial-install-test-package").concretized()
assert spec.package.log_path.endswith(_spack_build_logfile)
log_path = spec.package.log_path assert spec.package.env_path.endswith(_spack_build_envfile)
assert log_path.endswith(_spack_build_logfile)
env_path = spec.package.env_path
assert env_path.endswith(_spack_build_envfile)
# Backward compatibility checks
log_dir = os.path.dirname(log_path)
fs.mkdirp(log_dir)
with fs.working_dir(log_dir):
# Start with the older of the previous log filenames
older_log = "spack-build.out"
fs.touch(older_log)
assert spec.package.log_path.endswith(older_log)
# Now check the newer log filename
last_log = "spack-build.txt"
fs.rename(older_log, last_log)
assert spec.package.log_path.endswith(last_log)
# Check the old environment file
last_env = "spack-build.env"
fs.rename(last_log, last_env)
assert spec.package.env_path.endswith(last_env)
# Cleanup
shutil.rmtree(log_dir)
def test_pkg_install_paths(install_mockery): def test_pkg_install_paths(install_mockery):
# Get a basic concrete spec for the trivial install package. # Get a basic concrete spec for the trivial install package.
spec = Spec("trivial-install-test-package").concretized() spec = Spec("trivial-install-test-package").concretized()
log_path = os.path.join(spec.prefix, ".spack", _spack_build_logfile) log_path = os.path.join(spec.prefix, ".spack", _spack_build_logfile + ".gz")
assert spec.package.install_log_path == log_path assert spec.package.install_log_path == log_path
env_path = os.path.join(spec.prefix, ".spack", _spack_build_envfile) env_path = os.path.join(spec.prefix, ".spack", _spack_build_envfile)