Make Spack tests record their errors and continue

previously, tests would fail on the first error now, we wrap them in a TestFailure object that records all failures
2020-04-15 18:49:18 -07:00
parent 1b3e1897ca
commit 749ab2e79d
5 changed files with 163 additions and 86 deletions
--- a/lib/spack/spack/build_environment.py
+++ b/lib/spack/spack/build_environment.py
@@ -33,7 +33,6 @@
 calls you can make from within the install() function.
 """
 import re
-import inspect
 import multiprocessing
 import os
 import shutil
@@ -52,6 +51,7 @@
 import spack.config
 import spack.main
 import spack.paths
+import spack.package
 import spack.schema.environment
 import spack.store
 import spack.architecture as arch
@@ -879,7 +879,10 @@ def child_process(child_pipe, input_stream):

            # build up some context from the offending package so we can
            # show that, too.
-            package_context = get_package_context(tb)
+            if exc_type is not spack.package.TestFailure:
+                package_context = get_package_context(traceback.extract_tb(tb))
+            else:
+                package_context = []

            build_log = None
            if context == 'build' and hasattr(pkg, 'log_path'):
@@ -953,8 +956,8 @@ def get_package_context(traceback, context=3):
    """Return some context for an error message when the build fails.

    Args:
-        traceback (traceback): A traceback from some exception raised during
-            install
+        traceback (list of tuples): output from traceback.extract_tb() or
+            traceback.extract_stack()
        context (int): Lines of context to show before and after the line
            where the error happened

@@ -963,65 +966,44 @@ def get_package_context(traceback, context=3):
    from there.

    """
-    def make_stack(tb, stack=None):
-        """Tracebacks come out of the system in caller -> callee order.  Return
-        an array in callee -> caller order so we can traverse it."""
-        if stack is None:
-            stack = []
-        if tb is not None:
-            make_stack(tb.tb_next, stack)
-            stack.append(tb)
-        return stack
-
-    stack = make_stack(traceback)
-
-    for tb in stack:
-        frame = tb.tb_frame
-        if 'self' in frame.f_locals:
-            # Find the first proper subclass of PackageBase.
-            obj = frame.f_locals['self']
-            if isinstance(obj, spack.package.PackageBase):
+    for filename, lineno, function, text in reversed(traceback):
+        if 'package.py' in filename or 'spack/build_systems' in filename:
+            if function not in ('run_test', '_run_test_helper'):
+                # We are in a package and not one of the listed methods
+                # We exclude these methods because we expect errors in them to
+                # be the result of user tests failing, and we show the tests
+                # instead.
                break

-    # Determine whether we are in a package file
-    # Package files are named `package.py` and are not in the lib/spack/spack
    # Package files have a line added at import time, so we adjust the lineno
    # when we are getting context from a package file instead of a base class
-    # We have to remove the file extension because it can be .py and can be
-    # .pyc depending on context, and can differ between the files
-    filename = inspect.getfile(frame.f_code)
-    filename_noext = os.path.splitext(filename)[0]
-    packagebase_filename_noext = os.path.splitext(
-        inspect.getfile(spack.package.PackageBase))[0]
-    in_package = (filename_noext != packagebase_filename_noext and
-                  os.path.basename(filename_noext) == 'package')
-    adjust = 1 if in_package else 0
+    adjust = 1 if spack.paths.is_package_file(filename) else 0
+    lineno = lineno - adjust

    # We found obj, the Package implementation we care about.
    # Point out the location in the install method where we failed.
    lines = [
        '{0}:{1:d}, in {2}:'.format(
            filename,
-            tb.tb_lineno - adjust,  # adjust for import mangling
-            frame.f_code.co_name
+            lineno,
+            function
        )
    ]

    # Build a message showing context in the install method.
-    sourcelines, start = inspect.getsourcelines(frame)
-
-    # Calculate lineno of the error relative to the start of the function.
    # Adjust for import mangling of package files.
-    fun_lineno = tb.tb_lineno - start - adjust
-    start_ctx = max(0, fun_lineno - context)
-    sourcelines = sourcelines[start_ctx:fun_lineno + context + 1]
+    with open(filename, 'r') as f:
+        sourcelines = f.readlines()
+    start = max(0, lineno - context - 1)
+    sourcelines = sourcelines[start:lineno + context + 1]

    for i, line in enumerate(sourcelines):
-        is_error = start_ctx + i == fun_lineno
+        i = i + adjust  # adjusting for import munging again
+        is_error = start + i == lineno
        mark = '>> ' if is_error else '   '
        # Add start to get lineno relative to start of file, not function.
        marked = '  {0}{1:-6d}{2}'.format(
-            mark, start + start_ctx + i, line.rstrip())
+            mark, start + i, line.rstrip())
        if is_error:
            marked = colorize('@R{%s}' % cescape(marked))
        lines.append(marked)
@@ -1093,28 +1075,11 @@ def long_message(self):
        if (self.module, self.name) in ChildError.build_errors:
            # The error happened in some external executed process. Show
            # the log with errors or warnings highlighted.
-            def write_log_summary(log_type, log):
-                errors, warnings = parse_log_events(log)
-                nerr = len(errors)
-                nwar = len(warnings)
-                if nerr > 0:
-                    # If errors are found, only display errors
-                    out.write(
-                        "\n%s found in %s log:\n" %
-                        (plural(nerr, 'error'), log_type))
-                    out.write(make_log_context(errors))
-                elif nwar > 0:
-                    # If no errors are found but warnings are, display warnings
-                    out.write(
-                        "\n%s found in %s log:\n" %
-                        (plural(nwar, 'warning'), log_type))
-                    out.write(make_log_context(warnings))
-
            if self.build_log and os.path.exists(self.build_log):
-                write_log_summary('build', self.build_log)
+                write_log_summary(out, 'build', self.build_log)

            if self.test_log and os.path.exists(self.test_log):
-                write_log_summary('test', self.test_log)
+                write_log_summary(out, 'test', self.test_log)

        else:
            # The error happened in in the Python code, so try to show
@@ -1171,3 +1136,30 @@ def __reduce__(self):

 def _make_stop_phase(msg, long_msg):
    return StopPhase(msg, long_msg)
+
+
+def write_log_summary(out, log_type, log, last=None):
+    errors, warnings = parse_log_events(log)
+    nerr = len(errors)
+    nwar = len(warnings)
+
+    if nerr > 0:
+        if last and nerr > last:
+            errors = errors[-last:]
+            nerr = last
+
+        # If errors are found, only display errors
+        out.write(
+            "\n%s found in %s log:\n" %
+            (plural(nerr, 'error'), log_type))
+        out.write(make_log_context(errors))
+    elif nwar > 0:
+        if last and nwar > last:
+            warnings = warnings[-last:]
+            nwar = last
+
+        # If no errors are found but warnings are, display warnings
+        out.write(
+            "\n%s found in %s log:\n" %
+            (plural(nwar, 'warning'), log_type))
+        out.write(make_log_context(warnings))
--- a/lib/spack/spack/package.py
+++ b/lib/spack/spack/package.py
@@ -1600,6 +1600,9 @@ def test_log_name(self):
        return 'test-%s-out.txt' % self.spec.format('{name}-{hash:7}')

    test_requires_compiler = False
+    test_failures = None
+    test_log_file = None
+    test_stage = None

    def do_test(self, name, remove_directory=False, dirty=False):
        if self.test_requires_compiler:
@@ -1612,16 +1615,19 @@ def do_test(self, name, remove_directory=False, dirty=False):
                          self.spec.compiler)
                return

-        test_stage = Prefix(os.path.join(
+        # Clear test failures
+        self.test_failures = []
+
+        self.test_stage = Prefix(os.path.join(
            sup.canonicalize_path(
                spack.config.get('config:test_stage', os.getcwd())),
            name))
-        if not os.path.exists(test_stage):
-            mkdirp(test_stage)
-        test_log_file = os.path.join(test_stage, self.test_log_name)
+        if not os.path.exists(self.test_stage):
+            mkdirp(self.test_stage)
+        self.test_log_file = os.path.join(self.test_stage, self.test_log_name)

        def test_process():
-            with tty.log.log_output(test_log_file) as logger:
+            with tty.log.log_output(self.test_log_file) as logger:
                with logger.force_echo():
                    tty.msg('Testing package %s' %
                            self.spec.format('{name}-{hash:7}'))
@@ -1631,7 +1637,8 @@ def test_process():
                tty.set_debug(True)

                # setup test directory
-                testdir = test_stage.join(self.spec.format('{name}-{hash}'))
+                testdir = self.test_stage.join(
+                    self.spec.format('{name}-{hash}'))
                if os.path.exists(testdir):
                    shutil.rmtree(testdir)
                mkdirp(testdir)
@@ -1644,20 +1651,14 @@ def test_process():
                try:
                    os.chdir(testdir)
                    self.test()
-                except Exception as e:
-                    # Catch the error and print a summary to the log file
-                    # so that cdash and junit reporters know about it
-                    exc_info = sys.exc_info()
-                    print('Error: %s' % e)
-                    import traceback
-                    traceback.print_tb(exc_info[2])
-                    raise  # re-raise the same error/traceback
-                else:
+                    if self.test_failures:
+                        raise TestFailure(self.test_failures)
+
                    # cleanup test directory on success
                    if remove_directory:
                        shutil.rmtree(testdir)
-                        if not os.listdir(test_stage):
-                            shutil.rmtree(test_stage)
+                        if not os.listdir(self.test_stage):
+                            shutil.rmtree(self.test_stage)
                finally:
                    # reset debug level
                    tty.set_debug(old_debug)
@@ -1673,15 +1674,66 @@ def run_test(self, exe, options=[], expected=[], status=None,
                 installed=False, purpose=''):
        """Run the test and confirm the expected results are obtained

+        Log any failures and continue, they will be re-raised later
+
        Args:
            exe (str): the name of the executable
            options (list of str): list of options to pass to the runner
            expected (list of str): list of expected output strings
            status (int, list of int, or None): possible passing status values
                with 0 and None meaning the test is expected to succeed
-            installed (bool): the executable should be in the install prefix
+            installed (bool): the executable must be in the install prefix
            purpose (str): message to display before running test
        """
+        try:
+            self._run_test_helper(
+                exe, options, expected, status, installed, purpose)
+            tty.msg("PASSED")
+        except BaseException as e:
+            # print a summary of the error to the log file
+            # so that cdash and junit reporters know about it
+            exc_type, _, tb = sys.exc_info()
+            print('Error: %s' % e)
+            import traceback
+            # remove the current call frame to get back to
+            stack = traceback.extract_stack()[:-1]
+
+            # Package files have a line added at import time, so we re-read
+            # the file to make line numbers match. We have to subtract two from
+            # the line number because the original line number is inflated once
+            # by the import statement and the lines are displaced one by the
+            # import statement.
+            for i, entry in enumerate(stack):
+                filename, lineno, function, text = entry
+                if spack.paths.is_package_file(filename):
+                    with open(filename, 'r') as f:
+                        lines = f.readlines()
+                    text = lines[lineno - 2]
+                    stack[i] = (filename, lineno, function, text)
+
+            # Format the stack to print and print it
+            out = traceback.format_list(stack)
+            for line in out:
+                print(line.rstrip('\n'))
+
+            if exc_type is spack.util.executable.ProcessError:
+                out = StringIO()
+                spack.build_environment.write_log_summary(
+                    out, 'test', self.test_log_file, last=1)
+                m = out.getvalue()
+            else:
+                # We're below the package context, so get context from stack
+                # instead of from traceback.
+                # The traceback is truncated here, so we can't use it to
+                # traverse the stack.
+                m = '\n'.join(spack.build_environment.get_package_context(
+                        traceback.extract_stack()))
+
+            exc = e  # e is deleted after this block
+            self.test_failures.append((exc, m))
+
+    def _run_test_helper(self, exe, options, expected, status, installed,
+                         purpose):
        status = [status] if not isinstance(status, list) else status
        if purpose:
            tty.msg(purpose)
@@ -2528,3 +2580,15 @@ def __init__(self, conflict):
        super(DependencyConflictError, self).__init__(
            "%s conflicts with another file in the flattened directory." % (
                conflict))
+
+
+class TestFailure(spack.error.SpackError):
+    """Raised when package tests have failed for an installation."""
+    def __init__(self, failures):
+        # Failures are all exceptions
+        msg = "%d tests failed.\n" % len(failures)
+        for failure, message in failures:
+            msg += '\n\n%s\n' % str(failure)
+            msg += '\n%s\n' % message
+
+        super(TestFailure, self).__init__(msg)
--- a/lib/spack/spack/paths.py
+++ b/lib/spack/spack/paths.py
@@ -10,9 +10,9 @@
 dependencies.
 """
 import os
+import inspect
 from llnl.util.filesystem import ancestor

-
 #: This file lives in $prefix/lib/spack/spack/__file__
 prefix = ancestor(__file__, 4)

@@ -58,3 +58,16 @@
 mock_gpg_data_path = os.path.join(var_path, "gpg.mock", "data")
 mock_gpg_keys_path = os.path.join(var_path, "gpg.mock", "keys")
 gpg_path           = os.path.join(opt_path, "spack", "gpg")
+
+
+def is_package_file(filename):
+    """Determine whether we are in a package file from a repo."""
+    # Package files are named `package.py` and are not in lib/spack/spack
+    # We have to remove the file extension because it can be .py and can be
+    # .pyc depending on context, and can differ between the files
+    import spack.package  # break cycle
+    filename_noext = os.path.splitext(filename)[0]
+    packagebase_filename_noext = os.path.splitext(
+        inspect.getfile(spack.package.PackageBase))[0]
+    return (filename_noext != packagebase_filename_noext and
+            os.path.basename(filename_noext) == 'package')
--- a/lib/spack/spack/test/cmd/test.py
+++ b/lib/spack/spack/test/cmd/test.py
@@ -66,7 +66,7 @@ def test_test_output(install_mockery, mock_archive, mock_fetch,
    with open(outfile, 'r') as f:
        output = f.read()
    assert "BEFORE TEST" in output
-    assert "true: expect to succeed" in output
+    assert "true: expect command status in [" in output
    assert "AFTER TEST" in output
    assert "rror" not in output  # no error

@@ -78,7 +78,8 @@ def test_test_output_on_error(mock_packages, mock_archive, mock_fetch,
    with capfd.disabled():
        out = spack_test('run', 'test-error', fail_on_error=False)

-    assert "ProcessError: Command exited with status 1" in out
+    assert "TestFailure" in out
+    assert "Error: Command exited with status 1" in out


 def test_test_output_on_failure(mock_packages, mock_archive, mock_fetch,
@@ -88,7 +89,8 @@ def test_test_output_on_failure(mock_packages, mock_archive, mock_fetch,
        out = spack_test('run', 'test-fail', fail_on_error=False)

    assert "Expected 'not in the output' in output of `true`" in out
-    assert "AssertionError:" in out
+    assert "Error:" in out
+    assert "TestFailure" in out


 def test_show_log_on_error(mock_packages, mock_archive, mock_fetch,
@@ -106,8 +108,8 @@ def test_show_log_on_error(mock_packages, mock_archive, mock_fetch,
    'mock_packages', 'mock_archive', 'mock_fetch', 'install_mockery'
 )
@pytest.mark.parametrize('pkg_name,msgs', [
-    ('test-error', ['Error: Command exited', 'ProcessError']),
-    ('test-fail', ['Error: Expected', 'AssertionError'])
+    ('test-error', ['Error: Command exited', 'TestFailure']),
+    ('test-fail', ['Error: Expected', 'TestFailure'])
 ])
 def test_junit_output_with_failures(tmpdir, mock_test_stage, pkg_name, msgs):
    install(pkg_name)
--- a/var/spack/repos/builtin/packages/python/package.py
+++ b/var/spack/repos/builtin/packages/python/package.py
@@ -1114,6 +1114,12 @@ def test(self):
        # do not use self.command because we are also testing the run env
        exe = self.command.name

+        self.run_test('/bin/false')
+
+        self.run_test('/usr/bin/false')
+
+        self.run_test('/usr/bin/true', status=1)
+
        # test hello world
        self.run_test(exe, options=['-c', 'print("hello world!")'],
                      expected=['hello world!'])