Ignore Modules v4 environment variables in from_sourcing_file (#10753)

* from_sourcing_file: fixed a bug + added a few ignored variables closes #7536 Credits for this change goes to mgsternberg (original author of #7536) The new variables being ignored are specific to Modules v4. * Use Spack Executable in 'EnvironmentModifications.from_sourcing_file' Using this class avoids duplicating lower level logic to decode stdout and handle non-zero return codes * Extracted a function that returns the environment after sourcing files The logic in `EnvironmentModifications.from_sourcing_file` has been simplified by extracting a function that returns a dictionary with the environment one would have after sourcing the files passed as argument. * Further refactoring of EnvironmentModifications.from_sourcing_file Extracted a function that sanitizes a dictionary removing keys that are blacklisted, but keeping those that are whitelisted. Blacklisting and whitelisting can be done on literals or regex. Extracted a new factory that creates an instance of EnvironmentModifications from a diff of two environments. * Added unit tests * PS1 is blacklisted + more readable names for some variables
2019-07-16 17:54:33 +02:00
parent 5ed68d31a2
commit 2fe1ecbaa2
3 changed files with 307 additions and 113 deletions
--- a/lib/spack/spack/test/data/sourceme_unset.sh
+++ b/lib/spack/spack/test/data/sourceme_unset.sh
@@ -0,0 +1,8 @@
 #!/usr/bin/env bash
 #
 # Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
 # Spack Project Developers. See the top-level COPYRIGHT file for details.
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 unset UNSET_ME
--- a/lib/spack/spack/test/environment_modifications.py
+++ b/lib/spack/spack/test/environment_modifications.py
@@ -14,6 +14,9 @@
 from spack.util.environment import filter_system_paths, is_system_path
 datadir = os.path.join(spack_root, 'lib', 'spack', 'spack', 'test', 'data')
 def test_inspect_path(tmpdir):
    inspections = {
        'bin': ['PATH'],
@@ -55,7 +58,7 @@ def test_exclude_paths_from_inspection():
@pytest.fixture()
-def prepare_environment_for_tests():
+def prepare_environment_for_tests(working_env):
    """Sets a few dummy variables in the current environment, that will be
    useful for the tests below.
    """
@@ -67,11 +70,6 @@ def prepare_environment_for_tests():
    os.environ['PATH_LIST_WITH_SYSTEM_PATHS'] \
        = '/usr/include:' + os.environ['REMOVE_PATH_LIST']
    os.environ['PATH_LIST_WITH_DUPLICATES'] = os.environ['REMOVE_PATH_LIST']
    yield
    for x in ('UNSET_ME', 'EMPTY_PATH_LIST', 'PATH_LIST', 'REMOVE_PATH_LIST',
              'PATH_LIST_WITH_SYSTEM_PATHS', 'PATH_LIST_WITH_DUPLICATES'):
        if x in os.environ:
            del os.environ[x]
@pytest.fixture
@@ -109,19 +107,13 @@ def miscellaneous_paths():
@pytest.fixture
 def files_to_be_sourced():
    """Returns a list of files to be sourced"""
-    datadir = os.path.join(
+    return [
        spack_root, 'lib', 'spack', 'spack', 'test', 'data'
    )
    files = [
        os.path.join(datadir, 'sourceme_first.sh'),
        os.path.join(datadir, 'sourceme_second.sh'),
        os.path.join(datadir, 'sourceme_parameters.sh'),
        os.path.join(datadir, 'sourceme_unicode.sh')
    ]
    return files
 def test_set(env):
    """Tests setting values in the environment."""
@@ -322,8 +314,119 @@ def test_preserve_environment(prepare_environment_for_tests):
    assert os.environ['PATH_LIST'] == '/path/second:/path/third'
@pytest.mark.parametrize('files,expected,deleted', [
    # Sets two variables
    ((os.path.join(datadir, 'sourceme_first.sh'),),
     {'NEW_VAR': 'new', 'UNSET_ME': 'overridden'}, []),
    # Check if we can set a variable to different values depending
    # on command line parameters
    ((os.path.join(datadir, 'sourceme_parameters.sh'),),
     {'FOO': 'default'}, []),
    (([os.path.join(datadir, 'sourceme_parameters.sh'), 'intel64'],),
     {'FOO': 'intel64'}, []),
    # Check unsetting variables
    ((os.path.join(datadir, 'sourceme_second.sh'),),
     {'PATH_LIST': '/path/first:/path/second:/path/fourth'},
     ['EMPTY_PATH_LIST']),
    # Check that order of sourcing matters
    ((os.path.join(datadir, 'sourceme_unset.sh'),
      os.path.join(datadir, 'sourceme_first.sh')),
     {'NEW_VAR': 'new', 'UNSET_ME': 'overridden'}, []),
    ((os.path.join(datadir, 'sourceme_first.sh'),
      os.path.join(datadir, 'sourceme_unset.sh')),
     {'NEW_VAR': 'new'}, ['UNSET_ME']),
 ])
@pytest.mark.usefixtures('prepare_environment_for_tests')
 def test_environment_from_sourcing_files(files, expected, deleted):
    env = environment.environment_after_sourcing_files(*files)
    # Test that variables that have been modified are still there and contain
    # the expected output
    for name, value in expected.items():
        assert name in env
        assert value in env[name]
    # Test that variables that have been unset are not there
    for name in deleted:
        assert name not in env
 def test_clear(env):
    env.set('A', 'dummy value')
    assert len(env) > 0
    env.clear()
    assert len(env) == 0
@pytest.mark.parametrize('env,blacklist,whitelist', [
    # Check we can blacklist a literal
    ({'SHLVL': '1'}, ['SHLVL'], []),
    # Check whitelist takes precedence
    ({'SHLVL': '1'}, ['SHLVL'], ['SHLVL']),
 ])
 def test_sanitize_literals(env, blacklist, whitelist):
    after = environment.sanitize(env, blacklist, whitelist)
    # Check that all the whitelisted variables are there
    assert all(x in after for x in whitelist)
    # Check that the blacklisted variables that are not
    # whitelisted are there
    blacklist = list(set(blacklist) - set(whitelist))
    assert all(x not in after for x in blacklist)
@pytest.mark.parametrize('env,blacklist,whitelist,expected,deleted', [
    # Check we can blacklist using a regex
    ({'SHLVL': '1'}, ['SH.*'], [], [], ['SHLVL']),
    # Check we can whitelist using a regex
    ({'SHLVL': '1'}, ['SH.*'], ['SH.*'], ['SHLVL'], []),
    # Check regex to blacklist Modules v4 related vars
    ({'MODULES_LMALTNAME': '1', 'MODULES_LMCONFLICT': '2'},
     ['MODULES_(.*)'], [], [], ['MODULES_LMALTNAME', 'MODULES_LMCONFLICT']),
    ({'A_modquar': '1', 'b_modquar': '2', 'C_modshare': '3'},
     [r'(\w*)_mod(quar|share)'], [], [],
     ['A_modquar', 'b_modquar', 'C_modshare']),
 ])
 def test_sanitize_regex(env, blacklist, whitelist, expected, deleted):
    after = environment.sanitize(env, blacklist, whitelist)
    assert all(x in after for x in expected)
    assert all(x not in after for x in deleted)
@pytest.mark.parametrize('before,after,search_list', [
    # Set environment variables
    ({}, {'FOO': 'foo'}, [environment.SetEnv('FOO', 'foo')]),
    # Unset environment variables
    ({'FOO': 'foo'}, {}, [environment.UnsetEnv('FOO')]),
    # Append paths to an environment variable
    ({'FOO_PATH': '/a/path'}, {'FOO_PATH': '/a/path:/b/path'},
     [environment.AppendPath('FOO_PATH', '/b/path')]),
    ({}, {'FOO_PATH': '/a/path:/b/path'}, [
        environment.AppendPath('FOO_PATH', '/a/path:/b/path')
    ]),
    ({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/b/path'}, [
        environment.RemovePath('FOO_PATH', '/a/path')
    ]),
    ({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/a/path:/c/path'}, [
        environment.RemovePath('FOO_PATH', '/b/path'),
        environment.AppendPath('FOO_PATH', '/c/path')
    ]),
    ({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/c/path:/a/path'}, [
        environment.RemovePath('FOO_PATH', '/b/path'),
        environment.PrependPath('FOO_PATH', '/c/path')
    ])
 ])
 def test_from_environment_diff(before, after, search_list):
    mod = environment.EnvironmentModifications.from_environment_diff(
        before, after
    )
    for item in search_list:
        assert item in mod
--- a/lib/spack/spack/util/environment.py
+++ b/lib/spack/spack/util/environment.py
@@ -12,9 +12,11 @@
 import re
 import sys
 import os.path
-import subprocess
+
 import six
 import llnl.util.tty as tty
 import spack.util.executable as executable
 from llnl.util.lang import dedupe
@@ -171,6 +173,11 @@ def __init__(self, name, **kwargs):
        self.args.update(kwargs)
    def __eq__(self, other):
        if not isinstance(other, NameModifier):
            return False
        return self.name == other.name
    def update_args(self, **kwargs):
        self.__dict__.update(kwargs)
        self.args.update(kwargs)
@@ -185,6 +192,13 @@ def __init__(self, name, value, **kwargs):
        self.args = {'name': name, 'value': value, 'separator': self.separator}
        self.args.update(kwargs)
    def __eq__(self, other):
        if not isinstance(other, NameValueModifier):
            return False
        return self.name == other.name and \
            self.value == other.value and \
            self.separator == other.separator
    def update_args(self, **kwargs):
        self.__dict__.update(kwargs)
        self.args.update(kwargs)
@@ -483,122 +497,86 @@ def shell_modifications(self, shell='sh'):
        return cmds
    @staticmethod
-    def from_sourcing_file(filename, *args, **kwargs):
+    def from_sourcing_file(filename, *arguments, **kwargs):
-        """Returns modifications that would be made by sourcing a file.
+        """Constructs an instance of a
        :py:class:`spack.util.environment.EnvironmentModifications` object
        that has the same effect as sourcing a file.
-        Parameters:
+        Args:
-            filename (str): The file to source
+            filename (str): the file to be sourced
-            *args (list of str): Arguments to pass on the command line
+            *arguments (list of str): arguments to pass on the command line
-        Keyword Arguments:
+        Keyword Args:
-            shell (str): The shell to use (default: ``bash``)
+            shell (str): the shell to use (default: ``bash``)
-            shell_options (str): Options passed to the shell (default: ``-c``)
+            shell_options (str): options passed to the shell (default: ``-c``)
-            source_command (str): The command to run (default: ``source``)
+            source_command (str): the command to run (default: ``source``)
-            suppress_output (str): Redirect used to suppress output of command
+            suppress_output (str): redirect used to suppress output of command
                (default: ``&> /dev/null``)
-            concatenate_on_success (str): Operator used to execute a command
+            concatenate_on_success (str): operator used to execute a command
                only when the previous command succeeds (default: ``&&``)
-            blacklist ([str or re]): Ignore any modifications of these
+            blacklist ([str or re]): ignore any modifications of these
                variables (default: [])
-            whitelist ([str or re]): Always respect modifications of these
+            whitelist ([str or re]): always respect modifications of these
-                variables (default: []). Has precedence over blacklist.
+                variables (default: []). has precedence over blacklist.
-            clean (bool): In addition to removing empty entries,
+            clean (bool): in addition to removing empty entries,
                also remove duplicate entries (default: False).
        Returns:
            EnvironmentModifications: an object that, if executed, has
                the same effect on the environment as sourcing the file
        """
        # Check if the file actually exists
        if not os.path.isfile(filename):
            msg = 'Trying to source non-existing file: {0}'.format(filename)
            raise RuntimeError(msg)
-        # Kwargs parsing and default values
+        # Prepare a whitelist and a blacklist of environment variable names
-        shell                  = kwargs.get('shell', '/bin/bash')
+        blacklist = kwargs.get('blacklist', [])
-        shell_options          = kwargs.get('shell_options', '-c')
+        whitelist = kwargs.get('whitelist', [])
-        source_command         = kwargs.get('source_command', 'source')
+        clean = kwargs.get('clean', False)
        suppress_output        = kwargs.get('suppress_output', '&> /dev/null')
        concatenate_on_success = kwargs.get('concatenate_on_success', '&&')
        blacklist              = kwargs.get('blacklist', [])
        whitelist              = kwargs.get('whitelist', [])
        clean                  = kwargs.get('clean', False)
        source_file = [source_command, filename]
        source_file.extend(args)
        source_file = ' '.join(source_file)
        dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))'
        dump_environment = 'python -c "{0}"'.format(dump_cmd)
        # Construct the command that will be executed
        command = [
            shell,
            shell_options,
            ' '.join([
                source_file, suppress_output,
                concatenate_on_success, dump_environment,
            ]),
        ]
        # Try to source the file
        proc = subprocess.Popen(
            command, stdout=subprocess.PIPE, env=os.environ)
        proc.wait()
        if proc.returncode != 0:
            msg = 'Sourcing file {0} returned a non-zero exit code'.format(
                filename)
            raise RuntimeError(msg)
        output = ''.join([line.decode('utf-8') for line in proc.stdout])
        # Construct dictionaries of the environment before and after
        # sourcing the file, so that we can diff them.
        env_before = dict(os.environ)
        env_after = json.loads(output)
        # If we're in python2, convert to str objects instead of unicode
        # like json gives us.  We can't put unicode in os.environ anyway.
        if sys.version_info[0] < 3:
            env_after = dict((k.encode('utf-8'), v.encode('utf-8'))
                             for k, v in env_after.items())
        # Other variables unrelated to sourcing a file
-        blacklist.extend(['SHLVL', '_', 'PWD', 'OLDPWD', 'PS2'])
+        blacklist.extend([
            # Bash internals
            'SHLVL', '_', 'PWD', 'OLDPWD', 'PS1', 'PS2', 'ENV',
            # Environment modules v4
            'LOADEDMODULES', '_LMFILES_', 'BASH_FUNC_module()', 'MODULEPATH',
            'MODULES_(.*)', r'(\w*)_mod(quar|share)'
        ])
-        def set_intersection(fullset, *args):
+        # Compute the environments before and after sourcing
-            # A set intersection using string literals and regexs
+        before = sanitize(
-            meta = '[' + re.escape('[$()*?[]^{|}') + ']'
+            dict(os.environ), blacklist=blacklist, whitelist=whitelist
-            subset = fullset & set(args)   # As literal
+        )
-            for name in args:
+        file_and_args = (filename,) + arguments
-                if re.search(meta, name):
+        after = sanitize(
-                    pattern = re.compile(name)
+            environment_after_sourcing_files(file_and_args, **kwargs),
-                    for k in fullset:
+            blacklist=blacklist, whitelist=whitelist
-                        if re.match(pattern, k):
+        )
                            subset.add(k)
            return subset
-        for d in env_after, env_before:
+        # Delegate to the other factory
-            # Retain (whitelist) has priority over prune (blacklist)
+        return EnvironmentModifications.from_environment_diff(
-            prune = set_intersection(set(d), *blacklist)
+            before, after, clean
-            prune -= set_intersection(prune, *whitelist)
+        )
            for k in prune:
                d.pop(k, None)
    @staticmethod
    def from_environment_diff(before, after, clean=False):
        """Constructs an instance of a
        :py:class:`spack.util.environment.EnvironmentModifications` object
        from the diff of two dictionaries.
        Args:
            before (dict): environment before the modifications are applied
            after (dict): environment after the modifications are applied
            clean (bool): in addition to removing empty entries, also remove
                duplicate entries
        """
        # Fill the EnvironmentModifications instance
        env = EnvironmentModifications()
        # New variables
-        new_variables = list(set(env_after) - set(env_before))
+        new_variables = list(set(after) - set(before))
        # Variables that have been unset
-        unset_variables = list(set(env_before) - set(env_after))
+        unset_variables = list(set(before) - set(after))
        # Variables that have been modified
-        common_variables = set(env_before).intersection(set(env_after))
+        common_variables = set(before).intersection(set(after))
        modified_variables = [x for x in common_variables
-                              if env_before[x] != env_after[x]]
+                              if before[x] != after[x]]
        # Consistent output order - looks nicer, easier comparison...
        new_variables.sort()
        unset_variables.sort()
@@ -616,21 +594,21 @@ def return_separator_if_any(*args):
        # Assume that variables with 'PATH' in the name or that contain
        # separators like ':' or ';' are more likely to be paths
        for x in new_variables:
-            sep = return_separator_if_any(env_after[x])
+            sep = return_separator_if_any(after[x])
            if sep:
-                env.prepend_path(x, env_after[x], separator=sep)
+                env.prepend_path(x, after[x], separator=sep)
            elif 'PATH' in x:
-                env.prepend_path(x, env_after[x])
+                env.prepend_path(x, after[x])
            else:
                # We just need to set the variable to the new value
-                env.set(x, env_after[x])
+                env.set(x, after[x])
        for x in unset_variables:
            env.unset(x)
        for x in modified_variables:
-            before = env_before[x]
+            before = before[x]
-            after = env_after[x]
+            after = after[x]
            sep = return_separator_if_any(before, after)
            if sep:
                before_list = before.split(sep)
@@ -844,3 +822,108 @@ def preserve_environment(*variables):
            msg += ' {0} was set to "{1}", will be unset'
            tty.debug(msg.format(var, os.environ[var]))
            del os.environ[var]
 def environment_after_sourcing_files(*files, **kwargs):
    """Returns a dictionary with the environment that one would have
    after sourcing the files passed as argument.
    Args:
        *files: each item can either be a string containing the path
            of the file to be sourced or a sequence, where the first element
            is the file to be sourced and the remaining are arguments to be
            passed to the command line
    Keyword Args:
        env (dict): the initial environment (default: current environment)
        shell (str): the shell to use (default: ``/bin/bash``)
        shell_options (str): options passed to the shell (default: ``-c``)
        source_command (str): the command to run (default: ``source``)
        suppress_output (str): redirect used to suppress output of command
            (default: ``&> /dev/null``)
        concatenate_on_success (str): operator used to execute a command
            only when the previous command succeeds (default: ``&&``)
    """
    # Set the shell executable that will be used to source files
    shell_cmd = kwargs.get('shell', '/bin/bash')
    shell_options = kwargs.get('shell_options', '-c')
    source_command = kwargs.get('source_command', 'source')
    suppress_output = kwargs.get('suppress_output', '&> /dev/null')
    concatenate_on_success = kwargs.get('concatenate_on_success', '&&')
    shell = executable.Executable(' '.join([shell_cmd, shell_options]))
    def _source_single_file(file_and_args, environment):
        source_file = [source_command]
        source_file.extend(x for x in file_and_args)
        source_file = ' '.join(source_file)
        dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))'
        dump_environment = 'python -c "{0}"'.format(dump_cmd)
        # Try to source the file
        source_file_arguments = ' '.join([
            source_file, suppress_output,
            concatenate_on_success, dump_environment,
        ])
        output = shell(source_file_arguments, output=str, env=environment)
        environment = json.loads(output)
        # If we're in python2, convert to str objects instead of unicode
        # like json gives us.  We can't put unicode in os.environ anyway.
        if sys.version_info[0] < 3:
            environment = dict(
                (k.encode('utf-8'), v.encode('utf-8'))
                for k, v in environment.items()
            )
        return environment
    current_environment = kwargs.get('env', dict(os.environ))
    for f in files:
        # Normalize the input to the helper function
        if isinstance(f, six.string_types):
            f = [f]
        current_environment = _source_single_file(
            f, environment=current_environment
        )
    return current_environment
 def sanitize(environment, blacklist, whitelist):
    """Returns a copy of the input dictionary where all the keys that
    match a blacklist pattern and don't match a whitelist pattern are
    removed.
    Args:
        environment (dict): input dictionary
        blacklist (list of str): literals or regex patterns to be
            blacklisted
        whitelist (list of str): literals or regex patterns to be
            whitelisted
    """
    def set_intersection(fullset, *args):
        # A set intersection using string literals and regexs
        meta = '[' + re.escape('[$()*?[]^{|}') + ']'
        subset = fullset & set(args)  # As literal
        for name in args:
            if re.search(meta, name):
                pattern = re.compile(name)
                for k in fullset:
                    if re.match(pattern, k):
                        subset.add(k)
        return subset
    # Don't modify input, make a copy instead
    environment = dict(environment)
    # Retain (whitelist) has priority over prune (blacklist)
    prune = set_intersection(set(environment), *blacklist)
    prune -= set_intersection(prune, *whitelist)
    for k in prune:
        environment.pop(k, None)
    return environment