Ignore Modules v4 environment variables in from_sourcing_file (#10753)

* from_sourcing_file: fixed a bug + added a few ignored variables

closes #7536

Credits for this change goes to mgsternberg (original author of #7536)

The new variables being ignored are specific to Modules v4.

* Use Spack Executable in 'EnvironmentModifications.from_sourcing_file'

Using this class avoids duplicating lower level logic to decode
stdout and handle non-zero return codes

* Extracted a function that returns the environment after sourcing files

The logic in `EnvironmentModifications.from_sourcing_file` has been
simplified by extracting a function that returns a dictionary with the
environment one would have after sourcing the files passed as argument.

* Further refactoring of EnvironmentModifications.from_sourcing_file

Extracted a function that sanitizes a dictionary removing keys that are
blacklisted, but keeping those that are whitelisted. Blacklisting and
whitelisting can be done on literals or regex.

Extracted a new factory that creates an instance of
EnvironmentModifications from a diff of two environments.

* Added unit tests

* PS1 is blacklisted + more readable names for some variables
This commit is contained in:
Massimiliano Culpo 2019-07-16 17:54:33 +02:00 committed by Greg Becker
parent 5ed68d31a2
commit 2fe1ecbaa2
3 changed files with 307 additions and 113 deletions

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
#
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
unset UNSET_ME

View File

@ -14,6 +14,9 @@
from spack.util.environment import filter_system_paths, is_system_path from spack.util.environment import filter_system_paths, is_system_path
datadir = os.path.join(spack_root, 'lib', 'spack', 'spack', 'test', 'data')
def test_inspect_path(tmpdir): def test_inspect_path(tmpdir):
inspections = { inspections = {
'bin': ['PATH'], 'bin': ['PATH'],
@ -55,7 +58,7 @@ def test_exclude_paths_from_inspection():
@pytest.fixture() @pytest.fixture()
def prepare_environment_for_tests(): def prepare_environment_for_tests(working_env):
"""Sets a few dummy variables in the current environment, that will be """Sets a few dummy variables in the current environment, that will be
useful for the tests below. useful for the tests below.
""" """
@ -67,11 +70,6 @@ def prepare_environment_for_tests():
os.environ['PATH_LIST_WITH_SYSTEM_PATHS'] \ os.environ['PATH_LIST_WITH_SYSTEM_PATHS'] \
= '/usr/include:' + os.environ['REMOVE_PATH_LIST'] = '/usr/include:' + os.environ['REMOVE_PATH_LIST']
os.environ['PATH_LIST_WITH_DUPLICATES'] = os.environ['REMOVE_PATH_LIST'] os.environ['PATH_LIST_WITH_DUPLICATES'] = os.environ['REMOVE_PATH_LIST']
yield
for x in ('UNSET_ME', 'EMPTY_PATH_LIST', 'PATH_LIST', 'REMOVE_PATH_LIST',
'PATH_LIST_WITH_SYSTEM_PATHS', 'PATH_LIST_WITH_DUPLICATES'):
if x in os.environ:
del os.environ[x]
@pytest.fixture @pytest.fixture
@ -109,19 +107,13 @@ def miscellaneous_paths():
@pytest.fixture @pytest.fixture
def files_to_be_sourced(): def files_to_be_sourced():
"""Returns a list of files to be sourced""" """Returns a list of files to be sourced"""
datadir = os.path.join( return [
spack_root, 'lib', 'spack', 'spack', 'test', 'data'
)
files = [
os.path.join(datadir, 'sourceme_first.sh'), os.path.join(datadir, 'sourceme_first.sh'),
os.path.join(datadir, 'sourceme_second.sh'), os.path.join(datadir, 'sourceme_second.sh'),
os.path.join(datadir, 'sourceme_parameters.sh'), os.path.join(datadir, 'sourceme_parameters.sh'),
os.path.join(datadir, 'sourceme_unicode.sh') os.path.join(datadir, 'sourceme_unicode.sh')
] ]
return files
def test_set(env): def test_set(env):
"""Tests setting values in the environment.""" """Tests setting values in the environment."""
@ -322,8 +314,119 @@ def test_preserve_environment(prepare_environment_for_tests):
assert os.environ['PATH_LIST'] == '/path/second:/path/third' assert os.environ['PATH_LIST'] == '/path/second:/path/third'
@pytest.mark.parametrize('files,expected,deleted', [
# Sets two variables
((os.path.join(datadir, 'sourceme_first.sh'),),
{'NEW_VAR': 'new', 'UNSET_ME': 'overridden'}, []),
# Check if we can set a variable to different values depending
# on command line parameters
((os.path.join(datadir, 'sourceme_parameters.sh'),),
{'FOO': 'default'}, []),
(([os.path.join(datadir, 'sourceme_parameters.sh'), 'intel64'],),
{'FOO': 'intel64'}, []),
# Check unsetting variables
((os.path.join(datadir, 'sourceme_second.sh'),),
{'PATH_LIST': '/path/first:/path/second:/path/fourth'},
['EMPTY_PATH_LIST']),
# Check that order of sourcing matters
((os.path.join(datadir, 'sourceme_unset.sh'),
os.path.join(datadir, 'sourceme_first.sh')),
{'NEW_VAR': 'new', 'UNSET_ME': 'overridden'}, []),
((os.path.join(datadir, 'sourceme_first.sh'),
os.path.join(datadir, 'sourceme_unset.sh')),
{'NEW_VAR': 'new'}, ['UNSET_ME']),
])
@pytest.mark.usefixtures('prepare_environment_for_tests')
def test_environment_from_sourcing_files(files, expected, deleted):
env = environment.environment_after_sourcing_files(*files)
# Test that variables that have been modified are still there and contain
# the expected output
for name, value in expected.items():
assert name in env
assert value in env[name]
# Test that variables that have been unset are not there
for name in deleted:
assert name not in env
def test_clear(env): def test_clear(env):
env.set('A', 'dummy value') env.set('A', 'dummy value')
assert len(env) > 0 assert len(env) > 0
env.clear() env.clear()
assert len(env) == 0 assert len(env) == 0
@pytest.mark.parametrize('env,blacklist,whitelist', [
# Check we can blacklist a literal
({'SHLVL': '1'}, ['SHLVL'], []),
# Check whitelist takes precedence
({'SHLVL': '1'}, ['SHLVL'], ['SHLVL']),
])
def test_sanitize_literals(env, blacklist, whitelist):
after = environment.sanitize(env, blacklist, whitelist)
# Check that all the whitelisted variables are there
assert all(x in after for x in whitelist)
# Check that the blacklisted variables that are not
# whitelisted are there
blacklist = list(set(blacklist) - set(whitelist))
assert all(x not in after for x in blacklist)
@pytest.mark.parametrize('env,blacklist,whitelist,expected,deleted', [
# Check we can blacklist using a regex
({'SHLVL': '1'}, ['SH.*'], [], [], ['SHLVL']),
# Check we can whitelist using a regex
({'SHLVL': '1'}, ['SH.*'], ['SH.*'], ['SHLVL'], []),
# Check regex to blacklist Modules v4 related vars
({'MODULES_LMALTNAME': '1', 'MODULES_LMCONFLICT': '2'},
['MODULES_(.*)'], [], [], ['MODULES_LMALTNAME', 'MODULES_LMCONFLICT']),
({'A_modquar': '1', 'b_modquar': '2', 'C_modshare': '3'},
[r'(\w*)_mod(quar|share)'], [], [],
['A_modquar', 'b_modquar', 'C_modshare']),
])
def test_sanitize_regex(env, blacklist, whitelist, expected, deleted):
after = environment.sanitize(env, blacklist, whitelist)
assert all(x in after for x in expected)
assert all(x not in after for x in deleted)
@pytest.mark.parametrize('before,after,search_list', [
# Set environment variables
({}, {'FOO': 'foo'}, [environment.SetEnv('FOO', 'foo')]),
# Unset environment variables
({'FOO': 'foo'}, {}, [environment.UnsetEnv('FOO')]),
# Append paths to an environment variable
({'FOO_PATH': '/a/path'}, {'FOO_PATH': '/a/path:/b/path'},
[environment.AppendPath('FOO_PATH', '/b/path')]),
({}, {'FOO_PATH': '/a/path:/b/path'}, [
environment.AppendPath('FOO_PATH', '/a/path:/b/path')
]),
({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/b/path'}, [
environment.RemovePath('FOO_PATH', '/a/path')
]),
({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/a/path:/c/path'}, [
environment.RemovePath('FOO_PATH', '/b/path'),
environment.AppendPath('FOO_PATH', '/c/path')
]),
({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/c/path:/a/path'}, [
environment.RemovePath('FOO_PATH', '/b/path'),
environment.PrependPath('FOO_PATH', '/c/path')
])
])
def test_from_environment_diff(before, after, search_list):
mod = environment.EnvironmentModifications.from_environment_diff(
before, after
)
for item in search_list:
assert item in mod

View File

@ -12,9 +12,11 @@
import re import re
import sys import sys
import os.path import os.path
import subprocess
import six
import llnl.util.tty as tty import llnl.util.tty as tty
import spack.util.executable as executable
from llnl.util.lang import dedupe from llnl.util.lang import dedupe
@ -171,6 +173,11 @@ def __init__(self, name, **kwargs):
self.args.update(kwargs) self.args.update(kwargs)
def __eq__(self, other):
if not isinstance(other, NameModifier):
return False
return self.name == other.name
def update_args(self, **kwargs): def update_args(self, **kwargs):
self.__dict__.update(kwargs) self.__dict__.update(kwargs)
self.args.update(kwargs) self.args.update(kwargs)
@ -185,6 +192,13 @@ def __init__(self, name, value, **kwargs):
self.args = {'name': name, 'value': value, 'separator': self.separator} self.args = {'name': name, 'value': value, 'separator': self.separator}
self.args.update(kwargs) self.args.update(kwargs)
def __eq__(self, other):
if not isinstance(other, NameValueModifier):
return False
return self.name == other.name and \
self.value == other.value and \
self.separator == other.separator
def update_args(self, **kwargs): def update_args(self, **kwargs):
self.__dict__.update(kwargs) self.__dict__.update(kwargs)
self.args.update(kwargs) self.args.update(kwargs)
@ -483,122 +497,86 @@ def shell_modifications(self, shell='sh'):
return cmds return cmds
@staticmethod @staticmethod
def from_sourcing_file(filename, *args, **kwargs): def from_sourcing_file(filename, *arguments, **kwargs):
"""Returns modifications that would be made by sourcing a file. """Constructs an instance of a
:py:class:`spack.util.environment.EnvironmentModifications` object
that has the same effect as sourcing a file.
Parameters: Args:
filename (str): The file to source filename (str): the file to be sourced
*args (list of str): Arguments to pass on the command line *arguments (list of str): arguments to pass on the command line
Keyword Arguments: Keyword Args:
shell (str): The shell to use (default: ``bash``) shell (str): the shell to use (default: ``bash``)
shell_options (str): Options passed to the shell (default: ``-c``) shell_options (str): options passed to the shell (default: ``-c``)
source_command (str): The command to run (default: ``source``) source_command (str): the command to run (default: ``source``)
suppress_output (str): Redirect used to suppress output of command suppress_output (str): redirect used to suppress output of command
(default: ``&> /dev/null``) (default: ``&> /dev/null``)
concatenate_on_success (str): Operator used to execute a command concatenate_on_success (str): operator used to execute a command
only when the previous command succeeds (default: ``&&``) only when the previous command succeeds (default: ``&&``)
blacklist ([str or re]): Ignore any modifications of these blacklist ([str or re]): ignore any modifications of these
variables (default: []) variables (default: [])
whitelist ([str or re]): Always respect modifications of these whitelist ([str or re]): always respect modifications of these
variables (default: []). Has precedence over blacklist. variables (default: []). has precedence over blacklist.
clean (bool): In addition to removing empty entries, clean (bool): in addition to removing empty entries,
also remove duplicate entries (default: False). also remove duplicate entries (default: False).
Returns:
EnvironmentModifications: an object that, if executed, has
the same effect on the environment as sourcing the file
""" """
# Check if the file actually exists # Check if the file actually exists
if not os.path.isfile(filename): if not os.path.isfile(filename):
msg = 'Trying to source non-existing file: {0}'.format(filename) msg = 'Trying to source non-existing file: {0}'.format(filename)
raise RuntimeError(msg) raise RuntimeError(msg)
# Kwargs parsing and default values # Prepare a whitelist and a blacklist of environment variable names
shell = kwargs.get('shell', '/bin/bash') blacklist = kwargs.get('blacklist', [])
shell_options = kwargs.get('shell_options', '-c') whitelist = kwargs.get('whitelist', [])
source_command = kwargs.get('source_command', 'source') clean = kwargs.get('clean', False)
suppress_output = kwargs.get('suppress_output', '&> /dev/null')
concatenate_on_success = kwargs.get('concatenate_on_success', '&&')
blacklist = kwargs.get('blacklist', [])
whitelist = kwargs.get('whitelist', [])
clean = kwargs.get('clean', False)
source_file = [source_command, filename]
source_file.extend(args)
source_file = ' '.join(source_file)
dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))'
dump_environment = 'python -c "{0}"'.format(dump_cmd)
# Construct the command that will be executed
command = [
shell,
shell_options,
' '.join([
source_file, suppress_output,
concatenate_on_success, dump_environment,
]),
]
# Try to source the file
proc = subprocess.Popen(
command, stdout=subprocess.PIPE, env=os.environ)
proc.wait()
if proc.returncode != 0:
msg = 'Sourcing file {0} returned a non-zero exit code'.format(
filename)
raise RuntimeError(msg)
output = ''.join([line.decode('utf-8') for line in proc.stdout])
# Construct dictionaries of the environment before and after
# sourcing the file, so that we can diff them.
env_before = dict(os.environ)
env_after = json.loads(output)
# If we're in python2, convert to str objects instead of unicode
# like json gives us. We can't put unicode in os.environ anyway.
if sys.version_info[0] < 3:
env_after = dict((k.encode('utf-8'), v.encode('utf-8'))
for k, v in env_after.items())
# Other variables unrelated to sourcing a file # Other variables unrelated to sourcing a file
blacklist.extend(['SHLVL', '_', 'PWD', 'OLDPWD', 'PS2']) blacklist.extend([
# Bash internals
'SHLVL', '_', 'PWD', 'OLDPWD', 'PS1', 'PS2', 'ENV',
# Environment modules v4
'LOADEDMODULES', '_LMFILES_', 'BASH_FUNC_module()', 'MODULEPATH',
'MODULES_(.*)', r'(\w*)_mod(quar|share)'
])
def set_intersection(fullset, *args): # Compute the environments before and after sourcing
# A set intersection using string literals and regexs before = sanitize(
meta = '[' + re.escape('[$()*?[]^{|}') + ']' dict(os.environ), blacklist=blacklist, whitelist=whitelist
subset = fullset & set(args) # As literal )
for name in args: file_and_args = (filename,) + arguments
if re.search(meta, name): after = sanitize(
pattern = re.compile(name) environment_after_sourcing_files(file_and_args, **kwargs),
for k in fullset: blacklist=blacklist, whitelist=whitelist
if re.match(pattern, k): )
subset.add(k)
return subset
for d in env_after, env_before: # Delegate to the other factory
# Retain (whitelist) has priority over prune (blacklist) return EnvironmentModifications.from_environment_diff(
prune = set_intersection(set(d), *blacklist) before, after, clean
prune -= set_intersection(prune, *whitelist) )
for k in prune:
d.pop(k, None)
@staticmethod
def from_environment_diff(before, after, clean=False):
"""Constructs an instance of a
:py:class:`spack.util.environment.EnvironmentModifications` object
from the diff of two dictionaries.
Args:
before (dict): environment before the modifications are applied
after (dict): environment after the modifications are applied
clean (bool): in addition to removing empty entries, also remove
duplicate entries
"""
# Fill the EnvironmentModifications instance # Fill the EnvironmentModifications instance
env = EnvironmentModifications() env = EnvironmentModifications()
# New variables # New variables
new_variables = list(set(env_after) - set(env_before)) new_variables = list(set(after) - set(before))
# Variables that have been unset # Variables that have been unset
unset_variables = list(set(env_before) - set(env_after)) unset_variables = list(set(before) - set(after))
# Variables that have been modified # Variables that have been modified
common_variables = set(env_before).intersection(set(env_after)) common_variables = set(before).intersection(set(after))
modified_variables = [x for x in common_variables modified_variables = [x for x in common_variables
if env_before[x] != env_after[x]] if before[x] != after[x]]
# Consistent output order - looks nicer, easier comparison... # Consistent output order - looks nicer, easier comparison...
new_variables.sort() new_variables.sort()
unset_variables.sort() unset_variables.sort()
@ -616,21 +594,21 @@ def return_separator_if_any(*args):
# Assume that variables with 'PATH' in the name or that contain # Assume that variables with 'PATH' in the name or that contain
# separators like ':' or ';' are more likely to be paths # separators like ':' or ';' are more likely to be paths
for x in new_variables: for x in new_variables:
sep = return_separator_if_any(env_after[x]) sep = return_separator_if_any(after[x])
if sep: if sep:
env.prepend_path(x, env_after[x], separator=sep) env.prepend_path(x, after[x], separator=sep)
elif 'PATH' in x: elif 'PATH' in x:
env.prepend_path(x, env_after[x]) env.prepend_path(x, after[x])
else: else:
# We just need to set the variable to the new value # We just need to set the variable to the new value
env.set(x, env_after[x]) env.set(x, after[x])
for x in unset_variables: for x in unset_variables:
env.unset(x) env.unset(x)
for x in modified_variables: for x in modified_variables:
before = env_before[x] before = before[x]
after = env_after[x] after = after[x]
sep = return_separator_if_any(before, after) sep = return_separator_if_any(before, after)
if sep: if sep:
before_list = before.split(sep) before_list = before.split(sep)
@ -844,3 +822,108 @@ def preserve_environment(*variables):
msg += ' {0} was set to "{1}", will be unset' msg += ' {0} was set to "{1}", will be unset'
tty.debug(msg.format(var, os.environ[var])) tty.debug(msg.format(var, os.environ[var]))
del os.environ[var] del os.environ[var]
def environment_after_sourcing_files(*files, **kwargs):
"""Returns a dictionary with the environment that one would have
after sourcing the files passed as argument.
Args:
*files: each item can either be a string containing the path
of the file to be sourced or a sequence, where the first element
is the file to be sourced and the remaining are arguments to be
passed to the command line
Keyword Args:
env (dict): the initial environment (default: current environment)
shell (str): the shell to use (default: ``/bin/bash``)
shell_options (str): options passed to the shell (default: ``-c``)
source_command (str): the command to run (default: ``source``)
suppress_output (str): redirect used to suppress output of command
(default: ``&> /dev/null``)
concatenate_on_success (str): operator used to execute a command
only when the previous command succeeds (default: ``&&``)
"""
# Set the shell executable that will be used to source files
shell_cmd = kwargs.get('shell', '/bin/bash')
shell_options = kwargs.get('shell_options', '-c')
source_command = kwargs.get('source_command', 'source')
suppress_output = kwargs.get('suppress_output', '&> /dev/null')
concatenate_on_success = kwargs.get('concatenate_on_success', '&&')
shell = executable.Executable(' '.join([shell_cmd, shell_options]))
def _source_single_file(file_and_args, environment):
source_file = [source_command]
source_file.extend(x for x in file_and_args)
source_file = ' '.join(source_file)
dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))'
dump_environment = 'python -c "{0}"'.format(dump_cmd)
# Try to source the file
source_file_arguments = ' '.join([
source_file, suppress_output,
concatenate_on_success, dump_environment,
])
output = shell(source_file_arguments, output=str, env=environment)
environment = json.loads(output)
# If we're in python2, convert to str objects instead of unicode
# like json gives us. We can't put unicode in os.environ anyway.
if sys.version_info[0] < 3:
environment = dict(
(k.encode('utf-8'), v.encode('utf-8'))
for k, v in environment.items()
)
return environment
current_environment = kwargs.get('env', dict(os.environ))
for f in files:
# Normalize the input to the helper function
if isinstance(f, six.string_types):
f = [f]
current_environment = _source_single_file(
f, environment=current_environment
)
return current_environment
def sanitize(environment, blacklist, whitelist):
"""Returns a copy of the input dictionary where all the keys that
match a blacklist pattern and don't match a whitelist pattern are
removed.
Args:
environment (dict): input dictionary
blacklist (list of str): literals or regex patterns to be
blacklisted
whitelist (list of str): literals or regex patterns to be
whitelisted
"""
def set_intersection(fullset, *args):
# A set intersection using string literals and regexs
meta = '[' + re.escape('[$()*?[]^{|}') + ']'
subset = fullset & set(args) # As literal
for name in args:
if re.search(meta, name):
pattern = re.compile(name)
for k in fullset:
if re.match(pattern, k):
subset.add(k)
return subset
# Don't modify input, make a copy instead
environment = dict(environment)
# Retain (whitelist) has priority over prune (blacklist)
prune = set_intersection(set(environment), *blacklist)
prune -= set_intersection(prune, *whitelist)
for k in prune:
environment.pop(k, None)
return environment