Ignore Modules v4 environment variables in from_sourcing_file (#10753)

* from_sourcing_file: fixed a bug + added a few ignored variables

closes #7536

Credits for this change goes to mgsternberg (original author of #7536)

The new variables being ignored are specific to Modules v4.

* Use Spack Executable in 'EnvironmentModifications.from_sourcing_file'

Using this class avoids duplicating lower level logic to decode
stdout and handle non-zero return codes

* Extracted a function that returns the environment after sourcing files

The logic in `EnvironmentModifications.from_sourcing_file` has been
simplified by extracting a function that returns a dictionary with the
environment one would have after sourcing the files passed as argument.

* Further refactoring of EnvironmentModifications.from_sourcing_file

Extracted a function that sanitizes a dictionary removing keys that are
blacklisted, but keeping those that are whitelisted. Blacklisting and
whitelisting can be done on literals or regex.

Extracted a new factory that creates an instance of
EnvironmentModifications from a diff of two environments.

* Added unit tests

* PS1 is blacklisted + more readable names for some variables
This commit is contained in:
Massimiliano Culpo 2019-07-16 17:54:33 +02:00 committed by Greg Becker
parent 5ed68d31a2
commit 2fe1ecbaa2
3 changed files with 307 additions and 113 deletions

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
#
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
unset UNSET_ME

View File

@ -14,6 +14,9 @@
from spack.util.environment import filter_system_paths, is_system_path
datadir = os.path.join(spack_root, 'lib', 'spack', 'spack', 'test', 'data')
def test_inspect_path(tmpdir):
inspections = {
'bin': ['PATH'],
@ -55,7 +58,7 @@ def test_exclude_paths_from_inspection():
@pytest.fixture()
def prepare_environment_for_tests():
def prepare_environment_for_tests(working_env):
"""Sets a few dummy variables in the current environment, that will be
useful for the tests below.
"""
@ -67,11 +70,6 @@ def prepare_environment_for_tests():
os.environ['PATH_LIST_WITH_SYSTEM_PATHS'] \
= '/usr/include:' + os.environ['REMOVE_PATH_LIST']
os.environ['PATH_LIST_WITH_DUPLICATES'] = os.environ['REMOVE_PATH_LIST']
yield
for x in ('UNSET_ME', 'EMPTY_PATH_LIST', 'PATH_LIST', 'REMOVE_PATH_LIST',
'PATH_LIST_WITH_SYSTEM_PATHS', 'PATH_LIST_WITH_DUPLICATES'):
if x in os.environ:
del os.environ[x]
@pytest.fixture
@ -109,19 +107,13 @@ def miscellaneous_paths():
@pytest.fixture
def files_to_be_sourced():
"""Returns a list of files to be sourced"""
datadir = os.path.join(
spack_root, 'lib', 'spack', 'spack', 'test', 'data'
)
files = [
return [
os.path.join(datadir, 'sourceme_first.sh'),
os.path.join(datadir, 'sourceme_second.sh'),
os.path.join(datadir, 'sourceme_parameters.sh'),
os.path.join(datadir, 'sourceme_unicode.sh')
]
return files
def test_set(env):
"""Tests setting values in the environment."""
@ -322,8 +314,119 @@ def test_preserve_environment(prepare_environment_for_tests):
assert os.environ['PATH_LIST'] == '/path/second:/path/third'
@pytest.mark.parametrize('files,expected,deleted', [
# Sets two variables
((os.path.join(datadir, 'sourceme_first.sh'),),
{'NEW_VAR': 'new', 'UNSET_ME': 'overridden'}, []),
# Check if we can set a variable to different values depending
# on command line parameters
((os.path.join(datadir, 'sourceme_parameters.sh'),),
{'FOO': 'default'}, []),
(([os.path.join(datadir, 'sourceme_parameters.sh'), 'intel64'],),
{'FOO': 'intel64'}, []),
# Check unsetting variables
((os.path.join(datadir, 'sourceme_second.sh'),),
{'PATH_LIST': '/path/first:/path/second:/path/fourth'},
['EMPTY_PATH_LIST']),
# Check that order of sourcing matters
((os.path.join(datadir, 'sourceme_unset.sh'),
os.path.join(datadir, 'sourceme_first.sh')),
{'NEW_VAR': 'new', 'UNSET_ME': 'overridden'}, []),
((os.path.join(datadir, 'sourceme_first.sh'),
os.path.join(datadir, 'sourceme_unset.sh')),
{'NEW_VAR': 'new'}, ['UNSET_ME']),
])
@pytest.mark.usefixtures('prepare_environment_for_tests')
def test_environment_from_sourcing_files(files, expected, deleted):
env = environment.environment_after_sourcing_files(*files)
# Test that variables that have been modified are still there and contain
# the expected output
for name, value in expected.items():
assert name in env
assert value in env[name]
# Test that variables that have been unset are not there
for name in deleted:
assert name not in env
def test_clear(env):
env.set('A', 'dummy value')
assert len(env) > 0
env.clear()
assert len(env) == 0
@pytest.mark.parametrize('env,blacklist,whitelist', [
# Check we can blacklist a literal
({'SHLVL': '1'}, ['SHLVL'], []),
# Check whitelist takes precedence
({'SHLVL': '1'}, ['SHLVL'], ['SHLVL']),
])
def test_sanitize_literals(env, blacklist, whitelist):
after = environment.sanitize(env, blacklist, whitelist)
# Check that all the whitelisted variables are there
assert all(x in after for x in whitelist)
# Check that the blacklisted variables that are not
# whitelisted are there
blacklist = list(set(blacklist) - set(whitelist))
assert all(x not in after for x in blacklist)
@pytest.mark.parametrize('env,blacklist,whitelist,expected,deleted', [
# Check we can blacklist using a regex
({'SHLVL': '1'}, ['SH.*'], [], [], ['SHLVL']),
# Check we can whitelist using a regex
({'SHLVL': '1'}, ['SH.*'], ['SH.*'], ['SHLVL'], []),
# Check regex to blacklist Modules v4 related vars
({'MODULES_LMALTNAME': '1', 'MODULES_LMCONFLICT': '2'},
['MODULES_(.*)'], [], [], ['MODULES_LMALTNAME', 'MODULES_LMCONFLICT']),
({'A_modquar': '1', 'b_modquar': '2', 'C_modshare': '3'},
[r'(\w*)_mod(quar|share)'], [], [],
['A_modquar', 'b_modquar', 'C_modshare']),
])
def test_sanitize_regex(env, blacklist, whitelist, expected, deleted):
after = environment.sanitize(env, blacklist, whitelist)
assert all(x in after for x in expected)
assert all(x not in after for x in deleted)
@pytest.mark.parametrize('before,after,search_list', [
# Set environment variables
({}, {'FOO': 'foo'}, [environment.SetEnv('FOO', 'foo')]),
# Unset environment variables
({'FOO': 'foo'}, {}, [environment.UnsetEnv('FOO')]),
# Append paths to an environment variable
({'FOO_PATH': '/a/path'}, {'FOO_PATH': '/a/path:/b/path'},
[environment.AppendPath('FOO_PATH', '/b/path')]),
({}, {'FOO_PATH': '/a/path:/b/path'}, [
environment.AppendPath('FOO_PATH', '/a/path:/b/path')
]),
({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/b/path'}, [
environment.RemovePath('FOO_PATH', '/a/path')
]),
({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/a/path:/c/path'}, [
environment.RemovePath('FOO_PATH', '/b/path'),
environment.AppendPath('FOO_PATH', '/c/path')
]),
({'FOO_PATH': '/a/path:/b/path'}, {'FOO_PATH': '/c/path:/a/path'}, [
environment.RemovePath('FOO_PATH', '/b/path'),
environment.PrependPath('FOO_PATH', '/c/path')
])
])
def test_from_environment_diff(before, after, search_list):
mod = environment.EnvironmentModifications.from_environment_diff(
before, after
)
for item in search_list:
assert item in mod

View File

@ -12,9 +12,11 @@
import re
import sys
import os.path
import subprocess
import six
import llnl.util.tty as tty
import spack.util.executable as executable
from llnl.util.lang import dedupe
@ -171,6 +173,11 @@ def __init__(self, name, **kwargs):
self.args.update(kwargs)
def __eq__(self, other):
if not isinstance(other, NameModifier):
return False
return self.name == other.name
def update_args(self, **kwargs):
self.__dict__.update(kwargs)
self.args.update(kwargs)
@ -185,6 +192,13 @@ def __init__(self, name, value, **kwargs):
self.args = {'name': name, 'value': value, 'separator': self.separator}
self.args.update(kwargs)
def __eq__(self, other):
if not isinstance(other, NameValueModifier):
return False
return self.name == other.name and \
self.value == other.value and \
self.separator == other.separator
def update_args(self, **kwargs):
self.__dict__.update(kwargs)
self.args.update(kwargs)
@ -483,122 +497,86 @@ def shell_modifications(self, shell='sh'):
return cmds
@staticmethod
def from_sourcing_file(filename, *args, **kwargs):
"""Returns modifications that would be made by sourcing a file.
def from_sourcing_file(filename, *arguments, **kwargs):
"""Constructs an instance of a
:py:class:`spack.util.environment.EnvironmentModifications` object
that has the same effect as sourcing a file.
Parameters:
filename (str): The file to source
*args (list of str): Arguments to pass on the command line
Args:
filename (str): the file to be sourced
*arguments (list of str): arguments to pass on the command line
Keyword Arguments:
shell (str): The shell to use (default: ``bash``)
shell_options (str): Options passed to the shell (default: ``-c``)
source_command (str): The command to run (default: ``source``)
suppress_output (str): Redirect used to suppress output of command
Keyword Args:
shell (str): the shell to use (default: ``bash``)
shell_options (str): options passed to the shell (default: ``-c``)
source_command (str): the command to run (default: ``source``)
suppress_output (str): redirect used to suppress output of command
(default: ``&> /dev/null``)
concatenate_on_success (str): Operator used to execute a command
concatenate_on_success (str): operator used to execute a command
only when the previous command succeeds (default: ``&&``)
blacklist ([str or re]): Ignore any modifications of these
blacklist ([str or re]): ignore any modifications of these
variables (default: [])
whitelist ([str or re]): Always respect modifications of these
variables (default: []). Has precedence over blacklist.
clean (bool): In addition to removing empty entries,
whitelist ([str or re]): always respect modifications of these
variables (default: []). has precedence over blacklist.
clean (bool): in addition to removing empty entries,
also remove duplicate entries (default: False).
Returns:
EnvironmentModifications: an object that, if executed, has
the same effect on the environment as sourcing the file
"""
# Check if the file actually exists
if not os.path.isfile(filename):
msg = 'Trying to source non-existing file: {0}'.format(filename)
raise RuntimeError(msg)
# Kwargs parsing and default values
shell = kwargs.get('shell', '/bin/bash')
shell_options = kwargs.get('shell_options', '-c')
source_command = kwargs.get('source_command', 'source')
suppress_output = kwargs.get('suppress_output', '&> /dev/null')
concatenate_on_success = kwargs.get('concatenate_on_success', '&&')
blacklist = kwargs.get('blacklist', [])
whitelist = kwargs.get('whitelist', [])
clean = kwargs.get('clean', False)
source_file = [source_command, filename]
source_file.extend(args)
source_file = ' '.join(source_file)
dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))'
dump_environment = 'python -c "{0}"'.format(dump_cmd)
# Construct the command that will be executed
command = [
shell,
shell_options,
' '.join([
source_file, suppress_output,
concatenate_on_success, dump_environment,
]),
]
# Try to source the file
proc = subprocess.Popen(
command, stdout=subprocess.PIPE, env=os.environ)
proc.wait()
if proc.returncode != 0:
msg = 'Sourcing file {0} returned a non-zero exit code'.format(
filename)
raise RuntimeError(msg)
output = ''.join([line.decode('utf-8') for line in proc.stdout])
# Construct dictionaries of the environment before and after
# sourcing the file, so that we can diff them.
env_before = dict(os.environ)
env_after = json.loads(output)
# If we're in python2, convert to str objects instead of unicode
# like json gives us. We can't put unicode in os.environ anyway.
if sys.version_info[0] < 3:
env_after = dict((k.encode('utf-8'), v.encode('utf-8'))
for k, v in env_after.items())
# Prepare a whitelist and a blacklist of environment variable names
blacklist = kwargs.get('blacklist', [])
whitelist = kwargs.get('whitelist', [])
clean = kwargs.get('clean', False)
# Other variables unrelated to sourcing a file
blacklist.extend(['SHLVL', '_', 'PWD', 'OLDPWD', 'PS2'])
blacklist.extend([
# Bash internals
'SHLVL', '_', 'PWD', 'OLDPWD', 'PS1', 'PS2', 'ENV',
# Environment modules v4
'LOADEDMODULES', '_LMFILES_', 'BASH_FUNC_module()', 'MODULEPATH',
'MODULES_(.*)', r'(\w*)_mod(quar|share)'
])
def set_intersection(fullset, *args):
# A set intersection using string literals and regexs
meta = '[' + re.escape('[$()*?[]^{|}') + ']'
subset = fullset & set(args) # As literal
for name in args:
if re.search(meta, name):
pattern = re.compile(name)
for k in fullset:
if re.match(pattern, k):
subset.add(k)
return subset
# Compute the environments before and after sourcing
before = sanitize(
dict(os.environ), blacklist=blacklist, whitelist=whitelist
)
file_and_args = (filename,) + arguments
after = sanitize(
environment_after_sourcing_files(file_and_args, **kwargs),
blacklist=blacklist, whitelist=whitelist
)
for d in env_after, env_before:
# Retain (whitelist) has priority over prune (blacklist)
prune = set_intersection(set(d), *blacklist)
prune -= set_intersection(prune, *whitelist)
for k in prune:
d.pop(k, None)
# Delegate to the other factory
return EnvironmentModifications.from_environment_diff(
before, after, clean
)
@staticmethod
def from_environment_diff(before, after, clean=False):
"""Constructs an instance of a
:py:class:`spack.util.environment.EnvironmentModifications` object
from the diff of two dictionaries.
Args:
before (dict): environment before the modifications are applied
after (dict): environment after the modifications are applied
clean (bool): in addition to removing empty entries, also remove
duplicate entries
"""
# Fill the EnvironmentModifications instance
env = EnvironmentModifications()
# New variables
new_variables = list(set(env_after) - set(env_before))
new_variables = list(set(after) - set(before))
# Variables that have been unset
unset_variables = list(set(env_before) - set(env_after))
unset_variables = list(set(before) - set(after))
# Variables that have been modified
common_variables = set(env_before).intersection(set(env_after))
common_variables = set(before).intersection(set(after))
modified_variables = [x for x in common_variables
if env_before[x] != env_after[x]]
if before[x] != after[x]]
# Consistent output order - looks nicer, easier comparison...
new_variables.sort()
unset_variables.sort()
@ -616,21 +594,21 @@ def return_separator_if_any(*args):
# Assume that variables with 'PATH' in the name or that contain
# separators like ':' or ';' are more likely to be paths
for x in new_variables:
sep = return_separator_if_any(env_after[x])
sep = return_separator_if_any(after[x])
if sep:
env.prepend_path(x, env_after[x], separator=sep)
env.prepend_path(x, after[x], separator=sep)
elif 'PATH' in x:
env.prepend_path(x, env_after[x])
env.prepend_path(x, after[x])
else:
# We just need to set the variable to the new value
env.set(x, env_after[x])
env.set(x, after[x])
for x in unset_variables:
env.unset(x)
for x in modified_variables:
before = env_before[x]
after = env_after[x]
before = before[x]
after = after[x]
sep = return_separator_if_any(before, after)
if sep:
before_list = before.split(sep)
@ -844,3 +822,108 @@ def preserve_environment(*variables):
msg += ' {0} was set to "{1}", will be unset'
tty.debug(msg.format(var, os.environ[var]))
del os.environ[var]
def environment_after_sourcing_files(*files, **kwargs):
"""Returns a dictionary with the environment that one would have
after sourcing the files passed as argument.
Args:
*files: each item can either be a string containing the path
of the file to be sourced or a sequence, where the first element
is the file to be sourced and the remaining are arguments to be
passed to the command line
Keyword Args:
env (dict): the initial environment (default: current environment)
shell (str): the shell to use (default: ``/bin/bash``)
shell_options (str): options passed to the shell (default: ``-c``)
source_command (str): the command to run (default: ``source``)
suppress_output (str): redirect used to suppress output of command
(default: ``&> /dev/null``)
concatenate_on_success (str): operator used to execute a command
only when the previous command succeeds (default: ``&&``)
"""
# Set the shell executable that will be used to source files
shell_cmd = kwargs.get('shell', '/bin/bash')
shell_options = kwargs.get('shell_options', '-c')
source_command = kwargs.get('source_command', 'source')
suppress_output = kwargs.get('suppress_output', '&> /dev/null')
concatenate_on_success = kwargs.get('concatenate_on_success', '&&')
shell = executable.Executable(' '.join([shell_cmd, shell_options]))
def _source_single_file(file_and_args, environment):
source_file = [source_command]
source_file.extend(x for x in file_and_args)
source_file = ' '.join(source_file)
dump_cmd = 'import os, json; print(json.dumps(dict(os.environ)))'
dump_environment = 'python -c "{0}"'.format(dump_cmd)
# Try to source the file
source_file_arguments = ' '.join([
source_file, suppress_output,
concatenate_on_success, dump_environment,
])
output = shell(source_file_arguments, output=str, env=environment)
environment = json.loads(output)
# If we're in python2, convert to str objects instead of unicode
# like json gives us. We can't put unicode in os.environ anyway.
if sys.version_info[0] < 3:
environment = dict(
(k.encode('utf-8'), v.encode('utf-8'))
for k, v in environment.items()
)
return environment
current_environment = kwargs.get('env', dict(os.environ))
for f in files:
# Normalize the input to the helper function
if isinstance(f, six.string_types):
f = [f]
current_environment = _source_single_file(
f, environment=current_environment
)
return current_environment
def sanitize(environment, blacklist, whitelist):
"""Returns a copy of the input dictionary where all the keys that
match a blacklist pattern and don't match a whitelist pattern are
removed.
Args:
environment (dict): input dictionary
blacklist (list of str): literals or regex patterns to be
blacklisted
whitelist (list of str): literals or regex patterns to be
whitelisted
"""
def set_intersection(fullset, *args):
# A set intersection using string literals and regexs
meta = '[' + re.escape('[$()*?[]^{|}') + ']'
subset = fullset & set(args) # As literal
for name in args:
if re.search(meta, name):
pattern = re.compile(name)
for k in fullset:
if re.match(pattern, k):
subset.add(k)
return subset
# Don't modify input, make a copy instead
environment = dict(environment)
# Retain (whitelist) has priority over prune (blacklist)
prune = set_intersection(set(environment), *blacklist)
prune -= set_intersection(prune, *whitelist)
for k in prune:
environment.pop(k, None)
return environment