targets: first pass at target detection for linux

Add llnl.util.cpu_name, with initial support for detecting different
microarchitectures on Linux.  This also adds preliminary changes for
compiler support and variants to control the optimizatoin levels by
target.

This does not yet include translations of targets to particular
compilers; that is left to another PR.

Co-authored-by: Massimiliano Culpo <massimiliano.culpo@gmail.com>
This commit is contained in:
Gregory Becker 2017-02-09 14:48:55 -08:00 committed by Todd Gamblin
parent a940ff34d7
commit dfabf5d6b1
12 changed files with 349 additions and 16 deletions

4
lib/spack/env/cc vendored
View File

@ -32,6 +32,7 @@ parameters=(
SPACK_CXX_RPATH_ARG
SPACK_F77_RPATH_ARG
SPACK_FC_RPATH_ARG
SPACK_TARGET_ARGS
SPACK_SHORT_SPEC
SPACK_SYSTEM_DIRS
)
@ -78,7 +79,7 @@ function system_dir {
}
for param in "${parameters[@]}"; do
if [[ -z ${!param} ]]; then
if [[ -z ${!param+x} ]]; then
die "Spack compiler must be run from Spack! Input '$param' is missing."
fi
done
@ -373,6 +374,7 @@ case "$mode" in
CXX)
flags=("${flags[@]}" "${SPACK_CXXFLAGS[@]}") ;;
esac
args=(${SPACK_TARGET_ARGS[@]} "${args[@]}")
;;
esac

View File

@ -0,0 +1,226 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import platform
import re
import subprocess
import sys
# Tuple of name, flags added, flags removed (default [])
_intel_32 = [
('i686', []),
('pentium2', ['mmx']),
('pentium3', ['sse']),
('pentium4', ['sse2']),
('prescott', ['sse3']),
]
_intel_64 = [ # commenting out the ones that aren't shown through sysctl
('nocona', ['mmx', 'sse', 'sse2', 'sse3']),#lm
('core2', ['ssse3'], ['sse3']),
('nehalem', ['sse4_1', 'sse4_2', 'popcnt']),
('westmere', ['aes', 'pclmulqdq']),
('sandybridge', ['avx']),
('ivybridge', ['rdrand', 'f16c']),#fsgsbase (is it RDWRFSGS on darwin?)
('haswell', ['movbe', 'fma', 'avx2', 'bmi1', 'bmi2']),
('broadwell', ['rdseed', 'adx']),
('skylake', ['xsavec', 'xsaves'])
]
# We will need to build on these and combine with names when intel releases
# further avx512 processors.
# _intel_avx12 = ['avx512f', 'avx512cd']
_amd_10_names = [
('barcelona', ['mmx', 'sse', 'sse2', 'sse3', 'sse4a', 'abm'])
]
_amd_14_names = [
('btver1', ['mmx', 'sse', 'sse2', 'sse3', 'ssse3', 'sse4a', 'cx16',
'abm']),#lm
]
_amd_15_names = [
('bdver1', ['avx', 'aes', 'pclmulqdq', 'cx16', 'sse', 'sse2', 'sse3',
'ssse3', 'sse4a', 'sse4_1', 'sse4_2', 'abm']),#xop, lwp
('bdver2', ['bmi1', 'f16c', 'fma',]),#tba?
('bdver3', ['fsgsbase']),
('bdver4', ['bmi2', 'movbe', 'avx2'])
]
_amd_16_names = [
('btver2', ['mmx', 'sse', 'sse2', 'sse3', 'ssse3', 'sse4a', 'cx16',
'abm', 'movbe', 'f16c', 'bmi1', 'avx', 'pclmulqdq',
'aes', 'sse4_1', 'sse4_2']),#lm
]
_amd_17_names = [
('znver1', ['bmi1', 'bmi2', 'f16c', 'fma', 'fsgsbase', 'avx', 'avx2',
'rdseed', 'mwaitx', 'clzero', 'aes', 'pclmulqdq', 'cx16',
'movbe', 'mmx', 'sse', 'sse2', 'sse3', 'ssse3', 'sse4a',
'sse4_1', 'sse4_2', 'abm', 'xsavec', 'xsaves',
'clflushopt', 'popcnt', 'adcx'])
]
_amd_numbers = {
0x10: _amd_10_names,
0x14: _amd_14_names,
0x15: _amd_15_names,
0x16: _amd_16_names,
0x17: _amd_17_names
}
def supported_target_names():
intel_names = set(t[0] for t in _intel_64)
intel_names |= set(t[0] for t in _intel_32)
amd_names = set()
for family in _amd_numbers:
amd_names |= set(t[0] for t in _amd_numbers[family])
power_names = set('power' + str(d) for d in range(7, 10))
return intel_names | amd_names | power_names
def create_dict_from_cpuinfo():
# Initialize cpuinfo from file
cpuinfo = {}
try:
with open('/proc/cpuinfo') as file:
text = file.readlines()
for line in text:
if line.strip():
key, _, value = line.partition(':')
cpuinfo[key.strip()] = value.strip()
except IOError:
return None
return cpuinfo
def check_output(args):
if sys.version_info >= (3, 0):
return subprocess.run(args, check=True, stdout=PIPE).stdout # nopyqver
else:
return subprocess.check_output(args) # nopyqver
def create_dict_from_sysctl():
cpuinfo = {}
try:
cpuinfo['vendor_id'] = check_output(['sysctl', '-n',
'machdep.cpu.vendor']).strip()
cpuinfo['flags'] = check_output(['sysctl', '-n',
'machdep.cpu.features']).strip().lower()
cpuinfo['flags'] += ' ' + check_output(['sysctl', '-n',
'machdep.cpu.leaf7_features']).strip().lower()
cpuinfo['model'] = check_output(['sysctl', '-n',
'machdep.cpu.model']).strip()
cpuinfo['model name'] = check_output(['sysctl', '-n',
'machdep.cpu.brand_string']).strip()
# Super hacky way to deal with slight representation differences
# Would be better to somehow consider these "identical"
if 'sse4.1' in cpuinfo['flags']:
cpuinfo['flags'] += ' sse4_1'
if 'sse4.2' in cpuinfo['flags']:
cpuinfo['flags'] += ' sse4_2'
if 'avx1.0' in cpuinfo['flags']:
cpuinfo['flags'] += ' avx'
except:
pass
return cpuinfo
def get_cpu_name():
name = get_cpu_name_helper(platform.system())
return name if name else platform.machine()
def get_cpu_name_helper(system):
# TODO: Elsewhere create dict of codenames (targets) and flag sets.
# Return cpu name or an empty string if one cannot be determined.
cpuinfo = {}
if system == 'Linux':
cpuinfo = create_dict_from_cpuinfo()
elif system == 'Darwin':
cpuinfo = create_dict_from_sysctl()
if not cpuinfo:
return ''
if 'vendor_id' in cpuinfo and cpuinfo['vendor_id'] == 'GenuineIntel':
if 'model name' not in cpuinfo or 'flags' not in cpuinfo:
# We don't have the information we need to determine the
# microarchitecture name
return ''
return get_intel_cpu_name(cpuinfo)
elif 'vendor_id' in cpuinfo and cpuinfo['vendor_id'] == 'AuthenticAMD':
if 'cpu family' not in cpuinfo or 'flags' not in cpuinfo:
# We don't have the information we need to determine the
# microarchitecture name
return ''
return get_amd_cpu_name(cpuinfo)
elif 'cpu' in cpuinfo and 'POWER' in cpuinfo['cpu']:
return get_ibm_cpu_name(cpuinfo['cpu'])
else:
return ''
def get_ibm_cpu_name(cpu):
power_pattern = re.compile('POWER(\d+)')
power_match = power_pattern.search(cpu)
if power_match:
if 'le' in platform.machine():
return 'power' + power_match.group(1) + 'le'
return 'power' + power_match.group(1)
else:
return ''
def get_intel_cpu_name(cpuinfo):
model_name = cpuinfo['model name']
if 'Atom' in model_name:
return 'atom'
elif 'Quark' in model_name:
return 'quark'
elif 'Xeon' in model_name and 'Phi' in model_name:
# This is hacky and needs to be extended for newer avx512 chips
return 'knl'
else:
ret = ''
flag_list = cpuinfo['flags'].split()
proc_flags = []
for _intel_processors in [_intel_32, _intel_64]:
for entry in _intel_processors:
try:
proc, flags_added, flags_removed = entry
except ValueError:
proc, flags_added = entry
flags_removed = []
proc_flags = list(filter(lambda x: x not in flags_removed, proc_flags))
proc_flags.extend(flags_added)
if all(f in flag_list for f in proc_flags):
ret = proc
return ret
def get_amd_cpu_name(cpuinfo):
#TODO: Learn what the "canonical" granularity of naming
# is for AMD processors, implement dict as for intel.
ret = ''
flag_list = cpuinfo['flags'].split()
model_number = int(cpuinfo['cpu family'])
flags_dict = _amd_numbers[model_number]
proc_flags = []
for proc, proc_flags_added in flags_dict:
proc_flags.extend(proc_flags_added)
if all(f in flag_list for f in proc_flags):
ret = proc
else:
break
return ret
"""IDEA: In build_environment.setup_compiler_environment, include a
call to compiler.tuning_flags(spec.architecture.target). For gcc this
would return "-march=%s" % str(spec.architecture.target). We only call
this if the target is a valid tuning target (I.e. not
platform.machine(), but a more specific target we successfully
discovered.
Then set
SPACK_TUNING_FLAGS=compiler.tuning_flags(spec.architecture.target)
This way the compiler wrapper can just add $SPACK_TUNING_FLAGS to the
eventual command."""

View File

@ -60,6 +60,7 @@
import llnl.util.tty as tty
from llnl.util.lang import memoized, list_modules, key_ordering
from llnl.util.cpu_name import get_cpu_name
import spack.compiler
import spack.paths
@ -226,7 +227,7 @@ def __repr__(self):
return self.__str__()
def _cmp_key(self):
return self.name, self.version
return (self.name, self.version)
def to_dict(self):
return {

View File

@ -168,6 +168,7 @@ def clean_environment():
def set_compiler_environment_variables(pkg, env):
assert pkg.spec.concrete
compiler = pkg.compiler
spec = pkg.spec
# Set compiler variables used by CMake and autotools
assert all(key in compiler.link_paths for key in (
@ -199,6 +200,24 @@ def set_compiler_environment_variables(pkg, env):
env.set('SPACK_F77_RPATH_ARG', compiler.f77_rpath_arg)
env.set('SPACK_FC_RPATH_ARG', compiler.fc_rpath_arg)
# Set the tuning parameters that the compiler will add
isa_target = compiler.isa_name_for_target(spec.architecture.target)
if spec.variants['tuning'].value == 'generic':
tuning_target = 'generic'
else:
tuning_target = compiler.tuning_name_for_target(
spec.architecture.target
)
if compiler.isa_flag and isa_target:
isa_arg = '{0}={1}'.format(compiler.isa_flag, isa_target)
else:
isa_arg = ''
if compiler.tuning_flag and tuning_target:
tuning_arg = '{0}={1}'.format(compiler.tuning_flag, tuning_target)
else:
tuning_arg = ''
env.set('SPACK_TARGET_ARGS', '{0} {1}'.format(isa_arg, tuning_arg))
# Trap spack-tracked compiler flags as appropriate.
# env_flags are easy to accidentally override.
inject_flags = {}
@ -217,7 +236,7 @@ def set_compiler_environment_variables(pkg, env):
handler = pkg.flag_handler.__func__
else:
handler = pkg.flag_handler.im_func
injf, envf, bsf = handler(pkg, flag, pkg.spec.compiler_flags[flag])
injf, envf, bsf = handler(pkg, flag, spec.compiler_flags[flag])
inject_flags[flag] = injf or []
env_flags[flag] = envf or []
build_system_flags[flag] = bsf or []
@ -234,7 +253,7 @@ def set_compiler_environment_variables(pkg, env):
env.set(flag.upper(), ' '.join(f for f in env_flags[flag]))
pkg.flags_to_build_system_args(build_system_flags)
env.set('SPACK_COMPILER_SPEC', str(pkg.spec.compiler))
env.set('SPACK_COMPILER_SPEC', str(spec.compiler))
env.set('SPACK_SYSTEM_DIRS', ':'.join(system_dirs))

View File

@ -132,7 +132,8 @@ def parse_specs(args, **kwargs):
tests = kwargs.get('tests', False)
try:
specs = spack.spec.parse(args)
sargs = args if isinstance(args, basestring) else ' '.join(args)
specs = spack.spec.parse(sargs)
for spec in specs:
if concretize:
spec.concretize(tests=tests) # implies normalize

View File

@ -221,6 +221,15 @@ def f77_rpath_arg(self):
@property
def fc_rpath_arg(self):
return '-Wl,-rpath,'
@property
def isa_flag(self):
return '-march'
@property
def tuning_flag(self):
return '-mtune'
# Cray PrgEnv name that can be used to load this compiler
PrgEnv = None
# Name of module used to switch versions of this compiler
@ -419,6 +428,12 @@ def f77_version(cls, f77):
def fc_version(cls, fc):
return cls.default_version(fc)
def isa_name_for_target(self, target):
return str(target)
def tuning_name_for_target(self, target):
return str(target)
@classmethod
def search_regexps(cls, language):
# Compile all the regular expressions used for files beforehand.

View File

@ -16,6 +16,7 @@
import llnl.util.lang
import llnl.util.filesystem as fs
import llnl.util.tty as tty
from llnl.util.cpu_name import get_cpu_name
import spack.paths
import spack.error
@ -646,7 +647,7 @@ def _default(cmp_id, paths):
spec = spack.spec.CompilerSpec(compiler_cls.name, version)
paths = [paths.get(l, None) for l in ('cc', 'cxx', 'f77', 'fc')]
compiler = compiler_cls(
spec, operating_system, py_platform.machine(), paths
spec, operating_system, get_cpu_name(), paths
)
return [compiler]

View File

@ -61,6 +61,10 @@
from spack.util.package_hash import package_hash
from spack.version import Version
from spack.package_prefs import get_package_dir_permissions, get_package_group
from spack.directives import variant
"""Allowed URL schemes for spack packages."""
_ALLOWED_URL_SCHEMES = ["http", "https", "ftp", "file", "git"]
# Filename for the Spack build/install log.
@ -504,6 +508,10 @@ class PackageBase(with_metaclass(PackageMeta, PackageViewMixin, object)):
metadata_attrs = ['homepage', 'url', 'list_url', 'extendable', 'parallel',
'make_jobs']
# Add the universal variant "tuning" with values generic | specific
variant('tuning', values=('generic', 'specific'), default='generic',
description='Set compiler tuning generic or to target')
def __init__(self, spec):
# this determines how the package should be built.
self.spec = spec

View File

@ -4,19 +4,44 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import platform
from llnl.util.cpu_name import get_cpu_name
from spack.architecture import Platform, Target
from spack.operating_systems.mac_os import MacOs
class Darwin(Platform):
priority = 89
front_end = 'x86_64'
back_end = 'x86_64'
default = 'x86_64'
def __init__(self):
super(Darwin, self).__init__('darwin')
self.add_target(self.default, Target(self.default))
# TODO: These are probably overkill
# Add Intel architectures
self.add_target('haswell', Target('haswell'))
self.add_target('broadwell', Target('broadwell'))
self.add_target('ivybridge', Target('ivybridge'))
self.add_target('sandybridge', Target('sandybridge'))
self.add_target('core2', Target('core2'))
# Add "basic" architectures
self.add_target('x86_64', Target('x86_64'))
self.add_target('ppc64le', Target('ppc64le'))
self.add_target('ppc64', Target('ppc64'))
# Add IBM architectures
self.add_target('power7', Target('power7'))
self.add_target('power8', Target('power8'))
self.add_target('power8le', Target('power8le'))
self.add_target('power9', Target('power9'))
self.add_target('power9le', Target('power9le'))
self.default = get_cpu_name()
self.front_end = self.default
self.back_end = self.default
if self.default not in self.targets:
self.add_target(self.default, Target(self.default))
mac_os = MacOs()
self.default_os = str(mac_os)

View File

@ -6,19 +6,45 @@
import platform
from spack.architecture import Platform, Target
from spack.operating_systems.linux_distro import LinuxDistro
from llnl.util.cpu_name import get_cpu_name
class Linux(Platform):
priority = 90
def __init__(self):
super(Linux, self).__init__('linux')
# Add "basic" architectures
self.add_target('x86_64', Target('x86_64'))
self.add_target('ppc64le', Target('ppc64le'))
self.add_target('ppc64', Target('ppc64'))
self.default = platform.machine()
self.front_end = platform.machine()
self.back_end = platform.machine()
# Add Intel architectures
self.add_target('haswell', Target('haswell'))
self.add_target('broadwell', Target('broadwell'))
self.add_target('ivybridge', Target('ivybridge'))
self.add_target('sandybridge', Target('sandybridge'))
self.add_target('knl', Target('knl'))
# Add IBM architectures
self.add_target('power7', Target('power7'))
self.add_target('power8', Target('power8'))
self.add_target('power8le', Target('power8le'))
self.add_target('power9', Target('power9'))
self.add_target('power9le', Target('power9le'))
# Eternal TODO: Add more architectures as needed.
# Get specific default
self.default = get_cpu_name()
self.front_end = self.default
self.back_end = self.default
if not self.default:
# Fall back on more general name.
# This will likely fall in "basic" architectures list
self.default = platform.machine()
self.front_end = self.default
self.back_end = self.default
if self.default not in self.targets:
self.add_target(self.default, Target(self.default))

View File

@ -42,14 +42,22 @@ def build_environment(working_env):
os.environ['SPACK_CXX_RPATH_ARG'] = "-Wl,-rpath,"
os.environ['SPACK_F77_RPATH_ARG'] = "-Wl,-rpath,"
os.environ['SPACK_FC_RPATH_ARG'] = "-Wl,-rpath,"
os.environ['SPACK_SYSTEM_DIRS'] = '/usr/include /usr/lib'
os.environ['SPACK_TARGET_ARGS'] = ''
if 'SPACK_DEPENDENCIES' in os.environ:
del os.environ['SPACK_DEPENDENCIES']
yield {'cc': cc, 'cxx': cxx, 'fc': fc}
for name in ('SPACK_CC', 'SPACK_CXX', 'SPACK_FC', 'SPACK_PREFIX',
'SPACK_ENV_PATH', 'SPACK_DEBUG_LOG_DIR',
'SPACK_COMPILER_SPEC', 'SPACK_SHORT_SPEC',
'SPACK_CC_RPATH_ARG', 'SPACK_CXX_RPATH_ARG',
'SPACK_F77_RPATH_ARG', 'SPACK_FC_RPATH_ARG',
'SPACK_TARGET_ARGS'):
del os.environ[name]
def test_static_to_shared_library(build_environment):
os.environ['SPACK_TEST_COMMAND'] = 'dump-args'

View File

@ -102,7 +102,8 @@ def wrapper_environment():
SPACK_FC_RPATH_ARG='-Wl,-rpath,',
SPACK_LINK_DIRS=None,
SPACK_INCLUDE_DIRS=None,
SPACK_RPATH_DIRS=None):
SPACK_RPATH_DIRS=None,
SPACK_TARGET_ARGS=''):
yield