random123: Add support for HIP/rocm. (#46284)
This commit is contained in:
parent
0b575f60a5
commit
d5b8b0600a
@ -16,6 +16,8 @@ class Random123(Package):
|
|||||||
homepage = "https://www.deshawresearch.com/resources_random123.html"
|
homepage = "https://www.deshawresearch.com/resources_random123.html"
|
||||||
url = "https://github.com/DEShawResearch/random123/archive/refs/tags/v1.14.0.tar.gz"
|
url = "https://github.com/DEShawResearch/random123/archive/refs/tags/v1.14.0.tar.gz"
|
||||||
|
|
||||||
|
maintainers("KineticTheory")
|
||||||
|
|
||||||
version("1.14.0", sha256="effafd8656b18030b2a5b995cd3650c51a7c45052e6e1c21e48b9fa7a59d926e")
|
version("1.14.0", sha256="effafd8656b18030b2a5b995cd3650c51a7c45052e6e1c21e48b9fa7a59d926e")
|
||||||
version(
|
version(
|
||||||
"1.13.2",
|
"1.13.2",
|
||||||
@ -39,6 +41,7 @@ class Random123(Package):
|
|||||||
patch("ibmxl.patch", when="@1.09")
|
patch("ibmxl.patch", when="@1.09")
|
||||||
patch("arm-gcc.patch", when="@1.09")
|
patch("arm-gcc.patch", when="@1.09")
|
||||||
patch("v1132-xl161.patch", when="@1.13.2")
|
patch("v1132-xl161.patch", when="@1.13.2")
|
||||||
|
patch("v1140-hip.patch", when="@1.14.0")
|
||||||
|
|
||||||
def install(self, spec, prefix):
|
def install(self, spec, prefix):
|
||||||
# Random123 doesn't have a build system.
|
# Random123 doesn't have a build system.
|
||||||
|
296
var/spack/repos/builtin/packages/random123/v1140-hip.patch
Normal file
296
var/spack/repos/builtin/packages/random123/v1140-hip.patch
Normal file
@ -0,0 +1,296 @@
|
|||||||
|
warning: refname 'v1.14.0' is ambiguous.
|
||||||
|
diff --git a/include/Random123/array.h b/include/Random123/array.h
|
||||||
|
index 8076f23..06650ec 100644
|
||||||
|
--- a/include/Random123/array.h
|
||||||
|
+++ b/include/Random123/array.h
|
||||||
|
@@ -81,7 +81,7 @@ inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
|
||||||
|
|
||||||
|
/** @endcond */
|
||||||
|
|
||||||
|
-#ifdef __CUDA_ARCH__
|
||||||
|
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
|
||||||
|
/* CUDA can't handle std::reverse_iterator. We *could* implement it
|
||||||
|
ourselves, but let's not bother until somebody really feels a need
|
||||||
|
to reverse-iterate through an r123array */
|
||||||
|
@@ -114,8 +114,8 @@ inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
|
||||||
|
enum {static_size = _N}; \
|
||||||
|
R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
|
||||||
|
R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
|
||||||
|
- R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
|
||||||
|
- R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
|
||||||
|
+ R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) {R123_THROW(std::out_of_range("array index out of range"));}; return (*this)[i]; } \
|
||||||
|
+ R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) {R123_THROW(std::out_of_range("array index out of range"));}; return (*this)[i]; } \
|
||||||
|
R123_CUDA_DEVICE size_type size() const { return _N; } \
|
||||||
|
R123_CUDA_DEVICE size_type max_size() const { return _N; } \
|
||||||
|
R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
|
||||||
|
diff --git a/include/Random123/boxmuller.hpp b/include/Random123/boxmuller.hpp
|
||||||
|
index 9c91cf8..16d91f9 100644
|
||||||
|
--- a/include/Random123/boxmuller.hpp
|
||||||
|
+++ b/include/Random123/boxmuller.hpp
|
||||||
|
@@ -49,7 +49,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// r123::float2 r123::boxmuller(uint32_t u0, uint32_t u1);
|
||||||
|
// r123::double2 r123::boxmuller(uint64_t u0, uint64_t u1);
|
||||||
|
-//
|
||||||
|
+//
|
||||||
|
// float2 and double2 are identical to their synonymous global-
|
||||||
|
// namespace structures in CUDA.
|
||||||
|
//
|
||||||
|
@@ -68,7 +68,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
namespace r123{
|
||||||
|
|
||||||
|
-#if !defined(__CUDACC__)
|
||||||
|
+#if !(defined(__CUDACC__) || defined(__HIPCC__))
|
||||||
|
typedef struct { float x, y; } float2;
|
||||||
|
typedef struct { double x, y; } double2;
|
||||||
|
#else
|
||||||
|
diff --git a/include/Random123/features/compilerfeatures.h b/include/Random123/features/compilerfeatures.h
|
||||||
|
index 0606dee..9ad3f82 100644
|
||||||
|
--- a/include/Random123/features/compilerfeatures.h
|
||||||
|
+++ b/include/Random123/features/compilerfeatures.h
|
||||||
|
@@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
The Random123 library is portable across C, C++, CUDA, OpenCL environments,
|
||||||
|
and multiple operating systems (Linux, Windows 7, Mac OS X, FreeBSD, Solaris).
|
||||||
|
This level of portability requires the abstraction of some features
|
||||||
|
-and idioms that are either not standardized (e.g., asm statments), or for which
|
||||||
|
+and idioms that are either not standardized (e.g., asm statments), or for which
|
||||||
|
different vendors have their own standards (e.g., SSE intrinsics) or for
|
||||||
|
which vendors simply refuse to conform to well-established standards (e.g., <inttypes.h>).
|
||||||
|
|
||||||
|
@@ -55,7 +55,7 @@ Most of the symbols are boolean valued. In general, they will
|
||||||
|
Library users can override any value by defining the pp-symbol with a compiler option,
|
||||||
|
e.g.,
|
||||||
|
|
||||||
|
- cc -DR123_USE_MULHILO64_C99
|
||||||
|
+ cc -DR123_USE_MULHILO64_C99
|
||||||
|
|
||||||
|
will use a strictly c99 version of the full-width 64x64->128-bit multiplication
|
||||||
|
function, even if it would be disabled by default.
|
||||||
|
@@ -84,8 +84,8 @@ All boolean-valued pre-processor symbols in Random123/features/compilerfeatures.
|
||||||
|
CXX11_EXPLICIT_CONVERSIONS
|
||||||
|
CXX11_LONG_LONG
|
||||||
|
CXX11_STD_ARRAY
|
||||||
|
- CXX11
|
||||||
|
-
|
||||||
|
+ CXX11
|
||||||
|
+
|
||||||
|
X86INTRIN_H
|
||||||
|
IA32INTRIN_H
|
||||||
|
XMMINTRIN_H
|
||||||
|
@@ -102,7 +102,7 @@ All boolean-valued pre-processor symbols in Random123/features/compilerfeatures.
|
||||||
|
MULHILO64_C99
|
||||||
|
|
||||||
|
U01_DOUBLE
|
||||||
|
-
|
||||||
|
+
|
||||||
|
@endverbatim
|
||||||
|
Most have obvious meanings. Some non-obvious ones:
|
||||||
|
|
||||||
|
@@ -141,11 +141,11 @@ There are also non-boolean valued symbols:
|
||||||
|
<ul>
|
||||||
|
<li>R123_STATIC_INLINE -
|
||||||
|
According to both C99 and GNU99, the 'static inline' declaration allows
|
||||||
|
- the compiler to not emit code if the function is not used.
|
||||||
|
+ the compiler to not emit code if the function is not used.
|
||||||
|
Note that the semantics of 'inline', 'static' and 'extern' in
|
||||||
|
gcc have changed over time and are subject to modification by
|
||||||
|
command line options, e.g., -std=gnu89, -fgnu-inline.
|
||||||
|
- Nevertheless, it appears that the meaning of 'static inline'
|
||||||
|
+ Nevertheless, it appears that the meaning of 'static inline'
|
||||||
|
has not changed over time and (with a little luck) the use of 'static inline'
|
||||||
|
here will be portable between versions of gcc and to other C99
|
||||||
|
compilers.
|
||||||
|
@@ -157,7 +157,7 @@ There are also non-boolean valued symbols:
|
||||||
|
embellishments to strongly encourage that the declared function be
|
||||||
|
inlined. If there is no such compiler-specific magic, it should
|
||||||
|
expand to decl, unadorned.
|
||||||
|
-
|
||||||
|
+
|
||||||
|
<li>R123_CUDA_DEVICE - which expands to __device__ (or something else with
|
||||||
|
sufficiently similar semantics) when CUDA is in use, and expands
|
||||||
|
to nothing in other cases.
|
||||||
|
@@ -192,7 +192,7 @@ There are also non-boolean valued symbols:
|
||||||
|
\cond HIDDEN_FROM_DOXYGEN
|
||||||
|
*/
|
||||||
|
|
||||||
|
-/*
|
||||||
|
+/*
|
||||||
|
N.B. When something is added to the list of features, it should be
|
||||||
|
added to each of the *features.h files, AND to examples/ut_features.cpp.
|
||||||
|
*/
|
||||||
|
@@ -204,6 +204,8 @@ added to each of the *features.h files, AND to examples/ut_features.cpp.
|
||||||
|
#include "openclfeatures.h"
|
||||||
|
#elif defined(__CUDACC__)
|
||||||
|
#include "nvccfeatures.h"
|
||||||
|
+#elif defined(__HIPCC__)
|
||||||
|
+#include "hipfeatures.h"
|
||||||
|
#elif defined(__ICC)
|
||||||
|
#include "iccfeatures.h"
|
||||||
|
#elif defined(__xlC__) || defined(__ibmxl__)
|
||||||
|
@@ -292,7 +294,7 @@ added to each of the *features.h files, AND to examples/ut_features.cpp.
|
||||||
|
|
||||||
|
#ifndef R123_USE_64BIT
|
||||||
|
#define R123_USE_64BIT 1
|
||||||
|
-#endif
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
#ifndef R123_USE_PHILOX_64BIT
|
||||||
|
#define R123_USE_PHILOX_64BIT (R123_USE_64BIT && (R123_USE_MULHILO64_ASM || R123_USE_MULHILO64_MSVC_INTRIN || R123_USE_MULHILO64_CUDA_INTRIN || R123_USE_GNU_UINT128 || R123_USE_MULHILO64_C99 || R123_USE_MULHILO64_OPENCL_INTRIN || R123_USE_MULHILO64_MULHI_INTRIN))
|
||||||
|
@@ -327,7 +329,7 @@ added to each of the *features.h files, AND to examples/ut_features.cpp.
|
||||||
|
#ifndef R123_METAL_CONSTANT_ADDRESS_SPACE
|
||||||
|
#define R123_METAL_CONSTANT_ADDRESS_SPACE
|
||||||
|
#endif
|
||||||
|
-
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Windows.h (and perhaps other "well-meaning" code define min and
|
||||||
|
* max, so there's a high chance that our definition of min, max
|
||||||
|
diff --git a/include/Random123/features/hipfeatures.h b/include/Random123/features/hipfeatures.h
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..f3ac0ed
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/include/Random123/features/hipfeatures.h
|
||||||
|
@@ -0,0 +1,129 @@
|
||||||
|
+/*
|
||||||
|
+Copyright 2010-2011, D. E. Shaw Research.
|
||||||
|
+All rights reserved.
|
||||||
|
+
|
||||||
|
+Redistribution and use in source and binary forms, with or without
|
||||||
|
+modification, are permitted provided that the following conditions are
|
||||||
|
+met:
|
||||||
|
+
|
||||||
|
+* Redistributions of source code must retain the above copyright
|
||||||
|
+ notice, this list of conditions, and the following disclaimer.
|
||||||
|
+
|
||||||
|
+* Redistributions in binary form must reproduce the above copyright
|
||||||
|
+ notice, this list of conditions, and the following disclaimer in the
|
||||||
|
+ documentation and/or other materials provided with the distribution.
|
||||||
|
+
|
||||||
|
+* Neither the name of D. E. Shaw Research nor the names of its
|
||||||
|
+ contributors may be used to endorse or promote products derived from
|
||||||
|
+ this software without specific prior written permission.
|
||||||
|
+
|
||||||
|
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
+*/
|
||||||
|
+#ifndef __r123_hip_features_dot_h__
|
||||||
|
+#define __r123_hip_features_dot_h__
|
||||||
|
+
|
||||||
|
+#if !(defined(CUDART_VERSION) || defined(HIP_INCLUDE_HIP_HIP_RUNTIME_API_H))
|
||||||
|
+#error "why are we in hipfeatures.h if neither CUDART_VERSION NOR HIP_PLATFORM?"
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#if CUDART_VERSION < 4010 && !defined(HIP_INCLUDE_HIP_HIP_RUNTIME_API_H)
|
||||||
|
+#error "CUDA versions earlier than 4.1 produce incorrect results for some templated functions in namespaces. Random123 is unsupported. See comments in nvccfeatures.h"
|
||||||
|
+// This test was added in Random123-1.08 (August, 2013) because we
|
||||||
|
+// discovered that Ftype(maxTvalue<T>()) with Ftype=double and
|
||||||
|
+// T=uint64_t in examples/uniform.hpp produces -1 for CUDA4.0 and
|
||||||
|
+// earlier. We can't be sure this bug doesn't also affect invocations
|
||||||
|
+// of other templated functions, e.g., essentially all of Random123.
|
||||||
|
+// Thus, we no longer trust CUDA versions earlier than 4.1 even though
|
||||||
|
+// we had previously tested and timed Random123 with CUDA 3.x and 4.0.
|
||||||
|
+// If you feel lucky or desperate, you can change #error to #warning, but
|
||||||
|
+// please take extra care to be sure that you are getting correct
|
||||||
|
+// results.
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+// nvcc falls through to gcc or msvc. So first define
|
||||||
|
+// a couple of things and then include either gccfeatures.h
|
||||||
|
+// or msvcfeatures.h
|
||||||
|
+
|
||||||
|
+//#ifdef __CUDA_ARCH__ allows Philox32 and Philox64 to be compiled
|
||||||
|
+//for both device and host functions in CUDA by setting compiler flags
|
||||||
|
+//for the device function
|
||||||
|
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
|
||||||
|
+#ifndef R123_CUDA_DEVICE
|
||||||
|
+#define R123_CUDA_DEVICE __host__ __device__
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef R123_USE_MULHILO64_CUDA_INTRIN
|
||||||
|
+#define R123_USE_MULHILO64_CUDA_INTRIN 1
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef R123_THROW
|
||||||
|
+// No exceptions in CUDA, at least upto 4.0
|
||||||
|
+#define R123_THROW(x) R123_ASSERT(0)
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef R123_ASSERT
|
||||||
|
+# if defined(__CUDA_ARCH__)
|
||||||
|
+# define R123_ASSERT(x) if((x)); else asm("trap;")
|
||||||
|
+# elif defined(__HIP_DEVICE_COMPILE__)
|
||||||
|
+# define R123_ASSERT(x) if((x)); else asm("s_trap 2;")
|
||||||
|
+# endif
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#else // ! ( defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) )
|
||||||
|
+// If we're using nvcc not compiling for the CUDA architecture,
|
||||||
|
+// then we must be compiling for the host. In that case,
|
||||||
|
+// tell the philox code to use the mulhilo64 asm because
|
||||||
|
+// nvcc doesn't grok uint128_t.
|
||||||
|
+#ifndef R123_USE_MULHILO64_ASM
|
||||||
|
+#define R123_USE_MULHILO64_ASM 1
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#endif // __CUDA_ARCH__
|
||||||
|
+
|
||||||
|
+#ifndef R123_BUILTIN_EXPECT
|
||||||
|
+#define R123_BUILTIN_EXPECT(expr,likely) expr
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef R123_USE_AES_NI
|
||||||
|
+#define R123_USE_AES_NI 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef R123_USE_SSE4_2
|
||||||
|
+#define R123_USE_SSE4_2 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef R123_USE_SSE4_1
|
||||||
|
+#define R123_USE_SSE4_1 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef R123_USE_SSE
|
||||||
|
+#define R123_USE_SSE 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef R123_USE_GNU_UINT128
|
||||||
|
+#define R123_USE_GNU_UINT128 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef R123_ULONG_LONG
|
||||||
|
+// uint64_t, which is what we'd get without this, is
|
||||||
|
+// not the same as unsigned long long
|
||||||
|
+#define R123_ULONG_LONG unsigned long long
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#if defined(__GNUC__)
|
||||||
|
+#include "gccfeatures.h"
|
||||||
|
+#elif defined(_MSC_FULL_VER)
|
||||||
|
+#include "msvcfeatures.h"
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#endif
|
||||||
|
diff --git a/include/Random123/uniform.hpp b/include/Random123/uniform.hpp
|
||||||
|
index ee4ddfb..d40d0a4 100644
|
||||||
|
--- a/include/Random123/uniform.hpp
|
||||||
|
+++ b/include/Random123/uniform.hpp
|
||||||
|
@@ -125,7 +125,7 @@ R123_MK_SIGNED_UNSIGNED(__int128_t, __uint128_t);
|
||||||
|
#undef R123_MK_SIGNED_UNSIGNED
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-#if defined(__CUDACC__) || defined(_LIBCPP_HAS_NO_CONSTEXPR)
|
||||||
|
+#if defined(__CUDACC__) || defined(_LIBCPP_HAS_NO_CONSTEXPR) || defined(__HIPCC__)
|
||||||
|
// Amazing! cuda thinks numeric_limits::max() is a __host__ function, so
|
||||||
|
// we can't use it in a device function.
|
||||||
|
//
|
Loading…
Reference in New Issue
Block a user