add a phist patch to avoid trying to compile SSE code if that is not … (#38806)
* add a phist patch to avoid trying to compile SSE code if that is not available. * phist: make the avoid-sse patch more robust because compiler on ARM system still tried to compile SSE code
This commit is contained in:
parent
1f58ac5ed3
commit
5672c64356
346
var/spack/repos/builtin/packages/phist/avoid-sse.patch
Normal file
346
var/spack/repos/builtin/packages/phist/avoid-sse.patch
Normal file
@ -0,0 +1,346 @@
|
||||
commit eaef462cc07509fe8f380fbf520a2617b910b139
|
||||
Author: Jonas Thies <16190001+jthies@users.noreply.github.com>
|
||||
Date: Sun Jul 9 21:33:30 2023 +0200
|
||||
|
||||
exit early from builtin kernels requiring SSE so that they are not compiled if it is not available
|
||||
(this broke phist compilation on ARM systems, even though we never called these kernels if SSE was disabled)
|
||||
|
||||
diff --git a/src/kernels/builtin/axpy_kernels_nt.c b/src/kernels/builtin/axpy_kernels_nt.c
|
||||
index 64d5fbd0..17c5024a 100644
|
||||
--- a/src/kernels/builtin/axpy_kernels_nt.c
|
||||
+++ b/src/kernels/builtin/axpy_kernels_nt.c
|
||||
@@ -19,7 +19,9 @@
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
+#ifdef PHIST_HAVE_SSE
|
||||
#include <emmintrin.h>
|
||||
+#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
static inline _Bool is_aligned(const void *restrict pointer, size_t byte_count)
|
||||
@@ -30,6 +32,10 @@ static inline _Bool is_aligned(const void *restrict pointer, size_t byte_count)
|
||||
|
||||
void daxpy_nt_2_c(int nrows, const double *restrict alpha, const double *restrict x, double *restrict y)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(y,16) )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
|
||||
@@ -54,11 +60,16 @@ void daxpy_nt_2_c(int nrows, const double *restrict alpha, const double *restric
|
||||
// non-temporal store
|
||||
_mm_stream_pd(y+2*i, y_);
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
void daxpy_nt_4_c(int nrows, const double *restrict alpha, const double *restrict x, double *restrict y)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(y,16) )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
|
||||
@@ -86,11 +97,16 @@ void daxpy_nt_4_c(int nrows, const double *restrict alpha, const double *restric
|
||||
_mm_stream_pd(y+4*i+2*k, y_);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
void daxpy_nt_8_c(int nrows, const double *restrict alpha, const double *restrict x, double *restrict y)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(y,16) )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
|
||||
@@ -118,11 +134,16 @@ void daxpy_nt_8_c(int nrows, const double *restrict alpha, const double *restric
|
||||
_mm_stream_pd(y+8*i+2*k, y_);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
void daxpy_nt_strided_2_c(int nrows, const double *restrict alpha, const double *restrict x, int ldx, double *restrict y, int ldy)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(y,16) )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
|
||||
@@ -140,11 +161,16 @@ void daxpy_nt_strided_2_c(int nrows, const double *restrict alpha, const double
|
||||
// non-temporal store
|
||||
_mm_stream_pd(y+ldy*i, y_);
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
void daxpy_nt_strided_4_c(int nrows, const double *restrict alpha, const double *restrict x, int ldx, double *restrict y, int ldy)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(y,16) || ldy % 2 != 0 )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
|
||||
@@ -165,11 +191,16 @@ void daxpy_nt_strided_4_c(int nrows, const double *restrict alpha, const double
|
||||
_mm_stream_pd(y+ldy*i+2*k, y_);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
void daxpy_nt_strided_8_c(int nrows, const double *restrict alpha, const double *restrict x, int ldx, double *restrict y, int ldy)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(y,16) || ldy % 2 != 0 )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)y);
|
||||
@@ -190,11 +221,16 @@ void daxpy_nt_strided_8_c(int nrows, const double *restrict alpha, const double
|
||||
_mm_stream_pd(y+ldy*i+2*k, y_);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
void dcopy_general_nt_c(int nrows, int nvec, const double *restrict x, int ldx, double *restrict y, int ldy)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( nvec % 2 != 0 )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)x);
|
||||
@@ -217,5 +253,6 @@ void dcopy_general_nt_c(int nrows, int nvec, const double *restrict x, int ldx,
|
||||
_mm_stream_pd(y+i*ldy+2*j, tmp);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
diff --git a/src/kernels/builtin/spmvm_kernels_nt.c b/src/kernels/builtin/spmvm_kernels_nt.c
|
||||
index d4d30bff..5d858878 100644
|
||||
--- a/src/kernels/builtin/spmvm_kernels_nt.c
|
||||
+++ b/src/kernels/builtin/spmvm_kernels_nt.c
|
||||
@@ -19,7 +19,9 @@
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
+#ifdef PHIST_HAVE_SSE
|
||||
#include <emmintrin.h>
|
||||
+#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef PHIST_HIGH_PRECISION_KERNELS
|
||||
@@ -35,6 +37,10 @@ static inline _Bool is_aligned(const void *restrict pointer, size_t byte_count)
|
||||
void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
|
||||
const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(lhsv,16) )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
|
||||
@@ -123,7 +129,7 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
#endif
|
||||
|
||||
// last row
|
||||
-#ifdef PHIST_HIGH_PRECISION_KERNELS
|
||||
+# ifdef PHIST_HIGH_PRECISION_KERNELS
|
||||
if( nrows % 2 != 0 )
|
||||
{
|
||||
double lhs, lhsC;
|
||||
@@ -136,7 +142,7 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
|
||||
lhsv[nrows-1] = alpha*(lhs+lhsC);
|
||||
}
|
||||
-#else
|
||||
+# else
|
||||
if( nrows % 2 != 0 )
|
||||
{
|
||||
lhsv[nrows-1] = shifts[0]*rhsv[nrows-1];
|
||||
@@ -146,6 +152,7 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
lhsv[nrows-1] += val[j]*halo[ (col_idx[j]-1) ];
|
||||
lhsv[nrows-1] *= alpha;
|
||||
}
|
||||
+# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -153,6 +160,10 @@ void dspmvm_nt_1_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
|
||||
const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv, int ldl)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(lhsv,32) || ldl % 2 != 0 )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
|
||||
@@ -176,7 +187,7 @@ void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
__m128d shifts_ = _mm_loadu_pd(shifts);
|
||||
__m128d alpha_ = _mm_set1_pd(alpha);
|
||||
|
||||
-#ifdef PHIST_HIGH_PRECISION_KERNELS
|
||||
+# ifdef PHIST_HIGH_PRECISION_KERNELS
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(int i = 0; i < nrows; i++)
|
||||
{
|
||||
@@ -204,7 +215,7 @@ void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
// non-temporal store
|
||||
_mm_stream_pd(lhsv+i*ldl, lhs);
|
||||
}
|
||||
-#else
|
||||
+# else
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(int i = 0; i < nrows; i++)
|
||||
{
|
||||
@@ -232,16 +243,21 @@ void dspmvm_nt_2_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
// multiply with alpha
|
||||
__m128d alpha_ = _mm_set1_pd(alpha);
|
||||
lhs_ = _mm_mul_pd(alpha_,lhs_);
|
||||
-
|
||||
+
|
||||
// non-temporal store
|
||||
_mm_stream_pd(lhsv+i*ldl, lhs_);
|
||||
}
|
||||
+# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
|
||||
const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv, int ldl)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(lhsv,32) || ldl % 4 != 0 )
|
||||
{
|
||||
printf("%s: lhsv not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
|
||||
@@ -261,7 +277,7 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
}
|
||||
|
||||
|
||||
-#ifdef PHIST_HIGH_PRECISION_KERNELS
|
||||
+# ifdef PHIST_HIGH_PRECISION_KERNELS
|
||||
|
||||
__m256d shifts_ = _mm256_loadu_pd(shifts);
|
||||
__m256d alpha_ = _mm256_set1_pd(alpha);
|
||||
@@ -294,7 +310,7 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
_mm256_stream_pd(lhsv+i*ldl, lhs);
|
||||
}
|
||||
|
||||
-#else
|
||||
+# else
|
||||
|
||||
__m128d shifts_[2];
|
||||
shifts_[0] = _mm_loadu_pd(shifts);
|
||||
@@ -341,6 +357,7 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
}
|
||||
}
|
||||
|
||||
+# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -348,6 +365,10 @@ void dspmvm_nt_4_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
void dspmvm_nt_8_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
|
||||
const double *restrict shifts, const double *restrict rhsv, const double *restrict halo, double *restrict lhsv, int ldl)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(lhsv,16) || ldl % 2 != 0 )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
|
||||
@@ -412,12 +433,17 @@ void dspmvm_nt_8_c(int nrows, double alpha, const long *restrict row_ptr, const
|
||||
_mm_stream_pd(lhsv+i*ldl+2*k, lhs_[k]);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
||||
void dspmvm_nt_strided_2_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
|
||||
const double *restrict shifts, const double *restrict rhsv, int ldr, const double *restrict halo, double *restrict lhsv, int ldl)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(lhsv,16) || ldl % 2 != 0 )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
|
||||
@@ -460,15 +486,20 @@ void dspmvm_nt_strided_2_c(int nrows, double alpha, const long *restrict row_ptr
|
||||
// multiply with alpha
|
||||
__m128d alpha_ = _mm_set1_pd(alpha);
|
||||
lhs_ = _mm_mul_pd(alpha_,lhs_);
|
||||
-
|
||||
+
|
||||
// non-temporal store
|
||||
_mm_stream_pd(lhsv+i*ldl, lhs_);
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void dspmvm_nt_strided_4_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
|
||||
const double *restrict shifts, const double *restrict rhsv, int ldr, const double *restrict halo, double *restrict lhsv, int ldl)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(lhsv,16) || ldl % 2 != 0 )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
|
||||
@@ -526,11 +557,16 @@ void dspmvm_nt_strided_4_c(int nrows, double alpha, const long *restrict row_ptr
|
||||
_mm_stream_pd(lhsv+i*ldl+2*k, lhs_[k]);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void dspmvm_nt_strided_8_c(int nrows, double alpha, const long *restrict row_ptr, const long *restrict halo_ptr, const int *restrict col_idx, const double *restrict val,
|
||||
const double *restrict shifts, const double *restrict rhsv, int ldr, const double *restrict halo, double *restrict lhsv, int ldl)
|
||||
{
|
||||
+#ifndef PHIST_HAVE_SSE
|
||||
+ printf("%s: must not be called on platforms without SSE.", __FUNCTION__);
|
||||
+ exit(1);
|
||||
+#else
|
||||
if( !is_aligned(lhsv,16) || ldl % 2 != 0 )
|
||||
{
|
||||
printf("%s: not aligned %lx\n", __FUNCTION__, (uintptr_t)(void*)lhsv);
|
||||
@@ -589,6 +625,7 @@ void dspmvm_nt_strided_8_c(int nrows, double alpha, const long *restrict row_ptr
|
||||
_mm_stream_pd(lhsv+i*ldl+2*k, lhs_[k]);
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
|
@ -150,6 +150,10 @@ class Phist(CMakePackage):
|
||||
|
||||
# ###################### Patches ##########################
|
||||
|
||||
# Avoid trying to compile some SSE code if SSE is not available
|
||||
# This patch will be part of phist 1.11.3 and greater and only affects
|
||||
# the 'builtin' kernel_lib.
|
||||
patch("avoid-sse.patch", when="@:1.11.2 kernel_lib=builtin")
|
||||
# Only applies to 1.9.4: While SSE instructions are handled correctly,
|
||||
# build fails on ppc64le unless -DNO_WARN_X86_INTRINSICS is defined.
|
||||
patch("ppc64_sse.patch", when="@1.9.4")
|
||||
|
Loading…
Reference in New Issue
Block a user