225 lines
7.2 KiB
Diff
225 lines
7.2 KiB
Diff
From 8c5d5eca51d9e4cd9aa046dba8f939b3f4012256 Mon Sep 17 00:00:00 2001
|
|
From: Hans Kristian Rosbach <hk-git@circlestorm.org>
|
|
Date: Fri, 21 Jul 2023 13:43:15 +0200
|
|
Subject: [PATCH 1/3] Clean up SSE4.2 support, and no longer use asm fallback
|
|
or gcc builtin.
|
|
|
|
Defines changing meaning:
|
|
X86_SSE42 used to mean the compiler supports crc asm fallback.
|
|
X86_SSE42_CRC_INTRIN used to mean compiler supports SSE4.2 intrinsics.
|
|
|
|
X86_SSE42 now means compiler supports SSE4.2 intrinsics.
|
|
|
|
This therefore also fixes the adler32_sse42 checks, since those were depending
|
|
on SSE4.2 intrinsics but was mistakenly checking the X86_SSE42 define.
|
|
Now the X86_SSE42 define actually means what it appears to.
|
|
---
|
|
CMakeLists.txt | 5 +----
|
|
arch/x86/insert_string_sse42.c | 36 +++++----------------------------
|
|
cmake/detect-intrinsics.cmake | 23 +++------------------
|
|
configure | 37 ++++++++--------------------------
|
|
win32/Makefile.msc | 1 -
|
|
5 files changed, 17 insertions(+), 85 deletions(-)
|
|
|
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
index 420a5c78..1e42239a 100644
|
|
--- a/CMakeLists.txt
|
|
+++ b/CMakeLists.txt
|
|
@@ -827,15 +827,12 @@ if(WITH_OPTIM)
|
|
endif()
|
|
if(WITH_SSE42)
|
|
check_sse42_intrinsics()
|
|
- if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)
|
|
+ if(HAVE_SSE42_INTRIN)
|
|
add_definitions(-DX86_SSE42)
|
|
set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c)
|
|
add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"")
|
|
list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS})
|
|
set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}")
|
|
- if(HAVE_SSE42CRC_INTRIN)
|
|
- add_definitions(-DX86_SSE42_CRC_INTRIN)
|
|
- endif()
|
|
else()
|
|
set(WITH_SSE42 OFF)
|
|
endif()
|
|
diff --git a/arch/x86/insert_string_sse42.c b/arch/x86/insert_string_sse42.c
|
|
index 565d92f9..ae092a7e 100644
|
|
--- a/arch/x86/insert_string_sse42.c
|
|
+++ b/arch/x86/insert_string_sse42.c
|
|
@@ -5,38 +5,13 @@
|
|
*
|
|
*/
|
|
|
|
+#ifdef X86_SSE42
|
|
#include "../../zbuild.h"
|
|
-#include <immintrin.h>
|
|
-#ifdef _MSC_VER
|
|
-# include <nmmintrin.h>
|
|
-#endif
|
|
+#include <nmmintrin.h>
|
|
#include "../../deflate.h"
|
|
|
|
-#ifdef X86_SSE42_CRC_INTRIN
|
|
-# ifdef _MSC_VER
|
|
-# define HASH_CALC(s, h, val)\
|
|
- h = _mm_crc32_u32(h, val)
|
|
-# else
|
|
-# define HASH_CALC(s, h, val)\
|
|
- h = __builtin_ia32_crc32si(h, val)
|
|
-# endif
|
|
-#else
|
|
-# ifdef _MSC_VER
|
|
-# define HASH_CALC(s, h, val) {\
|
|
- __asm mov edx, h\
|
|
- __asm mov eax, val\
|
|
- __asm crc32 eax, edx\
|
|
- __asm mov h, eax\
|
|
- }
|
|
-# else
|
|
-# define HASH_CALC(s, h, val) \
|
|
- __asm__ __volatile__ (\
|
|
- "crc32 %1,%0\n\t"\
|
|
- : "+r" (h)\
|
|
- : "r" (val)\
|
|
- );
|
|
-# endif
|
|
-#endif
|
|
+#define HASH_CALC(s, h, val)\
|
|
+ h = _mm_crc32_u32(h, val)
|
|
|
|
#define HASH_CALC_VAR h
|
|
#define HASH_CALC_VAR_INIT uint32_t h = 0
|
|
@@ -45,6 +20,5 @@
|
|
#define INSERT_STRING insert_string_sse42
|
|
#define QUICK_INSERT_STRING quick_insert_string_sse42
|
|
|
|
-#ifdef X86_SSE42
|
|
-# include "../../insert_string_tpl.h"
|
|
+#include "../../insert_string_tpl.h"
|
|
#endif
|
|
diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake
|
|
index 9cbc5908..52c54dc8 100644
|
|
--- a/cmake/detect-intrinsics.cmake
|
|
+++ b/cmake/detect-intrinsics.cmake
|
|
@@ -481,35 +481,18 @@ macro(check_sse42_intrinsics)
|
|
set(SSE42FLAG "-msse4.2")
|
|
endif()
|
|
endif()
|
|
- # Check whether compiler supports SSE4.2 CRC inline asm
|
|
+ # Check whether compiler supports SSE4.2 intrinsics
|
|
set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}")
|
|
check_c_source_compile_or_run(
|
|
- "int main(void) {
|
|
- unsigned val = 0, h = 0;
|
|
- #if defined(_MSC_VER)
|
|
- { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov h, eax }
|
|
- #else
|
|
- __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) );
|
|
- #endif
|
|
- return (int)h;
|
|
- }"
|
|
- HAVE_SSE42CRC_INLINE_ASM
|
|
- )
|
|
- # Check whether compiler supports SSE4.2 CRC intrinsics
|
|
- check_c_source_compile_or_run(
|
|
- "#include <immintrin.h>
|
|
+ "#include <nmmintrin.h>
|
|
int main(void) {
|
|
unsigned crc = 0;
|
|
char c = 'c';
|
|
- #if defined(_MSC_VER)
|
|
crc = _mm_crc32_u32(crc, c);
|
|
- #else
|
|
- crc = __builtin_ia32_crc32qi(crc, c);
|
|
- #endif
|
|
(void)crc;
|
|
return 0;
|
|
}"
|
|
- HAVE_SSE42CRC_INTRIN
|
|
+ HAVE_SSE42_INTRIN
|
|
)
|
|
set(CMAKE_REQUIRED_FLAGS)
|
|
endmacro()
|
|
diff --git a/configure b/configure
|
|
index 8714590e..6b4e7fff 100755
|
|
--- a/configure
|
|
+++ b/configure
|
|
@@ -1431,38 +1431,23 @@ EOF
|
|
}
|
|
|
|
check_sse42_intrinsics() {
|
|
- # Check whether compiler supports SSE4.2 CRC inline asm
|
|
- cat > $test.c << EOF
|
|
-int main(void) {
|
|
- unsigned val = 0, h = 0;
|
|
- __asm__ __volatile__ ( "crc32 %1,%0" : "+r" (h) : "r" (val) );
|
|
- return (int) h;
|
|
-}
|
|
-EOF
|
|
- if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
|
|
- echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log
|
|
- HAVE_SSE42CRC_INLINE_ASM=1
|
|
- else
|
|
- echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log
|
|
- HAVE_SSE42CRC_INLINE_ASM=0
|
|
- fi
|
|
-
|
|
- # Check whether compiler supports SSE4.2 CRC intrinsics
|
|
+ # Check whether compiler supports SSE4.2 intrinsics
|
|
cat > $test.c << EOF
|
|
+#include <nmmintrin.h>
|
|
int main(void) {
|
|
unsigned crc = 0;
|
|
char c = 'c';
|
|
- crc = __builtin_ia32_crc32qi(crc, c);
|
|
+ crc = _mm_crc32_u32(crc, c);
|
|
(void)crc;
|
|
return 0;
|
|
}
|
|
EOF
|
|
if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
|
|
- echo "Checking for SSE4.2 CRC intrinsics ... Yes." | tee -a configure.log
|
|
- HAVE_SSE42CRC_INTRIN=1
|
|
+ echo "Checking for SSE4.2 intrinsics ... Yes." | tee -a configure.log
|
|
+ HAVE_SSE42_INTRIN=1
|
|
else
|
|
- echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log
|
|
- HAVE_SSE42CRC_INTRIN=0
|
|
+ echo "Checking for SSE4.2 intrinsics ... No." | tee -a configure.log
|
|
+ HAVE_SSE42_INTRIN=0
|
|
fi
|
|
}
|
|
|
|
@@ -1606,15 +1591,9 @@ case "${ARCH}" in
|
|
|
|
check_sse42_intrinsics
|
|
|
|
- if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
|
|
+ if test ${HAVE_SSE42_INTRIN} -eq 1; then
|
|
CFLAGS="${CFLAGS} -DX86_SSE42"
|
|
SFLAGS="${SFLAGS} -DX86_SSE42"
|
|
-
|
|
- if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then
|
|
- CFLAGS="${CFLAGS} -DX86_SSE42_CRC_INTRIN"
|
|
- SFLAGS="${SFLAGS} -DX86_SSE42_CRC_INTRIN"
|
|
- fi
|
|
-
|
|
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o insert_string_sse42.o"
|
|
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo insert_string_sse42.lo"
|
|
fi
|
|
diff --git a/win32/Makefile.msc b/win32/Makefile.msc
|
|
index 9ed26f28..3035072b 100644
|
|
--- a/win32/Makefile.msc
|
|
+++ b/win32/Makefile.msc
|
|
@@ -31,7 +31,6 @@ WFLAGS = \
|
|
-DX86_PCLMULQDQ_CRC \
|
|
-DX86_SSE2 \
|
|
-DX86_SSE42 \
|
|
- -DX86_SSE42_CRC_INTRIN \
|
|
-DX86_SSSE3 \
|
|
-DX86_AVX2
|
|
|
|
--
|
|
2.39.2
|
|
|