Fp8 conversion (#2686)

* add fp8 e4m3 converters * add cuda * default saturate to min/max * fix for older OS * fix no gpu/cpu * fix saturate * fix compile
2025-12-16 01:49:05 +08:00 · 2025-10-27 16:35:50 -07:00
parent d1e06117e8
commit 969924cc69
23 changed files with 363 additions and 117 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,6 +88,11 @@ cmake_policy(SET CMP0135 NEW)

 add_library(mlx)

+# Supress warnings: note: parameter passing for argument of type
+# ‘std::pair<float, float>’ when C++17 is enabled changed to match C++14 in GCC
+# 10.1
+target_compile_options(mlx PRIVATE -Wno-psabi)
+
 if(MLX_BUILD_CUDA)
  enable_language(CUDA)
 endif()