Fp8 conversion (#2686)

* add fp8 e4m3 converters

* add cuda

* default saturate to min/max

* fix for older OS

* fix no gpu/cpu

* fix saturate

* fix compile
This commit is contained in:
Awni Hannun
2025-10-27 16:35:50 -07:00
committed by GitHub
parent d1e06117e8
commit 969924cc69
23 changed files with 363 additions and 117 deletions

View File

@@ -88,6 +88,11 @@ cmake_policy(SET CMP0135 NEW)
add_library(mlx)
# Supress warnings: note: parameter passing for argument of type
# std::pair<float, float> when C++17 is enabled changed to match C++14 in GCC
# 10.1
target_compile_options(mlx PRIVATE -Wno-psabi)
if(MLX_BUILD_CUDA)
enable_language(CUDA)
endif()