mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
Fp8 conversion (#2686)
* add fp8 e4m3 converters * add cuda * default saturate to min/max * fix for older OS * fix no gpu/cpu * fix saturate * fix compile
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <simd/math.h>
|
||||
#include <simd/vector.h>
|
||||
|
||||
@@ -200,6 +201,15 @@ SIMD_DEFAULT_COMPARISONS(<=)
|
||||
SIMD_DEFAULT_COMPARISONS(==)
|
||||
SIMD_DEFAULT_COMPARISONS(!=)
|
||||
|
||||
template <typename T, int N>
|
||||
Simd<T, N> clz(Simd<T, N> x) {
|
||||
auto a = *(uint32x4_t*)(&x);
|
||||
auto b = *((uint32x4_t*)(&x) + 1);
|
||||
a = vclzq_u32(a);
|
||||
b = vclzq_u32(b);
|
||||
return asd::make_uint8(a, b);
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
Simd<T, N> atan2(Simd<T, N> a, Simd<T, N> b) {
|
||||
return asd::atan2(a.value, b.value);
|
||||
|
||||
@@ -171,6 +171,11 @@ DEFAULT_BINARY(&)
|
||||
DEFAULT_BINARY(&&)
|
||||
DEFAULT_BINARY(||)
|
||||
|
||||
template <typename T>
|
||||
Simd<T, 1> clz(Simd<T, 1> x_) {
|
||||
return __builtin_clz(x_.value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Simd<T, 1> remainder(Simd<T, 1> a_, Simd<T, 1> b_) {
|
||||
T a = a_.value;
|
||||
|
||||
Reference in New Issue
Block a user