21 Simd<float16_t, N>(float16x8_t v) :
value(v){};
24 auto f32x4_a = *(float32x4_t*)(&other);
25 auto f32x4_b = *((float32x4_t*)(&other) + 1);
26 value = vcvt_high_f16_f32(vcvt_f16_f32(f32x4_a), f32x4_b);
30 value = vcvtq_f16_u16(*(uint16x8_t*)(&other.value));
34 auto v = vcvtq_s16_f16(
value);
40 v.val[0] = vcvt_f32_f16(*(float16x4_t*)(&
value));
41 v.val[1] = vcvt_high_f32_f16(
value);
55#define DEFINE_NEON_UNARY_OP(name, op) \
56 inline Simd<float16_t, N> name(Simd<float16_t, N> a) { \
57 return Simd<float16_t, N>{op(a.value)}; \
68#define DEFINE_NEON_BINARY_OP(name, op) \
69 inline Simd<float16_t, N> name(Simd<float16_t, N> a, Simd<float16_t, N> b) { \
70 return op(a.value, b.value); \
72 template <typename T> \
73 Simd<float16_t, N> name(Simd<float16_t, N> a, T b) { \
74 return op(a.value, Simd<float16_t, N>(b).value); \
76 template <typename T> \
77 Simd<float16_t, N> name(T a, Simd<float16_t, N> b) { \
78 return op(Simd<float16_t, N>(a).value, b.value); \
82 auto out = vceqzq_f16(v.
value);
87 return vnegq_f16(v.
value);
97#define DEFINE_NEON_COMPARISON(Op, op) \
98 template <typename T> \
99 Simd<bool, N> operator Op(Simd<float16_t, N> a, T b) { \
100 auto out = op(a.value, Simd<float16_t, N>(b).value); \
101 return Simd<uint16_t, N>(*(uint16_t*)(&out)); \
103 template <typename T> \
104 Simd<bool, N> operator Op(T a, Simd<float16_t, N> b) { \
105 auto out = op(Simd<float16_t, N>(a).value, b.value); \
106 return Simd<uint16_t, N>(*(uint16_t*)(&out)); \
108 inline Simd<bool, N> operator Op( \
109 Simd<float16_t, N> a, Simd<float16_t, N> b) { \
110 auto out = op(a.value, b.value); \
111 return Simd<uint16_t, N>(*(uint16_t*)(&out)); \
165inline Simd<float16_t, N>
175template <
typename MaskT>
184 y = vpmax_f16(vget_low_f16(x.
value), vget_high_f16(x.
value));
187 return vget_lane_f16(y, 0);
191 y = vpmin_f16(vget_low_f16(x.
value), vget_high_f16(x.
value));
194 return vget_lane_f16(y, 0);
198 y = vpadd_f16(vget_low_f16(x.
value), vget_high_f16(x.
value));
201 return vget_lane_f16(y, 0);
204 auto hx = vmul_f16(vget_low_f16(x.
value), vget_high_f16(x.
value));
Definition accelerate_fp16_simd.h:9
Simd< bool, N > isnan(Simd< T, N > v)
Definition accelerate_simd.h:146
constexpr int N
Definition neon_fp16_simd.h:9
Simd< T, N > minimum(Simd< T, N > a, Simd< T, N > b)
Definition accelerate_simd.h:215
T prod(Simd< T, N > x)
Definition accelerate_simd.h:297
Simd< T, N > rint(Simd< T, N > v)
Definition accelerate_simd.h:127
Simd< T, N > load(const T *x)
Definition base_simd.h:28
Simd< bool, N > operator!=(Simd< T, N > a, U b)
Definition accelerate_simd.h:201
Simd< T, N > abs(Simd< T, N > v)
Definition accelerate_simd.h:112
T sum(Simd< T, N > x)
Definition accelerate_simd.h:284
T max(Simd< T, N > x)
Definition accelerate_simd.h:288
Simd< bool, N > operator!(Simd< T, N > v)
Definition accelerate_simd.h:152
Simd< T, N > maximum(Simd< T, N > a, Simd< T, N > b)
Definition accelerate_simd.h:209
Simd< T, N > operator&&(Simd< T, N > x, U y)
Definition accelerate_simd.h:179
Simd< T, N > floor(Simd< T, N > v)
Definition accelerate_simd.h:113
Simd< T, N > fma(Simd< T, N > x, Simd< T, N > y, U z)
Definition accelerate_simd.h:269
Simd< T, N > operator||(Simd< T, N > x, U y)
Definition accelerate_simd.h:180
T min(Simd< T, N > x)
Definition accelerate_simd.h:292
Simd< T, N > ceil(Simd< T, N > v)
Definition accelerate_simd.h:120
Simd< T, N > recip(Simd< T, N > v)
Definition accelerate_simd.h:131
Simd< T, N > sqrt(Simd< T, N > v)
Definition accelerate_simd.h:129
Simd< T, N > clamp(Simd< T, N > v, Simd< T, N > min, Simd< T, N > max)
Definition accelerate_simd.h:264
Simd< T, N > rsqrt(Simd< T, N > v)
Definition accelerate_simd.h:130
Simd< T, N > operator-(Simd< T, N > v)
Definition accelerate_simd.h:136
Simd< T1, N > select(Simd< MaskT, N > mask, Simd< T1, N > x, Simd< T2, N > y)
Definition accelerate_simd.h:236
struct _MLX_Float16 float16_t
Definition half_types.h:17
#define DEFINE_NEON_BINARY_OP(name, op)
Definition neon_fp16_simd.h:68
#define DEFINE_NEON_COMPARISON(Op, op)
Definition neon_fp16_simd.h:97
#define DEFINE_NEON_UNARY_OP(name, op)
Definition neon_fp16_simd.h:55
Simd()
Definition neon_fp16_simd.h:16
static constexpr int size
Definition neon_fp16_simd.h:13
float16_t scalar_t
Definition neon_fp16_simd.h:14
float16_t operator[](int idx) const
Definition neon_fp16_simd.h:44
float16_t & operator[](int idx)
Definition neon_fp16_simd.h:48
float16x8_t value
Definition neon_fp16_simd.h:52
Definition accelerate_simd.h:55
asd::Vector< scalar_t, N >::packed_t value
Definition accelerate_simd.h:80
Simd()
Definition accelerate_simd.h:59