21  Simd<float16_t, N>(float16x8_t v) : 
value(v){};
 
   24    auto f32x4_a = *(float32x4_t*)(&other);
 
   25    auto f32x4_b = *((float32x4_t*)(&other) + 1);
 
   26    value = vcvt_high_f16_f32(vcvt_f16_f32(f32x4_a), f32x4_b);
 
   30    value = vcvtq_f16_u16(*(uint16x8_t*)(&other.value));
 
   34    auto v = vcvtq_s16_f16(
value);
 
 
   40    v.val[0] = vcvt_f32_f16(*(float16x4_t*)(&
value));
 
   41    v.val[1] = vcvt_high_f32_f16(
value);
 
 
 
   55#define DEFINE_NEON_UNARY_OP(name, op)                   \ 
   56  inline Simd<float16_t, N> name(Simd<float16_t, N> a) { \ 
   57    return Simd<float16_t, N>{op(a.value)};              \ 
 
   68#define DEFINE_NEON_BINARY_OP(name, op)                                        \ 
   69  inline Simd<float16_t, N> name(Simd<float16_t, N> a, Simd<float16_t, N> b) { \ 
   70    return op(a.value, b.value);                                               \ 
   72  template <typename T>                                                        \ 
   73  Simd<float16_t, N> name(Simd<float16_t, N> a, T b) {                         \ 
   74    return op(a.value, Simd<float16_t, N>(b).value);                           \ 
   76  template <typename T>                                                        \ 
   77  Simd<float16_t, N> name(T a, Simd<float16_t, N> b) {                         \ 
   78    return op(Simd<float16_t, N>(a).value, b.value);                           \ 
 
   82  auto out = vceqzq_f16(v.
value);
 
 
   87  return vnegq_f16(v.
value);
 
 
   97#define DEFINE_NEON_COMPARISON(Op, op)                   \ 
   98  template <typename T>                                  \ 
   99  Simd<bool, N> operator Op(Simd<float16_t, N> a, T b) { \ 
  100    auto out = op(a.value, Simd<float16_t, N>(b).value); \ 
  101    return Simd<uint16_t, N>(*(uint16_t*)(&out));        \ 
  103  template <typename T>                                  \ 
  104  Simd<bool, N> operator Op(T a, Simd<float16_t, N> b) { \ 
  105    auto out = op(Simd<float16_t, N>(a).value, b.value); \ 
  106    return Simd<uint16_t, N>(*(uint16_t*)(&out));        \ 
  108  inline Simd<bool, N> operator Op(                      \ 
  109      Simd<float16_t, N> a, Simd<float16_t, N> b) {      \ 
  110    auto out = op(a.value, b.value);                     \ 
  111    return Simd<uint16_t, N>(*(uint16_t*)(&out));        \ 
 
  165inline Simd<float16_t, N>
 
  175template <
typename MaskT>
 
  184  y = vpmax_f16(vget_low_f16(x.
value), vget_high_f16(x.
value));
 
  187  return vget_lane_f16(y, 0);
 
 
  191  y = vpmin_f16(vget_low_f16(x.
value), vget_high_f16(x.
value));
 
  194  return vget_lane_f16(y, 0);
 
 
  198  y = vpadd_f16(vget_low_f16(x.
value), vget_high_f16(x.
value));
 
  201  return vget_lane_f16(y, 0);
 
 
  204  auto hx = vmul_f16(vget_low_f16(x.
value), vget_high_f16(x.
value));
 
 
Definition accelerate_fp16_simd.h:9
 
Simd< bool, N > isnan(Simd< T, N > v)
Definition accelerate_simd.h:146
 
constexpr int N
Definition neon_fp16_simd.h:9
 
Simd< T, N > minimum(Simd< T, N > a, Simd< T, N > b)
Definition accelerate_simd.h:215
 
T prod(Simd< T, N > x)
Definition accelerate_simd.h:297
 
Simd< T, N > rint(Simd< T, N > v)
Definition accelerate_simd.h:127
 
Simd< T, N > load(const T *x)
Definition base_simd.h:28
 
Simd< bool, N > operator!=(Simd< T, N > a, U b)
Definition accelerate_simd.h:201
 
Simd< T, N > abs(Simd< T, N > v)
Definition accelerate_simd.h:112
 
T sum(Simd< T, N > x)
Definition accelerate_simd.h:284
 
T max(Simd< T, N > x)
Definition accelerate_simd.h:288
 
Simd< bool, N > operator!(Simd< T, N > v)
Definition accelerate_simd.h:152
 
Simd< T, N > maximum(Simd< T, N > a, Simd< T, N > b)
Definition accelerate_simd.h:209
 
Simd< T, N > operator&&(Simd< T, N > x, U y)
Definition accelerate_simd.h:179
 
Simd< T, N > floor(Simd< T, N > v)
Definition accelerate_simd.h:113
 
Simd< T, N > fma(Simd< T, N > x, Simd< T, N > y, U z)
Definition accelerate_simd.h:269
 
Simd< T, N > operator||(Simd< T, N > x, U y)
Definition accelerate_simd.h:180
 
T min(Simd< T, N > x)
Definition accelerate_simd.h:292
 
Simd< T, N > ceil(Simd< T, N > v)
Definition accelerate_simd.h:120
 
Simd< T, N > recip(Simd< T, N > v)
Definition accelerate_simd.h:131
 
Simd< T, N > sqrt(Simd< T, N > v)
Definition accelerate_simd.h:129
 
Simd< T, N > clamp(Simd< T, N > v, Simd< T, N > min, Simd< T, N > max)
Definition accelerate_simd.h:264
 
Simd< T, N > rsqrt(Simd< T, N > v)
Definition accelerate_simd.h:130
 
Simd< T, N > operator-(Simd< T, N > v)
Definition accelerate_simd.h:136
 
Simd< T1, N > select(Simd< MaskT, N > mask, Simd< T1, N > x, Simd< T2, N > y)
Definition accelerate_simd.h:236
 
struct _MLX_Float16 float16_t
Definition half_types.h:17
 
#define DEFINE_NEON_BINARY_OP(name, op)
Definition neon_fp16_simd.h:68
 
#define DEFINE_NEON_COMPARISON(Op, op)
Definition neon_fp16_simd.h:97
 
#define DEFINE_NEON_UNARY_OP(name, op)
Definition neon_fp16_simd.h:55
 
Simd()
Definition neon_fp16_simd.h:16
 
static constexpr int size
Definition neon_fp16_simd.h:13
 
float16_t scalar_t
Definition neon_fp16_simd.h:14
 
float16_t operator[](int idx) const
Definition neon_fp16_simd.h:44
 
float16_t & operator[](int idx)
Definition neon_fp16_simd.h:48
 
float16x8_t value
Definition neon_fp16_simd.h:52
 
Definition accelerate_simd.h:55
 
asd::Vector< scalar_t, N >::packed_t value
Definition accelerate_simd.h:80
 
Simd()
Definition accelerate_simd.h:59