Remove cexpf

2025-09-01 21:04:41 +08:00 · 2025-07-11 04:46:09 +00:00
parent c55e0fb083
commit 15390f80a3
3 changed files with 3 additions and 142 deletions
--- a/mlx/backend/cuda/device/cexpf.cuh
+++ b/mlx/backend/cuda/device/cexpf.cuh
@@ -1,138 +0,0 @@
-// Copyright © 2025 Apple Inc.
-// Copyright © 2008-2013 NVIDIA Corporation
-// Copyright © 2013 Filipe RNC Maia
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Forked from
-// https://github.com/NVIDIA/cccl/blob/main/thrust/thrust/detail/complex/cexpf.h
-
-// TODO: We should use thrust::exp but the thrust header in old CUDA versions
-// can not be used in JIT.
-
-#pragma once
-
-#include <cuComplex.h>
-#include <cuda/std/cstdint>
-
-namespace mlx::core::cu::detail {
-
-using ieee_float_shape_type = union {
-  float value;
-  uint32_t word;
-};
-
-inline __device__ void get_float_word(uint32_t& i, float d) {
-  ieee_float_shape_type gf_u;
-  gf_u.value = (d);
-  (i) = gf_u.word;
-}
-
-inline __device__ void get_float_word(int32_t& i, float d) {
-  ieee_float_shape_type gf_u;
-  gf_u.value = (d);
-  (i) = gf_u.word;
-}
-
-inline __device__ void set_float_word(float& d, uint32_t i) {
-  ieee_float_shape_type sf_u;
-  sf_u.word = (i);
-  (d) = sf_u.value;
-}
-
-inline __device__ float frexp_expf(float x, int* expt) {
-  const uint32_t k = 235;
-  const float kln2 = 162.88958740F;
-
-  float exp_x;
-  uint32_t hx;
-
-  exp_x = expf(x - kln2);
-  get_float_word(hx, exp_x);
-  *expt = (hx >> 23) - (0x7f + 127) + k;
-  set_float_word(exp_x, (hx & 0x7fffff) | ((0x7f + 127) << 23));
-  return exp_x;
-}
-
-inline __device__ cuComplex ldexp_cexpf(cuComplex z, int expt) {
-  float x, y, exp_x, scale1, scale2;
-  int ex_expt, half_expt;
-
-  x = cuCrealf(z);
-  y = cuCimagf(z);
-  exp_x = frexp_expf(x, &ex_expt);
-  expt += ex_expt;
-
-  half_expt = expt / 2;
-  set_float_word(scale1, (0x7f + half_expt) << 23);
-  half_expt = expt - half_expt;
-  set_float_word(scale2, (0x7f + half_expt) << 23);
-
-  return cuComplex{
-      cosf(y) * exp_x * scale1 * scale2, sinf(y) * exp_x * scale1 * scale2};
-}
-
-inline __device__ cuComplex cexpf(const cuComplex& z) {
-  float x, y, exp_x;
-  uint32_t hx, hy;
-
-  const uint32_t exp_ovfl = 0x42b17218, cexp_ovfl = 0x43400074;
-
-  x = cuCrealf(z);
-  y = cuCimagf(z);
-
-  get_float_word(hy, y);
-  hy &= 0x7fffffff;
-
-  /* cexp(x + I 0) = exp(x) + I 0 */
-  if (hy == 0) {
-    return cuComplex{expf(x), y};
-  }
-  get_float_word(hx, x);
-  /* cexp(0 + I y) = cos(y) + I sin(y) */
-  if ((hx & 0x7fffffff) == 0) {
-    return cuComplex{cosf(y), sinf(y)};
-  }
-  if (hy >= 0x7f800000) {
-    if ((hx & 0x7fffffff) != 0x7f800000) {
-      /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */
-      return cuComplex{y - y, y - y};
-    } else if (hx & 0x80000000) {
-      /* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */
-      return cuComplex{0.0, 0.0};
-    } else {
-      /* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */
-      return cuComplex{x, y - y};
-    }
-  }
-
-  if (hx >= exp_ovfl && hx <= cexp_ovfl) {
-    /*
-     * x is between 88.7 and 192, so we must scale to avoid
-     * overflow in expf(x).
-     */
-    return ldexp_cexpf(z, 0);
-  } else {
-    /*
-     * Cases covered here:
-     *  -  x < exp_ovfl and exp(x) won't overflow (common case)
-     *  -  x > cexp_ovfl, so exp(x) * s overflows for all s > 0
-     *  -  x = +-Inf (generated by exp())
-     *  -  x = NaN (spurious inexact exception from y)
-     */
-    exp_x = expf(x);
-    return cuComplex{exp_x * cosf(y), exp_x * sinf(y)};
-  }
-}
-
-} // namespace mlx::core::cu::detail
--- a/mlx/backend/cuda/device/unary_ops.cuh
+++ b/mlx/backend/cuda/device/unary_ops.cuh
@@ -2,12 +2,12 @@

 #pragma once

-#include "mlx/backend/cuda/device/cexpf.cuh"
 #include "mlx/backend/cuda/device/cucomplex_math.cuh"
 #include "mlx/backend/cuda/device/fp16_math.cuh"
 #include "mlx/backend/cuda/device/utils.cuh"

 #include <math_constants.h>
+#include <cuda/std/complex>

 namespace mlx::core::cu {

@@ -152,7 +152,8 @@ struct Exp {
  template <typename T>
  __device__ T operator()(T x) {
    if constexpr (cuda::std::is_same_v<T, cuComplex>) {
-      return detail::cexpf(x);
+      auto r = exp(cuda::std::complex<float>{cuCrealf(x), cuCimagf(x)});
+      return cuComplex{r.real(), r.imag()};
    } else {
      return exp(x);
    }
--- a/mlx/backend/cuda/jit_module.cpp
+++ b/mlx/backend/cuda/jit_module.cpp
@@ -172,7 +172,6 @@ constexpr const char* g_include_names[] = {
    INCLUDE_PREFIX "atomic_ops.cuh",
    INCLUDE_PREFIX "binary_ops.cuh",
    INCLUDE_PREFIX "cast_op.cuh",
-    INCLUDE_PREFIX "cexpf.cuh",
    INCLUDE_PREFIX "config.h",
    INCLUDE_PREFIX "cucomplex_math.cuh",
    INCLUDE_PREFIX "fp16_math.cuh",
@@ -189,7 +188,6 @@ constexpr const char* g_headers[] = {
    jit_source_atomic_ops,
    jit_source_binary_ops,
    jit_source_cast_op,
-    jit_source_cexpf,
    jit_source_config,
    jit_source_cucomplex_math,
    jit_source_fp16_math,