mirror of
https://github.com/ml-explore/mlx.git
synced 2025-11-05 11:28:12 +08:00
rebase
This commit is contained in:
@@ -127,7 +127,8 @@ relying on a copy from ``ensure_row_contiguous``:
|
||||
name="myexp_strided",
|
||||
input_names=["inp"],
|
||||
output_names=["out"],
|
||||
source=source
|
||||
source=source,
|
||||
ensure_row_contiguous=False,
|
||||
)
|
||||
|
||||
def exp_elementwise(a: mx.array):
|
||||
@@ -138,7 +139,6 @@ relying on a copy from ``ensure_row_contiguous``:
|
||||
threadgroup=(256, 1, 1),
|
||||
output_shapes=[a.shape],
|
||||
output_dtypes=[a.dtype],
|
||||
ensure_row_contiguous=False,
|
||||
)
|
||||
return outputs[0]
|
||||
|
||||
|
||||
1
docs/build/html/_sources/index.rst
vendored
1
docs/build/html/_sources/index.rst
vendored
@@ -70,6 +70,7 @@ are the CPU and GPU.
|
||||
python/fft
|
||||
python/linalg
|
||||
python/metal
|
||||
python/cuda
|
||||
python/memory_management
|
||||
python/nn
|
||||
python/optimizers
|
||||
|
||||
2
docs/build/html/_sources/install.rst
vendored
2
docs/build/html/_sources/install.rst
vendored
@@ -271,7 +271,7 @@ and the CUDA toolkit. For example on Ubuntu, run the following:
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
apt-get update -y
|
||||
apt-get -y install cuda-toolkit-12-9
|
||||
apt-get install libblas-dev liblapack-dev liblapacke-dev -y
|
||||
apt-get install libblas-dev liblapack-dev liblapacke-dev libcudnn9-dev-cuda-12 -y
|
||||
|
||||
|
||||
When building either the Python or C++ APIs make sure to pass the cmake flag
|
||||
|
||||
6
docs/build/html/_sources/python/_autosummary/mlx.core.cuda.is_available.rst
vendored
Normal file
6
docs/build/html/_sources/python/_autosummary/mlx.core.cuda.is_available.rst
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
mlx.core.cuda.is\_available
|
||||
===========================
|
||||
|
||||
.. currentmodule:: mlx.core.cuda
|
||||
|
||||
.. autofunction:: is_available
|
||||
6
docs/build/html/_sources/python/_autosummary/mlx.core.fast.cuda_kernel.rst
vendored
Normal file
6
docs/build/html/_sources/python/_autosummary/mlx.core.fast.cuda_kernel.rst
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
mlx.core.fast.cuda\_kernel
|
||||
==========================
|
||||
|
||||
.. currentmodule:: mlx.core.fast
|
||||
|
||||
.. autofunction:: cuda_kernel
|
||||
9
docs/build/html/_sources/python/cuda.rst
vendored
Normal file
9
docs/build/html/_sources/python/cuda.rst
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
CUDA
|
||||
=====
|
||||
|
||||
.. currentmodule:: mlx.core.cuda
|
||||
|
||||
.. autosummary::
|
||||
:toctree: _autosummary
|
||||
|
||||
is_available
|
||||
1
docs/build/html/_sources/python/fast.rst
vendored
1
docs/build/html/_sources/python/fast.rst
vendored
@@ -13,3 +13,4 @@ Fast
|
||||
rope
|
||||
scaled_dot_product_attention
|
||||
metal_kernel
|
||||
cuda_kernel
|
||||
|
||||
2
docs/build/html/_sources/usage/compile.rst
vendored
2
docs/build/html/_sources/usage/compile.rst
vendored
@@ -225,7 +225,7 @@ In some cases returning updated state can be pretty inconvenient. Hence,
|
||||
def fun(x, y):
|
||||
z = x + y
|
||||
state.append(z)
|
||||
return mx.exp(z), state
|
||||
return mx.exp(z)
|
||||
|
||||
fun(mx.array(1.0), mx.array(2.0))
|
||||
# Prints [array(3, dtype=float32)]
|
||||
|
||||
Reference in New Issue
Block a user