mirror of
https://github.com/ml-explore/mlx.git
synced 2025-07-28 21:21:21 +08:00

* Link with cuDNN * Initial implementation * Remove backend apis * Fix recording cudnn conv * More unused backend apis * Fix C++ conv tests * include cudnn as python dep * Install libcudnn9-dev-cuda-12 in CI * cudnn only accepts contiguous inputs * Switch to backend apis * Plan needs to be kept alive * Turn off tf32 * Add cache * Test the native cuda graph api * Set cudnn stream before execution * Make LRUCache more like a normal container * Do error check for cublas handle * Zero-initilizing array * Use tf32 for conv * Skip TestConv.test_torch_conv_2D test --------- Co-authored-by: Awni Hannun <awni@apple.com>
25 lines
650 B
Bash
25 lines
650 B
Bash
#!/bin/bash
|
|
|
|
auditwheel repair dist/* \
|
|
--plat manylinux_2_35_x86_64 \
|
|
--exclude libcublas* \
|
|
--exclude libnvrtc* \
|
|
--exclude libcuda* \
|
|
-w wheel_tmp
|
|
|
|
|
|
mkdir wheelhouse
|
|
cd wheel_tmp
|
|
repaired_wheel=$(find . -name "*.whl" -print -quit)
|
|
unzip -q "${repaired_wheel}"
|
|
rm "${repaired_wheel}"
|
|
mlx_so="mlx/lib/libmlx.so"
|
|
rpath=$(patchelf --print-rpath "${mlx_so}")
|
|
base="\$ORIGIN/../../nvidia"
|
|
rpath=$rpath:${base}/cublas/lib:${base}/cuda_nvrtc/lib:${base}/cudnn/lib
|
|
patchelf --force-rpath --set-rpath "$rpath" "$mlx_so"
|
|
python ../python/scripts/repair_record.py ${mlx_so}
|
|
|
|
# Re-zip the repaired wheel
|
|
zip -r -q "../wheelhouse/${repaired_wheel}" .
|