Normalize README bullet formatting (#2671 )

speed up scalars (#2669 )
2025-12-16 01:49:05 +08:00 · 2025-10-13 12:13:30 -07:00 · 2025-10-13 12:10:15 -07:00
2 changed files with 30 additions and 25 deletions
--- a/README.md
+++ b/README.md
@@ -110,7 +110,7 @@ Hannun, Jagrit Digani, Angelos Katharopoulos, and Ronan Collobert. If you find
 MLX useful in your research and wish to cite it, please use the following
 BibTex entry:
-```
+```text
@software{mlx2023,
  author = {Awni Hannun and Jagrit Digani and Angelos Katharopoulos and Ronan Collobert},
  title = {{MLX}: Efficient and flexible machine learning on Apple silicon},
--- a/mlx/backend/cuda/allocator.cpp
+++ b/mlx/backend/cuda/allocator.cpp
@@ -30,15 +30,20 @@ SmallSizePool::SmallSizePool() {
  next_free_ = buffer_;
  CHECK_CUDA_ERROR(cudaMallocManaged(&data_, small_pool_size));
  int device_count = 0;
  CHECK_CUDA_ERROR(cudaGetDeviceCount(&device_count));
  for (int i = 0; i < device_count; ++i) {
 #if CUDART_VERSION >= 13000
    cudaMemLocation loc;
    loc.type = cudaMemLocationTypeDevice;
-  loc.id = 0;
+    loc.id = i;
 #else
-  int loc = 0;
+    int loc = i;
 #endif // CUDART_VERSION >= 13000
    CHECK_CUDA_ERROR(
-      cudaMemAdvise(data_, small_pool_size, cudaMemAdviseSetReadMostly, loc));
+        cudaMemAdvise(data_, small_pool_size, cudaMemAdviseSetAccessedBy, loc));
  }
  auto curr = next_free_;
  for (size_t i = 1; i < num_blocks; ++i) {
Author	SHA1	Message	Date
Fabrizio Milo	9bfc476d72	Normalize README bullet formatting (#2671 )	2025-10-13 12:13:30 -07:00
Awni Hannun	25e2356316	speed up scalars (#2669 )	2025-10-13 12:10:15 -07:00