Awni Hannun
							
						 
					 | 
					
						
						
							
						
						f5f65ef48c
					 | 
					
						
						
							
							Make sliceUpdate general (#2282)
						
						
						
						
						
						
						
						* Make sliceUpdate general
* fix 
						
						
							
						
					 | 
					
						2025-06-12 16:48:54 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						c2dd81a8aa
					 | 
					
						
						
							
							Fix warnings from latest CUDA toolkit (#2275)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-12 06:03:01 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						d7e680ffe4
					 | 
					
						
						
							
							CUDA backend: layernorm (#2271)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-11 15:48:32 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						c371baf53a
					 | 
					
						
						
							
							CUDA backend: softmax (#2272)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-11 13:55:22 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						ccf78f566c
					 | 
					
						
						
							
							CUDA backend: argreduce (#2270)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-11 13:26:17 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						c9fa68664a
					 | 
					
						
						
							
							CUDA backend: reduce (#2269)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-11 11:22:25 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						c35f4d089a
					 | 
					
						
						
							
							start cuda circle config (#2256)
						
						
						
						
						
						
						
						* rebase
* fix metal kernel linking issue on cuda
* start cuda circle config 
						
						
							
						
					 | 
					
						2025-06-10 21:19:47 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						8590c0941e
					 | 
					
						
						
							
							Add load_safe to the general conv loaders (#2258)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-10 20:58:16 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						095163b8d1
					 | 
					
						
						
							
							Fix building cpp benchmarks on Linux (#2268)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-10 17:10:24 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						99c33d011d
					 | 
					
						
						
							
							rebase + nit (#2260)
						
						
						
						
						
						
						
						Co-authored-by: Awni Hannun <awni@apple.com> 
						
						
							
						
					 | 
					
						2025-06-10 10:51:51 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						62fecf3e13
					 | 
					
						
						
							
							fix conv export (#2265)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-10 09:34:01 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						7c4eb5d03e
					 | 
					
						
						
							
							CUDA backend: random (#2261)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-10 08:59:56 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						bae9a6b404
					 | 
					
						
						
							
							CUDA backend: sort (#2262)
						
						
						
						
						
						
						
						Co-authored-by: Awni Hannun <awni@apple.com> 
						
						
							
						
					 | 
					
						2025-06-10 08:59:47 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Christopher Fleetwood
							
						 
					 | 
					
						
						
							
						
						004c1d8ef2
					 | 
					
						
						
							
							Report number of missing parameters (#2264)
						
						
						
						
						
						
						
						* chore: inform
* chore: format
---------
Co-authored-by: FL33TW00D <FL33TW00D@users.noreply.github.com> 
						
						
							
						
					 | 
					
						2025-06-10 06:37:50 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						7ebb2e0193
					 | 
					
						
						
							
							CUDA backend: binary ops (#2259)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-10 06:37:40 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						9ce77798b1
					 | 
					
						
						
							
							fix export to work with gather/scatter axis (#2263)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-09 20:37:27 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						f8bad60609
					 | 
					
						
						
							
							CUDA backend: unary ops (#2158)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-09 06:45:08 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Emmanuel Ferdman
							
						 
					 | 
					
						
						
							
						
						5866b3857b
					 | 
					
						
						
							
							Refactor the lu test (#2250)
						
						
						
						
						
						
						
						Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com> 
						
						
							
						
					 | 
					
						2025-06-07 06:12:08 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						1ca616844b
					 | 
					
						
						
							
							Fix unintuitive metal kernel caching (#2242)
						
						
						
						
						
						
						
						* Fix unintuitive metal kernel caching
* alternative solution 
						
						
							
						
					 | 
					
						2025-06-06 20:08:15 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						2e8cf0b450
					 | 
					
						
						
							
							Change layernorms to two pass algorithm (#2246)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-06 13:34:56 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						24f89173d1
					 | 
					
						
						
							
							CUDA backend: matmul (#2241)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-06 12:24:04 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						c6a20b427a
					 | 
					
						
						
							
							Improve metal elementwise kernels (#2247)
						
						
						
						
						
						
						
						* improve metal elementwise kernels
* compile and copy
* fix jit 
						
						
							
						
					 | 
					
						2025-06-06 11:37:40 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						a5ac9244c4
					 | 
					
						
						
							
							fix linux linking error (#2248)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-06 10:41:51 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						c763fe1be0
					 | 
					
						
						
							
							default strict mode for module update and update_modules (#2239)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-05 15:27:02 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						52dc8c8cd5
					 | 
					
						
						
							
							Add profiler annotations in common primitives for CUDA backend (#2244)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-04 19:55:12 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						aede70e81d
					 | 
					
						
						
							
							Perf regression fix (#2243)
						
						
						
						
						
						
							
 v0.26.1
						
					 | 
					
						2025-06-03 17:55:12 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						85a8beb5e4
					 | 
					
						
						
							
							Avoid atomic updates across CPU/GPU in CUDA event (#2231)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-03 16:49:06 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						0bb89e9e5f
					 | 
					
						
						
							
							Share more common code in Compiled (#2240)
						
						
						
						
						
						
						
						* Share more common code in Compiled
* Remove build_lib_name 
						
						
							
						
					 | 
					
						2025-06-03 16:48:50 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						5685ceb3c7
					 | 
					
						
						
							
							Avoid invoking allocator::malloc when creating CUDA event (#2232)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-03 16:48:40 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Suryash Malviya
							
						 
					 | 
					
						
						
							
						
						0408ba0a76
					 | 
					
						
						
							
							Optimizing Complex Matrix Multiplication using Karatsuba’s Algorithm  (#2220)
						
						
						
						
						
						
						
						* Implementing Complex Matmul using Karatsuba Algorithm
* Implemented Karatsuba's Algorithm for complex matmul and pre-commit them
* fix
---------
Co-authored-by: Awni Hannun <awni@apple.com> 
						
						
							
 v0.26.0
						
					 | 
					
						2025-06-02 15:58:46 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						cbad6c3093
					 | 
					
						
						
							
							version (#2237)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-02 15:58:33 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						1b021f6984
					 | 
					
						
						
							
							Fast primitives decide when to use the fallback (#2216)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-02 13:26:37 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						95b7551d65
					 | 
					
						
						
							
							Do not check event.is_signaled() in eval_impl (#2230)
						
						
						
						
						
						
							
						
					 | 
					
						2025-06-02 13:23:34 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						db5a7c6192
					 | 
					
						
						
							
							Add memory cache to CUDA backend (#2221)
						
						
						
						
						
						
						
						* Move BufferCache out of allocator
* Add memory cache to cuda backend allocator
* Simplify BufferCache assuming buf can not be null 
						
						
							
						
					 | 
					
						2025-05-30 12:12:54 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						6ef2f67e7f
					 | 
					
						
						
							
							5bit quants (#2226)
						
						
						
						
						
						
						
						* 5bit quants
* 5bit quants 
						
						
							
						
					 | 
					
						2025-05-30 12:12:10 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						f76ee1ffd2
					 | 
					
						
						
							
							Move some dims utils to common (#2223)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-29 06:48:30 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						54a71f270a
					 | 
					
						
						
							
							Remove unused defines (#2217)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-23 06:14:58 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						55b4062dd8
					 | 
					
						
						
							
							copyright in docs (#2214)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-21 17:13:04 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						79071bfba4
					 | 
					
						
						
							
							Fix out-of-bounds default value in logsumexp/softmax (#2213)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-21 07:25:16 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						7774b87cbd
					 | 
					
						
						
							
							Remove redundant simd_sum in logsumexp (#2210)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-21 07:25:03 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						35c87741cf
					 | 
					
						
						
							
							Build for compute capability 70 instead of 75 (#2209)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-20 19:42:48 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jack Wind
							
						 
					 | 
					
						
						
							
						
						4cbe605214
					 | 
					
						
						
							
							Feat: Allow per-target Metal debug flags (#2201)
						
						
						
						
						
						
						
						* feat: allow per-target Metal debug flags
* formatting fix 
						
						
							
						
					 | 
					
						2025-05-20 10:22:26 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Clement Liaw
							
						 
					 | 
					
						
						
							
						
						ab8883dd55
					 | 
					
						
						
							
							include mlx::core::version() symbols in the mlx static library (#2207)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-20 07:39:11 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						eebe73001a
					 | 
					
						
						
							
							fix large arg reduce (#2206)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-19 13:10:44 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						0359bf02c9
					 | 
					
						
						
							
							Nearest upsample (#2202)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-19 11:23:38 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						237f9e58a8
					 | 
					
						
						
							
							Fix BEFORE keyword in target_include_directories (#2204)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-19 06:10:44 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						8576e6fe36
					 | 
					
						
						
							
							fix conv2d bug + faster conv 1d (#2195)
						
						
						
						
						
						
						
						* fix conv2d bug + faster conv 1d
* revert sort + flaky test 
						
						
							
						
					 | 
					
						2025-05-18 06:05:11 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						0654543dcc
					 | 
					
						
						
							
							Add complex eigh (#2191)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-18 00:18:43 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						48ef3e74e2
					 | 
					
						
						
							
							reduce vjp for all and any (#2193)
						
						
						
						
						
						
							
						
					 | 
					
						2025-05-16 08:38:49 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Cheng
							
						 
					 | 
					
						
						
							
						
						7d4b378952
					 | 
					
						
						
							
							Include cuda_bf16.h for bfloat16 overloads (#2192)
						
						
						
						
						
						
						
						* Include cuda_bf16.h for bfloat16 overloads
* Add NO_GPU_MULTI(Eig) in cuda backend 
						
						
							
						
					 | 
					
						2025-05-16 06:44:42 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
						
					 |