Awni Hannun
							
						 
					 | 
					
						
						
							
						
						7bb063bcb3
					 | 
					
						
						
							
							Enable vjp for quantized scale and bias (#2129)
						
						
						
						
						
						
						
						* Enable vjp for quantized scale and bias
* higher tol 
						
						
					 | 
					
						2025-04-29 13:03:09 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						fbc89e3ced
					 | 
					
						
						
							
							fix pinv (#2110)
						
						
						
						
						
						
					 | 
					
						2025-04-23 13:08:28 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Param Thakkar
							
						 
					 | 
					
						
						
							
						
						600e87e03c
					 | 
					
						
						
							
							Added output_padding parameters in conv_transpose (#2092)
						
						
						
						
						
						
					 | 
					
						2025-04-23 09:26:33 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Hyunsung Lee
							
						 
					 | 
					
						
						
							
						
						3836445241
					 | 
					
						
						
							
							Add broadcast_shapes in python API (#2091)
						
						
						
						
						
						
					 | 
					
						2025-04-22 18:57:39 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Yury Popov
							
						 
					 | 
					
						
						
							
						
						1d2c9d6a07
					 | 
					
						
						
							
							Complex scan (#2094)
						
						
						
						
						
						
					 | 
					
						2025-04-22 18:56:28 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						e8ac6bd2f5
					 | 
					
						
						
							
							irfft throws instead of segfaults on scalars (#2109)
						
						
						
						
						
						
					 | 
					
						2025-04-22 10:25:55 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						fdadc4f22c
					 | 
					
						
						
							
							Add more complex unary ops (#2101)
						
						
						
						
						
						
					 | 
					
						2025-04-21 13:04:54 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						79b527f45f
					 | 
					
						
						
							
							conv vmap (#2102)
						
						
						
						
						
						
					 | 
					
						2025-04-21 13:04:39 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						5de6d94a90
					 | 
					
						
						
							
							Gather qmm batched kernel and refactoring of quantized (#2078)
						
						
						
						
						
						
					 | 
					
						2025-04-17 13:53:11 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						99eefd2ec0
					 | 
					
						
						
							
							Gather mm new kernel and small refactoring (#2040)
						
						
						
						
						
						
					 | 
					
						2025-04-14 16:37:36 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Yury Popov
							
						 
					 | 
					
						
						
							
						
						e9e268336b
					 | 
					
						
						
							
							LogCumSumExp (#2069)
						
						
						
						
						
						
					 | 
					
						2025-04-13 01:27:29 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						c4189a38e4
					 | 
					
						
						
							
							Add float mask to sdpa vector (#2068)
						
						
						
						
						
						
					 | 
					
						2025-04-11 17:29:40 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						ef7ece9851
					 | 
					
						
						
							
							fix fft bug (#2062)
						
						
						
						
						
						
					 | 
					
						2025-04-10 19:41:27 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						ddaa4b7dcb
					 | 
					
						
						
							
							Fix the test and add custom min/max reductions for uncommon MPI types (#2060)
						
						
						
						
						
						
					 | 
					
						2025-04-10 17:01:17 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Anastasiia Filippova
							
						 
					 | 
					
						
						
							
						
						515f104926
					 | 
					
						
						
							
							Min / max reductions (#2041)
						
						
						
						
						
						
					 | 
					
						2025-04-09 23:22:20 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						00794c42bc
					 | 
					
						
						
							
							Fix causal mask sdpa vec (#2053)
						
						
						
						
						
						
						
						* fix sdpa vector causal mask
* test 
						
						
					 | 
					
						2025-04-08 09:11:23 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						f2c85308c1
					 | 
					
						
						
							
							add a half simd gemm fallback (#2046)
						
						
						
						
						
						
						
						* add a half simd gemm fallback
* nit 
						
						
					 | 
					
						2025-04-07 09:31:29 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jagrit Digani
							
						 
					 | 
					
						
						
							
						
						8777fd104f
					 | 
					
						
						
							
							Depthwise Conv2D optimization (#2036)
						
						
						
						
						
						
						
						- Add new specialized kernel for small kernel (kernels size <= 7), small strides (strides <= 2) depthwise 2d convolutions
- Add related tests 
						
						
					 | 
					
						2025-04-03 09:42:04 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						de5f38fd48
					 | 
					
						
						
							
							Custom logsumexp (#2028)
						
						
						
						
						
						
						
						* initial custom logsumexp
* more tests
* comments + fix 
						
						
					 | 
					
						2025-03-31 07:36:55 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						ec2854b13a
					 | 
					
						
						
							
							Swap -inf for finite_minimum value (#2029)
						
						
						
						
						
						
					 | 
					
						2025-03-30 21:55:04 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						28f39e9038
					 | 
					
						
						
							
							Log for complex numbers in Metal (#2025)
						
						
						
						
						
						
						
						* Log for complex numbers in Metal
* fix log2 
						
						
					 | 
					
						2025-03-30 17:04:38 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						05d7118561
					 | 
					
						
						
							
							causal vector sdpa (#2018)
						
						
						
						
						
						
						
						* causal vector sdpa
* get rid of memory threshold 
						
						
					 | 
					
						2025-03-28 12:36:13 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						98b901ad66
					 | 
					
						
						
							
							enable complex gemm (#2017)
						
						
						
						
						
						
					 | 
					
						2025-03-28 10:45:13 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						5580b47291
					 | 
					
						
						
							
							iinfo and scalar overflow detection (#2009)
						
						
						
						
						
						
					 | 
					
						2025-03-27 19:54:56 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						a84cc0123f
					 | 
					
						
						
							
							promote mask when needed (#1998)
						
						
						
						
						
						
					 | 
					
						2025-03-23 19:58:28 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						4eef8102c9
					 | 
					
						
						
							
							Distributed layers (#1270)
						
						
						
						
						
						
					 | 
					
						2025-03-21 13:52:17 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						69e4dd506b
					 | 
					
						
						
							
							Add a ring all gather (#1985)
						
						
						
						
						
						
					 | 
					
						2025-03-21 13:36:51 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						2a980a76ce
					 | 
					
						
						
							
							Add stats and limit to common allocator and enable tests (#1988)
						
						
						
						
						
						
						
						* add stats to common allocator and enable tests
* linux memory and default
* fix 
						
						
					 | 
					
						2025-03-21 12:28:36 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						4e1994e9d7
					 | 
					
						
						
							
							move memory APIs into top level mlx.core (#1982)
						
						
						
						
						
						
					 | 
					
						2025-03-21 07:25:12 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						7b7e2352cd
					 | 
					
						
						
							
							fix malloc or wait deadlock (#1976)
						
						
						
						
						
						
					 | 
					
						2025-03-20 16:48:43 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						005e7efa64
					 | 
					
						
						
							
							fix mask in sdpa (#1980)
						
						
						
						
						
						
						
						* fix mask in sdpa
* fix attention mask
* Re-enable routing for array mask
---------
Co-authored-by: Jagrit Digani <digani@apple.com> 
						
						
					 | 
					
						2025-03-20 14:53:12 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jagrit Digani
							
						 
					 | 
					
						
						
							
						
						b42d13ec84
					 | 
					
						
						
							
							Update attention tests to show diff, disable array masks (#1978)
						
						
						
						
						
						
					 | 
					
						2025-03-20 14:25:38 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jagrit Digani
							
						 
					 | 
					
						
						
							
						
						9adcd1a650
					 | 
					
						
						
							
							Support fused masking in Attention (#1924)
						
						
						
						
						
						
						
						* Update API to allow mask='causal' in fast::sdpa
* Add fallback
* Update steel::AttnParams
* Fix typo
* WIP, basic causal
* Update tests
* Update benchmarking
* Update masking loop limits
* Add bool masking and update tests
* Update additive mask
* Update benchmarks
* Update benchmarks
* Update tests
* Update for bfloat error
* Update early exit
* Add random seed to tests 
						
						
					 | 
					
						2025-03-20 11:01:32 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						3c164fca8c
					 | 
					
						
						
							
							Fix multistream GPU deadlock (#1969)
						
						
						
						
						
						
						
						* fix multistream GPU deadlock
* comments 
						
						
					 | 
					
						2025-03-20 07:19:47 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						c6ea2ba329
					 | 
					
						
						
							
							Use same accumulation precision in gemv as gemm (#1962)
						
						
						
						
						
						
						
						* use same accumulation precision in gemv as gemm
* faster
* fix compile 
						
						
					 | 
					
						2025-03-16 07:13:24 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						2770a10240
					 | 
					
						
						
							
							fix grad with inplace updates (#1961)
						
						
						
						
						
						
					 | 
					
						2025-03-13 19:13:09 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						32da94507a
					 | 
					
						
						
							
							fix vmap for flatten (#1955)
						
						
						
						
						
						
					 | 
					
						2025-03-11 10:42:22 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						3c3e558c60
					 | 
					
						
						
							
							Support transposed head/seq for kv (#1950)
						
						
						
						
						
						
						
						* support transposed head/seq for kv
* fix flaky test
* nit 
						
						
					 | 
					
						2025-03-10 10:53:45 -07:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Abe Leininger
							
						 
					 | 
					
						
						
							
						
						3835a428c5
					 | 
					
						
						
							
							Adds nuclear norm support (#1894)
						
						
						
						
						
						
						
						* adjust norm unit test tolerance 
						
						
					 | 
					
						2025-03-04 13:26:02 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						9680f72cca
					 | 
					
						
						
							
							Add a multi optimizer (#1916)
						
						
						
						
						
						
					 | 
					
						2025-03-04 13:16:35 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						e613d0eaf0
					 | 
					
						
						
							
							SDPA support for small batch (over sequence) queries (#1922)
						
						
						
						
						
						
						
						* batch query sdpa
* batch sdpa for query 
						
						
					 | 
					
						2025-03-04 10:59:04 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						6bcd6bcf70
					 | 
					
						
						
							
							fix donation in scan (#1917)
						
						
						
						
						
						
					 | 
					
						2025-03-03 11:30:59 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						4e7cd31d12
					 | 
					
						
						
							
							Fix slice data size (#1913)
						
						
						
						
						
						
						
						* fix slice data size
* add test 
						
						
					 | 
					
						2025-03-02 21:50:42 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						5e6c130d93
					 | 
					
						
						
							
							RMS norm without scaling (#1915)
						
						
						
						
						
						
					 | 
					
						2025-02-28 20:26:57 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						7d042f17fe
					 | 
					
						
						
							
							Double for lapack (#1904)
						
						
						
						
						
						
						
						* double for lapack ops
* add double support for lapack ops 
						
						
					 | 
					
						2025-02-25 11:39:36 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						28b8079e30
					 | 
					
						
						
							
							fix double type promotion (#1901)
						
						
						
						
						
						
					 | 
					
						2025-02-25 06:00:53 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						7face5d9fd
					 | 
					
						
						
							
							fix cpu compile (#1897)
						
						
						
						
						
						
					 | 
					
						2025-02-24 14:10:30 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						2d0f384b6f
					 | 
					
						
						
							
							fix simd erf_inv (#1896)
						
						
						
						
						
						
					 | 
					
						2025-02-24 13:57:47 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Angelos Katharopoulos
							
						 
					 | 
					
						
						
							
						
						10b271d963
					 | 
					
						
						
							
							Ring update (#1885)
						
						
						
						
						
						
					 | 
					
						2025-02-20 14:32:31 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Awni Hannun
							
						 
					 | 
					
						
						
							
						
						bbda0fdbdb
					 | 
					
						
						
							
							Allow non-square lu (#1889)
						
						
						
						
						
						
					 | 
					
						2025-02-20 08:13:23 -08:00 | 
					
					
						
						
						
							
							
							
							
							
							
							
							
						
					 |