mirror of
				https://github.com/ml-explore/mlx.git
				synced 2025-11-01 00:28:11 +08:00 
			
		
		
		
	non square qr (#1783)
This commit is contained in:
		| @@ -41,7 +41,7 @@ template <typename T> | |||||||
| void qrf_impl(const array& a, array& q, array& r) { | void qrf_impl(const array& a, array& q, array& r) { | ||||||
|   const int M = a.shape(-2); |   const int M = a.shape(-2); | ||||||
|   const int N = a.shape(-1); |   const int N = a.shape(-1); | ||||||
|   const int lda = std::max(M, N); |   const int lda = M; | ||||||
|   size_t num_matrices = a.size() / (M * N); |   size_t num_matrices = a.size() / (M * N); | ||||||
|   int num_reflectors = std::min(M, N); |   int num_reflectors = std::min(M, N); | ||||||
|   auto tau = |   auto tau = | ||||||
| @@ -89,13 +89,16 @@ void qrf_impl(const array& a, array& q, array& r) { | |||||||
|   allocator::free(work); |   allocator::free(work); | ||||||
|  |  | ||||||
|   r.set_data(allocator::malloc_or_wait(r.nbytes())); |   r.set_data(allocator::malloc_or_wait(r.nbytes())); | ||||||
|   copy_inplace(in, r, CopyType::General); |  | ||||||
|  |  | ||||||
|   for (int i = 0; i < num_matrices; ++i) { |   for (int i = 0; i < num_matrices; ++i) { | ||||||
|     // Zero lower triangle |     /// num_reflectors x N | ||||||
|     for (int j = 0; j < r.shape(-2); ++j) { |     for (int j = 0; j < r.shape(-2); ++j) { | ||||||
|       for (int k = 0; k < j; ++k) { |       for (int k = 0; k < j; ++k) { | ||||||
|         r.data<T>()[i * N * M + j * N + k] = 0; |         r.data<T>()[i * N * num_reflectors + j * N + k] = 0; | ||||||
|  |       } | ||||||
|  |       for (int k = j; k < r.shape(-1); ++k) { | ||||||
|  |         r.data<T>()[i * N * num_reflectors + j * N + k] = | ||||||
|  |             in.data<T>()[i * N * M + j + k * M]; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| @@ -104,7 +107,7 @@ void qrf_impl(const array& a, array& q, array& r) { | |||||||
|   lwork = -1; |   lwork = -1; | ||||||
|   lpack<T>::xorgqr( |   lpack<T>::xorgqr( | ||||||
|       &M, |       &M, | ||||||
|       &N, |       &num_reflectors, | ||||||
|       &num_reflectors, |       &num_reflectors, | ||||||
|       nullptr, |       nullptr, | ||||||
|       &lda, |       &lda, | ||||||
| @@ -120,7 +123,7 @@ void qrf_impl(const array& a, array& q, array& r) { | |||||||
|     // Compute Q |     // Compute Q | ||||||
|     lpack<T>::xorgqr( |     lpack<T>::xorgqr( | ||||||
|         &M, |         &M, | ||||||
|         &N, |         &num_reflectors, | ||||||
|         &num_reflectors, |         &num_reflectors, | ||||||
|         in.data<float>() + M * N * i, |         in.data<float>() + M * N * i, | ||||||
|         &lda, |         &lda, | ||||||
| @@ -131,7 +134,15 @@ void qrf_impl(const array& a, array& q, array& r) { | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   q.set_data(allocator::malloc_or_wait(q.nbytes())); |   q.set_data(allocator::malloc_or_wait(q.nbytes())); | ||||||
|   copy_inplace(in, q, CopyType::General); |   for (int i = 0; i < num_matrices; ++i) { | ||||||
|  |     // M x num_reflectors | ||||||
|  |     for (int j = 0; j < q.shape(-2); ++j) { | ||||||
|  |       for (int k = 0; k < q.shape(-1); ++k) { | ||||||
|  |         q.data<T>()[i * M * num_reflectors + j * num_reflectors + k] = | ||||||
|  |             in.data<T>()[i * N * M + j + k * M]; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|   // Cleanup |   // Cleanup | ||||||
|   allocator::free(work); |   allocator::free(work); | ||||||
|   | |||||||
| @@ -187,13 +187,13 @@ std::pair<array, array> qr(const array& a, StreamOrDevice s /* = {} */) { | |||||||
|         << a.ndim() << " dimensions."; |         << a.ndim() << " dimensions."; | ||||||
|     throw std::invalid_argument(msg.str()); |     throw std::invalid_argument(msg.str()); | ||||||
|   } |   } | ||||||
|   if (a.shape(-1) != a.shape(-2)) { |   int k = std::min(a.shape(-2), a.shape(-1)); | ||||||
|     throw std::invalid_argument( |   auto q_shape = a.shape(); | ||||||
|         "[linalg::qr] Support for non-square matrices NYI."); |   q_shape.back() = k; | ||||||
|   } |   auto r_shape = a.shape(); | ||||||
|  |   r_shape[r_shape.size() - 2] = k; | ||||||
|   auto out = array::make_arrays( |   auto out = array::make_arrays( | ||||||
|       {a.shape(), a.shape()}, |       {std::move(q_shape), std::move(r_shape)}, | ||||||
|       {a.dtype(), a.dtype()}, |       {a.dtype(), a.dtype()}, | ||||||
|       std::make_shared<QRF>(to_stream(s)), |       std::make_shared<QRF>(to_stream(s)), | ||||||
|       {astype(a, a.dtype(), s)}); |       {astype(a, a.dtype(), s)}); | ||||||
|   | |||||||
| @@ -103,7 +103,7 @@ class TestLinalg(mlx_tests.MLXTestCase): | |||||||
|         Q, R = mx.linalg.qr(A, stream=mx.cpu) |         Q, R = mx.linalg.qr(A, stream=mx.cpu) | ||||||
|         out = Q @ R |         out = Q @ R | ||||||
|         self.assertTrue(mx.allclose(out, A)) |         self.assertTrue(mx.allclose(out, A)) | ||||||
|         out = Q @ Q |         out = Q.T @ Q | ||||||
|         self.assertTrue(mx.allclose(out, mx.eye(2), rtol=1e-5, atol=1e-7)) |         self.assertTrue(mx.allclose(out, mx.eye(2), rtol=1e-5, atol=1e-7)) | ||||||
|         self.assertTrue(mx.allclose(mx.tril(R, -1), mx.zeros_like(R))) |         self.assertTrue(mx.allclose(mx.tril(R, -1), mx.zeros_like(R))) | ||||||
|         self.assertEqual(Q.dtype, mx.float32) |         self.assertEqual(Q.dtype, mx.float32) | ||||||
| @@ -116,10 +116,21 @@ class TestLinalg(mlx_tests.MLXTestCase): | |||||||
|         for a, q, r in zip(A, Q, R): |         for a, q, r in zip(A, Q, R): | ||||||
|             out = q @ r |             out = q @ r | ||||||
|             self.assertTrue(mx.allclose(out, a)) |             self.assertTrue(mx.allclose(out, a)) | ||||||
|             out = q @ q |             out = q.T @ q | ||||||
|             self.assertTrue(mx.allclose(out, mx.eye(2), rtol=1e-5, atol=1e-7)) |             self.assertTrue(mx.allclose(out, mx.eye(2), rtol=1e-5, atol=1e-7)) | ||||||
|             self.assertTrue(mx.allclose(mx.tril(r, -1), mx.zeros_like(r))) |             self.assertTrue(mx.allclose(mx.tril(r, -1), mx.zeros_like(r))) | ||||||
|  |  | ||||||
|  |         # Non square matrices | ||||||
|  |         for shape in [(4, 8), (8, 4)]: | ||||||
|  |             A = mx.random.uniform(shape=shape) | ||||||
|  |             Q, R = mx.linalg.qr(A, stream=mx.cpu) | ||||||
|  |             out = Q @ R | ||||||
|  |             self.assertTrue(mx.allclose(out, A, rtol=1e-4, atol=1e-6)) | ||||||
|  |             out = Q.T @ Q | ||||||
|  |             self.assertTrue( | ||||||
|  |                 mx.allclose(out, mx.eye(min(A.shape)), rtol=1e-4, atol=1e-6) | ||||||
|  |             ) | ||||||
|  |  | ||||||
|     def test_svd_decomposition(self): |     def test_svd_decomposition(self): | ||||||
|         A = mx.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], dtype=mx.float32) |         A = mx.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], dtype=mx.float32) | ||||||
|         U, S, Vt = mx.linalg.svd(A, stream=mx.cpu) |         U, S, Vt = mx.linalg.svd(A, stream=mx.cpu) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Awni Hannun
					Awni Hannun