mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-29 01:58:12 +08:00
GGUF: Avoid dequantization when format is compatible (#426)
* GGUF: Don't dequantize q4_1 * Fix weight order. First in low bits * Add unpacking for q4_0 * Don't dequantize q8_0 * rebase quants and split file * don't quantize every weight * reapply patch * error handling --------- Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
@@ -500,7 +500,6 @@ TEST_CASE("test metal enable/disable cache") {
|
||||
auto buf = a.malloc(size, false);
|
||||
auto buf_ptr = static_cast<MTL::Buffer*>(buf.ptr());
|
||||
unsigned char first_byte = *reinterpret_cast<unsigned char*>(buf_ptr);
|
||||
printf("first byte: %d\n", first_byte);
|
||||
|
||||
// Release a
|
||||
a.free(buf);
|
||||
@@ -508,7 +507,6 @@ TEST_CASE("test metal enable/disable cache") {
|
||||
// If release successfully, the first byte should be different from the
|
||||
// first byte before release
|
||||
unsigned char new_first_byte = *reinterpret_cast<unsigned char*>(buf_ptr);
|
||||
printf("new first byte: %d\n", new_first_byte);
|
||||
|
||||
CHECK_NE(new_first_byte, first_byte);
|
||||
}
|
||||
|
Reference in New Issue
Block a user