diff --git a/gguflib.c b/gguflib.c
index 5e71ecf..16e3f3a 100644
--- a/gguflib.c
+++ b/gguflib.c
@@ -521,16 +521,16 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
             i++;
         }
     } else if (tensor->type == GGUF_TYPE_Q8_0) {
-        /* Very simple layout: |16 bit delta|32 x 8bit weights|
-         * Each weight is delta * quantized_weight[0..31] */
+        /* Very simple layout: |16 bit scale|32 x 8bit weights|
+         * Each weight is scale * quantized_weight[0..31] */
         int8_t *block = (int8_t*)tensor->weights_data;
         uint64_t i = 0; // i-th weight to dequantize.
         while(i < tensor->num_weights) {
-            /* For each block get the delta and convert all the
+            /* For each block get the scale and convert all the
              * weights in the block. */
-            float delta = from_half(*((uint16_t*)block));
+            float scale = from_half(*((uint16_t*)block));
             for (uint32_t j = 0; j < tf->items_per_block; j++) {
-                f[i++] = block[j+2] * delta; // j+2 to skip the delta bytes.
+                f[i++] = block[j+2] * scale; // j+2 to skip the scale bytes.
                 if (i == tensor->num_weights) break;
             }
             block += block_size; // Go to the next block.
@@ -540,22 +540,26 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
         uint64_t i = 0; // i-th weight to dequantize.
         while(i < tensor->num_weights) {
             /* Q4_K super-blocks have 256 total weights, split in 8 sub-block.
-             * Each 8 sub-blocks have a different set of deltas/mins, so
-             * there are 16 total values for deltas/mins, but the deltas/mins
-             * are also quantized (6 bits each) using two different deltas:
-             * delta_of_deltas and delta_of_mins, that are two FP16 values
+             * Each 8 sub-blocks have a different set of scales/mins, so
+             * there are 16 total values for scales/mins, but the scales/mins
+             * are also quantized (6 bits each) using two different scales:
+             * scale_of_scales and scale_of_mins, that are two FP16 values
              * at the start of the super block, so:
              *
-             * |FP16 d_of_deltas  | + 
-             * |FP16 d_of_mins    | +
+             * |FP16 s_of_scales | + 
+             * |FP16 s_of_mins   | +
              * |16 6 bit integers d,m pairs, one per sub-block of 32 ele | +
              * |256 x 4bit weights|
+             *
+             * Each quantized weight 'q' is restored as:
+             *
+             *      w = q * scale - min;
              */
-            float deltas_delta = from_half(*((uint16_t*)block));
-            float mins_delta  = from_half(*((uint16_t*)(block+2)));
+            float scales_scale = from_half(*((uint16_t*)block));
+            float mins_scale  = from_half(*((uint16_t*)(block+2)));
             block += 4;
             
-            /* Extract the 16 x 6 bit values deltas-mins pairs. The
+            /* Extract the 16 x 6 bit values scales-mins pairs. The
              * encoding of those values is odd because of performance
              * reasons:
              *
@@ -566,10 +570,10 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
              *  66222222|77333333|44444444|55555555|66666666|77777777
              *
              * In the above diagram you can see the 12 bytes and the
-             * deltas/mins 6 bits encodings. */
+             * scales/mins 6 bits encodings. */
 
-            /* Scale deltas/mins. */
-            float deltas[8], mins[8];
+            /* Scale scales/mins. */
+            float scales[8], mins[8];
             for (int j = 0; j < 8; j++) {
                 uint8_t d,m;
 		if (j < 4) {
@@ -579,8 +583,8 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
 		    d = (block[j+4] & 0xF) | ((block[j-4] >> 6) << 4);
 		    m = (block[j+4] >> 4) | ((block[j-0] >> 6) << 4);
 		}
-                deltas[j] = d * deltas_delta;
-                mins[j] = m * mins_delta;
+                scales[j] = d * scales_scale;
+                mins[j] = m * mins_scale;
             }
             block += 12; // Seek 4-bit weights start.
 
@@ -591,23 +595,94 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
              * bits of each byte. Second 32 weights of the second
              * block are lower 4 bits of each byte. */
             for (uint32_t b = 0; b < 8; b += 2) {
-                float delta = deltas[b];
+                float scale = scales[b];
                 float min = mins[b];
                 /* First set: higher bits. */
                 for (uint32_t j = 0; j < 32; j++) {
                     uint8_t w = block[j] & 0xf;
-                    f[i++] = w * delta - min;
+                    f[i++] = w * scale - min;
                     if (i == tensor->num_weights) return f;
                 }
                 /* Second set: lower bits. */
                 for (uint32_t j = 0; j < 32; j++) {
                     uint8_t w = block[j] >> 4;
-                    f[i++] = w * delta - min;
+                    f[i++] = w * scale - min;
                     if (i == tensor->num_weights) return f;
                 }
                 block += 32; // Skip the two processed blocks.
             }
         }
+    } else if (tensor->type == GGUF_TYPE_Q6_K) {
+        uint8_t *block = (uint8_t*)tensor->weights_data;
+        uint64_t i = 0; // i-th weight to dequantize.
+        while(i < tensor->num_weights) {
+            /* Q6_K super-blocks have 256 total weights, split in 16 sub-block
+             * of 16 elements. There are no mins, just scales. Each sub-block
+             * have a block-specific scale quantized at 8 bits via a single
+             * 16-bit main scale-of-scales.
+             *
+             * |128 bytes of lower 4 bits of quants| +
+             * |64 bytes of lower 2 bits of quants| +
+             * |16 bytes of 8-bit block scales | +
+             * |A single FP16 value: the scale of the scales above |
+             *
+             * Let's call "L" the lower 4 bits array (128 bytes)
+             * and "H" the higher 2 bits array (64 bytes)
+             *
+             * Values are logically encoded in two 128 weights clusters
+             * where the first cluster is the first 64 bytes of "L" and
+             * the first 32 bytes of "H".
+             *
+             * Higher bits of the i-th weight from 0 to 63 are stored in the
+             * lower 4 bits of L[i], while higher bits of the i-th weight
+             * from 64 to 127 are stored in the higher bits of L[i-64]:
+             *
+             * L = |64640000|65650101|66660202|...
+             *
+             * So this actually is: w_low = (L[i%64] >> i/64*4) & 15
+             *
+             * H = |96643200|97653301|98663402|...
+             *
+             * Higher bits of the i-th weight are arranged like that:
+             *
+             * From 0 to 31,  bits 0,1 of H[i]
+             * From 32 to 63, bits 3,2 of H[i-32]
+             * From 64 to 95, bits 5,4 of H[i-64]
+             * From 96 to 127, bits 7,6 of H[i-96]
+             *
+             * So this actually is: w_high = ((H[i%32] >> i/32*2) & 3) << 2
+             * The same is true with the next 128 weights cluster, but
+             * everything is relative to the second half of H and L.
+             *
+             * Finally, there is to extract the scale from the
+             * 16 blocks scales array. Scales are just sequential,
+             * so the i-th weight uses the scale[i/16].
+             *
+             * Important: In Q6_K the 6-bit quants are wisely stored
+             * as unsigned integers + 32, so that there is no need to
+             * do sign bit extension in order to convert the 6-bit value
+             * into 8 bit value. Instead the values from -32 to 31 are
+             * remapped in the 0-63 range (just adding 32).
+             */
+            float super_scale = from_half(*((uint16_t*)(block+128+64+16)));
+            uint8_t *L = block;
+            uint8_t *H = block+128;
+            int8_t *scales = (int8_t*)block+128+64;
+            for (int cluster = 0; cluster < 2; cluster++) {
+                for (uint64_t j = 0; j < 128; j++) {
+                    f[i] = (super_scale * scales[j/16]) *
+                           ((int8_t)
+                            ((((L[j%64] >> (j/64*4)) & 0xF) |
+                             (((H[j%32] >> (j/32*2)) & 3) << 4)))-32);
+                    i++;
+                    if (i == tensor->num_weights) return f;
+                }
+                L += 64;
+                H += 32;
+                scales += 8;
+            }
+            block += 128+64+16+2; // Go to the next block.
+        }
     } else {
         errno = EINVAL;
         return NULL;