mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-01 21:01:32 +08:00
Add support for cohere2 (#1157)
* add support for cohere2 * revert to act_fn to silu * fix tests and sliding window attention * add tests * add to tuner * fix sliding window * add coauthor :) Co-authored-by: n8programs <43304488+N8python@users.noreply.github.com> * Add rotating kvcache to save space * some nits * style * nits --------- Co-authored-by: n8programs <43304488+N8python@users.noreply.github.com> Co-authored-by: N8 <n8@n8programs.com> Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
@@ -187,9 +187,10 @@ def maybe_quantize_kv_cache(prompt_cache, quantized_kv_start, kv_group_size, kv_
|
||||
and prompt_cache[0].offset > quantized_kv_start
|
||||
):
|
||||
for i in range(len(prompt_cache)):
|
||||
prompt_cache[i] = prompt_cache[i].to_quantized(
|
||||
group_size=kv_group_size, bits=kv_bits
|
||||
)
|
||||
if isinstance(prompt_cache[i], cache.KVCache):
|
||||
prompt_cache[i] = prompt_cache[i].to_quantized(
|
||||
group_size=kv_group_size, bits=kv_bits
|
||||
)
|
||||
|
||||
|
||||
def generate_step(
|
||||
|
Reference in New Issue
Block a user