diff --git a/speculative_decoding/README.md b/llms/speculative_decoding/README.md similarity index 100% rename from speculative_decoding/README.md rename to llms/speculative_decoding/README.md diff --git a/speculative_decoding/decoder.py b/llms/speculative_decoding/decoder.py similarity index 100% rename from speculative_decoding/decoder.py rename to llms/speculative_decoding/decoder.py index 9cdab1bb..0a426828 100644 --- a/speculative_decoding/decoder.py +++ b/llms/speculative_decoding/decoder.py @@ -1,12 +1,12 @@ -import transformers -from dataclasses import dataclass, field -from model import Llama -import mlx.core as mx -import mlx.nn as nn import time -import numpy as np +from dataclasses import dataclass, field from typing import List, Optional +import mlx.core as mx +import mlx.nn as nn +import numpy as np +import transformers +from model import Llama from prompts import create_urial_prompt diff --git a/speculative_decoding/main.py b/llms/speculative_decoding/main.py similarity index 99% rename from speculative_decoding/main.py rename to llms/speculative_decoding/main.py index b825b922..4a56b686 100644 --- a/speculative_decoding/main.py +++ b/llms/speculative_decoding/main.py @@ -1,4 +1,5 @@ import time + from decoder import SpeculativeDecoder # This will use the chat template from the primary model diff --git a/speculative_decoding/model.py b/llms/speculative_decoding/model.py similarity index 97% rename from speculative_decoding/model.py rename to llms/speculative_decoding/model.py index 0ccab438..1eb76553 100644 --- a/speculative_decoding/model.py +++ b/llms/speculative_decoding/model.py @@ -1,11 +1,10 @@ -from transformers import LlamaConfig, AutoModelForCausalLM -import mlx.core as mx -import mlx.nn as nn -from mlx.utils import tree_unflatten, tree_map -import mlx.core as mx -import mlx.nn as nn from typing import Optional, Tuple +import mlx.core as mx +import mlx.nn as nn +from mlx.utils import tree_map, tree_unflatten +from transformers import AutoModelForCausalLM, LlamaConfig + def create_additive_causal_mask(N: int, offset: int = 0, dtype: mx.Dtype = mx.float32): rinds = mx.arange(offset + N) diff --git a/speculative_decoding/prompts.py b/llms/speculative_decoding/prompts.py similarity index 99% rename from speculative_decoding/prompts.py rename to llms/speculative_decoding/prompts.py index b052ee63..4d357143 100644 --- a/speculative_decoding/prompts.py +++ b/llms/speculative_decoding/prompts.py @@ -24,5 +24,6 @@ Sure, here are some of the most common types of renewable energy sources: Each type of renewable energy source has its own set of advantages and challenges, but collectively, they represent our best hope at achieving sustainable and environmentally friendly energy consumption. Please let me know if you have any other questions! ```""" + def create_urial_prompt(message: str): return URIAL_1_SHOT + "\n\n# Query:\n```" + message + "```\n\n# Answer:\n```\n" diff --git a/speculative_decoding/requirements.txt b/llms/speculative_decoding/requirements.txt similarity index 100% rename from speculative_decoding/requirements.txt rename to llms/speculative_decoding/requirements.txt