From 1613e608a90c80d96055acb0455258235cd31d3a Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Thu, 14 Dec 2023 08:18:01 -0800 Subject: [PATCH] fix args, update README, remove extra files --- phi2/README.md | 44 +++++++++++++++++++++++------- phi2/hf_model.py | 23 ---------------- phi2/model.py | 5 +++- phi2/phi2_outputs.txt | 63 ------------------------------------------- 4 files changed, 38 insertions(+), 97 deletions(-) delete mode 100644 phi2/hf_model.py delete mode 100644 phi2/phi2_outputs.txt diff --git a/phi2/README.md b/phi2/README.md index c38f8a74..46a7c589 100644 --- a/phi2/README.md +++ b/phi2/README.md @@ -1,24 +1,48 @@ # Phi-2 -Phi-2 is a 2.7B parameter model released by Microsoft and trained on a mixture of GPT-4 outputs and clean web-text. -Its performance theoretically rivals much, much stronger models. +Phi-2 is a 2.7B parameter model released by Microsoft[^1] and trained on a mixture +of GPT-4 outputs and clean web-text. Its performance rivals +much, much stronger models. -## Downloading and Converting Weights +## Setup -To download and convert the model: +Download and convert the model: ```sh -python phi2/convert.py +python convert.py ``` -That will fill in `weights/phi-2.npz`. +which will make a file `weights.npz`. -## Running the Model +## Generate -🚧 (Not yet done) To run the model: +To generate text with the default prompt: ```sh -python phi2/generate.py +python model.py ``` -Layer-by-layer forward pass outputs are currently shown in the outputs.txt files. +Should give the output: + +``` +Answer: Mathematics is like a lighthouse that guides us through the darkness of +uncertainty. Just as a lighthouse emits a steady beam of light, mathematics +provides us with a clear path to navigate through complex problems. It +illuminates our understanding and helps us make sense of the world around us. + +Exercise 2: +Compare and contrast the role of logic in mathematics and the role of a compass +in navigation. + +Answer: Logic in mathematics is like a compass in navigation. It helps +``` + +To use your own prompt: + +```sh +python model.py --prompt --max_tokens +``` + +[^1]: For more details on the model see the [blog post]( +https://www.microsoft.com/en-us/research/blog/phi-2-the-surprising-power-of-small-language-models/) +and the [Hugging Face repo](https://huggingface.co/microsoft/phi-2) diff --git a/phi2/hf_model.py b/phi2/hf_model.py deleted file mode 100644 index d09ff108..00000000 --- a/phi2/hf_model.py +++ /dev/null @@ -1,23 +0,0 @@ -from transformers import AutoModelForCausalLM, AutoTokenizer - - -if __name__ == "__main__": - model = AutoModelForCausalLM.from_pretrained( - "microsoft/phi-2", torch_dtype="auto", trust_remote_code=True - ) - tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) - - inputs = tokenizer( - '''def print_prime(n): - """ - Print all primes between 1 and n - """''', - return_tensors="pt", - return_attention_mask=False, - ) - - print(model(**inputs)) - - # outputs = model.generate(**inputs, max_length=200) - # text = tokenizer.batch_decode(outputs)[0] - # print(text) diff --git a/phi2/model.py b/phi2/model.py index 52bda27e..a99d3d5d 100644 --- a/phi2/model.py +++ b/phi2/model.py @@ -203,11 +203,14 @@ if __name__ == "__main__": prompt = mx.array(prompt) + print("[INFO] Generating with Phi-2...", flush=True) + print(args.prompt, end="", flush=True) + tokens = [] for token, _ in zip(generate(prompt, model), range(args.max_tokens)): tokens.append(token) - if (len(tokens) % args.tokens_per_eval) == 0: + if (len(tokens) % 10) == 0: mx.eval(tokens) s = tokenizer.decode([t.item() for t in tokens]) print(s, end="", flush=True) diff --git a/phi2/phi2_outputs.txt b/phi2/phi2_outputs.txt deleted file mode 100644 index 4f27e44b..00000000 --- a/phi2/phi2_outputs.txt +++ /dev/null @@ -1,63 +0,0 @@ -(HF) Output of Embeddings - -tensor([[[-0.0353, 0.0045, 0.0208, ..., -0.0117, 0.0041, 0.0075], - [-0.0172, 0.0236, -0.0051, ..., 0.0141, 0.0115, 0.0058], - [-0.0148, 0.0043, -0.0252, ..., 0.0179, 0.0025, -0.0008], - ..., - [ 0.0003, 0.0051, 0.0002, ..., 0.0043, 0.0075, 0.0049], - [-0.0110, 0.0472, 0.0030, ..., 0.0098, -0.0075, 0.0146], - [-0.0085, -0.0219, -0.0016, ..., -0.0059, 0.0109, -0.0016]]], - device='cuda:0', dtype=torch.float16, grad_fn=) - -(MLX) Output of Embeddings - -array([[[-0.0352783, 0.00445175, 0.020813, ..., -0.0117188, 0.00411606, 0.00748444], - [-0.0171509, 0.0236053, -0.00508881, ..., 0.0141144, 0.0115204, 0.00582504], - [-0.0147858, 0.00426102, -0.0252075, ..., 0.0179443, 0.0024662, -0.00076437], - ..., - [0.000337124, 0.00508499, 0.000193119, ..., 0.00427628, 0.00753403, 0.00492477], - [-0.0110092, 0.0472107, 0.00295448, ..., 0.00982666, -0.00747681, 0.0145721], - [-0.00852203, -0.0218964, -0.00161839, ..., -0.00592422, 0.0108643, -0.00162697]]], dtype=float16) - -(HF) Output of First Attention Layer - -tensor([[[-0.2000, 0.4849, 0.9863, ..., -0.2209, 0.1355, 0.3469], - [ 0.4922, -0.3865, 0.8428, ..., 0.5894, -0.0069, -0.5278], - [ 0.0902, 0.1028, 0.6826, ..., 0.1394, -0.8145, -0.1880], - ..., - [ 0.2380, 0.0555, -0.3005, ..., 0.0372, -0.0895, 0.0255], - [ 0.2512, 0.1949, 0.3401, ..., 0.3625, -0.3103, -0.1064], - [-0.0905, 0.0665, 0.5210, ..., -0.0767, -0.2460, -0.1449]]], - device='cuda:0', dtype=torch.float16, grad_fn=) -torch.Size([1, 23, 2560]) - -(MLX) Output of First Attention Layer - -array([[[-0.199973, 0.485224, 0.987237, ..., -0.220847, 0.13511, 0.346074], - [0.44883, -0.271683, 0.877478, ..., 0.653217, -0.0929724, -0.711176], - [-0.233398, 5.7824e-05, 0.435001, ..., 0.0504494, -0.623998, -0.438785], - ..., - [0.123587, -0.237459, -0.447518, ..., 0.0653363, -0.0767153, -0.341505], - [0.187798, 0.331209, 0.0827338, ..., 0.529453, -0.582141, -0.165316], - [-0.413614, 0.134572, 0.685769, ..., 0.0796088, 0.0217719, -0.118885]]], dtype=float32) -[1, 23, 2560] - -(HF) Overall Output of Inputs: - -tensor([[[ 6.4688, 5.1016, 1.9658, ..., -2.9043, -2.9043, -2.9043], - [ 5.2188, 6.4414, 5.1914, ..., -0.1852, -0.1862, -0.1866], - [ 4.3516, 5.3281, 5.9922, ..., -0.3689, -0.3699, -0.3696], - ..., - [10.4141, 11.7031, 12.5859, ..., 0.7778, 0.7769, 0.7754], - [10.7188, 11.7891, 13.3125, ..., 1.6123, 1.6113, 1.6104], - [10.8047, 12.0234, 12.4375, ..., 0.2321, 0.2314, 0.2317]]], - -(MLX) Overall Output of Inputs: - -array([[[6.46632, 5.10102, 1.96306, ..., -2.90427, -2.90341, -2.90392], - [4.5092, 5.90938, 4.98036, ..., -0.411165, -0.412062, -0.412547], - [4.34246, 5.7794, 6.13245, ..., -0.40106, -0.402052, -0.401838], - ..., - [6.61827, 10.4022, 12.1672, ..., 0.602787, 0.602138, 0.600666], - [7.96546, 12.9569, 14.7947, ..., -0.347764, -0.348587, -0.34937], - [8.22272, 10.6631, 11.5968, ..., -1.12037, -1.12025, -1.12152]]], dtype=float32) \ No newline at end of file