mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-25 01:41:19 +08:00
Merge branch 'ml-explore:main' into fix-unsupported-scalartype
This commit is contained in:
commit
85345d42cb
2
.gitignore
vendored
2
.gitignore
vendored
@ -127,3 +127,5 @@ dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
.idea/
|
||||
.vscode/
|
||||
|
@ -17,6 +17,9 @@ weights you will need to [request
|
||||
access](https://docs.google.com/forms/d/e/1FAIpQLSfqNECQnMkycAp2jP4Z9TFX0cGR4uf7b_fBxjY_OjhJILlKGA/viewform)
|
||||
from Meta.
|
||||
|
||||
|
||||
Alternatively, you can also download a select converted checkpoints from the [mlx-llama](https://huggingface.co/mlx-llama) community organisation on Hugging Face and skip the conversion step.
|
||||
|
||||
Convert the weights with:
|
||||
|
||||
```
|
||||
|
@ -1,3 +1,4 @@
|
||||
mlx
|
||||
sentencepiece
|
||||
torch
|
||||
numpy
|
||||
|
@ -65,8 +65,9 @@ Performance
|
||||
-----------
|
||||
|
||||
The following table compares the performance of the UNet in stable diffusion.
|
||||
We report throughput in images per second for the provided `txt2image.py`
|
||||
script and the `diffusers` library using the MPS PyTorch backend.
|
||||
We report throughput in images per second **processed by the UNet** for the
|
||||
provided `txt2image.py` script and the `diffusers` library using the MPS
|
||||
PyTorch backend.
|
||||
|
||||
At the time of writing this comparison convolutions are still some of the least
|
||||
optimized operations in MLX. Despite that, MLX still achieves **~40% higher
|
||||
@ -93,3 +94,7 @@ The above experiments were made on an M2 Ultra with PyTorch version 2.1,
|
||||
diffusers version 0.21.4 and transformers version 4.33.3. For the generation we
|
||||
used classifier free guidance which means that the above batch sizes result
|
||||
double the images processed by the UNet.
|
||||
|
||||
Note that the above table means that it takes about 90 seconds to fully
|
||||
generate 16 images with MLX and 50 diffusion steps with classifier free
|
||||
guidance and about 120 for PyTorch.
|
||||
|
@ -1,3 +1,4 @@
|
||||
mlx
|
||||
safetensors
|
||||
huggingface-hub
|
||||
regex
|
||||
|
@ -11,4 +11,4 @@ python main.py --gpu
|
||||
|
||||
By default the dataset is the [PTB corpus](https://paperswithcode.com/dataset/penn-treebank). Choose a different dataset with the `--dataset` option.
|
||||
|
||||
To run the PyTorch, Jax or TensorFlowexamples install the respective framework.
|
||||
To run the PyTorch, Jax or TensorFlow examples install the respective framework.
|
||||
|
@ -81,13 +81,13 @@ def main(args):
|
||||
optimizer = optim.SGD(learning_rate=args.learning_rate)
|
||||
loss_and_grad_fn = nn.value_and_grad(model, model.loss)
|
||||
|
||||
def eval_fn(params, dataset):
|
||||
def eval_fn(model, dataset):
|
||||
inputs, targets = map(mx.array, to_samples(context_size, dataset))
|
||||
loss = 0
|
||||
for s in range(0, targets.shape[0], batch_size):
|
||||
bx, by = inputs[s : s + batch_size], targets[s : s + batch_size]
|
||||
bx, by = map(mx.array, (bx, by))
|
||||
losses = self.loss(bx, by, reduce=False)
|
||||
losses = model.loss(bx, by, reduce=False)
|
||||
loss += mx.sum(losses).item()
|
||||
return loss / len(targets)
|
||||
|
||||
@ -110,9 +110,8 @@ def main(args):
|
||||
)
|
||||
losses = []
|
||||
tic = time.perf_counter()
|
||||
|
||||
if (it + 1) % steps_per_eval == 0:
|
||||
val_loss = eval_fn(params, valid)
|
||||
val_loss = eval_fn(model, valid)
|
||||
toc = time.perf_counter()
|
||||
print(
|
||||
f"Iter {it + 1}: "
|
||||
@ -123,7 +122,7 @@ def main(args):
|
||||
tic = time.perf_counter()
|
||||
|
||||
if args.eval_test:
|
||||
test_loss = eval_fn(params, test)
|
||||
test_loss = eval_fn(model, test)
|
||||
test_ppl = math.exp(test_loss)
|
||||
print(f"Test loss {test_loss:.3f}, Test ppl {test_ppl:.3f}.")
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
# Copyright © 2023 Apple Inc.
|
||||
|
||||
import sys
|
||||
import time
|
||||
|
||||
import mlx.core as mx
|
||||
@ -48,46 +49,58 @@ def everything():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
feat_time = timer(feats)
|
||||
print(f"Feature time {feat_time:.3f}")
|
||||
mels = feats()[None]
|
||||
tokens = mx.array(
|
||||
[
|
||||
50364,
|
||||
1396,
|
||||
264,
|
||||
665,
|
||||
5133,
|
||||
23109,
|
||||
25462,
|
||||
264,
|
||||
6582,
|
||||
293,
|
||||
750,
|
||||
632,
|
||||
42841,
|
||||
292,
|
||||
370,
|
||||
938,
|
||||
294,
|
||||
4054,
|
||||
293,
|
||||
12653,
|
||||
356,
|
||||
50620,
|
||||
50620,
|
||||
23563,
|
||||
322,
|
||||
3312,
|
||||
13,
|
||||
50680,
|
||||
],
|
||||
mx.int32,
|
||||
)[None]
|
||||
model = load_models.load_model("tiny")
|
||||
model_forward_time = timer(model_forward, model, mels, tokens)
|
||||
print(f"Model forward time {model_forward_time:.3f}")
|
||||
decode_time = timer(decode, model, mels)
|
||||
print(f"Decode time {decode_time:.3f}")
|
||||
everything_time = timer(everything)
|
||||
print(f"Everything time {everything_time:.3f}")
|
||||
|
||||
# get command line arguments without 3rd party libraries
|
||||
# the command line argument to benchmark all models is "all"
|
||||
models = ["tiny"]
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] == "--all":
|
||||
models = ["tiny", "small", "medium", "large"]
|
||||
|
||||
for model_name in models:
|
||||
feat_time = timer(feats)
|
||||
|
||||
print(f"\nModel: {model_name.upper()}")
|
||||
print(f"\nFeature time {feat_time:.3f}")
|
||||
mels = feats()[None]
|
||||
tokens = mx.array(
|
||||
[
|
||||
50364,
|
||||
1396,
|
||||
264,
|
||||
665,
|
||||
5133,
|
||||
23109,
|
||||
25462,
|
||||
264,
|
||||
6582,
|
||||
293,
|
||||
750,
|
||||
632,
|
||||
42841,
|
||||
292,
|
||||
370,
|
||||
938,
|
||||
294,
|
||||
4054,
|
||||
293,
|
||||
12653,
|
||||
356,
|
||||
50620,
|
||||
50620,
|
||||
23563,
|
||||
322,
|
||||
3312,
|
||||
13,
|
||||
50680,
|
||||
],
|
||||
mx.int32,
|
||||
)[None]
|
||||
model = load_models.load_model(f"{model_name}")
|
||||
model_forward_time = timer(model_forward, model, mels, tokens)
|
||||
print(f"Model forward time {model_forward_time:.3f}")
|
||||
decode_time = timer(decode, model, mels)
|
||||
print(f"Decode time {decode_time:.3f}")
|
||||
everything_time = timer(everything)
|
||||
print(f"Everything time {everything_time:.3f}")
|
||||
print(f"\n{'-----' * 10}\n")
|
||||
|
Loading…
Reference in New Issue
Block a user