mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-26 02:33:23 +08:00
Merge branch 'ml-explore:main' into fix-unsupported-scalartype
This commit is contained in:
commit
85345d42cb
2
.gitignore
vendored
2
.gitignore
vendored
@ -127,3 +127,5 @@ dmypy.json
|
|||||||
|
|
||||||
# Pyre type checker
|
# Pyre type checker
|
||||||
.pyre/
|
.pyre/
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
@ -17,6 +17,9 @@ weights you will need to [request
|
|||||||
access](https://docs.google.com/forms/d/e/1FAIpQLSfqNECQnMkycAp2jP4Z9TFX0cGR4uf7b_fBxjY_OjhJILlKGA/viewform)
|
access](https://docs.google.com/forms/d/e/1FAIpQLSfqNECQnMkycAp2jP4Z9TFX0cGR4uf7b_fBxjY_OjhJILlKGA/viewform)
|
||||||
from Meta.
|
from Meta.
|
||||||
|
|
||||||
|
|
||||||
|
Alternatively, you can also download a select converted checkpoints from the [mlx-llama](https://huggingface.co/mlx-llama) community organisation on Hugging Face and skip the conversion step.
|
||||||
|
|
||||||
Convert the weights with:
|
Convert the weights with:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
mlx
|
mlx
|
||||||
sentencepiece
|
sentencepiece
|
||||||
torch
|
torch
|
||||||
|
numpy
|
||||||
|
@ -65,8 +65,9 @@ Performance
|
|||||||
-----------
|
-----------
|
||||||
|
|
||||||
The following table compares the performance of the UNet in stable diffusion.
|
The following table compares the performance of the UNet in stable diffusion.
|
||||||
We report throughput in images per second for the provided `txt2image.py`
|
We report throughput in images per second **processed by the UNet** for the
|
||||||
script and the `diffusers` library using the MPS PyTorch backend.
|
provided `txt2image.py` script and the `diffusers` library using the MPS
|
||||||
|
PyTorch backend.
|
||||||
|
|
||||||
At the time of writing this comparison convolutions are still some of the least
|
At the time of writing this comparison convolutions are still some of the least
|
||||||
optimized operations in MLX. Despite that, MLX still achieves **~40% higher
|
optimized operations in MLX. Despite that, MLX still achieves **~40% higher
|
||||||
@ -93,3 +94,7 @@ The above experiments were made on an M2 Ultra with PyTorch version 2.1,
|
|||||||
diffusers version 0.21.4 and transformers version 4.33.3. For the generation we
|
diffusers version 0.21.4 and transformers version 4.33.3. For the generation we
|
||||||
used classifier free guidance which means that the above batch sizes result
|
used classifier free guidance which means that the above batch sizes result
|
||||||
double the images processed by the UNet.
|
double the images processed by the UNet.
|
||||||
|
|
||||||
|
Note that the above table means that it takes about 90 seconds to fully
|
||||||
|
generate 16 images with MLX and 50 diffusion steps with classifier free
|
||||||
|
guidance and about 120 for PyTorch.
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
mlx
|
||||||
safetensors
|
safetensors
|
||||||
huggingface-hub
|
huggingface-hub
|
||||||
regex
|
regex
|
||||||
|
@ -81,13 +81,13 @@ def main(args):
|
|||||||
optimizer = optim.SGD(learning_rate=args.learning_rate)
|
optimizer = optim.SGD(learning_rate=args.learning_rate)
|
||||||
loss_and_grad_fn = nn.value_and_grad(model, model.loss)
|
loss_and_grad_fn = nn.value_and_grad(model, model.loss)
|
||||||
|
|
||||||
def eval_fn(params, dataset):
|
def eval_fn(model, dataset):
|
||||||
inputs, targets = map(mx.array, to_samples(context_size, dataset))
|
inputs, targets = map(mx.array, to_samples(context_size, dataset))
|
||||||
loss = 0
|
loss = 0
|
||||||
for s in range(0, targets.shape[0], batch_size):
|
for s in range(0, targets.shape[0], batch_size):
|
||||||
bx, by = inputs[s : s + batch_size], targets[s : s + batch_size]
|
bx, by = inputs[s : s + batch_size], targets[s : s + batch_size]
|
||||||
bx, by = map(mx.array, (bx, by))
|
bx, by = map(mx.array, (bx, by))
|
||||||
losses = self.loss(bx, by, reduce=False)
|
losses = model.loss(bx, by, reduce=False)
|
||||||
loss += mx.sum(losses).item()
|
loss += mx.sum(losses).item()
|
||||||
return loss / len(targets)
|
return loss / len(targets)
|
||||||
|
|
||||||
@ -110,9 +110,8 @@ def main(args):
|
|||||||
)
|
)
|
||||||
losses = []
|
losses = []
|
||||||
tic = time.perf_counter()
|
tic = time.perf_counter()
|
||||||
|
|
||||||
if (it + 1) % steps_per_eval == 0:
|
if (it + 1) % steps_per_eval == 0:
|
||||||
val_loss = eval_fn(params, valid)
|
val_loss = eval_fn(model, valid)
|
||||||
toc = time.perf_counter()
|
toc = time.perf_counter()
|
||||||
print(
|
print(
|
||||||
f"Iter {it + 1}: "
|
f"Iter {it + 1}: "
|
||||||
@ -123,7 +122,7 @@ def main(args):
|
|||||||
tic = time.perf_counter()
|
tic = time.perf_counter()
|
||||||
|
|
||||||
if args.eval_test:
|
if args.eval_test:
|
||||||
test_loss = eval_fn(params, test)
|
test_loss = eval_fn(model, test)
|
||||||
test_ppl = math.exp(test_loss)
|
test_ppl = math.exp(test_loss)
|
||||||
print(f"Test loss {test_loss:.3f}, Test ppl {test_ppl:.3f}.")
|
print(f"Test loss {test_loss:.3f}, Test ppl {test_ppl:.3f}.")
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# Copyright © 2023 Apple Inc.
|
# Copyright © 2023 Apple Inc.
|
||||||
|
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import mlx.core as mx
|
import mlx.core as mx
|
||||||
@ -48,8 +49,19 @@ def everything():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
# get command line arguments without 3rd party libraries
|
||||||
|
# the command line argument to benchmark all models is "all"
|
||||||
|
models = ["tiny"]
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
if sys.argv[1] == "--all":
|
||||||
|
models = ["tiny", "small", "medium", "large"]
|
||||||
|
|
||||||
|
for model_name in models:
|
||||||
feat_time = timer(feats)
|
feat_time = timer(feats)
|
||||||
print(f"Feature time {feat_time:.3f}")
|
|
||||||
|
print(f"\nModel: {model_name.upper()}")
|
||||||
|
print(f"\nFeature time {feat_time:.3f}")
|
||||||
mels = feats()[None]
|
mels = feats()[None]
|
||||||
tokens = mx.array(
|
tokens = mx.array(
|
||||||
[
|
[
|
||||||
@ -84,10 +96,11 @@ if __name__ == "__main__":
|
|||||||
],
|
],
|
||||||
mx.int32,
|
mx.int32,
|
||||||
)[None]
|
)[None]
|
||||||
model = load_models.load_model("tiny")
|
model = load_models.load_model(f"{model_name}")
|
||||||
model_forward_time = timer(model_forward, model, mels, tokens)
|
model_forward_time = timer(model_forward, model, mels, tokens)
|
||||||
print(f"Model forward time {model_forward_time:.3f}")
|
print(f"Model forward time {model_forward_time:.3f}")
|
||||||
decode_time = timer(decode, model, mels)
|
decode_time = timer(decode, model, mels)
|
||||||
print(f"Decode time {decode_time:.3f}")
|
print(f"Decode time {decode_time:.3f}")
|
||||||
everything_time = timer(everything)
|
everything_time = timer(everything)
|
||||||
print(f"Everything time {everything_time:.3f}")
|
print(f"Everything time {everything_time:.3f}")
|
||||||
|
print(f"\n{'-----' * 10}\n")
|
||||||
|
Loading…
Reference in New Issue
Block a user