mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 17:31:18 +08:00
[Whisper] Large-v3 requires 128 Mel frequency bins (#193)
* Large-v3 requires 128 Mel frequency bins * extract correct model dimensions and use argparse * format * format --------- Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
parent
e1e56a625b
commit
07c163d9d9
@ -1,5 +1,5 @@
|
||||
# Copyright © 2023 Apple Inc.
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
|
||||
@ -10,6 +10,22 @@ from whisper import audio, decoding, load_models, transcribe
|
||||
audio_file = "whisper/assets/ls_test.flac"
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(description="Benchmark script.")
|
||||
parser.add_argument(
|
||||
"--all",
|
||||
action="store_true",
|
||||
help="Use all available models, i.e. tiny,small,medium,large-v3",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--models",
|
||||
type=str,
|
||||
help="Specify models as a comma-separated list (e.g., tiny,small,medium)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def timer(fn, *args):
|
||||
for _ in range(5):
|
||||
fn(*args)
|
||||
@ -23,10 +39,10 @@ def timer(fn, *args):
|
||||
return (toc - tic) / num_its
|
||||
|
||||
|
||||
def feats():
|
||||
def feats(n_mels: int = 80):
|
||||
data = audio.load_audio(audio_file)
|
||||
data = audio.pad_or_trim(data)
|
||||
mels = audio.log_mel_spectrogram(data)
|
||||
mels = audio.log_mel_spectrogram(data, n_mels)
|
||||
mx.eval(mels)
|
||||
return mels
|
||||
|
||||
@ -46,20 +62,20 @@ def everything(model_name):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# get command line arguments without 3rd party libraries
|
||||
# the command line argument to benchmark all models is "all"
|
||||
args = parse_arguments()
|
||||
if args.all:
|
||||
models = ["tiny", "small", "medium", "large-v3"]
|
||||
elif args.models:
|
||||
models = args.models.split(",")
|
||||
else:
|
||||
models = ["tiny"]
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] == "--all":
|
||||
models = ["tiny", "small", "medium", "large"]
|
||||
|
||||
print("Selected models:", models)
|
||||
|
||||
feat_time = timer(feats)
|
||||
print(f"\nFeature time {feat_time:.3f}")
|
||||
mels = feats()[None].astype(mx.float16)
|
||||
|
||||
for model_name in models:
|
||||
|
||||
print(f"\nModel: {model_name.upper()}")
|
||||
tokens = mx.array(
|
||||
[
|
||||
@ -95,6 +111,7 @@ if __name__ == "__main__":
|
||||
mx.int32,
|
||||
)[None]
|
||||
model = load_models.load_model(f"{model_name}", dtype=mx.float16)
|
||||
mels = feats(model.dims.n_mels)[None].astype(mx.float16)
|
||||
model_forward_time = timer(model_forward, model, mels, tokens)
|
||||
print(f"Model forward time {model_forward_time:.3f}")
|
||||
decode_time = timer(decode, model, mels)
|
||||
|
Loading…
Reference in New Issue
Block a user