* initial encodec

* works

* nits

* use fast group norm

* fix for rnn layer

* fix mlx version

* use custom LSTM kernel

* audio encodec

* fix example, support batched inference

* nits
This commit is contained in:
Awni Hannun
2024-09-23 11:39:25 -07:00
committed by GitHub
parent 796d5e40e4
commit 9bb2dd62f3
10 changed files with 1267 additions and 0 deletions

View File

@@ -0,0 +1,30 @@
# Copyright © 2024 Apple Inc.
import time
import mlx.core as mx
from utils import load
model, processor = load("mlx-community/encodec-48khz-float32")
audio = mx.random.uniform(shape=(288000, 2))
feats, mask = processor(audio)
mx.eval(model, feats, mask)
@mx.compile
def fun():
codes, scales = model.encode(feats, mask, bandwidth=3)
reconstructed = model.decode(codes, scales, mask)
return reconstructed
for _ in range(5):
mx.eval(fun())
tic = time.time()
for _ in range(10):
mx.eval(fun())
toc = time.time()
ms = 1000 * (toc - tic) / 10
print(f"Time per it: {ms:.3f}")