From 4111473f9d1fa05de9aacee52ab1d53b1252b159 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Mon, 23 Sep 2024 11:38:00 -0700 Subject: [PATCH] nits --- README.md | 1 + encodec/encodec.py | 5 ++--- encodec/utils.py | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 00e57803..bd180975 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Some more useful examples are listed below. ### Audio Models - Speech recognition with [OpenAI's Whisper](whisper). +- Audio compression and generation with [Meta's EnCodec](encodec). ### Multimodal models diff --git a/encodec/encodec.py b/encodec/encodec.py index a796491d..3ef47369 100644 --- a/encodec/encodec.py +++ b/encodec/encodec.py @@ -474,7 +474,6 @@ class EncodecModel(nn.Module): self.encoder = EncodecEncoder(config) self.decoder = EncodecDecoder(config) self.quantizer = EncodecResidualVectorQuantizer(config) - self.bits_per_codebook = int(math.log2(self.config.codebook_size)) def _encode_frame( self, input_values: mx.array, bandwidth: float, padding_mask: mx.array @@ -527,8 +526,8 @@ class EncodecModel(nn.Module): A list of frames containing the discrete encoded codes for the input audio waveform, along with rescaling factors for each chunk when ``config.normalize==True``. Each frame is a tuple ``(codebook, - scale)``, with ``codebook`` of shape ``[batch_size, num_codebooks, - frames]``. + scale)``, with ``codebook`` of shape ``(batch_size, num_codebooks, + frames)``. """ if bandwidth is None: diff --git a/encodec/utils.py b/encodec/utils.py index 952b92bd..18b3f063 100644 --- a/encodec/utils.py +++ b/encodec/utils.py @@ -2,7 +2,6 @@ import functools import json -import math from pathlib import Path from types import SimpleNamespace from typing import List, Optional, Union