diff --git a/README.md b/README.md index 00e57803..bd180975 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Some more useful examples are listed below. ### Audio Models - Speech recognition with [OpenAI's Whisper](whisper). +- Audio compression and generation with [Meta's EnCodec](encodec). ### Multimodal models diff --git a/encodec/encodec.py b/encodec/encodec.py index a796491d..3ef47369 100644 --- a/encodec/encodec.py +++ b/encodec/encodec.py @@ -474,7 +474,6 @@ class EncodecModel(nn.Module): self.encoder = EncodecEncoder(config) self.decoder = EncodecDecoder(config) self.quantizer = EncodecResidualVectorQuantizer(config) - self.bits_per_codebook = int(math.log2(self.config.codebook_size)) def _encode_frame( self, input_values: mx.array, bandwidth: float, padding_mask: mx.array @@ -527,8 +526,8 @@ class EncodecModel(nn.Module): A list of frames containing the discrete encoded codes for the input audio waveform, along with rescaling factors for each chunk when ``config.normalize==True``. Each frame is a tuple ``(codebook, - scale)``, with ``codebook`` of shape ``[batch_size, num_codebooks, - frames]``. + scale)``, with ``codebook`` of shape ``(batch_size, num_codebooks, + frames)``. """ if bandwidth is None: diff --git a/encodec/utils.py b/encodec/utils.py index 952b92bd..18b3f063 100644 --- a/encodec/utils.py +++ b/encodec/utils.py @@ -2,7 +2,6 @@ import functools import json -import math from pathlib import Path from types import SimpleNamespace from typing import List, Optional, Union