nits

2025-09-01 04:14:38 +08:00 · 2024-09-23 11:38:00 -07:00
parent f3c6ed99c8
commit 4111473f9d
3 changed files with 3 additions and 4 deletions
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@ Some more useful examples are listed below.
 ### Audio Models

 - Speech recognition with [OpenAI's Whisper](whisper).
+- Audio compression and generation with [Meta's EnCodec](encodec).

 ### Multimodal models

--- a/encodec/encodec.py
+++ b/encodec/encodec.py
@@ -474,7 +474,6 @@ class EncodecModel(nn.Module):
        self.encoder = EncodecEncoder(config)
        self.decoder = EncodecDecoder(config)
        self.quantizer = EncodecResidualVectorQuantizer(config)
-        self.bits_per_codebook = int(math.log2(self.config.codebook_size))

    def _encode_frame(
        self, input_values: mx.array, bandwidth: float, padding_mask: mx.array
@@ -527,8 +526,8 @@ class EncodecModel(nn.Module):
            A list of frames containing the discrete encoded codes for the
            input audio waveform, along with rescaling factors for each chunk
            when ``config.normalize==True``. Each frame is a tuple ``(codebook,
-            scale)``, with ``codebook`` of shape ``[batch_size, num_codebooks,
-            frames]``.
+            scale)``, with ``codebook`` of shape ``(batch_size, num_codebooks,
+            frames)``.
        """

        if bandwidth is None:
--- a/encodec/utils.py
+++ b/encodec/utils.py
@@ -2,7 +2,6 @@

 import functools
 import json
-import math
 from pathlib import Path
 from types import SimpleNamespace
 from typing import List, Optional, Union