This commit is contained in:
Awni Hannun
2024-09-23 11:38:00 -07:00
parent f3c6ed99c8
commit 4111473f9d
3 changed files with 3 additions and 4 deletions

View File

@@ -27,6 +27,7 @@ Some more useful examples are listed below.
### Audio Models
- Speech recognition with [OpenAI's Whisper](whisper).
- Audio compression and generation with [Meta's EnCodec](encodec).
### Multimodal models

View File

@@ -474,7 +474,6 @@ class EncodecModel(nn.Module):
self.encoder = EncodecEncoder(config)
self.decoder = EncodecDecoder(config)
self.quantizer = EncodecResidualVectorQuantizer(config)
self.bits_per_codebook = int(math.log2(self.config.codebook_size))
def _encode_frame(
self, input_values: mx.array, bandwidth: float, padding_mask: mx.array
@@ -527,8 +526,8 @@ class EncodecModel(nn.Module):
A list of frames containing the discrete encoded codes for the
input audio waveform, along with rescaling factors for each chunk
when ``config.normalize==True``. Each frame is a tuple ``(codebook,
scale)``, with ``codebook`` of shape ``[batch_size, num_codebooks,
frames]``.
scale)``, with ``codebook`` of shape ``(batch_size, num_codebooks,
frames)``.
"""
if bandwidth is None:

View File

@@ -2,7 +2,6 @@
import functools
import json
import math
from pathlib import Path
from types import SimpleNamespace
from typing import List, Optional, Union