Merge pull request #90 from bofenghuang/fix-fp16

Fix whisper fp16 inference
2025-08-09 02:16:37 +08:00 · 2023-12-13 07:29:10 -08:00 · 2023-12-13 07:29:10 -08:00 · 700b67fa3a
commit 700b67fa3a
parent 3b7cfeb8ed 4b1a06c0cb
1 changed files with 3 additions and 3 deletions
--- a/whisper/whisper/whisper.py
+++ b/whisper/whisper/whisper.py
@ -117,7 +117,7 @@ class ResidualAttentionBlock(nn.Module):
        if self.cross_attn:
            y, cross_kv = self.cross_attn(self.cross_attn_ln(x), xa, kv_cache=cross_kv)
            x += y
-        x = x + self.mlp2(nn.gelu(self.mlp1(self.mlp_ln(x))))
+        x = x + self.mlp2(nn.gelu(self.mlp1(self.mlp_ln(x))).astype(x.dtype))
        return x, (kv, cross_kv)
@ -134,8 +134,8 @@ class AudioEncoder(nn.Module):
        self.ln_post = LayerNorm(n_state)
    def __call__(self, x):
-        x = nn.gelu(self.conv1(x))
+        x = nn.gelu(self.conv1(x)).astype(x.dtype)
-        x = nn.gelu(self.conv2(x))
+        x = nn.gelu(self.conv2(x)).astype(x.dtype)
        assert x.shape[1:] == self._positional_embedding.shape, "incorrect audio shape"
        x = x + self._positional_embedding