From bd6b08e8139408a9b53d709b12ac4f0f53abc7d5 Mon Sep 17 00:00:00 2001
From: Awni Hannun <awni@apple.com>
Date: Mon, 4 Nov 2024 13:59:53 -0800
Subject: [PATCH] some nits

---
 whisper/README.md              | 15 +++-----
 whisper/mlx_whisper/audio.py   |  2 +-
 whisper/mlx_whisper/cli.py     | 17 ++++++---
 whisper/mlx_whisper/writers.py | 23 +++---------
 whisper/test_cli.sh            | 69 ----------------------------------
 5 files changed, 24 insertions(+), 102 deletions(-)
 delete mode 100755 whisper/test_cli.sh

diff --git a/whisper/README.md b/whisper/README.md
index 23d37783..cd3bc684 100644
--- a/whisper/README.md
+++ b/whisper/README.md
@@ -26,7 +26,7 @@ pip install mlx-whisper
 At its simplest:
 
 ```sh
-mlx_whisper audio_file.mp3  # output name will re-use basename of audio file path
+mlx_whisper audio_file.mp3
 ```
 
 This will make a text file `audio_file.txt` with the results.
@@ -35,19 +35,14 @@ Use `-f` to specify the output format and `--model` to specify the model. There
 are many other supported command line options. To see them all, run
 `mlx_whisper -h`.
 
-Alternatively, you can pipe in the audio content of other programs via stdin,
-useful when `mlx_whisper` acts as a composable command line utility.
+You can also pipe the audio content of other programs via stdin:
 
 ```sh
-# hypothetical demo of audio content via stdin
-# default output file name will be content.*
-some-process | mlx_whisper
-
-# hypothetical demo of media content via stdin
-# use --output-name to name your output artifacts
-some-downloader https://some.url/media?id=lecture42 | mlx_whisper --output-name mlx-demo
+some-process | mlx_whisper -
 ```
 
+The default output file name will be `content.*`. You can specify the name with
+the `--output-name` flag.
 
 #### API
 
diff --git a/whisper/mlx_whisper/audio.py b/whisper/mlx_whisper/audio.py
index f8bd8e69..c8cca07c 100644
--- a/whisper/mlx_whisper/audio.py
+++ b/whisper/mlx_whisper/audio.py
@@ -39,7 +39,7 @@ def load_audio(file: str = Optional[str], sr: int = SAMPLE_RATE, from_stdin=Fals
     """
 
     # This launches a subprocess to decode audio while down-mixing
-    # and resampling as necessary.  Requires the ffmpeg CLI in PATH.
+    # and resampling as necessary. Requires the ffmpeg CLI in PATH.
     if from_stdin:
         cmd = ["ffmpeg", "-i", "pipe:0"]
     else:
diff --git a/whisper/mlx_whisper/cli.py b/whisper/mlx_whisper/cli.py
index 956d83a0..7d08a043 100644
--- a/whisper/mlx_whisper/cli.py
+++ b/whisper/mlx_whisper/cli.py
@@ -2,6 +2,7 @@
 
 import argparse
 import os
+import pathlib
 import traceback
 import warnings
 
@@ -40,8 +41,11 @@ def build_parser():
     parser.add_argument(
         "--output-name",
         type=str,
-        default="{basename}",
-        help="logical name of transcription/translation output files, before --output-format extensions",
+        default=None,
+        help=(
+            "The name of transcription/translation output files before "
+            "--output-format extensions"
+        ),
     )
     parser.add_argument(
         "--output-dir",
@@ -207,10 +211,10 @@ def main():
     path_or_hf_repo: str = args.pop("model")
     output_dir: str = args.pop("output_dir")
     output_format: str = args.pop("output_format")
-    output_name_template: str = args.pop("output_name")
+    output_name: str = args.pop("output_name")
     os.makedirs(output_dir, exist_ok=True)
 
-    writer = get_writer(output_format, output_dir, output_name_template)
+    writer = get_writer(output_format, output_dir)
     word_options = [
         "highlight_words",
         "max_line_count",
@@ -233,13 +237,16 @@ def main():
             # receive the contents from stdin rather than read a file
             audio_obj = audio.load_audio(from_stdin=True)
 
+            output_name = output_name or "content"
+        else:
+            output_name = output_name or pathlib.Path(audio_obj).stem
         try:
             result = transcribe(
                 audio_obj,
                 path_or_hf_repo=path_or_hf_repo,
                 **args,
             )
-            writer(result, audio_obj, **writer_args)
+            writer(result, output_name, **writer_args)
         except Exception as e:
             traceback.print_exc()
             print(f"Skipping {audio_obj} due to {type(e).__name__}: {str(e)}")
diff --git a/whisper/mlx_whisper/writers.py b/whisper/mlx_whisper/writers.py
index cbfe1f66..cdb35063 100644
--- a/whisper/mlx_whisper/writers.py
+++ b/whisper/mlx_whisper/writers.py
@@ -37,22 +37,13 @@ def get_start(segments: List[dict]) -> Optional[float]:
 class ResultWriter:
     extension: str
 
-    def __init__(self, output_dir: str, output_name_template: str):
+    def __init__(self, output_dir: str):
         self.output_dir = output_dir
-        self.output_name_template = output_name_template
 
     def __call__(
-        self, result: dict, audio_obj: str, options: Optional[dict] = None, **kwargs
+        self, result: dict, output_name: str, options: Optional[dict] = None, **kwargs
     ):
-        if isinstance(audio_obj, (str, pathlib.Path)):
-            basename = pathlib.Path(audio_obj).stem
-        else:
-            # mx.array, np.ndarray, etc
-            basename = "content"
-
-        output_basename = self.output_name_template.format(basename=basename)
-
-        output_path = (pathlib.Path(self.output_dir) / output_basename).with_suffix(
+        output_path = (pathlib.Path(self.output_dir) / output_name).with_suffix(
             f".{self.extension}"
         )
 
@@ -253,7 +244,7 @@ class WriteJSON(ResultWriter):
 
 
 def get_writer(
-    output_format: str, output_dir: str, output_name_template: str
+    output_format: str, output_dir: str
 ) -> Callable[[dict, TextIO, dict], None]:
     writers = {
         "txt": WriteTXT,
@@ -264,9 +255,7 @@ def get_writer(
     }
 
     if output_format == "all":
-        all_writers = [
-            writer(output_dir, output_name_template) for writer in writers.values()
-        ]
+        all_writers = [writer(output_dir) for writer in writers.values()]
 
         def write_all(
             result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
@@ -276,4 +265,4 @@ def get_writer(
 
         return write_all
 
-    return writers[output_format](output_dir, output_name_template)
+    return writers[output_format](output_dir)
diff --git a/whisper/test_cli.sh b/whisper/test_cli.sh
deleted file mode 100755
index 4e34a217..00000000
--- a/whisper/test_cli.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/zsh -e
-
-set -o err_exit
-
-TEST_AUDIO="mlx_whisper/assets/ls_test.flac"
-TEST_OUTPUT_DIR=$(mktemp -d -t mlx_whisper_cli_test)
-
-# the control output - cli called with audio position arg
-# expected output file name is ls_test.json
-TEST_OUTPUT_NAME_FOR_ALL="--output-name arg is used for all output formats"
-mlx_whisper "$TEST_AUDIO" \
-    --output-dir "$TEST_OUTPUT_DIR" \
-    --output-format all \
-    --output-name '{basename}_transcribed' \
-    --temperature 0 \
-    --verbose=False
-if /bin/ls ${TEST_OUTPUT_DIR}/ls_test_transcribed.{json,srt,tsv,txt,vtt} > /dev/null; then
-    echo "[PASS] $TEST_OUTPUT_NAME_FOR_ALL"
-else
-    echo "[FAIL] $TEST_OUTPUT_NAME_FOR_ALL"
-fi
-
-
-TEST_OUTPUT_NAME_TEMPLATE="testing the output name template usage scenario"
-for test_val in $(seq 10 10 60); do
-    mlx_whisper "$TEST_AUDIO" \
-        --output-name "{basename}_mwpl_${test_val}" \
-        --output-dir "$TEST_OUTPUT_DIR" \
-        --output-format srt \
-        --max-words-per-line $test_val \
-        --word-timestamps True \
-        --verbose=False
-    TEST_DESC="testing output name template while varying --max-words-per-line=${test_val}"
-    if /bin/ls $TEST_OUTPUT_DIR/ls_test_mwpl_${test_val}.srt > /dev/null; then
-        echo "[PASS] $TEST_DESC"
-    else
-        echo "[FAIL] $TEST_DESC"
-    fi
-done
-
-
-TEST_STDIN_1="mlx_whisper produces identical output whether provided audio arg or stdin of same content"
-/bin/cat "$TEST_AUDIO" | mlx_whisper - \
-    --output-dir "$TEST_OUTPUT_DIR" \
-    --output-format json \
-    --temperature 0 \
-    --verbose=False
-if diff "${TEST_OUTPUT_DIR}/content.json" "${TEST_OUTPUT_DIR}/ls_test_transcribed.json"; then
-    echo "[PASS] $TEST_STDIN_1"
-else
-    echo "[FAIL] $TEST_STDIN_1"
-    echo "Check unexpected output in ${TEST_OUTPUT_DIR}"
-fi
-
-TEST_STDIN_2="mlx_whisper produces identical output when stdin comes via: cmd < file"、
-mlx_whisper - \
-    --output-name '{basename}_transcribed' \
-    --output-dir "$TEST_OUTPUT_DIR" \
-    --output-format tsv \
-    --temperature 0 \
-    --verbose=False < "$TEST_AUDIO"
-if diff "${TEST_OUTPUT_DIR}/content_transcribed.tsv" "${TEST_OUTPUT_DIR}/ls_test_transcribed.tsv"; then
-    echo "[PASS] $TEST_STDIN_2"
-else
-    echo "[FAIL] $TEST_STDIN_2"
-    echo "Check unexpected output in ${TEST_OUTPUT_DIR}"
-fi
-
-echo "Outputs can be verified in ${TEST_OUTPUT_DIR}"