From 7b818c0d39e09294301991a4f11f60dd3f1f560c Mon Sep 17 00:00:00 2001 From: Anthony Wu <462072+anthonywu@users.noreply.github.com> Date: Thu, 3 Oct 2024 15:45:48 -1000 Subject: [PATCH] fix bugs, add test coverage --- whisper/mlx_whisper/cli.py | 1 - whisper/test_cli.sh | 39 +++++++++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/whisper/mlx_whisper/cli.py b/whisper/mlx_whisper/cli.py index f5304e5d..956d83a0 100644 --- a/whisper/mlx_whisper/cli.py +++ b/whisper/mlx_whisper/cli.py @@ -232,7 +232,6 @@ def main(): if audio_obj == "-": # receive the contents from stdin rather than read a file audio_obj = audio.load_audio(from_stdin=True) - output_name_template = "content" try: result = transcribe( diff --git a/whisper/test_cli.sh b/whisper/test_cli.sh index f3b9b7c9..4e34a217 100755 --- a/whisper/test_cli.sh +++ b/whisper/test_cli.sh @@ -7,16 +7,44 @@ TEST_OUTPUT_DIR=$(mktemp -d -t mlx_whisper_cli_test) # the control output - cli called with audio position arg # expected output file name is ls_test.json +TEST_OUTPUT_NAME_FOR_ALL="--output-name arg is used for all output formats" mlx_whisper "$TEST_AUDIO" \ --output-dir "$TEST_OUTPUT_DIR" \ --output-format all \ --output-name '{basename}_transcribed' \ --temperature 0 \ --verbose=False -/bin/ls ${TEST_OUTPUT_DIR}/ls_test_transcribed.{json,srt,tsv,txt,vtt} | sort +if /bin/ls ${TEST_OUTPUT_DIR}/ls_test_transcribed.{json,srt,tsv,txt,vtt} > /dev/null; then + echo "[PASS] $TEST_OUTPUT_NAME_FOR_ALL" +else + echo "[FAIL] $TEST_OUTPUT_NAME_FOR_ALL" +fi + + +TEST_OUTPUT_NAME_TEMPLATE="testing the output name template usage scenario" +for test_val in $(seq 10 10 60); do + mlx_whisper "$TEST_AUDIO" \ + --output-name "{basename}_mwpl_${test_val}" \ + --output-dir "$TEST_OUTPUT_DIR" \ + --output-format srt \ + --max-words-per-line $test_val \ + --word-timestamps True \ + --verbose=False + TEST_DESC="testing output name template while varying --max-words-per-line=${test_val}" + if /bin/ls $TEST_OUTPUT_DIR/ls_test_mwpl_${test_val}.srt > /dev/null; then + echo "[PASS] $TEST_DESC" + else + echo "[FAIL] $TEST_DESC" + fi +done + TEST_STDIN_1="mlx_whisper produces identical output whether provided audio arg or stdin of same content" -/bin/cat "$TEST_AUDIO" | mlx_whisper - --output-dir "$TEST_OUTPUT_DIR" --output-format json --temperature 0 --verbose=False +/bin/cat "$TEST_AUDIO" | mlx_whisper - \ + --output-dir "$TEST_OUTPUT_DIR" \ + --output-format json \ + --temperature 0 \ + --verbose=False if diff "${TEST_OUTPUT_DIR}/content.json" "${TEST_OUTPUT_DIR}/ls_test_transcribed.json"; then echo "[PASS] $TEST_STDIN_1" else @@ -25,7 +53,12 @@ else fi TEST_STDIN_2="mlx_whisper produces identical output when stdin comes via: cmd < file"、 -mlx_whisper - --output-name '{basename}_transcribed' --output-dir "$TEST_OUTPUT_DIR" --output-format tsv --temperature 0 --verbose=False < "$TEST_AUDIO" +mlx_whisper - \ + --output-name '{basename}_transcribed' \ + --output-dir "$TEST_OUTPUT_DIR" \ + --output-format tsv \ + --temperature 0 \ + --verbose=False < "$TEST_AUDIO" if diff "${TEST_OUTPUT_DIR}/content_transcribed.tsv" "${TEST_OUTPUT_DIR}/ls_test_transcribed.tsv"; then echo "[PASS] $TEST_STDIN_2" else