Spaces:

distil-whisper
/

hallucination-analysis

Running

sanchit-gandhi HF staff commited on Oct 6, 2023

Commit

eed20cf

•

1 Parent(s): 0a74dbb

repeated n-grams

Files changed (2) hide show

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import gradio as gr
 from datasets import load_dataset
 import pandas as pd
 from jiwer import process_words, wer_default
 class Action(Enum):
@@ -63,7 +64,7 @@ target_dtype = np.int16
 max_range = np.iinfo(target_dtype).max
-def get_visualisation(idx, model="large-v2", round_dp=2):
     idx -= 1
     audio = dataset[idx]["audio"]
     array = (audio["array"] * max_range).astype(np.int16)
@@ -83,12 +84,19 @@ def get_visualisation(idx, model="large-v2", round_dp=2):
         100 * wer_output.insertions / len(wer_output.references[0]), round_dp
     )
-    rel_length = round(len(text2.split()) / len(text1.split()), round_dp)
     diff = compare_string(text1, text2)
     full_text = style_text(diff)
-    return (sampling_rate, array), wer_percentage, ier_percentage, rel_length, full_text
 def get_side_by_side_visualisation(idx):
@@ -136,7 +144,7 @@ if __name__ == "__main__":
                     "Model",
                     "Word Error Rate (WER)",
                     "Insertion Error Rate (IER)",
-                    "Rel length (ref length / tgt length)",
                 ],
                 height=1000,
             )

 from datasets import load_dataset
 import pandas as pd
 from jiwer import process_words, wer_default
+from nltk import ngrams
 class Action(Enum):
 max_range = np.iinfo(target_dtype).max
+def get_visualisation(idx, model="large-v2", round_dp=2, ngram_degree=5):
     idx -= 1
     audio = dataset[idx]["audio"]
     array = (audio["array"] * max_range).astype(np.int16)
         100 * wer_output.insertions / len(wer_output.references[0]), round_dp
     )
+    all_ngrams = list(ngrams(text2.split(), ngram_degree))
+    unique_ngrams = []
+    for ngram in all_ngrams:
+        if ngram not in unique_ngrams:
+            unique_ngrams.append(ngram)
+    repeated_ngrams = len(all_ngrams) - len(unique_ngrams)
     diff = compare_string(text1, text2)
     full_text = style_text(diff)
+    return (sampling_rate, array), wer_percentage, ier_percentage, repeated_ngrams, full_text
 def get_side_by_side_visualisation(idx):
                     "Model",
                     "Word Error Rate (WER)",
                     "Insertion Error Rate (IER)",
+                    "Repeated 5-grams",
                 ],
                 height=1000,
             )

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 pandas
 datasets[audio]
-jiwer

 pandas
 datasets[audio]
+jiwer
+nltk