import unittest from cer import CER cer = CER() class TestCER(unittest.TestCase): def test_cer_case_sensitive(self): refs = ["Magyar Országgyűlés"] preds = ["Magyar Országgyűlés"] # S = 2, D = 0, I = 0, N = 11, CER = 2 / 11 char_error_rate = cer.compute(predictions=preds, references=refs) self.assertTrue(abs(char_error_rate - 0.1818181818) < 1e-6) def test_cer_whitespace(self): refs = ["Farkasok voltak"] preds = ["Farkasokvoltak"] # S = , D = , I = 1, N = , CER = I / N char_error_rate = cer.compute(predictions=preds, references=refs) self.assertTrue(abs(char_error_rate - 0.) < 1e-6) refs = ["Farkasokvoltak"] preds = ["Ferkasok voltak"] # S = , D = 1, I = 0, N = 14, CER = char_error_rate = cer.compute(predictions=preds, references=refs) self.assertTrue(abs(char_error_rate - 0.) < 1e-6) # consecutive whitespaces case 1 refs = ["Farkasok voltak"] preds = ["Farkasok voltak"] # S = 0, D = 0, I = 0, N = , CER = 0 char_error_rate = cer.compute(predictions=preds, references=refs) self.assertTrue(abs(char_error_rate - 0.0) < 1e-6) # consecutive whitespaces case 2 refs = ["Farkasok voltak"] preds = ["Farkasok voltak"] # S = 0, D = 0, I = 0, N = ?, CER = 0 char_error_rate = cer.compute(predictions=preds, references=refs) self.assertTrue(abs(char_error_rate - 0.0) < 1e-6) def test_cer_sub(self): refs = ["Magyar"] preds = ["Megyar"] # S = 1, D = 0, I = 0, N = 6, CER = 0.125 char_error_rate = cer.compute(predictions=preds, references=refs) self.assertTrue(abs(char_error_rate - 0.125) < 1e-6) def test_cer_del(self): refs = ["Farkasokvoltak"] preds = ["Farkasokavoltak"] # S = 0, D = 1, I = 0, N = 14, CER = 0. char_error_rate = cer.compute(predictions=preds, references=refs) self.assertTrue(abs(char_error_rate - 0.) < 1e-6) def test_cer_insert(self): refs = ["Farkasokvoltak"] preds = ["Farkasokoltak"] # S = 0, D = 0, I = 1, N = 14, CER = 0. char_error_rate = cer.compute(predictions=preds, references=refs) self.assertTrue(abs(char_error_rate - 0.) < 1e-6) def test_cer_equal(self): refs = ["Magyar"] char_error_rate = cer.compute(predictions=refs, references=refs) self.assertEqual(char_error_rate, 0.0) def test_cer_list_of_seqs(self): # ['Eötvös Loránd University','I love my daughter'] refs = ["Eötvös Loránd Tudományegyetem", "szeretem a lányom"] char_error_rate = cer.compute(predictions=refs, references=refs) self.assertEqual(char_error_rate, 0.0) refs = ["diák", "Az arab nyelvet könnyű megtanulni!", "autó"] preds = ["dxák", "Az arab nyelvet könnyű megtanulni!", "autó"] # S = 1, D = 0, I = 0, N = 28, CER = 1 / 42 char_error_rate = cer.compute(predictions=preds, references=refs) self.assertTrue(abs(char_error_rate - 0.0238095238) < 1e-6) def test_correlated_sentences(self): # Learn artificial intelligence to secure your future # Tanuljon mesterséges intelligenciát, hogy biztosítsa jövőjét refs = ["Tanuljon mesterséges intelligenciát,", " hogy biztosítsa jövőjét"] preds = ["Tanuljon mesterséges intelligenciát, hogy", " biztosítsa jövőjét"] # S = 0, D = 0, I = 1, N = 28, CER = 2 / 60 # whitespace at the front of " biztosítsa jövőjét" will be strip during preporcessing # so need to insert 2 whitespaces char_error_rate = cer.compute(predictions=preds, references=refs, concatenate_texts=True) self.assertTrue(abs(char_error_rate - 0.03333333333) < 1e-6) def test_cer_empty(self): refs = [""] preds = ["tök mindegy"] with self.assertRaises(ValueError): cer.compute(predictions=preds, references=refs) if __name__ == "__main__": unittest.main()