File size: 1,086 Bytes
fda57dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
from unittest import TestCase
from tokenizers.pre_tokenizers import Whitespace
from recognizers.utils import DifferenceSample
class DifferenceSampleTestCase(TestCase):
def setUp(self):
self.text_a = "Chinese shares close higher Friday."
self.text_b = "Les actions chinoises clôturent en baisse mercredi."
self.tokenizer = Whitespace()
self.encoding_a = self.tokenizer.pre_tokenize_str(self.text_a)
self.encoding_b = self.tokenizer.pre_tokenize_str(self.text_b)
self.result = DifferenceSample(
tokens_a=tuple([token[0] for token in self.encoding_a]),
tokens_b=tuple([token[0] for token in self.encoding_b]),
labels_a=tuple([0.1 for _ in range(len(self.encoding_a))]),
labels_b=tuple([0.1 for _ in range(len(self.encoding_b))]),
)
def test_add_whitespace(self):
self.result.add_whitespace(self.encoding_a, self.encoding_b)
self.assertEqual("".join(self.result.tokens_a), self.text_a)
self.assertEqual("".join(self.result.tokens_b), self.text_b)
|