jvamvas's picture
Basic implementation
fda57dd
from unittest import TestCase
from tokenizers.pre_tokenizers import Whitespace
from recognizers.utils import DifferenceSample
class DifferenceSampleTestCase(TestCase):
def setUp(self):
self.text_a = "Chinese shares close higher Friday."
self.text_b = "Les actions chinoises clôturent en baisse mercredi."
self.tokenizer = Whitespace()
self.encoding_a = self.tokenizer.pre_tokenize_str(self.text_a)
self.encoding_b = self.tokenizer.pre_tokenize_str(self.text_b)
self.result = DifferenceSample(
tokens_a=tuple([token[0] for token in self.encoding_a]),
tokens_b=tuple([token[0] for token in self.encoding_b]),
labels_a=tuple([0.1 for _ in range(len(self.encoding_a))]),
labels_b=tuple([0.1 for _ in range(len(self.encoding_b))]),
)
def test_add_whitespace(self):
self.result.add_whitespace(self.encoding_a, self.encoding_b)
self.assertEqual("".join(self.result.tokens_a), self.text_a)
self.assertEqual("".join(self.result.tokens_b), self.text_b)