|
from unittest import TestCase |
|
|
|
from tokenizers.pre_tokenizers import Whitespace |
|
|
|
from recognizers.utils import DifferenceSample |
|
|
|
|
|
class DifferenceSampleTestCase(TestCase): |
|
|
|
def setUp(self): |
|
self.text_a = "Chinese shares close higher Friday." |
|
self.text_b = "Les actions chinoises clôturent en baisse mercredi." |
|
self.tokenizer = Whitespace() |
|
self.encoding_a = self.tokenizer.pre_tokenize_str(self.text_a) |
|
self.encoding_b = self.tokenizer.pre_tokenize_str(self.text_b) |
|
self.result = DifferenceSample( |
|
tokens_a=tuple([token[0] for token in self.encoding_a]), |
|
tokens_b=tuple([token[0] for token in self.encoding_b]), |
|
labels_a=tuple([0.1 for _ in range(len(self.encoding_a))]), |
|
labels_b=tuple([0.1 for _ in range(len(self.encoding_b))]), |
|
) |
|
|
|
def test_add_whitespace(self): |
|
self.result.add_whitespace(self.encoding_a, self.encoding_b) |
|
self.assertEqual("".join(self.result.tokens_a), self.text_a) |
|
self.assertEqual("".join(self.result.tokens_b), self.text_b) |
|
|