LPN64 commited on
Commit
d8e9d34
1 Parent(s): 23a8be0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -30,7 +30,7 @@ class LongCiteModel:
30
  def text_split_by_punctuation(original_text, return_dict=False):
31
  # text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text) # separate period without space
32
  text = original_text
33
- custom_sent_tokenizer = PunktSentenceTokenizer(text)
34
  punctuations = r"([。;!?])" # For Chinese support
35
 
36
  separated = custom_sent_tokenizer.tokenize(text)
 
30
  def text_split_by_punctuation(original_text, return_dict=False):
31
  # text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text) # separate period without space
32
  text = original_text
33
+ custom_sent_tokenizer = PunktSentenceTokenizer()
34
  punctuations = r"([。;!?])" # For Chinese support
35
 
36
  separated = custom_sent_tokenizer.tokenize(text)