nam194 commited on
Commit
0df41e3
1 Parent(s): 4d38a62

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +2 -2
utils.py CHANGED
@@ -57,7 +57,7 @@ def normalize(text, segment=True):
57
  text = replace_all(text, dict_map)
58
  if segment:
59
  text = text.split(".")
60
- text = ". ".join([underthesea.word_tokenize(i, format="text") for i in text)])
61
  return text
62
  def text_preprocess(document):
63
  punc = [i for i in ["\"", "-", ".", ":"]]#string.punctuation.replace(",","")]
@@ -84,7 +84,7 @@ def text_preprocess(document):
84
  document = re.sub(" ", " ", document)
85
  try:
86
  document = document.split(".")
87
- document = ". ".join([underthesea.word_tokenize(i, format="text") for i in document)])
88
  except:
89
  pass
90
  return document.lower()
 
57
  text = replace_all(text, dict_map)
58
  if segment:
59
  text = text.split(".")
60
+ text = ". ".join([underthesea.word_tokenize(i, format="text") for i in text])
61
  return text
62
  def text_preprocess(document):
63
  punc = [i for i in ["\"", "-", ".", ":"]]#string.punctuation.replace(",","")]
 
84
  document = re.sub(" ", " ", document)
85
  try:
86
  document = document.split(".")
87
+ document = ". ".join([underthesea.word_tokenize(i, format="text") for i in document])
88
  except:
89
  pass
90
  return document.lower()