zaanind commited on
Commit
0e54b5e
1 Parent(s): db1cb23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -10
app.py CHANGED
@@ -1,23 +1,19 @@
1
  import gradio
2
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
- import ctranslate2
4
 
5
 
6
  model_name = "zaanind/nllb-ensi-v1-tuning-subs"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = ctranslate2.Translator(model_name)
9
-
10
- #model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
11
 
12
  tokenizer.src_lang = "eng_Latn"
13
 
14
  def translate(text):
15
- source = tokenizer.convert_ids_to_tokens(tokenizer.encode(text))
16
- target_prefix = ["sin_Sinh"]
17
- results = translator.translate_batch([source], target_prefix=[target_prefix])
18
- target = results[0].hypotheses[0][1:]
19
- translation = tokenizer.decode(tokenizer.convert_tokens_to_ids(target))
20
-
21
  return translation
22
 
23
 
 
1
  import gradio
2
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
3
 
4
 
5
  model_name = "zaanind/nllb-ensi-v1-tuning-subs"
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 
 
8
 
9
  tokenizer.src_lang = "eng_Latn"
10
 
11
  def translate(text):
12
+ inputs = tokenizer(text=text, return_tensors="pt")
13
+ translated_tokens = model.generate(
14
+ **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["sin_Sinh"]
15
+ )
16
+ translation = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
 
17
  return translation
18
 
19