|
import gradio as gr |
|
from transformers import pipeline, AutoModelForSeq2SeqLM, MBart50Tokenizer, AutoTokenizer |
|
|
|
tokenizer = AutoTokenizer.from_pretrained('jafrilalam/bangla_sentence_correction_01', src_lang="bn_IN", tgt_lang="bn_IN", use_fast=True) |
|
model = AutoModelForSeq2SeqLM.from_pretrained("jafrilalam/bangla_sentence_correction_01", use_safetensors=True) |
|
|
|
def correct_text(given_sentence): |
|
inputs = tokenizer.encode( |
|
given_sentence, |
|
truncation=True, |
|
return_tensors="pt", |
|
max_length=len(given_sentence), |
|
) |
|
|
|
output_ids = model.generate( |
|
inputs, |
|
max_new_tokens=len(given_sentence), |
|
early_stopping=True, |
|
) |
|
|
|
return tokenizer.decode(output_ids[0], skip_special_tokens=True) |
|
|
|
iface = gr.Interface( |
|
fn=correct_text, |
|
inputs=gr.Textbox(lines=4, label="Incorrect Bangla Sentence"), |
|
outputs=gr.Textbox(label="Corrected Bangla Sentence") |
|
) |
|
|
|
iface.launch(share = True) |