tensorgirl's picture
Upload app.py
91db312 verified
raw
history blame contribute delete
No virus
4.16 kB
import os
import transformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from transformers import AutoModelForSeq2SeqLM, pipeline
from huggingface_hub import login
import gradio as gr
import numpy as np
new_model = "tensorgirl/finetuned-gemma"
model = AutoModelForCausalLM.from_pretrained(new_model, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(new_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
generator = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto",
)
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
device = 0 if torch.cuda.is_available() else -1
def translate(text, src_lang, tgt_lang):
translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device)
result = translation_pipeline(text)
return result[0]['translation_text']
def English(audio):
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"]
def Hindi(audio):
transcriber = pipeline("automatic-speech-recognition", model="theainerd/Wav2Vec2-large-xlsr-hindi")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
text = transcriber({"sampling_rate":sr, "raw":y})["text"]
return translate(text, "hin_Deva", "eng_Latn")
def Telegu(audio):
transcriber = pipeline("automatic-speech-recognition", model="anuragshas/wav2vec2-large-xlsr-53-telugu")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
text = transcriber({"sampling_rate":sr, "raw":y})["text"]
return translate(text, "tel_Telu", "eng_Latn")
def Tamil(audio):
transcriber = pipeline("automatic-speech-recognition", model="Harveenchadha/vakyansh-wav2vec2-tamil-tam-250")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
text = transcriber({"sampling_rate":sr, "raw":y})["text"]
return translate(text, "tam_Taml", "eng_Latn")
def Kannada(audio):
transcriber = pipeline("automatic-speech-recognition", model="vasista22/whisper-kannada-medium")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
text = transcriber({"sampling_rate":sr, "raw":y})["text"]
return translate(text, "kan_Knda", "eng_Latn")
def predict(audio, language):
if language == "English":
message = English(audio)
if language == "Hindi":
message = Hindi(audio)
if language == "Telegu":
message = Telegu(audio)
if language == "Tamil":
message = Tamil(audio)
if language == "Kannada":
message = Kannada(audio)
print(message)
sequences = generator(
message,
max_length=200,
do_sample=False,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,)
answer = ""
for seq in sequences:
answer = answer + seq['generated_text'] + " "
print(answer)
if language == "English":
return answer
if language == "Hindi":
return translate(answer,"eng_Latn", "hin_Deva")
if language == "Telegu":
return translate(answer,"eng_Latn", "tel_Telu")
if language == "Tamil":
return translate(answer, "eng_Latn", "tam_Taml")
if language == "Kannada":
return translate(answer, "eng_Latn", "kan_Knda")
return answer
demo = gr.Interface(
predict,
[gr.Audio(),
gr.Dropdown(
["Hindi", "Telegu", "Tamil", "Kannada", "English"], label="Language", info="Please select language of your choice"
)],
"text",
title = "Farmers-Helper-Bot",
description = "Ask your queries in your regional Language"
)
demo.launch(share=True)