import os import transformers import torch from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig from transformers import AutoModelForSeq2SeqLM, pipeline from huggingface_hub import login import gradio as gr import numpy as np new_model = "tensorgirl/finetuned-gemma" model = AutoModelForCausalLM.from_pretrained(new_model, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(new_model, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token generator = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", ) model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M") tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") device = 0 if torch.cuda.is_available() else -1 def translate(text, src_lang, tgt_lang): translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device) result = translation_pipeline(text) return result[0]['translation_text'] def English(audio): transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en") sr, y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) return transcriber({"sampling_rate": sr, "raw": y})["text"] def Hindi(audio): transcriber = pipeline("automatic-speech-recognition", model="theainerd/Wav2Vec2-large-xlsr-hindi") sr, y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) text = transcriber({"sampling_rate":sr, "raw":y})["text"] return translate(text, "hin_Deva", "eng_Latn") def Telegu(audio): transcriber = pipeline("automatic-speech-recognition", model="anuragshas/wav2vec2-large-xlsr-53-telugu") sr, y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) text = transcriber({"sampling_rate":sr, "raw":y})["text"] return translate(text, "tel_Telu", "eng_Latn") def Tamil(audio): transcriber = pipeline("automatic-speech-recognition", model="Harveenchadha/vakyansh-wav2vec2-tamil-tam-250") sr, y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) text = transcriber({"sampling_rate":sr, "raw":y})["text"] return translate(text, "tam_Taml", "eng_Latn") def Kannada(audio): transcriber = pipeline("automatic-speech-recognition", model="vasista22/whisper-kannada-medium") sr, y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) text = transcriber({"sampling_rate":sr, "raw":y})["text"] return translate(text, "kan_Knda", "eng_Latn") def predict(audio, language): if language == "English": message = English(audio) if language == "Hindi": message = Hindi(audio) if language == "Telegu": message = Telegu(audio) if language == "Tamil": message = Tamil(audio) if language == "Kannada": message = Kannada(audio) print(message) sequences = generator( message, max_length=200, do_sample=False, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id,) answer = "" for seq in sequences: answer = answer + seq['generated_text'] + " " print(answer) if language == "English": return answer if language == "Hindi": return translate(answer,"eng_Latn", "hin_Deva") if language == "Telegu": return translate(answer,"eng_Latn", "tel_Telu") if language == "Tamil": return translate(answer, "eng_Latn", "tam_Taml") if language == "Kannada": return translate(answer, "eng_Latn", "kan_Knda") return answer demo = gr.Interface( predict, [gr.Audio(), gr.Dropdown( ["Hindi", "Telegu", "Tamil", "Kannada", "English"], label="Language", info="Please select language of your choice" )], "text", title = "Farmers-Helper-Bot", description = "Ask your queries in your regional Language" ) demo.launch(share=True)