shah1zil commited on
Commit
e5b3380
β€’
1 Parent(s): ff93138

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import whisper
4
+ from gtts import gTTS
5
+ import io
6
+ from groq import Groq
7
+ from PyPDF2 import PdfReader
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM
9
+ import torch
10
+
11
+ # Set up environment variables
12
+ os.environ["GROQ_API_KEY"] = "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"
13
+
14
+ # Initialize the Groq client
15
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
16
+
17
+ # Load the Whisper model
18
+ whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
19
+
20
+ # Initialize the tokenizer and model from the saved checkpoint for RAG
21
+ rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
22
+ rag_model = AutoModelForCausalLM.from_pretrained(
23
+ "himmeow/vi-gemma-2b-RAG",
24
+ device_map="auto",
25
+ torch_dtype=torch.bfloat16
26
+ )
27
+
28
+ # Use GPU if available for RAG model
29
+ if torch.cuda.is_available():
30
+ rag_model.to("cuda")
31
+
32
+ # Load PDF content
33
+ def load_pdf(pdf_path):
34
+ pdf_text = ""
35
+ with open(pdf_path, "rb") as file:
36
+ reader = PdfReader(file)
37
+ for page_num in range(len(reader.pages)):
38
+ page = reader.pages[page_num]
39
+ text = page.extract_text()
40
+ pdf_text += text + "\n"
41
+ return pdf_text
42
+
43
+ # Define the prompt format for the RAG model
44
+ prompt_template = """
45
+ ### Instruction and Input:
46
+ Based on the following context/document:
47
+ {}
48
+ Please answer the question: {}
49
+
50
+ ### Response:
51
+ {}
52
+ """
53
+
54
+ # Function to process audio and generate a response using RAG and Groq
55
+ def process_audio_rag(file_path):
56
+ try:
57
+ # Load and transcribe the audio using Whisper
58
+ audio = whisper.load_audio(file_path)
59
+ result = whisper_model.transcribe(audio)
60
+ text = result["text"]
61
+
62
+ # Load the PDF content (update with your PDF path or pass it as an argument)
63
+ pdf_path = "/content/BN_Cotton.pdf"
64
+ pdf_text = load_pdf(pdf_path)
65
+
66
+ # Prepare the input data for the RAG model
67
+ query = text
68
+ input_text = prompt_template.format(pdf_text, query, " ")
69
+
70
+ # Encode the input text into input ids for RAG model
71
+ input_ids = rag_tokenizer(input_text, return_tensors="pt")
72
+ if torch.cuda.is_available():
73
+ input_ids = input_ids.to("cuda")
74
+
75
+ # Generate text using the RAG model
76
+ outputs = rag_model.generate(
77
+ **input_ids,
78
+ max_new_tokens=500,
79
+ no_repeat_ngram_size=5
80
+ )
81
+ rag_response = rag_tokenizer.decode(outputs[0], skip_special_tokens=True)
82
+
83
+ # Generate a response using Groq if needed
84
+ chat_completion = client.chat.completions.create(
85
+ messages=[{"role": "user", "content": rag_response}],
86
+ model="llama3-8b-8192", # Replace with the correct model if necessary
87
+ )
88
+ response_message = chat_completion.choices[0].message.content.strip()
89
+
90
+ # Convert the response text to speech
91
+ tts = gTTS(response_message)
92
+ response_audio_io = io.BytesIO()
93
+ tts.write_to_fp(response_audio_io)
94
+ response_audio_io.seek(0)
95
+
96
+ # Save audio to a file to ensure it's generated correctly
97
+ with open("response.mp3", "wb") as audio_file:
98
+ audio_file.write(response_audio_io.getvalue())
99
+
100
+ # Return the response text and the path to the saved audio file
101
+ return response_message, "response.mp3"
102
+
103
+ except Exception as e:
104
+ return f"An error occurred: {e}", None
105
+
106
+ # Create a Gradio interface
107
+ iface = gr.Interface(
108
+ fn=process_audio_rag,
109
+ inputs=gr.Audio(type="filepath"),
110
+ outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
111
+ live=True,
112
+ title="Agriculture Assistant"
113
+ )
114
+
115
+ # Launch the interface with the given title
116
+ iface.launch()