import gradio as gr import os from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer from datasets import load_dataset import torch import soundfile as sf from pdfminer.high_level import extract_text from llama_cpp import Llama # Check if MPS is available and set the device if torch.backends.mps.is_available(): device = torch.device("mps") print("Using MPS device") else: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"MPS not available, using {device}") def toText(audio): asr = pipeline( "automatic-speech-recognition", model="openai/whisper-tiny.en", chunk_length_s=30, device=device, ) question = asr(audio, batch_size=8)["text"] return question # Global variable to store chat history chat_history = [] def extract_answer(question, text): global chat_history # Load the LLaMA model model_path="/Users/chandima/.cache/lm-studio/models/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q3_K_L.gguf" # Load the LLaMA model with MPS acceleration llm = Llama( model_path=model_path, n_gpu_layers=-1, # Use all available layers for GPU acceleration n_ctx=2048, # Adjust context size as needed verbose=True, # Optional: for debugging use_mlock=True, # Optional: for better memory management n_threads=6, # Adjust based on your CPU use_mmap=True, # Optional: for faster loading ) # Construct the conversation history conversation = "\n".join([f"Human: {q}\nAI: {a}" for q, a in chat_history]) # Use LLaMA to extract skills prompt = f""" You are an AI assistant answering questions based on a resume. Here's the conversation so far: {conversation} Human: {question} Resume: {text} AI: """ response = llm(prompt, max_tokens=800, stop=["Human:", "\n\n"]) answer = response['choices'][0]['text'].strip() # Append the new question and answer to the chat history chat_history.append((question, answer)) print(answer) return answer def toAudio(text): synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts", device=device) embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding}) return speech def clone(audio, file): if audio is None or file is None: return None question = toText(audio=audio) text = extract_text(file.name) res = extract_answer(question, text) print(res) speech = toAudio(res) sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"]) return "./speech.wav" def start_recording(): return None def reset_conversation(): global chat_history chat_history = [] return None with gr.Blocks() as iface: with gr.Row(): audio_input = gr.Audio(sources="microphone", type="filepath", label='Question from Resume') file_input = gr.File(label="Resume") output = gr.Audio(label='Says', autoplay=True) inputs = [audio_input, file_input] btn = gr.Button("Submit") btn.click(fn=clone, inputs=inputs, outputs=output) audio_input.stop_recording(fn=clone, inputs=inputs, outputs=output) # Add event to start recording after output audio finishes output.play(fn=start_recording, outputs=audio_input) # Add a button to reset the conversation reset_btn = gr.Button("Reset Conversation") reset_btn.click(fn=reset_conversation, inputs=None, outputs=None) iface.launch()