import gradio as gr
import os
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
from datasets import load_dataset
import torch
import soundfile as sf
from pdfminer.high_level import extract_text
from llama_cpp import Llama

# Check if MPS is available and set the device
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS device")
else:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"MPS not available, using {device}")
def toText(audio):
    asr = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-tiny.en",
    chunk_length_s=30,
    device=device,
    )
    question = asr(audio, batch_size=8)["text"]
    return question


# Global variable to store chat history
chat_history = []

def extract_answer(question, text):
    global chat_history
    
    # Load the LLaMA model
    model_path="/Users/chandima/.cache/lm-studio/models/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q3_K_L.gguf"
    # Load the LLaMA model with MPS acceleration
    llm = Llama(
        model_path=model_path,
        n_gpu_layers=-1,  # Use all available layers for GPU acceleration
        n_ctx=2048,  # Adjust context size as needed
        verbose=True,  # Optional: for debugging
        use_mlock=True,  # Optional: for better memory management
        n_threads=6,  # Adjust based on your CPU
        use_mmap=True,  # Optional: for faster loading
    )

    # Construct the conversation history
    conversation = "\n".join([f"Human: {q}\nAI: {a}" for q, a in chat_history])

    # Use LLaMA to extract skills
    prompt = f"""
    You are an AI assistant answering questions based on a resume. Here's the conversation so far:

    {conversation}

    Human: {question}

    Resume:
    {text}

    AI: """
    
    response = llm(prompt, max_tokens=800, stop=["Human:", "\n\n"])
    answer = response['choices'][0]['text'].strip()
    
    # Append the new question and answer to the chat history
    chat_history.append((question, answer))
    
    print(answer)
    return answer

def toAudio(text):
    synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts", device=device)
    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
    speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
    speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
    return speech

def clone(audio, file):
    if audio is None or file is None:
        return None
    question = toText(audio=audio)
    text = extract_text(file.name)
    res = extract_answer(question, text)
    print(res)
    speech = toAudio(res) 
    sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
    return "./speech.wav"

def start_recording():
    return None

def reset_conversation():
    global chat_history
    chat_history = []
    return None

with gr.Blocks() as iface:
    with gr.Row():
        audio_input = gr.Audio(sources="microphone", type="filepath", label='Question from Resume')
        file_input = gr.File(label="Resume")
    
    output = gr.Audio(label='Says', autoplay=True)
    
    inputs = [audio_input, file_input]
    
    btn = gr.Button("Submit")
    btn.click(fn=clone, inputs=inputs, outputs=output)
    
    audio_input.stop_recording(fn=clone, inputs=inputs, outputs=output)
    
    # Add event to start recording after output audio finishes
    output.play(fn=start_recording, outputs=audio_input)

    # Add a button to reset the conversation
    reset_btn = gr.Button("Reset Conversation")
    reset_btn.click(fn=reset_conversation, inputs=None, outputs=None)

iface.launch()