File size: 3,750 Bytes
4cd2ebc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f2aac0
 
 
4cd2ebc
8f2aac0
 
4cd2ebc
 
 
 
 
 
 
 
 
 
 
 
 
8f2aac0
 
 
4cd2ebc
 
8f2aac0
 
 
4cd2ebc
8f2aac0
4cd2ebc
 
 
 
8f2aac0
4cd2ebc
 
 
8f2aac0
 
 
 
4cd2ebc
 
 
 
 
 
 
 
 
 
 
a0315da
 
4cd2ebc
 
 
 
 
 
a0315da
 
 
 
 
8f2aac0
 
 
 
 
a0315da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cd2ebc
8f2aac0
 
 
 
4cd2ebc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr
import os
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
from datasets import load_dataset
import torch
import soundfile as sf
from pdfminer.high_level import extract_text
from llama_cpp import Llama

# Check if MPS is available and set the device
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS device")
else:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"MPS not available, using {device}")
def toText(audio):
    asr = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-tiny.en",
    chunk_length_s=30,
    device=device,
    )
    question = asr(audio, batch_size=8)["text"]
    return question


# Global variable to store chat history
chat_history = []

def extract_answer(question, text):
    global chat_history
    
    # Load the LLaMA model
    model_path="/Users/chandima/.cache/lm-studio/models/lmstudio-community/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q3_K_L.gguf"
    # Load the LLaMA model with MPS acceleration
    llm = Llama(
        model_path=model_path,
        n_gpu_layers=-1,  # Use all available layers for GPU acceleration
        n_ctx=2048,  # Adjust context size as needed
        verbose=True,  # Optional: for debugging
        use_mlock=True,  # Optional: for better memory management
        n_threads=6,  # Adjust based on your CPU
        use_mmap=True,  # Optional: for faster loading
    )

    # Construct the conversation history
    conversation = "\n".join([f"Human: {q}\nAI: {a}" for q, a in chat_history])

    # Use LLaMA to extract skills
    prompt = f"""
    You are an AI assistant answering questions based on a resume. Here's the conversation so far:

    {conversation}

    Human: {question}

    Resume:
    {text}

    AI: """
    
    response = llm(prompt, max_tokens=800, stop=["Human:", "\n\n"])
    answer = response['choices'][0]['text'].strip()
    
    # Append the new question and answer to the chat history
    chat_history.append((question, answer))
    
    print(answer)
    return answer

def toAudio(text):
    synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts", device=device)
    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
    speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
    speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
    return speech

def clone(audio, file):
    if audio is None or file is None:
        return None
    question = toText(audio=audio)
    text = extract_text(file.name)
    res = extract_answer(question, text)
    print(res)
    speech = toAudio(res) 
    sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
    return "./speech.wav"

def start_recording():
    return None

def reset_conversation():
    global chat_history
    chat_history = []
    return None

with gr.Blocks() as iface:
    with gr.Row():
        audio_input = gr.Audio(sources="microphone", type="filepath", label='Question from Resume')
        file_input = gr.File(label="Resume")
    
    output = gr.Audio(label='Says', autoplay=True)
    
    inputs = [audio_input, file_input]
    
    btn = gr.Button("Submit")
    btn.click(fn=clone, inputs=inputs, outputs=output)
    
    audio_input.stop_recording(fn=clone, inputs=inputs, outputs=output)
    
    # Add event to start recording after output audio finishes
    output.play(fn=start_recording, outputs=audio_input)

    # Add a button to reset the conversation
    reset_btn = gr.Button("Reset Conversation")
    reset_btn.click(fn=reset_conversation, inputs=None, outputs=None)

iface.launch()