tmartinez commited on
Commit
b23c951
1 Parent(s): c95b274

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -0
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Imports
2
+ import gradio as gr
3
+ import transformers
4
+ import torch
5
+ from transformers import pipeline, AutoTokenizer
6
+
7
+ from huggingface_hub import login
8
+
9
+ login('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')
10
+
11
+ # Model name in Hugging Face docs
12
+ model = "meta-llama/Llama-2-7b-chat-hf"
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True)
15
+
16
+
17
+ llama_pipeline = pipeline(
18
+ "text-generation", # LLM task
19
+ model=model,
20
+ torch_dtype=torch.float16,
21
+ device_map="auto",
22
+ )
23
+
24
+
25
+ SYSTEM_PROMPT = """<s>[INST] <<SYS>>
26
+ You are a helpful bot. Your answers are clear and concise.
27
+ <</SYS>>
28
+
29
+ """
30
+
31
+ # Formatting function for message and history
32
+ def format_message(message: str, history: list, memory_limit: int = 3) -> str:
33
+ """
34
+ Formats the message and history for the Llama model.
35
+
36
+ Parameters:
37
+ message (str): Current message to send.
38
+ history (list): Past conversation history.
39
+ memory_limit (int): Limit on how many past interactions to consider.
40
+
41
+ Returns:
42
+ str: Formatted message string
43
+ """
44
+ # always keep len(history) <= memory_limit
45
+ if len(history) > memory_limit:
46
+ history = history[-memory_limit:]
47
+
48
+ if len(history) == 0:
49
+ return SYSTEM_PROMPT + f"{message} [/INST]"
50
+
51
+ formatted_message = SYSTEM_PROMPT + f"{history[0][0]} [/INST] {history[0][1]} </s>"
52
+
53
+ # Handle conversation history
54
+ for user_msg, model_answer in history[1:]:
55
+ formatted_message += f"<s>[INST] {user_msg} [/INST] {model_answer} </s>"
56
+
57
+ # Handle the current message
58
+ formatted_message += f"<s>[INST] {message} [/INST]"
59
+
60
+ return formatted_message
61
+
62
+
63
+
64
+ # Generate a response from the Llama model
65
+ def get_llama_response(message: str, history: list) -> str:
66
+ """
67
+ Generates a conversational response from the Llama model.
68
+
69
+ Parameters:
70
+ message (str): User's input message.
71
+ history (list): Past conversation history.
72
+
73
+ Returns:
74
+ str: Generated response from the Llama model.
75
+ """
76
+ query = format_message(message, history)
77
+ response = ""
78
+
79
+ sequences = llama_pipeline(
80
+ query,
81
+ do_sample=True,
82
+ top_k=10,
83
+ num_return_sequences=1,
84
+ eos_token_id=tokenizer.eos_token_id,
85
+ max_length=1024,
86
+ )
87
+
88
+ generated_text = sequences[0]['generated_text']
89
+ response = generated_text[len(query):] # Remove the prompt from the output
90
+
91
+ print("Chatbot:", response.strip())
92
+ return response.strip()
93
+
94
+
95
+ gr.ChatInterface(get_llama_response).launch(debug=True)
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+