tmartinez commited on
Commit
abf42db
1 Parent(s): b475b4e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +108 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Imports
2
+ import gradio as gr
3
+ import transformers
4
+ import torch
5
+ from transformers import pipeline, AutoTokenizer
6
+
7
+ from huggingface_hub import login
8
+
9
+ login('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')
10
+
11
+ # Model name in Hugging Face docs
12
+ model = 'klyang/MentaLLaMA-chat-7B'
13
+
14
+ #"meta-llama/Llama-2-7b-chat-hf"
15
+
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True)
18
+
19
+ llama_pipeline = pipeline(
20
+ "text-generation", # LLM task
21
+ model=model,
22
+ torch_dtype=torch.float16,
23
+ device_map="auto",
24
+ )
25
+
26
+
27
+ SYSTEM_PROMPT = """<s>[INST] <<SYS>>
28
+ You are a helpful bot. Your answers are clear and concise.
29
+ <</SYS>>
30
+
31
+ """
32
+
33
+ # Formatting function for message and history
34
+ def format_message(message: str, history: list, memory_limit: int = 3) -> str:
35
+ """
36
+ Formats the message and history for the Llama model.
37
+
38
+ Parameters:
39
+ message (str): Current message to send.
40
+ history (list): Past conversation history.
41
+ memory_limit (int): Limit on how many past interactions to consider.
42
+
43
+ Returns:
44
+ str: Formatted message string
45
+ """
46
+ # always keep len(history) <= memory_limit
47
+ if len(history) > memory_limit:
48
+ history = history[-memory_limit:]
49
+
50
+ if len(history) == 0:
51
+ return SYSTEM_PROMPT + f"{message} [/INST]"
52
+
53
+ formatted_message = SYSTEM_PROMPT + f"{history[0][0]} [/INST] {history[0][1]} </s>"
54
+
55
+ # Handle conversation history
56
+ for user_msg, model_answer in history[1:]:
57
+ formatted_message += f"<s>[INST] {user_msg} [/INST] {model_answer} </s>"
58
+
59
+ # Handle the current message
60
+ formatted_message += f"<s>[INST] {message} [/INST]"
61
+
62
+ return formatted_message
63
+
64
+
65
+
66
+ # Generate a response from the Llama model
67
+ def get_llama_response(message: str, history: list) -> str:
68
+ """
69
+ Generates a conversational response from the Llama model.
70
+
71
+ Parameters:
72
+ message (str): User's input message.
73
+ history (list): Past conversation history.
74
+
75
+ Returns:
76
+ str: Generated response from the Llama model.
77
+ """
78
+ query = format_message(message, history)
79
+ response = ""
80
+
81
+ sequences = llama_pipeline(
82
+ query,
83
+ do_sample=True,
84
+ top_k=10,
85
+ num_return_sequences=1,
86
+ eos_token_id=tokenizer.eos_token_id,
87
+ max_length=1024,
88
+ )
89
+
90
+ generated_text = sequences[0]['generated_text']
91
+ response = generated_text[len(query):] # Remove the prompt from the output
92
+
93
+ print("Chatbot:", response.strip())
94
+ return response.strip()
95
+
96
+
97
+ gr.ChatInterface(get_llama_response).launch(debug=True)
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ accelerate
2
+ gradio
3
+ torch
4
+ transformers
5
+ sentencepiece
6
+