Spaces:
Running
Running
Prep context cutoff
Browse files
app.py
CHANGED
@@ -4,10 +4,11 @@ import os
|
|
4 |
import requests
|
5 |
from transformers import GPT2TokenizerFast
|
6 |
|
7 |
-
|
8 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
9 |
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
|
10 |
|
|
|
|
|
11 |
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
12 |
|
13 |
default_system_message = {"role": "system", "content": "You are a brilliant, helpful assistant, always providing answers to the best of your knowledge. If you are unsure of the answer, you indicate it to the user. Currently, you don't have access to the internet."}
|
@@ -21,7 +22,7 @@ personalities = {
|
|
21 |
"Cartman": {"role": "system", "content": "You are Eric Cartman from South Park. You are a self-centered, fat, rude kid obsessed with your animal comforts."},
|
22 |
}
|
23 |
|
24 |
-
def get_completion(model, personality, user_message, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty):
|
25 |
# set personality
|
26 |
system_message = personalities[personality]
|
27 |
updated_message_history = message_history
|
@@ -54,11 +55,18 @@ def get_completion(model, personality, user_message, message_history, chatlog_hi
|
|
54 |
updated_chatlog_history[-1][1] = assistant_message
|
55 |
full_prompt = '\n'.join([row[0] + row[1] for row in updated_chatlog_history])
|
56 |
token_count = len(tokenizer(full_prompt)["input_ids"])#completion["usage"]["total_tokens"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
yield "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
|
58 |
# assistant_message = completion["choices"][0]["message"]["content"]
|
59 |
# return "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
|
60 |
|
61 |
-
def retry_completion(model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty):
|
62 |
# set personality
|
63 |
system_message = personalities[personality]
|
64 |
updated_message_history = message_history
|
@@ -88,13 +96,13 @@ def retry_completion(model, personality, message_history, chatlog_history, tempe
|
|
88 |
# iterate through the stream of events
|
89 |
for chunk in response:
|
90 |
collected_chunks.append(chunk) # save the event response
|
91 |
-
chunk_message = chunk[
|
92 |
collected_messages.append(chunk_message) # save the message
|
93 |
-
assistant_message =
|
94 |
updated_message_history[-1]["content"] = assistant_message
|
95 |
updated_chatlog_history[-1][1] = assistant_message
|
96 |
-
full_prompt =
|
97 |
-
token_count = len(tokenizer(full_prompt)["input_ids"])
|
98 |
yield "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
|
99 |
|
100 |
def reset_chat():
|
@@ -118,6 +126,7 @@ with gr.Blocks(theme=theme) as app:
|
|
118 |
presence_penalty = gr.Slider(minimum=0, maximum=2, step=0.01, value=0, interactive=True, label="Presence penalty")
|
119 |
# with gr.Tab("Model Settings"):
|
120 |
token_count = gr.Number(info="GPT-3 limit is 4096 tokens. GPT-4 limit is 8192 tokens.",interactive=False, label="Token count")
|
|
|
121 |
with gr.Row():
|
122 |
user_message = gr.Textbox(label="Message")
|
123 |
with gr.Row():
|
|
|
4 |
import requests
|
5 |
from transformers import GPT2TokenizerFast
|
6 |
|
|
|
7 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
8 |
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
|
9 |
|
10 |
+
openai.api_key = OPENAI_API_KEY
|
11 |
+
|
12 |
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
13 |
|
14 |
default_system_message = {"role": "system", "content": "You are a brilliant, helpful assistant, always providing answers to the best of your knowledge. If you are unsure of the answer, you indicate it to the user. Currently, you don't have access to the internet."}
|
|
|
22 |
"Cartman": {"role": "system", "content": "You are Eric Cartman from South Park. You are a self-centered, fat, rude kid obsessed with your animal comforts."},
|
23 |
}
|
24 |
|
25 |
+
def get_completion(model, personality, user_message, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty, context_cutoff):
|
26 |
# set personality
|
27 |
system_message = personalities[personality]
|
28 |
updated_message_history = message_history
|
|
|
55 |
updated_chatlog_history[-1][1] = assistant_message
|
56 |
full_prompt = '\n'.join([row[0] + row[1] for row in updated_chatlog_history])
|
57 |
token_count = len(tokenizer(full_prompt)["input_ids"])#completion["usage"]["total_tokens"]
|
58 |
+
# if token_count > context_cutoff:
|
59 |
+
# # delete second row of updated_message_history
|
60 |
+
# updated_message_history.pop(1)
|
61 |
+
# print("cutoff exceeded", updated_message_history)
|
62 |
+
# # recalculate token count
|
63 |
+
# full_prompt = "".join([row["content"] for row in updated_message_history])
|
64 |
+
# token_count = len(tokenizer(full_prompt)["input_ids"])
|
65 |
yield "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
|
66 |
# assistant_message = completion["choices"][0]["message"]["content"]
|
67 |
# return "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
|
68 |
|
69 |
+
def retry_completion(model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty, context_cutoff):
|
70 |
# set personality
|
71 |
system_message = personalities[personality]
|
72 |
updated_message_history = message_history
|
|
|
96 |
# iterate through the stream of events
|
97 |
for chunk in response:
|
98 |
collected_chunks.append(chunk) # save the event response
|
99 |
+
chunk_message = chunk["choices"][0]["delta"] # extract the message
|
100 |
collected_messages.append(chunk_message) # save the message
|
101 |
+
assistant_message = "".join([m.get("content", "") for m in collected_messages])
|
102 |
updated_message_history[-1]["content"] = assistant_message
|
103 |
updated_chatlog_history[-1][1] = assistant_message
|
104 |
+
full_prompt = "".join([row["content"] for row in updated_message_history])
|
105 |
+
token_count = len(tokenizer(full_prompt)["input_ids"])
|
106 |
yield "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
|
107 |
|
108 |
def reset_chat():
|
|
|
126 |
presence_penalty = gr.Slider(minimum=0, maximum=2, step=0.01, value=0, interactive=True, label="Presence penalty")
|
127 |
# with gr.Tab("Model Settings"):
|
128 |
token_count = gr.Number(info="GPT-3 limit is 4096 tokens. GPT-4 limit is 8192 tokens.",interactive=False, label="Token count")
|
129 |
+
# context_cutoff = gr.Slider(minimum=256, maximum=8192, step=256, value=2048, interactive=True, label="Context cutoff")
|
130 |
with gr.Row():
|
131 |
user_message = gr.Textbox(label="Message")
|
132 |
with gr.Row():
|