Spaces:
Running
Running
File size: 4,920 Bytes
30f2a35 0708e87 30f2a35 0708e87 30f2a35 b33ef6b 30f2a35 b33ef6b 30f2a35 0708e87 b33ef6b 30f2a35 0708e87 30f2a35 0708e87 b33ef6b 30f2a35 0708e87 30f2a35 b33ef6b 0708e87 30f2a35 6c4a187 b33ef6b 0708e87 30f2a35 b33ef6b 30f2a35 b33ef6b 30f2a35 0708e87 30f2a35 0708e87 30f2a35 b33ef6b 30f2a35 b33ef6b 30f2a35 b33ef6b 30f2a35 0708e87 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import time
import gradio as gr
from os import getenv
from openai import OpenAI
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=getenv("OPENROUTER_API_KEY"),
)
css = """
body.show-thoughts .thought {
display: block !important;
}
.thought {
opacity: 0.8;
font-family: "Courier New", monospace;
border: 1px gray solid;
padding: 10px;
border-radius: 5px;
display: none;
}
.thought-prompt {
opacity: 0.8;
font-family: "Courier New", monospace;
}
"""
with open("contemplator.txt", "r") as f:
system_msg = f.read()
def make_thinking_prompt(time):
i = int(time * 4) % 40
if i > 20:
i = 40 - i
return "π€ [" + "." * i + "Thinking" + "." * (20 - i) + "]"
def streaming(message, history, system_msg, model):
messages = [
{
"role": "system",
"content": system_msg
}
]
for user, assistant in history:
messages.append({
"role": "user",
"content": user
})
messages.append({
"role": "assistant",
"content": assistant
})
messages.append({
"role": "user",
"content": message
})
thinking_prompt = "<p class='thought-prompt'>" + "π€¨ Understanding..." + "</p>"
yield thinking_prompt
completion = client.chat.completions.create(
model=model,
messages=messages,
max_completion_tokens=8000,
temperature=0.0,
stream=True,
)
reply = ""
start_time = time.time()
try:
for i, chunk in enumerate(completion):
reply += chunk.choices[0].delta.content
answer = ""
if not "</inner_thoughts>" in reply:
thought_text = f'<div class="thought">{reply.replace("<inner_thoughts>", "").strip()}</div>'
thinking_prompt = "<p class='thought-prompt'>" + make_thinking_prompt(time.time() - start_time) + "</p>"
else:
thought_text = f'<div class="thought">{reply.replace("<inner_thoughts>", "").split("</inner_thoughts>")[0].strip()}</div>'
answer = reply.split("</inner_thoughts>")[1].replace("<final_answer>", "").replace("</final_answer>", "").strip()
thinking_prompt = f"<p class='thought-prompt'>β Thought for {time.time() - start_time:.2f} seconds</p>"
yield thinking_prompt + thought_text + "<br>" + answer
yield thinking_prompt + thought_text + "<br>" + answer
except Exception as e:
print(e)
yield f"An error occurred. {e}"
markdown = """
## π« Overthink 1(o1)
Insprired by how o1 works, this LLM is instructed to generate very long and detailed chain-of-thoughts. It will think extra hard before providing an answer.
Actually this does help with reasoning, compared to normal step-by-step reasoning. I wrote a blog post about this [here](https://huggingface.co/blog/wenbopan/recreating-o1).
Sometimes this LLM overthinks for super simple questions, but it's fun to watch. Hope you enjoy it!
### System Message
This is done by instructing the model with a large system message, which you can check on the top tab.
"""
with gr.Blocks(theme=gr.themes.Soft(), css=css, fill_height=True) as demo:
with gr.Row(equal_height=True):
with gr.Column(scale=1, min_width=300):
with gr.Tab("Settings"):
gr.Markdown(markdown)
model = gr.Dropdown(["nousresearch/hermes-3-llama-3.1-405b:free", "nousresearch/hermes-3-llama-3.1-70b", "meta-llama/llama-3.1-405b-instruct", "google/gemini-pro-1.5-exp", "meta-llama/llama-3.1-8b-instruct:free"], value="nousresearch/hermes-3-llama-3.1-405b:free", label="Model")
show_thoughts = gr.Checkbox(False, label="Show Thoughts", interactive=True, elem_id="show_thoughts")
show_thoughts.change(None, js="""function run(){ checked = document.querySelector('#show_thoughts input[type="checkbox"]').checked; document.querySelector('body').classList.toggle('show-thoughts', checked); } """)
with gr.Tab("System Message"):
system_msg = gr.TextArea(system_msg, label="System Message")
with gr.Column(scale=3, min_width=300):
gr.ChatInterface(
streaming,
additional_inputs=[
system_msg,
model
],
examples=[
["How do you do? ", None, None, None],
["How many R's in strawberry?", None, None, None],
["Solve the puzzle of 24 points: 1 2 3 4", None, None, None],
["Find x such that βxβ + x = 23/7. Express x as a common fraction.", None, None, None],
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch() |