Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
from typing import Iterator | |
import sambanova | |
def generate( | |
message: str, | |
chat_history: list[tuple[str, str]], | |
system_message, | |
max_tokens: int = 1024, | |
temperature: float = 0.6, | |
top_p: float = 0.9, | |
top_k: int = 50, | |
repetition_penalty: float = 1.2, | |
) -> Iterator[str]: | |
conversation = [{"role": "system", "content": system_message}] | |
for val in chat_history: | |
if val[0]: | |
conversation.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
conversation.append({"role": "assistant", "content": val[1]}) | |
conversation.append({"role": "user", "content": message}) | |
outputs = [] | |
for text in sambanova.Streamer(conversation, | |
new_tokens=max_tokens, | |
temperature=temperature, | |
top_k=top_k, | |
top_p=top_p): | |
outputs.append(text) | |
yield "".join(outputs) | |
MAX_MAX_TOKENS = 2048 | |
DEFAULT_MAX_TOKENS = 1024 | |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) | |
# chat_interface = gr.ChatInterface( | |
# fn=generate, | |
# additional_inputs=[ | |
# gr.Slider( | |
# label="Max new tokens", | |
# minimum=1, | |
# maximum=MAX_MAX_NEW_TOKENS, | |
# step=1, | |
# value=DEFAULT_MAX_NEW_TOKENS, | |
# ), | |
# gr.Slider( | |
# label="Temperature", | |
# minimum=0.1, | |
# maximum=4.0, | |
# step=0.1, | |
# value=0.6, | |
# ), | |
# gr.Slider( | |
# label="Top-p (nucleus sampling)", | |
# minimum=0.05, | |
# maximum=1.0, | |
# step=0.05, | |
# value=0.9, | |
# ), | |
# gr.Slider( | |
# label="Top-k", | |
# minimum=1, | |
# maximum=1000, | |
# step=1, | |
# value=50, | |
# ), | |
# gr.Slider( | |
# label="Repetition penalty", | |
# minimum=1.0, | |
# maximum=2.0, | |
# step=0.05, | |
# value=1.2, | |
# ), | |
# ], | |
# stop_btn=None, | |
# fill_height=True, | |
# examples=[ | |
# ["Which one is bigger? 4.9 or 4.11"], | |
# [ | |
# "Can you explain briefly to me what is the Python programming language?" | |
# ], | |
# ["Explain the plot of Cinderella in a sentence."], | |
# ["How many hours does it take a man to eat a Helicopter?"], | |
# [ | |
# "Write a 100-word article on 'Benefits of Open-Source in AI research'" | |
# ], | |
# ], | |
# cache_examples=False, | |
# ) | |
chat_interface = gr.ChatInterface( | |
generate, | |
additional_inputs=[ | |
gr.Textbox(value="You are a friendly Chatbot.", | |
label="System message"), | |
gr.Slider( | |
label="Max tokens", | |
minimum=1, | |
maximum=MAX_MAX_TOKENS, | |
step=1, | |
value=DEFAULT_MAX_TOKENS, | |
), | |
gr.Slider( | |
label="Temperature", | |
minimum=0.1, | |
maximum=4.0, | |
step=0.1, | |
value=0.6, | |
), | |
gr.Slider( | |
label="Top-p (nucleus sampling)", | |
minimum=0.05, | |
maximum=1.0, | |
step=0.05, | |
value=0.9, | |
), | |
gr.Slider( | |
label="Top-k", | |
minimum=1, | |
maximum=1000, | |
step=1, | |
value=50, | |
), | |
], | |
examples=[ | |
["Which one is bigger? 4.9 or 4.11"], | |
[ | |
"Can you explain briefly to me what is the Python programming language?" | |
], | |
["Explain the plot of Cinderella in a sentence."], | |
["How many hours does it take a man to eat a Helicopter?"], | |
[ | |
"Write a 100-word article on 'Benefits of Open-Source in AI research'" | |
], | |
], | |
cache_examples=False, | |
) | |
with gr.Blocks(fill_height=True) as demo: | |
gr.Markdown('# Sambanova model inference LLAMA 405B') | |
chat_interface.render() | |
if __name__ == "__main__": | |
demo.queue(max_size=20).launch() | |