Spaces:
Running
Running
File size: 6,441 Bytes
e0e93c4 50f19fa 044dd38 ea19e17 044dd38 ea19e17 044dd38 ea19e17 e0e93c4 044dd38 ea19e17 044dd38 ea19e17 044dd38 50f19fa ea19e17 044dd38 2b2948e 044dd38 73d3fc4 2d9906b ea19e17 50f19fa 3f6473e 50f19fa b3b6d77 2d9906b ea19e17 50f19fa 3f6473e 50f19fa 73d3fc4 044dd38 ea19e17 2b2948e ea19e17 044dd38 ea19e17 044dd38 ea19e17 044dd38 ea19e17 044dd38 50f19fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import gradio as gr
import models
import pandas as pd
text = "<h1 style='text-align: center; color: midnightblue; font-size: 40px;'>TCO Comparison Calculator"
text0 = "<h1 style='text-align: center; color: midnightblue; font-size: 30px;'>Describe your use case"
text1 = "<h1 style='text-align: center; color: midnightblue; font-size: 30px;'>First solution"
text2 = "<h1 style='text-align: center; color: midnightblue; font-size: 30px;'>Second solution"
text3 = "<h1 style='text-align: center; color: midnightblue; font-size: 30px;'>Compute and compare TCOs"
description=f"""
<p>In this demo application, we help you compare different AI model services, such as Open source or SaaS solutions.</p>
<p>First, you'll have to choose how you want to use the AI model service based on your use case. Then, select the two model service solutions you'd like to compare. Depending on the solution you chose, you could be able to modify some parameters of the set-up. Eventually, we will provide you with the cost of deployment for the selected model services, as a function of the number of requests. You can compare both solutions to evaluate which one best suits your needs.</p>
"""
def on_use_case_change(use_case):
if use_case == "Summarize":
return gr.update(value=500), gr.update(value=200)
elif use_case == "Question-Answering":
return gr.update(value=300), gr.update(value=300)
else:
return gr.update(value=50), gr.update(value=10)
def compare(tco1, tco2):
r = tco1 / tco2
if r < 1:
comparison_result = f"Second solution is {1/r:.5f} times more expensive than the first"
elif r > 1:
comparison_result = f"Second solution is {r:.5f} times cheaper than the first"
else:
comparison_result = "Both solutions will cost the same."
return comparison_result
def update_plot(tco1, tco2, dropdown, dropdown2):
if dropdown == "(Open source) Llama 2" or dropdown == "(Open source) DIY":
salary = 1000
salary2 = 0
elif dropdown2 == "(Open source) Llama 2" or dropdown2 == "(Open source) DIY":
salary = 0
salary2 = 1000
else:
salary = 0
salary2 = 0
data = pd.DataFrame(
{
"Number of requests": [100, 200, 300, 400, 500, 1000, 10000, 100, 200, 300, 400, 500, 1000, 10000],
"Cost ($)": [(tco1 * req + salary) for req in [100, 200, 300, 400, 500, 1000, 10000]] + [(tco2 * req + salary2) for req in [100, 200, 300, 400, 500, 1000, 10000]],
"AI model service": [dropdown] * 7 + [dropdown2] * 7
}
)
return gr.LinePlot.update(data, x="Number of requests", y="Cost ($)",color="AI model service",color_legend_position="bottom", title="Total Cost of Model Serving for one month", height=300, width=500, tooltip=["Number of requests", "Cost ($)", "AI model service"])
with gr.Blocks(theme=gr.themes.Soft()) as demo:
Models: list[models.BaseTCOModel] = [models.OpenAIModel, models.CohereModel, models.OpenSourceLlama2Model, models.OpenSourceDIY]
model_names = [Model().get_name() for Model in Models]
gr.Markdown(value=text)
gr.Markdown(value=description)
with gr.Row():
with gr.Column():
with gr.Row():
gr.Markdown(value=text0)
with gr.Row():
use_case = gr.Dropdown(["Summarize", "Question-Answering", "Classification"], value="Question-Answering", label="AI model service type")
with gr.Row():
input_tokens = gr.Slider(minimum=1, maximum=1000, value=300, step=1, label="Number of input token", info="We put a value that we find best suit your use case choice but feel free to adjust", interactive=True)
output_tokens = gr.Slider(minimum=1, maximum=1000, value=300, step=1, label="Number of output token", info="We put a value that we find best suit your use case choice but feel free to adjust", interactive=True)
with gr.Row(visible=False):
num_users = gr.Number(value="1000", interactive = True, label="Number of users for your service")
use_case.change(on_use_case_change, inputs=use_case, outputs=[input_tokens, output_tokens])
with gr.Row():
with gr.Column():
gr.Markdown(value=text1)
page1 = models.ModelPage(Models)
dropdown = gr.Dropdown(model_names, interactive=True, label="AI service options")
page1.render()
with gr.Column():
gr.Markdown(value=text2)
page2 = models.ModelPage(Models)
dropdown2 = gr.Dropdown(model_names, interactive=True, label="AI service options")
page2.render()
dropdown.change(page1.make_model_visible, inputs=[dropdown, use_case, num_users, input_tokens, output_tokens], outputs=page1.get_all_components())
dropdown2.change(page2.make_model_visible, inputs=[dropdown2, use_case, num_users, input_tokens, output_tokens], outputs=page2.get_all_components())
gr.Markdown(value=text3)
compute_tco_btn = gr.Button("Compute cost/request and TCOs", size="lg")
tco1 = gr.State()
tco2 = gr.State()
with gr.Row():
with gr.Column():
tco_output = gr.Text("Output 1: ", label="Cost/request for the first solution")
latency_info = gr.Markdown()
with gr.Accordion("Open to see the formula", open=False):
tco_formula = gr.Markdown()
with gr.Column():
tco_output2 = gr.Text("Output 2: ", label="Cost/request for the second solution")
latency_info2 = gr.Markdown()
with gr.Accordion("Open to see the formula", open=False):
tco_formula2 = gr.Markdown()
with gr.Row():
with gr.Column(scale=1):
ratio = gr.Text("Ratio: ", label="Ratio of cost/request for both solutions")
with gr.Column(scale=3):
plot = gr.LinePlot()
compute_tco_btn.click(page1.compute_cost_per_token, inputs=page1.get_all_components_for_cost_computing() + [dropdown], outputs=[tco_output, tco1, tco_formula, latency_info]).then(page2.compute_cost_per_token, inputs=page2.get_all_components_for_cost_computing() + [dropdown2], outputs=[tco_output2, tco2, tco_formula2, latency_info2]).then(compare, inputs=[tco1, tco2], outputs=ratio).then(update_plot, inputs=[tco1, tco2, dropdown, dropdown2], outputs=plot)
demo.launch(debug=True) |