rlasseri commited on
Commit
98eb32d
1 Parent(s): 2804169

Rajout globaux

Browse files
Files changed (1) hide show
  1. models.py +96 -30
models.py CHANGED
@@ -49,27 +49,27 @@ class OpenAIModelGPT4(BaseTCOModel):
49
 
50
  def __init__(self):
51
  self.set_name("(SaaS) OpenAI GPT4")
52
- self.set_latency("15s") #Default value for GPT4
53
  super().__init__()
54
 
55
  def render(self):
56
  def define_cost_per_token(context_length):
57
- if context_length == "8K":
58
- cost_per_1k_input_tokens = 0.03
59
- cost_per_1k_output_tokens = 0.06
60
  else:
61
  cost_per_1k_input_tokens = 0.06
62
  cost_per_1k_output_tokens = 0.12
63
  return cost_per_1k_input_tokens, cost_per_1k_output_tokens
64
 
65
- self.context_length = gr.Dropdown(["8K", "32K"], value="8K", interactive=True,
66
  label="Context size",
67
  visible=False, info="Number of tokens the model considers when processing text")
68
- self.input_tokens_cost_per_token = gr.Number(0.03, visible=False,
69
  label="($) Price/1K input prompt tokens",
70
  interactive=False
71
  )
72
- self.output_tokens_cost_per_token = gr.Number(0.06, visible=False,
73
  label="($) Price/1K output prompt tokens",
74
  interactive=False
75
  )
@@ -88,6 +88,54 @@ class OpenAIModelGPT4(BaseTCOModel):
88
 
89
  return cost_per_input_token, cost_per_output_token, labor
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  class OpenAIModelGPT3_5(BaseTCOModel):
92
 
93
  def __init__(self):
@@ -187,46 +235,64 @@ class DIYLlama2Model(BaseTCOModel):
187
  cost_per_output_token = (output_tokens_cost_per_token / 1000)
188
  return cost_per_input_token, cost_per_output_token, labor
189
 
190
- class CohereModel(BaseTCOModel):
 
 
191
  def __init__(self):
192
- self.set_name("(SaaS) Cohere")
193
- self.set_latency("Not available")
194
  super().__init__()
195
 
196
  def render(self):
197
- def on_model_change(model):
198
- if model == "Default":
199
- cost_per_1M_tokens = 15
200
- else:
201
- cost_per_1M_tokens = 30
202
- cost_per_1K_tokens = cost_per_1M_tokens / 1000
203
- return gr.update(value=cost_per_1K_tokens), gr.update(value=cost_per_1K_tokens)
204
 
205
- self.model = gr.Dropdown(["Default", "Custom"], value="Default",
206
- label="Model",
207
- interactive=True, visible=False)
208
- self.input_tokens_cost_per_token = gr.Number(0.015, visible=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  label="($) Price/1K input prompt tokens",
210
  interactive=False
211
  )
212
- self.output_tokens_cost_per_token = gr.Number(0.015, visible=False,
213
  label="($) Price/1K output prompt tokens",
214
  interactive=False
215
  )
216
- self.info = gr.Markdown("The cost per input and output tokens value is from Cohere's [pricing web page](https://cohere.com/pricing?utm_term=&utm_campaign=Cohere+Brand+%26+Industry+Terms&utm_source=adwords&utm_medium=ppc&hsa_acc=4946693046&hsa_cam=20368816223&hsa_grp=154209120409&hsa_ad=666081801359&hsa_src=g&hsa_tgt=dsa-19959388920&hsa_kw=&hsa_mt=&hsa_net=adwords&hsa_ver=3&gad=1&gclid=CjwKCAjww7KmBhAyEiwA5-PUSlyO7pq0zxeVrhViXMd8WuILW6uY-cfP1-SVuUfs-leUAz14xHlOHxoCmfkQAvD_BwE)", interactive=False, visible=False)
217
- self.model.change(on_model_change, inputs=self.model, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
218
- self.labor = gr.Number(0, visible=False,
219
- label="($) Labor cost per month",
220
  info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model",
221
  interactive=True
222
  )
223
 
224
  def compute_cost_per_token(self, input_tokens_cost_per_token, output_tokens_cost_per_token, labor):
225
-
226
- cost_per_input_token = input_tokens_cost_per_token / 1000
227
- cost_per_output_token = output_tokens_cost_per_token / 1000
 
228
 
229
- return cost_per_input_token, cost_per_output_token, labor
230
 
231
  class ModelPage:
232
  def __init__(self, Models: BaseTCOModel):
 
49
 
50
  def __init__(self):
51
  self.set_name("(SaaS) OpenAI GPT4")
52
+ self.set_latency("10s") #Default value for GPT4
53
  super().__init__()
54
 
55
  def render(self):
56
  def define_cost_per_token(context_length):
57
+ if context_length == "128K":
58
+ cost_per_1k_input_tokens = 0.01
59
+ cost_per_1k_output_tokens = 0.03
60
  else:
61
  cost_per_1k_input_tokens = 0.06
62
  cost_per_1k_output_tokens = 0.12
63
  return cost_per_1k_input_tokens, cost_per_1k_output_tokens
64
 
65
+ self.context_length = gr.Dropdown(["128K"], value="128K", interactive=True,
66
  label="Context size",
67
  visible=False, info="Number of tokens the model considers when processing text")
68
+ self.input_tokens_cost_per_token = gr.Number(0.01, visible=False,
69
  label="($) Price/1K input prompt tokens",
70
  interactive=False
71
  )
72
+ self.output_tokens_cost_per_token = gr.Number(0.03, visible=False,
73
  label="($) Price/1K output prompt tokens",
74
  interactive=False
75
  )
 
88
 
89
  return cost_per_input_token, cost_per_output_token, labor
90
 
91
+
92
+
93
+
94
+ class MistralO(BaseTCOModel):
95
+
96
+ def __init__(self):
97
+ self.set_name("(SaaS) Mistral API")
98
+ self.set_latency("5s") #Average latency value for GPT3.5 Turbo
99
+ super().__init__()
100
+
101
+ def render(self):
102
+ def define_cost_per_token(context_length):
103
+ if context_length == "4K":
104
+ cost_per_1k_input_tokens = 0.0015
105
+ cost_per_1k_output_tokens = 0.002
106
+ else:
107
+ cost_per_1k_input_tokens = 0.003
108
+ cost_per_1k_output_tokens = 0.004
109
+ return cost_per_1k_input_tokens, cost_per_1k_output_tokens
110
+
111
+ self.context_length = gr.Dropdown(choices=["4K", "16K"], value="4K", interactive=True,
112
+ label="Context size",
113
+ visible=False, info="Number of tokens the model considers when processing text")
114
+ self.input_tokens_cost_per_token = gr.Number(0.0015, visible=False,
115
+ label="($) Price/1K input prompt tokens",
116
+ interactive=False
117
+ )
118
+ self.output_tokens_cost_per_token = gr.Number(0.002, visible=False,
119
+ label="($) Price/1K output prompt tokens",
120
+ interactive=False
121
+ )
122
+ self.info = gr.Markdown("The cost per input and output tokens values are from OpenAI's [pricing web page](https://openai.com/pricing)", interactive=False, visible=False)
123
+ self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
124
+
125
+ self.labor = gr.Number(0, visible=False,
126
+ label="($) Labor cost per month",
127
+ info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model",
128
+ interactive=True
129
+ )
130
+
131
+ def compute_cost_per_token(self, input_tokens_cost_per_token, output_tokens_cost_per_token, labor):
132
+ cost_per_input_token = (input_tokens_cost_per_token / 1000)
133
+ cost_per_output_token = (output_tokens_cost_per_token / 1000)
134
+
135
+ return cost_per_input_token, cost_per_output_token, labor
136
+
137
+
138
+
139
  class OpenAIModelGPT3_5(BaseTCOModel):
140
 
141
  def __init__(self):
 
235
  cost_per_output_token = (output_tokens_cost_per_token / 1000)
236
  return cost_per_input_token, cost_per_output_token, labor
237
 
238
+
239
+ class DIYLlama2Model(BaseTCOModel):
240
+
241
  def __init__(self):
242
+ self.set_name("(Deploy yourself) Llama 2/Mistral (et variante 7B)")
243
+ self.set_latency("6s")
244
  super().__init__()
245
 
246
  def render(self):
247
+ def on_maxed_out_change(maxed_out, input_tokens_cost_per_token, output_tokens_cost_per_token):
248
+ output_tokens_cost_per_token = 0.06656
249
+ input_tokens_cost_per_token = 0.00052
250
+ r = maxed_out / 100
251
+ return input_tokens_cost_per_token * 0.65 / r, output_tokens_cost_per_token * 0.65/ r
 
 
252
 
253
+ self.source = gr.Markdown("""<span style="font-size: 16px; font-weight: 600; color: #212529;">Source</span>""", visible=False)
254
+ self.info = gr.Markdown("The cost per input and output tokens values below are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper) that were obtained using the following initial configurations.",
255
+ interactive=False,
256
+ visible=False)
257
+ self.vm = gr.Textbox(value="2x A100 80GB NVLINK",
258
+ visible=False,
259
+ label="Instance of VM with GPU",
260
+ )
261
+ self.vm_cost_per_hour = gr.Number(4.42, label="Instance cost ($) per hour",
262
+ interactive=False, visible=False)
263
+ self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
264
+ self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.", visible=False)
265
+ self.info_maxed_out = gr.Markdown(r"""This percentage influences the input and output cost/token values, and more precisely the number of token/s. Here is the formula used:<br>
266
+ $CT = \frac{VM_C}{TS}$ where $TS = TS_{max} * \frac{MO}{100}$ <br>
267
+ with: <br>
268
+ $CT$ = Cost per Token (Input or output), <br>
269
+ $VM_C$ = VM Cost per second, <br>
270
+ $TS$ = Tokens per second (Input or output), <br>
271
+ $TS_{max}$ = Tokens per second when the GPU is maxed out at 100%, <br>
272
+ $MO$ = Maxed Out, <br>
273
+ """, interactive=False, visible=False)
274
+ self.input_tokens_cost_per_token = gr.Number(0.00052, visible=False,
275
  label="($) Price/1K input prompt tokens",
276
  interactive=False
277
  )
278
+ self.output_tokens_cost_per_token = gr.Number(0.06656, visible=False,
279
  label="($) Price/1K output prompt tokens",
280
  interactive=False
281
  )
282
+ self.maxed_out.change(on_maxed_out_change, inputs=[self.maxed_out, self.input_tokens_cost_per_token, self.output_tokens_cost_per_token], outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
283
+
284
+ self.labor = gr.Number(5000, visible=False,
285
+ label="($) Labor cost per month",
286
  info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model",
287
  interactive=True
288
  )
289
 
290
  def compute_cost_per_token(self, input_tokens_cost_per_token, output_tokens_cost_per_token, labor):
291
+ cost_per_input_token = (input_tokens_cost_per_token / 1000)
292
+ cost_per_output_token = (output_tokens_cost_per_token / 1000)
293
+ return cost_per_input_token, cost_per_output_token, labor
294
+
295
 
 
296
 
297
  class ModelPage:
298
  def __init__(self, Models: BaseTCOModel):