jadehardouin commited on
Commit
2db3504
1 Parent(s): 3899805

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +25 -10
models.py CHANGED
@@ -96,6 +96,15 @@ class OpenAIModelGPT3_5(BaseTCOModel):
96
  super().__init__()
97
 
98
  def render(self):
 
 
 
 
 
 
 
 
 
99
  self.context_length = gr.Dropdown(choices=["4K", "16K"], value="4K", interactive=True,
100
  label="Context size",
101
  visible=False, info="Number of tokens the model considers when processing text")
@@ -108,16 +117,6 @@ class OpenAIModelGPT3_5(BaseTCOModel):
108
  interactive=False
109
  )
110
  self.info = gr.Markdown("The cost per input and output tokens values are from OpenAI's [pricing web page](https://openai.com/pricing)", interactive=False, visible=False)
111
-
112
- def define_cost_per_token(context_length):
113
- if context_length == "4K":
114
- cost_per_1k_input_tokens = 0.0015
115
- cost_per_1k_output_tokens = 0.002
116
- else:
117
- cost_per_1k_input_tokens = 0.003
118
- cost_per_1k_output_tokens = 0.004
119
- return cost_per_1k_input_tokens, cost_per_1k_output_tokens
120
-
121
  self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
122
 
123
  self.labor = gr.Number(0, visible=False,
@@ -140,6 +139,11 @@ class OpenSourceLlama2Model(BaseTCOModel):
140
  super().__init__()
141
 
142
  def render(self):
 
 
 
 
 
143
 
144
  self.vm = gr.Textbox(value="2x A100 80GB NVLINK",
145
  visible=False,
@@ -148,6 +152,16 @@ class OpenSourceLlama2Model(BaseTCOModel):
148
  self.vm_cost_per_hour = gr.Number(4.42, label="Instance cost ($) per hour",
149
  interactive=False, visible=False)
150
  self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
 
 
 
 
 
 
 
 
 
 
151
  self.input_tokens_cost_per_token = gr.Number(0.00052, visible=False,
152
  label="($) Price/1K input prompt tokens",
153
  interactive=False
@@ -156,6 +170,7 @@ class OpenSourceLlama2Model(BaseTCOModel):
156
  label="($) Price/1K output prompt tokens",
157
  interactive=False
158
  )
 
159
  self.source = gr.Markdown("""<span style="font-size: 16px; font-weight: 600; color: #212529;">Source</span>""")
160
  self.info = gr.Markdown("The cost per input and output tokens values above are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper)",
161
  label="Source",
 
96
  super().__init__()
97
 
98
  def render(self):
99
+ def define_cost_per_token(context_length):
100
+ if context_length == "4K":
101
+ cost_per_1k_input_tokens = 0.0015
102
+ cost_per_1k_output_tokens = 0.002
103
+ else:
104
+ cost_per_1k_input_tokens = 0.003
105
+ cost_per_1k_output_tokens = 0.004
106
+ return cost_per_1k_input_tokens, cost_per_1k_output_tokens
107
+
108
  self.context_length = gr.Dropdown(choices=["4K", "16K"], value="4K", interactive=True,
109
  label="Context size",
110
  visible=False, info="Number of tokens the model considers when processing text")
 
117
  interactive=False
118
  )
119
  self.info = gr.Markdown("The cost per input and output tokens values are from OpenAI's [pricing web page](https://openai.com/pricing)", interactive=False, visible=False)
 
 
 
 
 
 
 
 
 
 
120
  self.context_length.change(define_cost_per_token, inputs=self.context_length, outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
121
 
122
  self.labor = gr.Number(0, visible=False,
 
139
  super().__init__()
140
 
141
  def render(self):
142
+ def on_maxed_out_change(maxed_out, input_tokens_cost_per_token, output_tokens_cost_per_token):
143
+ output_tokens_cost_per_token = 0.06656
144
+ input_tokens_cost_per_token = 0.00052
145
+ r = maxed_out / 100
146
+ return input_tokens_cost_per_token * 0.65 / r, output_tokens_cost_per_token * 0.65/ r
147
 
148
  self.vm = gr.Textbox(value="2x A100 80GB NVLINK",
149
  visible=False,
 
152
  self.vm_cost_per_hour = gr.Number(4.42, label="Instance cost ($) per hour",
153
  interactive=False, visible=False)
154
  self.info_vm = gr.Markdown("This price above is from [CoreWeave's pricing web page](https://www.coreweave.com/gpu-cloud-pricing)", interactive=False, visible=False)
155
+ self.maxed_out = gr.Slider(minimum=1, maximum=100, value=65, step=1, label="Maxed out", info="Estimated average percentage of total GPU memory that is used. The instantaneous value can go from very high when many users are using the service to very low when no one does.")
156
+ self.info_maxed_out = gr.Markdown(r"""This percentage influences the input and output cost/token values, and more precisely the number of token/s. Here is the formula used:<br>
157
+ $CT = \frac{VM_C}{TS}$ where $TS = TS_{max} * \frac{MO}{100}$ <br>
158
+ with: <br>
159
+ $CT$ = Cost per Token (Input or output), <br>
160
+ $VM_C$ = VM Cost per second, <br>
161
+ $TS$ = Tokens per second (Input or output), <br>
162
+ $TS_{max}$ = Tokens per second when the GPU is maxed out at 100%, <br>
163
+ $MO$ = Maxed Out, <br>
164
+ """, interactive=False, visible=False)
165
  self.input_tokens_cost_per_token = gr.Number(0.00052, visible=False,
166
  label="($) Price/1K input prompt tokens",
167
  interactive=False
 
170
  label="($) Price/1K output prompt tokens",
171
  interactive=False
172
  )
173
+ self.maxed_out.change(on_maxed_out_change, inputs=[self.maxed_out, self.input_tokens_cost_per_token, self.output_tokens_cost_per_token], outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
174
  self.source = gr.Markdown("""<span style="font-size: 16px; font-weight: 600; color: #212529;">Source</span>""")
175
  self.info = gr.Markdown("The cost per input and output tokens values above are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper)",
176
  label="Source",