Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""Untitled2.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1iZpCUgC5T_ASnlDgMYm1n4RH8BZsm7sx
|
8 |
+
"""
|
9 |
+
|
10 |
+
# !pip install gradio
|
11 |
+
|
12 |
+
def estimate_training_cost(number_of_parameters, number_of_tokens, gpu_throughput=312e12, utilization_rate=0.5, overhead=1.10, cost_per_gpu_hour=1.85):
|
13 |
+
"""
|
14 |
+
Estimates the training cost of a large language model.
|
15 |
+
|
16 |
+
Args:
|
17 |
+
- number_of_parameters (int): The number of parameters in the model.
|
18 |
+
- number_of_tokens (int): The number of tokens to train on.
|
19 |
+
- gpu_throughput (float, optional): The peak throughput of the GPU in FLOPs/sec. Default is 312 TFLOPs/sec for A100 GPUs.
|
20 |
+
- utilization_rate (float, optional): The utilization rate of the GPU (0 < utilization_rate ≤ 1). Default is 0.5 (50%).
|
21 |
+
- overhead (float, optional): Multiplier to account for overhead and additional costs (1 + overhead percentage). Default is 1.10 (10% overhead).
|
22 |
+
- cost_per_gpu_hour (float, optional): The cost per hour of using the GPU. Default is $1.85/hour.
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
- float: The estimated total cost of training the model.
|
26 |
+
"""
|
27 |
+
# Calculate the total number of FLOPs required for training
|
28 |
+
total_flops = 6 * number_of_parameters * number_of_tokens
|
29 |
+
|
30 |
+
# Calculate the number of hours required on the A100 GPUs
|
31 |
+
gpu_hours = total_flops / (gpu_throughput * 3600)
|
32 |
+
|
33 |
+
# Adjust for the actual utilization of the GPUs
|
34 |
+
adjusted_gpu_hours = gpu_hours / utilization_rate
|
35 |
+
|
36 |
+
# Account for the overhead
|
37 |
+
actual_gpu_hours = adjusted_gpu_hours * overhead
|
38 |
+
|
39 |
+
# Calculate the total cost
|
40 |
+
total_cost = actual_gpu_hours * cost_per_gpu_hour
|
41 |
+
|
42 |
+
return total_cost
|
43 |
+
|
44 |
+
# Example usage:
|
45 |
+
# Let's say we have a model with 70 billion parameters and it's trained on 2 trillion tokens
|
46 |
+
# The default values for the other parameters are used in this example
|
47 |
+
total_cost = estimate_training_cost(number_of_parameters=70e9, number_of_tokens=2e12)
|
48 |
+
total_cost
|
49 |
+
|
50 |
+
import gradio as gr
|
51 |
+
|
52 |
+
# Assume the function estimate_training_cost is already defined as per the previous discussion.
|
53 |
+
|
54 |
+
def gradio_interface(number_of_parameters, number_of_tokens, utilization_rate, overhead, cost_per_gpu_hour):
|
55 |
+
# Convert string inputs to correct types
|
56 |
+
number_of_parameters = float(number_of_parameters) * 1e9 # Convert from billions to actual number
|
57 |
+
number_of_tokens = float(number_of_tokens) * 1e12 # Convert from trillions to actual number
|
58 |
+
utilization_rate = float(utilization_rate)
|
59 |
+
overhead = float(overhead)
|
60 |
+
cost_per_gpu_hour = float(cost_per_gpu_hour)
|
61 |
+
|
62 |
+
# Estimate the cost
|
63 |
+
cost = estimate_training_cost(number_of_parameters, number_of_tokens, utilization_rate=utilization_rate, overhead=overhead, cost_per_gpu_hour=cost_per_gpu_hour)
|
64 |
+
|
65 |
+
# Return the result as a formatted string
|
66 |
+
return f"The estimated training cost is ${cost:,.2f}"
|
67 |
+
|
68 |
+
# Define the title and description for the Gradio app
|
69 |
+
title = "<h2 style='text-align: center;'>LLM Training Cost Calculator</h2>"
|
70 |
+
description = "<p style='text-align: center;'>Estimate the cost of training large language models (LLM). This tool helps you to calculate the cost based on model parameters and tokens. We plan to extend this calculator to include the cost of fine-tuning models using strategies like LoRA or QLoRA. Stay tuned for updates where you'll be able to input the model ID from the Hugging Face Hub, select the fine-tuning strategy, and specify quantization details if QLoRA is chosen.</p>"
|
71 |
+
|
72 |
+
# Create the Gradio interface with title and description
|
73 |
+
iface = gr.Interface(
|
74 |
+
fn=gradio_interface,
|
75 |
+
inputs=[
|
76 |
+
gr.Textbox(label="Number of Parameters (in billions)", value="70"),
|
77 |
+
gr.Textbox(label="Number of Tokens (in trillions)", value="2"),
|
78 |
+
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"),
|
79 |
+
gr.Slider(minimum=1.0, maximum=2.0, step=0.01, value=1.10, label="Overhead (1 + overhead percentage)"),
|
80 |
+
gr.Textbox(label="Cost per GPU Hour ($)", value="1.85")
|
81 |
+
],
|
82 |
+
outputs=[gr.Textbox(label="Estimated Training Cost")],
|
83 |
+
title=title,
|
84 |
+
description=description,
|
85 |
+
article="<p style='text-align: center;'>Developed with ❤️ by Elfilali Ali</p>"
|
86 |
+
)
|
87 |
+
|
88 |
+
# Run the interface
|
89 |
+
iface.launch()
|
90 |
+
|