|
import glob |
|
import sys |
|
|
|
import pandas as pd |
|
from huggingface_hub import hf_hub_download, upload_file |
|
from huggingface_hub.utils._errors import EntryNotFoundError |
|
|
|
|
|
sys.path.append(".") |
|
from utils import BASE_PATH, FINAL_CSV_FILE, GITHUB_SHA, REPO_ID, collate_csv |
|
|
|
|
|
def has_previous_benchmark() -> str: |
|
csv_path = None |
|
try: |
|
csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILE) |
|
except EntryNotFoundError: |
|
csv_path = None |
|
return csv_path |
|
|
|
|
|
def filter_float(value): |
|
if isinstance(value, str): |
|
return float(value.split()[0]) |
|
return value |
|
|
|
|
|
def push_to_hf_dataset(): |
|
all_csvs = sorted(glob.glob(f"{BASE_PATH}/*.csv")) |
|
collate_csv(all_csvs, FINAL_CSV_FILE) |
|
|
|
|
|
csv_path = has_previous_benchmark() |
|
if csv_path is not None: |
|
current_results = pd.read_csv(FINAL_CSV_FILE) |
|
previous_results = pd.read_csv(csv_path) |
|
|
|
numeric_columns = current_results.select_dtypes(include=["float64", "int64"]).columns |
|
numeric_columns = [ |
|
c for c in numeric_columns if c not in ["batch_size", "num_inference_steps", "actual_gpu_memory (gbs)"] |
|
] |
|
|
|
for column in numeric_columns: |
|
previous_results[column] = previous_results[column].map(lambda x: filter_float(x)) |
|
|
|
|
|
current_results[column] = current_results[column].astype(float) |
|
previous_results[column] = previous_results[column].astype(float) |
|
percent_change = ((current_results[column] - previous_results[column]) / previous_results[column]) * 100 |
|
|
|
|
|
current_results[column] = current_results[column].map(str) + percent_change.map( |
|
lambda x: f" ({'+' if x > 0 else ''}{x:.2f}%)" |
|
) |
|
|
|
current_results[column] = current_results[column].map(lambda x: x.replace(" (nan%)", "")) |
|
|
|
|
|
current_results.to_csv(FINAL_CSV_FILE, index=False) |
|
|
|
commit_message = f"upload from sha: {GITHUB_SHA}" if GITHUB_SHA is not None else "upload benchmark results" |
|
upload_file( |
|
repo_id=REPO_ID, |
|
path_in_repo=FINAL_CSV_FILE, |
|
path_or_fileobj=FINAL_CSV_FILE, |
|
repo_type="dataset", |
|
commit_message=commit_message, |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
push_to_hf_dataset() |
|
|