import json import math import pandas as pd from src.assets.symbols import UP_ARROW, DOWN_ARROW from src.tasks import TASKS def load_from_hub(fs, repo_path, is_private=False): files = fs.glob(f"{repo_path}/**/*.json") set_organization_models = {} tasks = {} for file in files: organization, model, task = file.split("/")[-3:] organization_model = f"{organization}/{model}" task_code = task.replace(".json", "") if task_code not in map(lambda task: task.code, TASKS): continue if is_private != list(filter(lambda task: task.code == task_code, TASKS))[0].private_test: continue set_organization_models[organization_model] = 1 tasks[task_code] = 1 table = pd.DataFrame( index=list(set_organization_models.keys()), columns=["Organization", "Model"] + list(tasks.keys()), data=None, ) for file in files: organization, model, task = file.split("/")[-3:] organization_model = f"{organization}/{model}" task_code = task.replace(".json", "") if task_code not in map(lambda task: task.code, TASKS): continue if is_private != list(filter(lambda task: task.code == task_code, TASKS))[0].private_test: continue data = json.loads(fs.open(file, "r").read()) metric = list(filter(lambda task: task.code == task_code, TASKS))[0].metric result = round(data["results"][task_code][metric], 4) table.loc[organization_model, task_code] = result table.loc[organization_model, "Organization"] = organization table.loc[organization_model, "Model"] = model table.rename(columns={ task.code: f"{task.name} {UP_ARROW if task.higher_is_better else DOWN_ARROW}" for task in TASKS}, inplace=True) table = table[~table["Organization"].str.contains("vietgpt")] table = table[~table["Organization"].str.contains("vinai")] return table