|
import json |
|
import math |
|
|
|
import pandas as pd |
|
|
|
from src.assets.symbols import UP_ARROW, DOWN_ARROW |
|
|
|
from src.tasks import TASKS |
|
|
|
|
|
def load_from_hub(fs, repo_path, is_private=False): |
|
files = fs.glob(f"{repo_path}/**/*.json") |
|
|
|
set_organization_models = {} |
|
tasks = {} |
|
|
|
for file in files: |
|
organization, model, task = file.split("/")[-3:] |
|
|
|
organization_model = f"{organization}/{model}" |
|
task_code = task.replace(".json", "") |
|
|
|
if task_code not in map(lambda task: task.code, TASKS): |
|
continue |
|
if is_private != list(filter(lambda task: task.code == task_code, TASKS))[0].private_test: |
|
continue |
|
|
|
set_organization_models[organization_model] = 1 |
|
tasks[task_code] = 1 |
|
|
|
table = pd.DataFrame( |
|
index=list(set_organization_models.keys()), |
|
columns=["Organization", "Model"] + list(tasks.keys()), |
|
data=None, |
|
) |
|
|
|
for file in files: |
|
organization, model, task = file.split("/")[-3:] |
|
|
|
organization_model = f"{organization}/{model}" |
|
task_code = task.replace(".json", "") |
|
|
|
if task_code not in map(lambda task: task.code, TASKS): |
|
continue |
|
if is_private != list(filter(lambda task: task.code == task_code, TASKS))[0].private_test: |
|
continue |
|
|
|
data = json.loads(fs.open(file, "r").read()) |
|
|
|
metric = list(filter(lambda task: task.code == task_code, TASKS))[0].metric |
|
result = round(data["results"][task_code][metric], 4) |
|
|
|
table.loc[organization_model, task_code] = result |
|
table.loc[organization_model, "Organization"] = organization |
|
table.loc[organization_model, "Model"] = model |
|
|
|
table.rename(columns={ |
|
task.code: f"{task.name} {UP_ARROW if task.higher_is_better else DOWN_ARROW}" |
|
for task in TASKS}, inplace=True) |
|
|
|
table = table[~table["Organization"].str.contains("vietgpt")] |
|
table = table[~table["Organization"].str.contains("vinai")] |
|
return table |