|
import asyncio |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
|
|
import src.constants as constants |
|
from src.hub import glob, load_jsonlines_file |
|
|
|
|
|
def update_task_description_component(task): |
|
base_description = constants.TASK_DESCRIPTIONS.get(task, "") |
|
additional_info = "A higher score is a better score." |
|
description = f"{base_description}\n\n{additional_info}" if base_description else additional_info |
|
return gr.Textbox( |
|
description, |
|
label="Task Description", |
|
lines=6, |
|
visible=True, |
|
) |
|
|
|
|
|
def update_subtasks_component(task, profile: gr.OAuthProfile | None): |
|
visible_login_btn = True if task == "leaderboard_gpqa" else False |
|
subtasks = None if task == "leaderboard_gpqa" and not profile else constants.SUBTASKS.get(task) |
|
return ( |
|
gr.LoginButton(size="sm", visible=visible_login_btn), |
|
gr.Radio( |
|
choices=subtasks, |
|
info="Evaluation subtasks to be loaded", |
|
value=None, |
|
), |
|
) |
|
|
|
|
|
def update_load_details_component(model_id, subtask): |
|
if model_id and subtask: |
|
return gr.Button("Load Details", interactive=True) |
|
else: |
|
return gr.Button("Load Details", interactive=False) |
|
|
|
|
|
def fetch_details_paths(model_id, subtask): |
|
model_name_sanitized = model_id.replace("/", "__") |
|
dataset_id = constants.DETAILS_DATASET_ID.format(model_name_sanitized=model_name_sanitized) |
|
filename = constants.DETAILS_FILENAME.format(subtask=subtask) |
|
path = f"{dataset_id}/**/{filename}" |
|
return glob(path) |
|
|
|
|
|
async def load_details_dataframe(model_id, subtask): |
|
if not model_id or not subtask: |
|
return |
|
paths = await asyncio.to_thread(fetch_details_paths, model_id, subtask) |
|
if not paths: |
|
return |
|
path = max(paths) |
|
data = await load_jsonlines_file(path) |
|
if not data: |
|
return |
|
df = pd.json_normalize(data) |
|
|
|
df["model_name"] = model_id |
|
return df.sort_values("doc_id").set_index("doc_id", drop=False).set_index("model_name", append=True) |
|
|
|
|
|
async def load_details(subtask, *model_ids_lists): |
|
dfs = await asyncio.gather( |
|
*[ |
|
load_details_dataframe(model_id, subtask) |
|
for model_ids in model_ids_lists |
|
if model_ids |
|
for model_id in model_ids |
|
] |
|
) |
|
dfs = [df for df in dfs if df is not None] |
|
if dfs: |
|
return pd.concat(dfs), None |
|
else: |
|
return None, None |
|
|
|
|
|
def display_details(df, sample_idx, show_only_differences): |
|
if df is None: |
|
return |
|
df = df.loc[df.index.levels[0][sample_idx]] |
|
df = df.T.rename_axis(columns=None) |
|
|
|
|
|
|
|
any_difference = pd.Series(False, index=df.index) |
|
if show_only_differences: |
|
any_difference = df.ne(df.iloc[:, 0], axis=0).any(axis=1) |
|
|
|
return ( |
|
df.style.format(escape="html", na_rep="") |
|
|
|
|
|
.hide([row for row in df.index if show_only_differences and not any_difference[row]]) |
|
|
|
.set_table_styles( |
|
[ |
|
{ |
|
"selector": "td", |
|
"props": [("overflow-wrap", "break-word"), ("max-width", "1px")], |
|
}, |
|
{ |
|
"selector": ".col_heading", |
|
"props": [("width", f"{100 / len(df.columns)}%")], |
|
}, |
|
] |
|
) |
|
.to_html() |
|
) |
|
|
|
|
|
def update_sample_idx_component(df): |
|
if df is None: |
|
return |
|
maximum = len(df) - 1 |
|
return gr.Number( |
|
label="Sample Index", |
|
info="Index of the sample to be displayed", |
|
value=0, |
|
minimum=0, |
|
maximum=maximum, |
|
visible=True, |
|
) |
|
|
|
|
|
def clear_details(): |
|
|
|
return ( |
|
gr.Dropdown(value=[]), |
|
None, |
|
None, |
|
None, |
|
gr.Button("Load Details", interactive=False), |
|
gr.Number(label="Sample Index", info="Index of the sample to be displayed", value=0, minimum=0, visible=False), |
|
) |
|
|
|
|
|
def display_loading_message_for_details(): |
|
return "<h3 style='text-align: center;'>Loading...</h3>" |
|
|