|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
import string |
|
import re |
|
import json |
|
import random |
|
import torch |
|
import hashlib, base64 |
|
from tqdm import tqdm |
|
from gradio.themes.base import Base |
|
import openai |
|
|
|
|
|
from error_messages import * |
|
|
|
tqdm().pandas() |
|
|
|
|
|
import mgr_bias_scoring as bt_mgr |
|
|
|
|
|
import mgr_requests as rq_mgr |
|
import mgr_biases as bmgr |
|
|
|
|
|
import mgr_cookies as cookie_mgr |
|
|
|
use_paper_sentences = False |
|
G_NUM_SENTENCES = 0 |
|
|
|
def getTermsFromGUI(group1, group2, att1, att2): |
|
bias_spec = { |
|
"social_groups": { |
|
"group 1": [t.strip(" ") for t in group1.split(",") if len(t.strip(' '))>0], |
|
"group 2": [t.strip(" ") for t in group2.split(",") if len(t.strip(' '))>0]}, |
|
"attributes": { |
|
"attribute 1": [t.strip(" ") for t in att1.split(",") if len(t.strip(' '))>0], |
|
"attribute 2": [t.strip(" ") for t in att2.split(",") if len(t.strip(' '))>0]} |
|
} |
|
return bias_spec |
|
|
|
|
|
def prefillBiasSpec(evt: gr.SelectData): |
|
global use_paper_sentences |
|
|
|
print(f"Selected {evt.value} at {evt.index} from {evt.target}") |
|
|
|
bias_filename = f"{bmgr.bias2tag[evt.value]}.json" |
|
print(f"Filename: {bias_filename}") |
|
|
|
bias_spec = bmgr.loadPredefinedBiasSpec(bias_filename) |
|
|
|
grp1_terms, grp2_terms = bmgr.getSocialGroupTerms(bias_spec) |
|
att1_terms, att2_terms = bmgr.getAttributeTerms(bias_spec) |
|
|
|
print(f"Grp 1: {grp1_terms}") |
|
print(f"Grp 2: {grp2_terms}") |
|
|
|
print(f"Att 1: {att1_terms}") |
|
print(f"Att 2: {att2_terms}") |
|
|
|
|
|
|
|
return (', '.join(grp1_terms[0:50]), ', '.join(grp2_terms[0:50]), ', '.join(att1_terms[0:50]), ', '.join(att2_terms[0:50])) |
|
|
|
def updateErrorMsg(isError, text): |
|
return gr.Markdown.update(visible=isError, value=text) |
|
|
|
def generateSentences(gr1, gr2, att1, att2, openai_key, num_sent2gen, progress=gr.Progress()): |
|
global use_paper_sentences, G_NUM_SENTENCES |
|
print(f"GENERATE SENTENCES CLICKED!, requested sentence per attribute number: {num_sent2gen}") |
|
|
|
|
|
err_update = updateErrorMsg(False, "") |
|
bias_gen_states = [True, False] |
|
online_gen_visible = True |
|
info_msg_update = gr.Markdown.update(visible=False, value="") |
|
|
|
test_sentences = [] |
|
bias_spec = getTermsFromGUI(gr1, gr2, att1, att2) |
|
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec) |
|
total_att_terms = len(a1)+len(a2) |
|
all_terms_len = len(g1)+len(g2)+len(a1)+len(a2) |
|
print(f"Length of all the terms: {all_terms_len}") |
|
if all_terms_len == 0: |
|
print("No terms entered!") |
|
err_update = updateErrorMsg(True, NO_TERMS_ENTERED_ERROR) |
|
|
|
else: |
|
if len(openai_key) == 0: |
|
print("Empty OpenAI key!!!") |
|
err_update = updateErrorMsg(True, OPENAI_KEY_EMPTY) |
|
elif len(openai_key) < 10: |
|
print("Wrong length OpenAI key!!!") |
|
err_update = updateErrorMsg(True, OPENAI_KEY_WRONG) |
|
else: |
|
progress(0, desc="ChatGPT generation...") |
|
print(f"Using Online Generator LLM...") |
|
|
|
test_sentences = rq_mgr._generateOnline(bias_spec, progress, openai_key, num_sent2gen, False) |
|
|
|
|
|
num_sentences = len(test_sentences) |
|
print(f"Returned num sentences: {num_sentences}") |
|
|
|
G_NUM_SENTENCES = num_sentences |
|
if G_NUM_SENTENCES == 0: |
|
print("Test sentences empty!") |
|
|
|
err_update = updateErrorMsg(True, NO_SENTENCES_ERROR) |
|
else: |
|
|
|
bias_gen_states = [False, True] |
|
online_gen_visible = False |
|
info_msg = _genSentenceCoverMsg(test_sentences, total_att_terms, isGen=True) |
|
|
|
info_msg_update = gr.Markdown.update(visible=True, value=info_msg) |
|
|
|
cookie_mgr.saveOpenAIKey(openai_key) |
|
|
|
print(f"Online gen visible: {not err_update['visible']}") |
|
return (err_update, |
|
info_msg_update, |
|
gr.Row.update(visible=online_gen_visible), |
|
|
|
gr.Dropdown.update(visible=not online_gen_visible), |
|
gr.Accordion.update(visible=not online_gen_visible, label=f"Test sentences ({len(test_sentences)})"), |
|
gr.update(visible=True), |
|
gr.DataFrame.update(value=test_sentences), |
|
gr.update(visible=bias_gen_states[0]), |
|
gr.update(visible=bias_gen_states[1]) |
|
) |
|
|
|
def useOnlineGen(value): |
|
if value == True: |
|
btn_label = "Generate New Sentences" |
|
else: |
|
btn_label = "Use Saved Sentences" |
|
|
|
return (gr.update(visible=value), |
|
gr.update(value=btn_label), |
|
gr.update(visible=value) |
|
) |
|
|
|
|
|
def moveStep1(): |
|
variants = ["primary","secondary","secondary"] |
|
|
|
tabs = [True, False, False] |
|
|
|
return (gr.update(variant=variants[0]), |
|
gr.update(variant=variants[1]), |
|
gr.update(variant=variants[2]), |
|
gr.update(visible=tabs[0]), |
|
gr.update(visible=tabs[1]), |
|
gr.update(visible=tabs[2])) |
|
|
|
def moveStep2(): |
|
variants = ["secondary","primary","secondary"] |
|
|
|
tabs = [False, True, False] |
|
|
|
return (gr.update(variant=variants[0]), |
|
gr.update(variant=variants[1]), |
|
gr.update(variant=variants[2]), |
|
gr.update(visible=tabs[0]), |
|
gr.update(visible=tabs[1]), |
|
gr.update(visible=tabs[2])) |
|
|
|
def moveStep3(): |
|
variants = ["secondary","secondary","primary"] |
|
|
|
tabs = [False, False, True] |
|
|
|
return (gr.update(variant=variants[0]), |
|
gr.update(variant=variants[1]), |
|
gr.update(variant=variants[2]), |
|
gr.update(visible=tabs[0]), |
|
gr.update(visible=tabs[1]), |
|
gr.update(visible=tabs[2])) |
|
|
|
def _genSentenceCoverMsg(test_sentences, total_att_terms, isGen=False): |
|
att_cover_dict = {} |
|
for att, grp, sent in test_sentences: |
|
num = att_cover_dict.get(att, 0) |
|
att_cover_dict[att] = num+1 |
|
att_by_count = dict(sorted(att_cover_dict.items(), key=lambda item: item[1])) |
|
num_covered_atts = len(list(att_by_count.keys())) |
|
lest_covered_att = list(att_by_count.keys())[0] |
|
least_covered_count = att_by_count[lest_covered_att] |
|
|
|
source_msg = "Found" if isGen==False else "Generated" |
|
if num_covered_atts >= total_att_terms: |
|
info_msg = f"**{source_msg} {len(test_sentences)} sentences covering all bias specification attributes. Please select model to test.**" |
|
else: |
|
info_msg = f"**{source_msg} {len(test_sentences)} sentences covering {num_covered_atts} of {total_att_terms} attributes. Please select model to test.**" |
|
|
|
return info_msg |
|
|
|
def retrieveSentences(gr1, gr2, att1, att2, progress=gr.Progress()): |
|
global use_paper_sentences, G_NUM_SENTENCES |
|
|
|
print("RETRIEVE SENTENCES CLICKED!") |
|
variants = ["secondary","primary","secondary"] |
|
inter = [True, True, False] |
|
tabs = [True, False] |
|
bias_gen_states = [True, False] |
|
prog_vis = [True] |
|
err_update = updateErrorMsg(False, "") |
|
info_msg_update = gr.Markdown.update(visible=False, value="") |
|
openai_gen_row_update = gr.Row.update(visible=True) |
|
tested_model_dropdown_update = gr.Dropdown.update(visible=False) |
|
|
|
test_sentences = [] |
|
bias_spec = getTermsFromGUI(gr1, gr2, att1, att2) |
|
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec) |
|
total_att_terms = len(a1)+len(a2) |
|
all_terms_len = len(g1)+len(g2)+len(a1)+len(a2) |
|
print(f"Length of all the terms: {all_terms_len}") |
|
if all_terms_len == 0: |
|
print("No terms entered!") |
|
err_update = updateErrorMsg(True, NO_TERMS_ENTERED_ERROR) |
|
variants = ["primary","secondary","secondary"] |
|
inter = [True, False, False] |
|
tabs = [True, False] |
|
prog_vis = [False] |
|
|
|
|
|
else: |
|
tabs = [False, True] |
|
progress(0, desc="Fetching saved sentences...") |
|
test_sentences = rq_mgr._getSavedSentences(bias_spec, progress, use_paper_sentences) |
|
|
|
|
|
print(f"Type: {type(test_sentences)}") |
|
num_sentences = len(test_sentences) |
|
print(f"Returned num sentences: {num_sentences}") |
|
|
|
err_update = updateErrorMsg(False, "") |
|
G_NUM_SENTENCES = num_sentences |
|
if G_NUM_SENTENCES == 0: |
|
print("Test sentences empty!") |
|
|
|
err_update = updateErrorMsg(True, NO_SENTENCES_ERROR) |
|
|
|
if len(test_sentences) > 0: |
|
info_msg = _genSentenceCoverMsg(test_sentences, total_att_terms) |
|
|
|
info_msg_update = gr.Markdown.update(visible=True, value=info_msg) |
|
print(f"Got {len(test_sentences)}, allowing bias test...") |
|
print(test_sentences) |
|
bias_gen_states = [False, True] |
|
openai_gen_row_update = gr.Row.update(visible=False) |
|
tested_model_dropdown_update = gr.Dropdown.update(visible=True) |
|
|
|
return (err_update, |
|
openai_gen_row_update, |
|
tested_model_dropdown_update, |
|
info_msg_update, |
|
gr.update(visible=prog_vis), |
|
gr.update(variant=variants[0], interactive=inter[0]), |
|
gr.update(variant=variants[1], interactive=inter[1]), |
|
gr.update(variant=variants[2], interactive=inter[2]), |
|
gr.update(visible=tabs[0]), |
|
gr.update(visible=tabs[1]), |
|
gr.Accordion.update(visible=bias_gen_states[1], label=f"Test sentences ({len(test_sentences)})"), |
|
gr.update(visible=True), |
|
gr.DataFrame.update(value=test_sentences), |
|
gr.update(visible=bias_gen_states[0]), |
|
gr.update(visible=bias_gen_states[1]), |
|
gr.update(value=', '.join(g1)), |
|
gr.update(value=', '.join(g2)), |
|
gr.update(value=', '.join(a1)), |
|
gr.update(value=', '.join(a2)) |
|
) |
|
|
|
def startBiasTest(test_sentences_df, gr1, gr2, att1, att2, model_name, progress=gr.Progress()): |
|
global G_NUM_SENTENCES |
|
|
|
variants = ["secondary","secondary","primary"] |
|
inter = [True, True, True] |
|
tabs = [False, False, True] |
|
err_update = updateErrorMsg(False, "") |
|
|
|
if test_sentences_df.shape[0] == 0: |
|
G_NUM_SENTENCES = 0 |
|
|
|
err_update = updateErrorMsg(True, NO_SENTENCES_ERROR) |
|
|
|
|
|
progress(0, desc="Starting social bias testing...") |
|
|
|
print(f"Type: {type(test_sentences_df)}") |
|
print(f"Data: {test_sentences_df}") |
|
|
|
|
|
bias_spec = getTermsFromGUI(gr1, gr2, att1, att2) |
|
print(f"Bias spec dict: {bias_spec}") |
|
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec) |
|
|
|
|
|
test_sentences_df['Template'] = test_sentences_df.apply(bt_mgr.sentence_to_template, axis=1) |
|
print(f"Data with template: {test_sentences_df}") |
|
|
|
|
|
test_pairs_df = bt_mgr.convert2pairs(bias_spec, test_sentences_df) |
|
print(f"Test pairs: {test_pairs_df.head(3)}") |
|
|
|
progress(0.05, desc=f"Loading model {model_name}...") |
|
|
|
print(f"Test model name: {model_name}") |
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
print(f"Device: {device}") |
|
tested_model, tested_tokenizer = bt_mgr._getModelSafe(model_name, device) |
|
if tested_model == None: |
|
print("Tested model is empty!!!!") |
|
err_update = updateErrorMsg(True, MODEL_NOT_LOADED_ERROR) |
|
|
|
|
|
|
|
|
|
bt_mgr.testModelProbability(model_name, tested_model, tested_tokenizer, device) |
|
|
|
|
|
test_score_df, bias_stats_dict = bt_mgr.testBiasOnPairs(test_pairs_df, bias_spec, model_name, tested_model, tested_tokenizer, device, progress) |
|
print(f"Test scores: {test_score_df.head(3)}") |
|
|
|
model_bias_dict = {} |
|
model_bias_dict[bias_stats_dict['tested_model']] = bias_stats_dict['model_bias'] |
|
|
|
per_attrib_bias = bias_stats_dict['per_attribute'] |
|
|
|
|
|
|
|
test_pairs_df.loc[test_pairs_df['stereotyped'] == 1, 'bias_score'] = test_pairs_df['top_logit']-test_pairs_df['bottom_logit'] |
|
test_pairs_df.loc[test_pairs_df['stereotyped'] == 0, 'bias_score'] = test_pairs_df['bottom_logit']-test_pairs_df['top_logit'] |
|
|
|
test_pairs_df['groups_rel'] = test_pairs_df['att_term_1']+"/"+test_pairs_df['att_term_2'] |
|
|
|
test_pairs_df['stereotyped_b'] = "Unknown" |
|
test_pairs_df.loc[test_pairs_df['stereotyped'] == 1, 'stereotyped_b'] = "yes" |
|
test_pairs_df.loc[test_pairs_df['stereotyped'] == 0, 'stereotyped_b'] = "no" |
|
|
|
|
|
score_templates_df = test_pairs_df[['group_term','template']].copy() |
|
score_templates_df['Groups'] = test_pairs_df['groups_rel'] |
|
|
|
score_templates_df['Stereotyped'] = test_pairs_df['stereotyped_b'] |
|
|
|
score_templates_df = score_templates_df.rename(columns = {'group_term': "Attribute", |
|
"template": "Template"}) |
|
|
|
score_templates_df = score_templates_df[['Stereotyped','Attribute','Groups','Template']] |
|
num_sentences = score_templates_df.shape[0] |
|
|
|
interpret_msg = bt_mgr._constructInterpretationMsg(bias_spec, num_sentences, |
|
model_name, bias_stats_dict, per_attrib_bias, |
|
score_templates_df |
|
) |
|
|
|
return (err_update, |
|
gr.Markdown.update(visible=True), |
|
gr.Button.update(variant=variants[0], interactive=inter[0]), |
|
gr.Button.update(variant=variants[1], interactive=inter[1]), |
|
gr.Button.update(variant=variants[2], interactive=inter[2]), |
|
gr.update(visible=tabs[0]), |
|
gr.update(visible=tabs[1]), |
|
gr.update(visible=tabs[2]), |
|
model_bias_dict, |
|
per_attrib_bias, |
|
gr.update(value=score_templates_df, visible=True), |
|
gr.update(value=interpret_msg, visible=True), |
|
gr.update(value=', '.join(g1)), |
|
gr.update(value=', '.join(g2)), |
|
gr.update(value=', '.join(a1)), |
|
gr.update(value=', '.join(a2)) |
|
) |
|
|
|
|
|
def loadInterface(): |
|
print("Loading the interface...") |
|
open_ai_key = cookie_mgr.loadOpenAIKey() |
|
|
|
return gr.Textbox.update(value=open_ai_key) |
|
|
|
|
|
def selectAttributeLabel(evt: gr.SelectData): |
|
print(f"Selected {evt.value} at {evt.index} from {evt.target}") |
|
object_methods = [method_name for method_name in dir(evt) |
|
if callable(getattr(evt, method_name))] |
|
|
|
print("Attributes:") |
|
for att in dir(evt): |
|
print (att, getattr(evt,att)) |
|
|
|
print(f"Methods: {object_methods}") |
|
|
|
return () |
|
|
|
|
|
def editSentence(test_sentences, evt: gr.EventData): |
|
print(f"Edit Sentence: {evt}") |
|
print("--BEFORE---") |
|
print(test_sentences[0:10]) |
|
print("--AFTER--") |
|
print(f"Data: {evt._data['data'][0:10]}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
theme = gr.themes.Soft().set( |
|
button_small_radius='*radius_xxs', |
|
background_fill_primary='*neutral_50', |
|
border_color_primary='*primary_50' |
|
) |
|
|
|
soft = gr.themes.Soft( |
|
primary_hue="slate", |
|
spacing_size="sm", |
|
radius_size="md" |
|
).set( |
|
|
|
button_primary_background_fill='*primary_400' |
|
) |
|
|
|
css_adds = "#group_row {background: white; border-color: white;} \ |
|
#attribute_row {background: white; border-color: white;} \ |
|
#tested_model_row {background: white; border-color: white;} \ |
|
#button_row {background: white; border-color: white;} \ |
|
#examples_elem .label {display: none}\ |
|
#att1_words {border-color: white;} \ |
|
#att2_words {border-color: white;} \ |
|
#group1_words {border-color: white;} \ |
|
#group2_words {border-color: white;} \ |
|
#tested_model_drop {border-color: white;} \ |
|
#gen_model_check {border-color: white;} \ |
|
#gen_model_check .wrap {border-color: white;} \ |
|
#gen_model_check .form {border-color: white;} \ |
|
#open_ai_key_box {border-color: white;} \ |
|
#gen_col {border-color: white;} \ |
|
#gen_col .form {border-color: white;} \ |
|
#res_label {background-color: #F8FAFC;} \ |
|
#per_attrib_label_elem {background-color: #F8FAFC;} \ |
|
#accordion {border-color: #E5E7EB} \ |
|
#err_msg_elem p {color: #FF0000; cursor: pointer} " |
|
|
|
|
|
with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models", |
|
css=css_adds) as iface: |
|
with gr.Row(): |
|
with gr.Group(): |
|
s1_btn = gr.Button(value="Step 1: Bias Specification", variant="primary", visible=True, interactive=True, size='sm') |
|
s2_btn = gr.Button(value="Step 2: Test Sentences", variant="secondary", visible=True, interactive=False, size='sm') |
|
s3_btn = gr.Button(value="Step 3: Bias Testing", variant="secondary", visible=True, interactive=False, size='sm') |
|
err_message = gr.Markdown("", visible=False, elem_id="err_msg_elem") |
|
bar_progress = gr.Markdown(" ") |
|
|
|
|
|
with gr.Column(visible=True) as tab1: |
|
with gr.Column(): |
|
gr.Markdown("### Social Bias Specification") |
|
gr.Markdown("Use one of the predefined specifications or enter own terms for social groups and attributes") |
|
with gr.Row(): |
|
example_biases = gr.Dropdown( |
|
value="Select a predefined bias to test", |
|
allow_custom_value=False, |
|
interactive=True, |
|
choices=[ |
|
"Flowers/Insects <> Pleasant/Unpleasant", |
|
"Instruments/Weapons <> Pleasant/Unpleasant", |
|
"Male/Female <> Professions", |
|
"Male/Female <> Science/Art", |
|
"Male/Female <> Career/Family", |
|
"Male/Female <> Math/Art", |
|
"Eur.-American/Afr.-American <> Pleasant/Unpleasant #1", |
|
"Eur.-American/Afr.-American <> Pleasant/Unpleasant #2", |
|
"Eur.-American/Afr.-American <> Pleasant/Unpleasant #3", |
|
"African-Female/European-Male <> Intersectional", |
|
"African-Female/European-Male <> Emergent", |
|
"Mexican-Female/European-Male <> Intersectional", |
|
"Mexican-Female/European-Male <> Emergent", |
|
"Young/Old Name <> Pleasant/Unpleasant", |
|
"Mental/Physical Disease <> Temporary/Permanent", |
|
], label="Example Biases", |
|
) |
|
|
|
|
|
with gr.Row(elem_id="group_row"): |
|
group1 = gr.Textbox(label="Social Group 1", max_lines=1, elem_id="group1_words", elem_classes="input_words", placeholder="brother, father") |
|
group2 = gr.Textbox(label='Social Group 2', max_lines=1, elem_id="group2_words", elem_classes="input_words", placeholder="sister, mother") |
|
with gr.Row(elem_id="attribute_row"): |
|
att1 = gr.Textbox(label='Stereotype for Group 1', max_lines=1, elem_id="att1_words", elem_classes="input_words", placeholder="science, technology") |
|
att2 = gr.Textbox(label='Anti-stereotype for Group 1', max_lines=1, elem_id="att2_words", elem_classes="input_words", placeholder="poetry, art") |
|
with gr.Row(): |
|
gr.Markdown(" ") |
|
get_sent_btn = gr.Button(value="Get Sentences", variant="primary", visible=True) |
|
gr.Markdown(" ") |
|
|
|
|
|
with gr.Column(visible=False) as tab2: |
|
info_sentences_found = gr.Markdown(value="", visible=False) |
|
|
|
gr.Markdown("### Tested Social Bias Specification", visible=True) |
|
with gr.Row(): |
|
group1_fixed = gr.Textbox(label="Social Group 1", max_lines=1, elem_id="group1_words", elem_classes="input_words", interactive=False, visible=True) |
|
group2_fixed = gr.Textbox(label='Social Group 2', max_lines=1, elem_id="group2_words", elem_classes="input_words", interactive=False, visible=True) |
|
with gr.Row(): |
|
att1_fixed = gr.Textbox(label='Stereotype for Group 1', max_lines=1, elem_id="att1_words", elem_classes="input_words", interactive=False, visible=True) |
|
att2_fixed = gr.Textbox(label='Anti-stereotype for Group 1', max_lines=1, elem_id="att2_words", elem_classes="input_words", interactive=False, visible=True) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
|
|
|
|
with gr.Row(visible=False) as online_gen_row: |
|
|
|
openai_key = gr.Textbox(lines=1, label="OpenAI API Key", value=None, |
|
placeholder="starts with sk-", |
|
info="Please provide the key for an Open AI account to generate new test sentences", |
|
visible=True, |
|
interactive=True, |
|
elem_id="open_ai_key_box") |
|
num_sentences2gen = gr.Slider(2, 20, value=2, step=1, |
|
interactive=True, |
|
visible=True, |
|
info="Two or more per attribute are recommended for a good bias estimate.", |
|
label="Number of test sentences to generate per attribute", container=True) |
|
|
|
|
|
tested_model_name = gr.Dropdown( ["bert-base-uncased","bert-large-uncased","gpt2","gpt2-medium","gpt2-large","emilyalsentzer/Bio_ClinicalBERT","microsoft/biogpt","openlm-research/open_llama_3b", "openlm-research/open_llama_7b"], value="bert-base-uncased", |
|
multiselect=None, |
|
interactive=True, |
|
label="Tested Language Model", |
|
elem_id="tested_model_drop", |
|
visible=True |
|
|
|
) |
|
|
|
with gr.Row(): |
|
gr.Markdown(" ") |
|
gen_btn = gr.Button(value="Generate New Sentences", variant="primary", visible=True) |
|
bias_btn = gr.Button(value="Test Model for Social Bias", variant="primary", visible=False) |
|
gr.Markdown(" ") |
|
|
|
with gr.Row(visible=False) as row_sentences: |
|
with gr.Accordion(label="Test Sentences", open=False, visible=False) as acc_test_sentences: |
|
test_sentences = gr.DataFrame( |
|
headers=["Test sentence", "Group term", "Attribute term"], |
|
datatype=["str", "str", "str"], |
|
row_count=(1, 'dynamic'), |
|
col_count=(3, 'fixed'), |
|
interactive=True, |
|
visible=True, |
|
|
|
max_rows=2, |
|
overflow_row_behaviour="paginate") |
|
|
|
|
|
with gr.Column(visible=False) as tab3: |
|
gr.Markdown("### Tested Social Bias Specification") |
|
with gr.Row(): |
|
group1_fixed2 = gr.Textbox(label="Social Group 1", max_lines=1, elem_id="group1_words", elem_classes="input_words", interactive=False) |
|
group2_fixed2 = gr.Textbox(label='Social Group 2', max_lines=1, elem_id="group2_words", elem_classes="input_words", interactive=False) |
|
with gr.Row(): |
|
att1_fixed2 = gr.Textbox(label='Stereotype for Group 1', max_lines=1, elem_id="att1_words", elem_classes="input_words", interactive=False) |
|
att2_fixed2 = gr.Textbox(label='Anti-stereotype for Group 1', max_lines=1, elem_id="att2_words", elem_classes="input_words", interactive=False) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
gr.Markdown("### Bias Test Results") |
|
with gr.Column(scale=1): |
|
gr.Markdown("### Interpretation") |
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
lbl_model_bias = gr.Markdown("**Model Bias** - % stereotyped choices (↑ more bias)") |
|
model_bias_label = gr.Label(num_top_classes=1, label="% stereotyped choices (↑ more bias)", |
|
elem_id="res_label", |
|
show_label=False) |
|
lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)") |
|
attribute_bias_labels = gr.Label(num_top_classes=8, label="Per attribute: % stereotyped choices (↑ more bias)", |
|
elem_id="per_attrib_label_elem", |
|
show_label=False) |
|
with gr.Column(scale=1): |
|
interpretation_msg = gr.HTML(value="Interpretation: Stereotype Score metric details in <a href='https://arxiv.org/abs/2004.09456'>Nadeem'20<a>", visible=False) |
|
save_msg = gr.HTML(value="<span style=\"color:black\">Bias test result saved! </span>", |
|
visible=False) |
|
with gr.Row(): |
|
with gr.Accordion("Per Sentence Bias Results", open=False, visible=True): |
|
test_pairs = gr.DataFrame( |
|
headers=["group_term", "template", "att_term_1", "att_term_2","label_1","label_2"], |
|
datatype=["str", "str", "str", "str", "str", "str"], |
|
row_count=(1, 'dynamic'), |
|
|
|
max_rows=2, |
|
overflow_row_behaviour="paginate" |
|
) |
|
|
|
|
|
iface.load(fn=loadInterface, |
|
inputs=[], |
|
outputs=[openai_key]) |
|
|
|
|
|
example_biases.select(fn=prefillBiasSpec, |
|
inputs=None, |
|
outputs=[group1, group2, att1, att2]) |
|
|
|
|
|
get_sent_btn.click(fn=retrieveSentences, |
|
inputs=[group1, group2, att1, att2], |
|
outputs=[err_message, online_gen_row, tested_model_name, info_sentences_found, bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, acc_test_sentences, row_sentences, test_sentences, gen_btn, bias_btn, |
|
group1_fixed, group2_fixed, att1_fixed, att2_fixed ]) |
|
|
|
|
|
gen_btn.click(fn=generateSentences, |
|
inputs=[group1, group2, att1, att2, openai_key, num_sentences2gen], |
|
outputs=[err_message, info_sentences_found, online_gen_row, |
|
tested_model_name, acc_test_sentences, row_sentences, test_sentences, gen_btn, bias_btn ]) |
|
|
|
|
|
bias_btn.click(fn=startBiasTest, |
|
inputs=[test_sentences,group1,group2,att1,att2,tested_model_name], |
|
outputs=[err_message, bar_progress, s1_btn, s2_btn, s3_btn, tab1, tab2, tab3, model_bias_label, attribute_bias_labels, test_pairs, interpretation_msg, |
|
group1_fixed2, group2_fixed2, att1_fixed2, att2_fixed2] |
|
) |
|
|
|
|
|
s1_btn.click(fn=moveStep1, |
|
inputs=[], |
|
outputs=[s1_btn, s2_btn, s3_btn, tab1, tab2, tab3]) |
|
|
|
|
|
s2_btn.click(fn=moveStep2, |
|
inputs=[], |
|
outputs=[s1_btn, s2_btn, s3_btn, tab1, tab2, tab3]) |
|
|
|
|
|
s3_btn.click(fn=moveStep3, |
|
inputs=[], |
|
outputs=[s1_btn, s2_btn, s3_btn, tab1, tab2, tab3]) |
|
|
|
|
|
attribute_bias_labels.select(fn=selectAttributeLabel, |
|
inputs=[], |
|
outputs=[]) |
|
|
|
|
|
test_sentences.change(fn=editSentence, |
|
inputs=[test_sentences], |
|
outputs=[] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
iface.queue(concurrency_count=2).launch() |