|
import pandas as pd |
|
import gradio as gr |
|
import hashlib, base64 |
|
import openai |
|
|
|
|
|
from openAI_manager import initOpenAI, examples_to_prompt, genChatGPT, generateTestSentences |
|
|
|
|
|
import mgr_bias_scoring as bt_mgr |
|
import mgr_sentences as smgr |
|
|
|
|
|
from error_messages import * |
|
|
|
|
|
def getHashForString(text): |
|
d=hashlib.md5(bytes(text, encoding='utf-8')).digest() |
|
d=base64.urlsafe_b64encode(d) |
|
|
|
return d.decode('utf-8') |
|
|
|
def getBiasName(gr1_lst, gr2_lst, att1_lst, att2_lst): |
|
full_spec = ''.join(gr1_lst)+''.join(gr2_lst)+''.join(att1_lst)+''.join(att2_lst) |
|
hash = getHashForString(full_spec) |
|
bias_name = f"{gr1_lst[0].replace(' ','-')}_{gr2_lst[0].replace(' ','-')}__{att1_lst[0].replace(' ','-')}_{att2_lst[0].replace(' ','-')}_{hash}" |
|
|
|
return bias_name |
|
|
|
|
|
def _generateOnline(bias_spec, progress, key, num2gen, isSaving=False): |
|
test_sentences = [] |
|
|
|
|
|
try: |
|
models = initOpenAI(key) |
|
model_names = [m['id'] for m in models['data']] |
|
print(f"Model names: {model_names}") |
|
except openai.error.AuthenticationError as err: |
|
raise gr.Error(OPENAI_INIT_ERROR.replace("<ERR>", str(err))) |
|
|
|
if "gpt-3.5-turbo" in model_names: |
|
print("Access to ChatGPT") |
|
if "gpt-4" in model_names: |
|
print("Access to GPT-4") |
|
|
|
model_name = "gpt-3.5-turbo" |
|
|
|
|
|
gen = genChatGPT(model_name, ["man","math"], 2, 5, |
|
[{"Keywords": ["sky","blue"], "Sentence": "the sky is blue"} |
|
], |
|
temperature=0.8) |
|
print(f"Test gen: {gen}") |
|
|
|
|
|
print(f"Bias spec dict: {bias_spec}") |
|
|
|
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec) |
|
gens = generateTestSentences(model_name, g1+g2, a1+a2, num2gen, progress) |
|
print("--GENS--") |
|
print(gens) |
|
|
|
for gt, at, s in gens: |
|
test_sentences.append([s,gt,at]) |
|
|
|
|
|
print("Saving generations to HF DF...") |
|
save_df = pd.DataFrame(test_sentences, columns=["Test sentence",'Group term', "Attribute term"]) |
|
|
|
|
|
|
|
print(f"Bias spec dict: {bias_spec}") |
|
|
|
|
|
save_df['Template'] = save_df.apply(bt_mgr.sentence_to_template, axis=1) |
|
print(f"Data with template: {save_df}") |
|
|
|
|
|
test_pairs_df = bt_mgr.convert2pairs(bias_spec, save_df) |
|
print(f"Test pairs cols: {list(test_pairs_df.columns)}") |
|
|
|
bias_name = getBiasName(g1, g2, a1, a2) |
|
|
|
save_df = save_df.rename(columns={'Group term':'org_grp_term', |
|
"Attribute term": 'att_term', |
|
"Test sentence":'sentence', |
|
"Template":"template"}) |
|
|
|
save_df['grp_term1'] = test_pairs_df['att_term_1'] |
|
save_df['grp_term2'] = test_pairs_df['att_term_2'] |
|
save_df['label_1'] = test_pairs_df['label_1'] |
|
save_df['label_2'] = test_pairs_df['label_2'] |
|
save_df['bias_spec'] = bias_name |
|
save_df['type'] = 'tool' |
|
save_df['gen_model'] = model_name |
|
|
|
if isSaving == True: |
|
print(f"Save cols: {list(save_df.columns)}") |
|
print(f"Save: {save_df.head(1)}") |
|
|
|
|
|
num_sentences = len(test_sentences) |
|
print(f"Returned num sentences: {num_sentences}") |
|
|
|
return test_sentences |
|
|
|
def _getSavedSentences(bias_spec, progress, use_paper_sentences): |
|
test_sentences = [] |
|
|
|
print(f"Bias spec dict: {bias_spec}") |
|
|
|
g1, g2, a1, a2 = bt_mgr.get_words(bias_spec) |
|
for gi, g_term in enumerate(g1+g2): |
|
att_list = a1+a2 |
|
|
|
att_list_dash = [t.replace(' ','-') for t in att_list] |
|
att_list.extend(att_list_dash) |
|
att_list_nospace = [t.replace(' ','') for t in att_list] |
|
att_list.extend(att_list_nospace) |
|
att_list = list(set(att_list)) |
|
|
|
progress(gi/len(g1+g2), desc=f"{g_term}") |
|
|
|
_, sentence_df, _ = smgr.getSavedSentences(g_term) |
|
|
|
flt_gen_models = ["gpt-3.5","gpt-3.5-turbo"] |
|
print(f"Before filter: {sentence_df.shape[0]}") |
|
if use_paper_sentences == True: |
|
if 'type' in list(sentence_df.columns): |
|
sentence_df = sentence_df.query("type=='paper' and gen_model in @flt_gen_models") |
|
print(f"After filter: {sentence_df.shape[0]}") |
|
else: |
|
if 'type' in list(sentence_df.columns): |
|
|
|
sentence_df = sentence_df.query("gen_model in @flt_gen_models") |
|
print(f"After filter: {sentence_df.shape[0]}") |
|
|
|
if sentence_df.shape[0] > 0: |
|
sentence_df = sentence_df[['org_grp_term','att_term','sentence']] |
|
sentence_df = sentence_df.rename(columns={'org_grp_term': "Group term", |
|
"att_term": "Attribute term", |
|
"sentence": "Test sentence"}) |
|
|
|
sel = sentence_df[sentence_df['Attribute term'].isin(att_list)].values |
|
if len(sel) > 0: |
|
for gt,at,s in sel: |
|
test_sentences.append([s,gt,at]) |
|
else: |
|
print("Test sentences empty!") |
|
|
|
|
|
return test_sentences |
|
|
|
|