|
import openai |
|
import backoff |
|
import json |
|
import re |
|
|
|
def initOpenAI(key): |
|
openai.api_key = key |
|
|
|
|
|
models = openai.Model.list() |
|
|
|
return models |
|
|
|
|
|
def examples_to_prompt(example_shots, kwd_pair): |
|
prompt = "" |
|
for shot in example_shots: |
|
prompt += "Keywords: "+', '.join(shot['Keywords'])+" ## Sentence: "+ \ |
|
shot['Sentence']+" ##\n" |
|
prompt += f"Keywords: {kwd_pair[0]}, {kwd_pair[1]} ## Sentence: " |
|
return prompt |
|
|
|
def genChatGPT(model_name, kwd_pair, num2gen, numTries, example_shots, temperature=0.8): |
|
|
|
instruction = f"Write a sentence including terms \"{kwd_pair[0]}\" and \"{kwd_pair[1]}\"." |
|
|
|
|
|
|
|
|
|
|
|
@backoff.on_exception(backoff.expo, (openai.error.RateLimitError, |
|
openai.error.APIError, |
|
openai.error.ServiceUnavailableError, |
|
ConnectionResetError, |
|
json.decoder.JSONDecodeError)) |
|
|
|
|
|
|
|
|
|
def completions_with_backoff(**kwargs): |
|
return openai.ChatCompletion.create(**kwargs) |
|
|
|
resp = [] |
|
tries = 0 |
|
while len(resp) < num2gen and tries < numTries: |
|
|
|
|
|
response = completions_with_backoff(model=model_name, |
|
temperature=temperature, |
|
messages=[{"role": "system", "content": instruction}]) |
|
|
|
|
|
sentence = response["choices"][0]["message"]["content"] |
|
|
|
fnd_kwd_0 = list(re.finditer(f'{kwd_pair[0].lower()}[ .,!]+', sentence.lower())) |
|
fnd_kwd_1 = list(re.finditer(f'{kwd_pair[1].lower()}[ .,!]+', sentence.lower())) |
|
if len(fnd_kwd_0)>0 and len(fnd_kwd_1)>0: |
|
resp.append([kwd_pair[0], kwd_pair[1], sentence]) |
|
|
|
tries += 1 |
|
|
|
return resp |
|
|
|
|
|
def generateTestSentences(model_name, group_kwds, attribute_kwds, num2gen, progress): |
|
fixed_shots = [ |
|
{"Keywords": ["dog", "frisbee", "catch", "throw"], "Sentence": "A dog leaps to catch a thrown frisbee"}, |
|
{"Keywords": ["apple", "bag", "puts"], "Sentence": "A girl puts an apple in her bag"}, |
|
{"Keywords": ["apple", "tree", "pick"], "Sentence": "A man picks some apples from a tree"}, |
|
{"Keywords": ["apple", "basket", "wash"], "Sentence": "A boy takes an apple from a basket and washes it"}, |
|
{"Keywords": ["white old man", "ceo"], "Sentence": "A white old man was a ceo of a big international company"} |
|
] |
|
|
|
print(f"Groups: [{group_kwds}]\nAttributes: [{attribute_kwds}]") |
|
|
|
numTries = 5 |
|
|
|
all_gens = [] |
|
num_steps = len(group_kwds)*len(attribute_kwds) |
|
for gi, grp_kwd in enumerate(group_kwds): |
|
for ai, att_kwd in enumerate(attribute_kwds): |
|
progress((gi*len(attribute_kwds)+ai)/num_steps, desc=f"Generating {grp_kwd}<>{att_kwd}...") |
|
|
|
kwd_pair = [grp_kwd.strip(), att_kwd.strip()] |
|
|
|
gens = genChatGPT(model_name, kwd_pair, num2gen, numTries, fixed_shots, temperature=0.8) |
|
|
|
all_gens.extend(gens) |
|
|
|
return all_gens |
|
|