import re def answer_cleansing_zero_shot(dataset, pred, must_choice=False): pred = pred.strip() if dataset in ("commonsense-mc"): pred = re.findall(r'A|B|C|D|E', pred) elif dataset in ("arithmetic"): if must_choice: pred = re.findall(r'A|B|C|D', pred) else: pred = pred.replace(",", "") pred = [s for s in re.findall(r'-?\d+\.?\d*', pred)] elif dataset in ("commonsense-verify", "symbolic-coin"): pred = pred.lower() pred = re.sub("\"|\'|\n|\.|\s|\:|\,", " ", pred) pred = pred.split(" ") pred = [i for i in pred if i in ("yes", "no")] elif dataset == "symbolic-letter": pred = re.sub("\"|\'|\n|\.|\s", "", pred) pred = [pred] elif dataset == "UNDEFINED": pred = pred else: raise ValueError("dataset is not properly defined ...") # If there is no candidate in list, null is set. if len(pred) == 0: pred = "" else: # choose the first element in list ... pred = pred[0] # (For arithmetic tasks) if a word ends with period, it will be omitted ... if pred != "": if pred[-1] == ".": pred = pred[:-1] return pred def type_cleasing(type): type = re.findall(r'arithmetic|commonsense-mc|commonsense-verify|symbolic-coin|symbolic-letter', type) if len(type) == 0: type = "UNDEFINED" else: type = type[0] return type def entity_cleansing(ent): ent = re.sub("\n|\s*-\s*|\.", ",", ent) ent = ent.split(",") ent = [e.strip() for e in ent if e != ""] return ent def knowledge_cleansing(knowledge): #print("Knowledge Before: " + knowledge) knowledge = knowledge.strip() if knowledge.startswith("No, "): knowledge = re.sub("No, ", "", knowledge) knowledge = re.sub("\s"," ", knowledge) #print("Knowledge After: " + knowledge) return knowledge