Spaces:
Running
Running
update math
Browse files
tasks.py
CHANGED
@@ -269,7 +269,9 @@ class Metrics:
|
|
269 |
else:
|
270 |
ans = response[indices[-2] + 1 : indices[-1]]
|
271 |
extract_responses.append(strip_string(ans))
|
272 |
-
extract_answers
|
|
|
|
|
273 |
return extract_responses, extract_answers
|
274 |
|
275 |
|
|
|
269 |
else:
|
270 |
ans = response[indices[-2] + 1 : indices[-1]]
|
271 |
extract_responses.append(strip_string(ans))
|
272 |
+
extract_answers=[]
|
273 |
+
for answer in answers:
|
274 |
+
extract_answers.append(strip_string(get_answer(answer)))
|
275 |
return extract_responses, extract_answers
|
276 |
|
277 |
|
utils.py
CHANGED
@@ -9,6 +9,16 @@ NUMERIC_IN_ZH = (
|
|
9 |
)
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def extract_choice_zh(gen):
|
13 |
# 答案是A | 选项是A | 应该选A选项
|
14 |
res = re.search(
|
@@ -164,7 +174,7 @@ def remove_right_units(string):
|
|
164 |
# "\\text{ " only ever occurs (at least in the val set) when describing units
|
165 |
if "\\text{ " in string:
|
166 |
splits = string.split("\\text{ ")
|
167 |
-
assert len(splits) == 2
|
168 |
return splits[0]
|
169 |
else:
|
170 |
return string
|
@@ -278,16 +288,14 @@ def strip_string(string):
|
|
278 |
|
279 |
# NOTE: X/Y changed to \frac{X}{Y} in dataset, but in simple cases fix in case the model output is X/Y
|
280 |
# string = fix_a_slash_b(string)
|
281 |
-
|
|
|
|
|
282 |
return string
|
283 |
|
284 |
|
285 |
def get_answer(string):
|
286 |
-
|
287 |
-
answer = remove_boxed(last_boxed_only_string(string))
|
288 |
-
# answer = strip_string(answer)
|
289 |
-
except Exception:
|
290 |
-
answer = string
|
291 |
return answer
|
292 |
|
293 |
|
@@ -307,21 +315,22 @@ def is_equiv(str1, str2, verbose=False):
|
|
307 |
except Exception:
|
308 |
return str1 == str2
|
309 |
|
|
|
310 |
def first_option_postprocess(text: str, options: str) -> str:
|
311 |
"""Find first valid option for text."""
|
312 |
|
313 |
patterns = [
|
314 |
-
f
|
315 |
-
f
|
316 |
-
f
|
317 |
-
f
|
318 |
-
f
|
319 |
-
f
|
320 |
-
f
|
321 |
-
f
|
322 |
-
f
|
323 |
-
f
|
324 |
-
f
|
325 |
]
|
326 |
|
327 |
regexes = [re.compile(pattern) for pattern in patterns]
|
@@ -332,44 +341,41 @@ def first_option_postprocess(text: str, options: str) -> str:
|
|
332 |
for i in options:
|
333 |
if i in outputs:
|
334 |
return i
|
335 |
-
return
|
336 |
|
337 |
|
338 |
def first_capital_postprocess(text: str) -> str:
|
339 |
for t in text:
|
340 |
if t.isupper():
|
341 |
return t
|
342 |
-
return
|
343 |
|
344 |
|
345 |
def general_postprocess(text: str) -> str:
|
346 |
# Cut off the first newline, period, or comma
|
347 |
-
truncated_text = re.split(r
|
348 |
|
349 |
# Remove punctuation
|
350 |
-
no_punctuation = re.sub(r
|
351 |
|
352 |
# Remove article
|
353 |
-
no_articles = re.sub(r
|
354 |
-
'',
|
355 |
-
no_punctuation,
|
356 |
-
flags=re.IGNORECASE)
|
357 |
|
358 |
# Remove duplicated blank spaces
|
359 |
-
cleaned_text = re.sub(r
|
360 |
|
361 |
return cleaned_text
|
362 |
|
363 |
-
|
364 |
def bbh_mcq_postprocess(text: str) -> str:
|
365 |
ans = text
|
366 |
-
ans_line = ans.split(
|
367 |
if len(ans_line) != 1:
|
368 |
ans = ans_line[-1].strip()
|
369 |
-
match = re.search(r
|
370 |
if match:
|
371 |
return f"({match.group(1)})"
|
372 |
-
match = re.search(r
|
373 |
if match:
|
374 |
return f"({match.group(1)})"
|
375 |
return f"({ans})"
|
@@ -377,18 +383,17 @@ def bbh_mcq_postprocess(text: str) -> str:
|
|
377 |
|
378 |
def bbh_freeform_postprocess(text: str) -> str:
|
379 |
ans = text
|
380 |
-
ans_line = ans.split(
|
381 |
if len(ans_line) != 1:
|
382 |
ans = ans_line[-1].strip()
|
383 |
-
ans = ans.split(
|
384 |
-
if ans.endswith(
|
385 |
ans = ans[:-1]
|
386 |
return ans
|
387 |
|
388 |
|
389 |
-
|
390 |
icl_prompts = {
|
391 |
-
|
392 |
|
393 |
Q: Today, Emily went to the museum. Between what times could they have gone?
|
394 |
We know that:
|
@@ -465,9 +470,7 @@ Wake-up time: 5am.
|
|
465 |
3pm-4pm: free.
|
466 |
The beach closure time: 4pm.
|
467 |
The only time when Tiffany could have gone to the beach was 3pm to 4pm. So the answer is (D).""",
|
468 |
-
|
469 |
-
|
470 |
-
'disambiguation_qa': """Clarify the meaning of sentences with ambiguous pronouns.
|
471 |
|
472 |
Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous.
|
473 |
Sentence: The chief told the counselor that they took the day off.
|
@@ -504,9 +507,7 @@ Here we need to determine who the pronoun "his" might be referring to. There are
|
|
504 |
Let's consider Y first: "X will plan to meet Y at Y's office." This case makes sense, because X might want to meet up with Y at Y's office.
|
505 |
Now, consider X: "X will plan to meet Y at X's office." This case also makes sense, because X might want to meet up with Y at X's own office.
|
506 |
Because both X and Y are possible at the same time, we conclude that the antecedent of the pronoun is ambiguous. So the answer is (C).""",
|
507 |
-
|
508 |
-
|
509 |
-
'date_understanding': """Infer the date from context.
|
510 |
|
511 |
Q: Today is Christmas Eve of 1937. What is the date 10 days ago in MM/DD/YYYY?
|
512 |
Options:
|
@@ -539,9 +540,7 @@ Options:
|
|
539 |
(F) 12/03/1960
|
540 |
A: Let's think step by step.
|
541 |
If Jane and John married on Jan 2, 1958, then and if it is their 5-year anniversary today, then today's date is Jan 2, 1963. The date tomorrow is Jan 3, 1963, that is 01/03/1963. So the answer is (B).""",
|
542 |
-
|
543 |
-
|
544 |
-
'tracking_shuffled_objects_three_objects': """A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.
|
545 |
|
546 |
Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball.
|
547 |
As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the
|
@@ -581,9 +580,7 @@ A: Let's think step by step.
|
|
581 |
(2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola.
|
582 |
(3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola.
|
583 |
At the end of the dance, Alice is dancing with Patrick. So the answer is (C).""",
|
584 |
-
|
585 |
-
|
586 |
-
'penguins_in_a_table': """Answer questions about a table of penguins and their attributes.
|
587 |
|
588 |
Q: Here is a table where the first line is a header and each subsequent line is a penguin: name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15 For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm. We now add a penguin to the table:
|
589 |
James, 12, 90, 12
|
@@ -624,9 +621,7 @@ This question focuses on the name. We know the following: The names of the pengu
|
|
624 |
When we sort their names alphabetically, we get Bernard, Gwen, Louis, Vincent.
|
625 |
The name of the second penguin sorted by alphabetical order is Gwen.
|
626 |
The name of the second penguin sorted by alphabetic order is Gwen. So the answer is (D).""",
|
627 |
-
|
628 |
-
|
629 |
-
'geometric_shapes': """Name geometric shapes from their SVG paths.
|
630 |
|
631 |
Q: This SVG path element <path d="M 31.00,73.00 L 32.00,59.00 L 44.00,50.00 L 49.00,41.00 L 64.00,37.00 L 71.00,55.00 L 64.00,76.00 L 52.00,61.00 L 31.00,73.00"/> draws a
|
632 |
Options:
|
@@ -704,9 +699,7 @@ Length of side B: |B| = sqrt((37.00-41.00)^2 + (34.00-33.00)^2)) = sqrt((4)^2 +
|
|
704 |
Length of side C: |C| = sqrt((41.00-45.00)^2 + (33.00-34.00)^2)) = sqrt((-4)^2 + (-1)^2) = sqrt(16 + 1) = sqrt(17).
|
705 |
Length of side D: |D| = sqrt((45.00-41.00)^2 + (34.00-43.00)^2)) = sqrt((4)^2 + (-9)^2) = sqrt(16 + 81) = sqrt(97).
|
706 |
Note that |A| = |D| and |B| = |C|. Furthermore, A and D are adjacent and B and C are adjacent. Thus, this polygon has two pairs of equal-length adjacent sides and is "kite". So the answer is (D).""",
|
707 |
-
|
708 |
-
|
709 |
-
'snarks': """Determine which of two sentences is sarcastic.
|
710 |
|
711 |
According to Cambridge University Dictionary, sarcasm is "the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way." Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.
|
712 |
|
@@ -736,9 +729,7 @@ A: Let's think step by step.
|
|
736 |
If we look at (A), it likens the consistency in the league's punishments with that in politics. Because politics or political affairs are often not considered to be consistent or dependable, this sentence appears to be satirical.
|
737 |
If we look at (B), it likens the consistency in the league's punishments with that in morality. Discussing the consistency of the league's punishments in the context of morality, ethics, or law makes sense and does not appear to make a satirical point about anything.
|
738 |
Above the above, the sarcastic option is (A). So the answer is (A).""",
|
739 |
-
|
740 |
-
|
741 |
-
'ruin_names': """Select the humorous edit that 'ruins' the input movie or musical artist name.
|
742 |
|
743 |
Q: Which of the following is a humorous edit of this artist or movie name: 'whitesnake'?
|
744 |
Options:
|
@@ -782,9 +773,7 @@ The original name is "counting crows". This is the name of an American rock band
|
|
782 |
(C) "courting crows": Here the word "counting" is changed to "courting", and "courting" is an actual word; however, "courting crows" does not sound as humorous as "counting cows".
|
783 |
(D) "coutnting crows": Here the word "counting" is changed to "coutnting", but the word "coutnting" is not an actual word.
|
784 |
Above the above, the only humorous edit is (B). So the answer is (B).""",
|
785 |
-
|
786 |
-
|
787 |
-
'tracking_shuffled_objects_seven_objects': """A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.
|
788 |
|
789 |
Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball.
|
790 |
As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the
|
@@ -824,9 +813,7 @@ A: Let's think step by step.
|
|
824 |
(2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola.
|
825 |
(3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola.
|
826 |
At the end of the dance, Alice is dancing with Patrick. So the answer is (C).""",
|
827 |
-
|
828 |
-
|
829 |
-
'tracking_shuffled_objects_five_objects': """A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.
|
830 |
|
831 |
Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball.
|
832 |
As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the
|
@@ -866,9 +853,7 @@ A: Let's think step by step.
|
|
866 |
(2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola.
|
867 |
(3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola.
|
868 |
At the end of the dance, Alice is dancing with Patrick. So the answer is (C).""",
|
869 |
-
|
870 |
-
|
871 |
-
'logical_deduction_three_objects': """A logical deduction task which requires deducing the order of a sequence of objects.
|
872 |
|
873 |
Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy.
|
874 |
Options:
|
@@ -905,9 +890,7 @@ A: Let's think step by step.
|
|
905 |
(3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)".
|
906 |
According to this ordering, the leftmost book is the white book.
|
907 |
The white book is the leftmost. So the answer is (C).""",
|
908 |
-
|
909 |
-
|
910 |
-
'hyperbaton': """Order adjectives correctly in English sentences.
|
911 |
|
912 |
Q: Which sentence has the correct adjective order:
|
913 |
Options:
|
@@ -935,9 +918,7 @@ A: Let's think step by step.
|
|
935 |
When there is more than one adjective before a noun, the adjectives need to respect the following order before a noun: "[1. opinion] [2. size] [3. age] [4. shape] [5. color] [6. origin] [7. material] [8. purpose] noun".
|
936 |
Option (A): "blue gold wonderful square shoe". (1) "blue" falls into the color category. (2) "gold" falls into the material category. (3) "wonderful" falls into the opinion category. (4) "square" falls into the shape category. The adjective order that Option (A) has is [5. color] [7. material] [1. opinion] [4. shape] (or, in numeric terms, 5 7 1 4). Because 5 < 7 < 1 < 4 is not correct, (A) does not have the correct ordering.
|
937 |
Option (B): "wonderful square blue gold shoe". Option (B) has the following adjective order: [1. opinion] [4. shape] [5. color] [7. material] (or, in numeric terms, 1 4 5 7 ). Because 1 < 4 < 5 < 7 is correct, (B) has the correct ordering. So the answer is (B).""",
|
938 |
-
|
939 |
-
|
940 |
-
'logical_deduction_five_objects': """A logical deduction task which requires deducing the order of a sequence of objects.
|
941 |
|
942 |
Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy.
|
943 |
Options:
|
@@ -974,9 +955,7 @@ A: Let's think step by step.
|
|
974 |
(3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)".
|
975 |
According to this ordering, the leftmost book is the white book.
|
976 |
The white book is the leftmost. So the answer is (C).""",
|
977 |
-
|
978 |
-
|
979 |
-
'logical_deduction_seven_objects': """A logical deduction task which requires deducing the order of a sequence of objects.
|
980 |
|
981 |
Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy.
|
982 |
Options:
|
@@ -1013,9 +992,7 @@ A: Let's think step by step.
|
|
1013 |
(3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)".
|
1014 |
According to this ordering, the leftmost book is the white book.
|
1015 |
The white book is the leftmost. So the answer is (C).""",
|
1016 |
-
|
1017 |
-
|
1018 |
-
'movie_recommendation': """Recommend movies similar to the given list of movies.
|
1019 |
|
1020 |
Q: Find a movie similar to Star Wars Episode IV - A New Hope, Indiana Jones and the Last Crusade, Star Wars Episode V - The Empire Strikes Back, The Big Lebowski:
|
1021 |
Options:
|
@@ -1057,9 +1034,7 @@ A: Let's think step by step.
|
|
1057 |
- Inside Out (animation, family, comedy; 2015)
|
1058 |
- Forrest Gump (comedy, drama, romance; 1994)
|
1059 |
These are all famous movies produced in the past few decades.Amongst all the options, the only movie similar to these ones seems to be Edge of Tomorrow (action, adventure, crime, mystery; 2014), as it is also a science-fiction movie and features Tom Cruise. So the answer is (D).""",
|
1060 |
-
|
1061 |
-
|
1062 |
-
'salient_translation_error_detection': """Detect the type of error in an English translation of a German source sentence.
|
1063 |
|
1064 |
Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error. Source: In der Liste der Baudenkmale in Lenzen (Elbe) sind alle Baudenkmale der brandenburgischen Stadt Lenzen (Elbe) und ihrer Ortsteile aufgelistet.
|
1065 |
Translation: In the list of architectural monuments in Lenzen all architectural monuments of the Brandenburg city of Lenzen and its districts are listed.
|
@@ -1099,9 +1074,7 @@ Options:
|
|
1099 |
(F) Facts
|
1100 |
A: Let's think step by step.
|
1101 |
We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "Łeba is a small town and seaside resort in the Powiat Lęborski of the Polish Pomeranian Voivodeship." On the other hand, the provided translation is "Łeba is not a small town and seaside resort in the Powiat Léborski county of the Pomeranian Voivodeship of Poland." Note that the provided sentence says, "Łeba is not a small town ..." However, the translation should have been "Łeba is a small town ..." Because a negation is introduced at the beginning of the sentence and has fundamentally changed the meaning of the original source, the translation contains an error pertaining to Negation or Antonyms. So the answer is (C).""",
|
1102 |
-
|
1103 |
-
|
1104 |
-
'reasoning_about_colored_objects': """Answer extremely simple questions about the colors of objects on a surface.
|
1105 |
|
1106 |
Q: On the nightstand, there is a red pencil, a purple mug, a burgundy keychain, a fuchsia teddy bear, a black plate, and a blue stress ball. What color is the stress ball?
|
1107 |
Options:
|
@@ -1164,8 +1137,7 @@ A: Let's think step by step.
|
|
1164 |
According to this question, the objects are arranged in a row, from left to right, as follows: (1) a teal plate, (2) a burgundy keychain, (3) a yellow scrunchiephone charger, (4) an orange mug, (5) a pink notebook, (6) a grey cup.
|
1165 |
The teal plate is the first item, namely (1). There is no item to the left of the teal item.
|
1166 |
The number of non-orange items to the left of the teal item is zero. So the answer is (A).""",
|
1167 |
-
|
1168 |
-
'multistep_arithmetic_two': """Solve multi-step arithmetic problems.
|
1169 |
|
1170 |
Q: ((-5 + 9 * -4 - 0) * (4 + -7 + 0 * -5)) =
|
1171 |
A: Let's think step by step.
|
@@ -1190,9 +1162,7 @@ This equation can be written as "A - B", where A = (-3 + 5 * 8 * -4) and B = (9
|
|
1190 |
Let's calculate A = (-3 + 5 * 8 * -4) = (-3 + (5 * 8) * -4) = (-3 + (40) * -4) = (-3 + (40 * -4)) = (-3 + -160) = -163.
|
1191 |
Let's calculate B = (9 - 8 * -7 + -9) = (9 - (8 * -7) + -9) = (9 - (-56) + -9) = ((9 - (-56)) + -9) = ((65) + -9)= (65 - 9) = 56.
|
1192 |
Then, the final equation is A - B = -163 - 56 = -219. So the answer is -219.""",
|
1193 |
-
|
1194 |
-
|
1195 |
-
'navigate': """Given a series of navigation instructions, determine whether one would end up back at the starting point.
|
1196 |
|
1197 |
Q: If you follow these instructions, do you return to the starting point? Turn left. Turn around. Turn left. Take 7 steps. Take 2 steps. Take 4 steps. Take 8 steps.
|
1198 |
Options:
|
@@ -1235,9 +1205,7 @@ We start at the origin (0, 0), facing the positive y-axis.
|
|
1235 |
(3) Take 9 steps left: (0, -7), facing the positive y-axis.
|
1236 |
(4) Take 7 steps right: (0, 7), facing the positive y-axis.
|
1237 |
Since (0, 0) is (0, 0), we are indeed where we started. So the answer is Yes.""",
|
1238 |
-
|
1239 |
-
|
1240 |
-
'dyck_languages': """Correctly close a Dyck-n word.
|
1241 |
|
1242 |
Q: Complete the rest of the sequence, making sure that the parentheses are closed properly. Input: [ { [
|
1243 |
A: Let's think step by step.
|
@@ -1309,9 +1277,7 @@ We should process each input one by one and keep track of the stack configuratio
|
|
1309 |
Now, we have reached the end. The final stack is "< [ {".
|
1310 |
We will need to pop out "{", "[", "<" one by one in that order.
|
1311 |
So, we need "}", "]", ">". So the answer is } ] >.""",
|
1312 |
-
|
1313 |
-
|
1314 |
-
'word_sorting': """Sort a list of words.
|
1315 |
|
1316 |
Q: Sort the following words alphabetically: List: oven costume counterpart
|
1317 |
A: Let's think step by step.
|
@@ -1328,9 +1294,7 @@ Q: Sort the following words alphabetically: List: newt arson parthia seismograph
|
|
1328 |
A: Let's think step by step.
|
1329 |
The first letter: "newt": "n" (14). "arson": "a" (1). "parthia": "p" (16). "seismography": "s" (19). "mugho": "m" (13). "aspect": "a" (1). "census": "c" (3). We now have: (1) ["arson" ? "aspect"] < (3) "census" < (13) "mugho" < (14) "newt" < (16) "parthia" < (19) "seismography". Now let's sort this subpart ["arson" ? "aspect"] by looking at their second letters.
|
1330 |
The second letter: "arson": "r" (18). "aspect": "s" (19). We now have: (18) "arson" < (19) "aspect". Hence, we have ["arson" < "aspect"] < "census" < "mugho" < "newt" < "parthia" < "seismography". So the answer is arson aspect census mugho newt parthia seismography.""",
|
1331 |
-
|
1332 |
-
|
1333 |
-
'sports_understanding': """Determine whether an artificially constructed sentence relating to sports is plausible or not.
|
1334 |
|
1335 |
Q: Is the following sentence plausible? "Bam Adebayo scored a reverse layup in the Western Conference Finals."
|
1336 |
A: Let's think step by step. Bam Adebayo is an American basketball player. Scoring a reverse layup in the Western Conference Finals is part of the NBA Finals. So the answer is yes.
|
@@ -1340,9 +1304,7 @@ A: Let's think step by step. Santi Cazorla is a soccer player. Touchdown is part
|
|
1340 |
|
1341 |
Q: Is the following sentence plausible? "DeMar DeRozan was called for the goal tend."
|
1342 |
A: Let's think step by step. DeMar DeRozan is an American basketball player. Goal tending is part of basketball. So the answer is yes.""",
|
1343 |
-
|
1344 |
-
|
1345 |
-
'boolean_expressions': """Evaluate the result of a random Boolean expression.
|
1346 |
|
1347 |
Q: not ( ( not not True ) ) is
|
1348 |
A: Let's think step by step.
|
@@ -1365,9 +1327,7 @@ Remember that (i) expressions inside brackets are always evaluated first and tha
|
|
1365 |
We first simplify this expression "Z" as follows: "Z = not not ( not ( False ) ) = not not ( A )" where "A = not ( False )".
|
1366 |
Let's evaluate A: A = not ( False ) = not False = True.
|
1367 |
Plugging in A, we get: Z = not not ( A ) = not not (True) = not not False = True. So the answer is True.""",
|
1368 |
-
|
1369 |
-
|
1370 |
-
'object_counting': """Questions that involve enumerating objects and asking the model to count them.
|
1371 |
|
1372 |
Q: I have a blackberry, a clarinet, a nectarine, a plum, a strawberry, a banana, a flute, an orange, and a violin. How many fruits do I have?
|
1373 |
A: Let's think step by step.
|
@@ -1404,9 +1364,7 @@ We first identify the vegetables on the list and include their quantity in paren
|
|
1404 |
- garlic (1)
|
1405 |
- yam (1)
|
1406 |
Now, let's add the numbers in parentheses: 1 + 1 + 1 + 1 + 2 + 1 + 1 = 8. So the answer is 8.""",
|
1407 |
-
|
1408 |
-
|
1409 |
-
'formal_fallacies': """Distinguish deductively valid arguments from formal fallacies.
|
1410 |
|
1411 |
Q: "It is not always easy to see who is related to whom -- and in which ways. The following argument pertains to this question: To begin with, Lesley is a close friend of Fernando. Moreover, being a close friend of Fernando or a schoolmate of Lowell is sufficient for being a great-grandfather of Leroy. It follows that Lesley is a great-grandfather of Leroy."
|
1412 |
Is the argument, given the explicitly stated premises, deductively valid or invalid?
|
@@ -1450,9 +1408,7 @@ By (1), we have if X = infrequent-user(Paul Mitchell), then X = rare-consumer(Ni
|
|
1450 |
The case X = rare-consumer(Nioxin) does not appear in (2).
|
1451 |
The case X = loyal-buyer(Caress) does not appear in (2), either.
|
1452 |
So, from (1) and (2), we cannot necessarily deduce the Hypothesis. So the answer is invalid.""",
|
1453 |
-
|
1454 |
-
|
1455 |
-
'causal_judgement': """Answer questions about causal attribution.
|
1456 |
|
1457 |
Q: How would a typical person answer each of the following questions about causation?
|
1458 |
Frank T., had an ongoing dispute with his neighbor over a stretch of land and one day decided to shoot his neighbor in the body. Frank T. had no experience with guns, his hand slipped on the barrel of the gun, and the shot went wild. Nonetheless, the bullet bounced off a large boulder several feet away and hit the neighbor's body, causing significant injury. Did Frank T. intentionally shoot his neighbor in the body?
|
@@ -1477,7 +1433,7 @@ Options:
|
|
1477 |
- No
|
1478 |
A: Let's think step by step.
|
1479 |
Here in this question, we are told that "He aims the dart at the low point region." A typical person might therefore think George did intentionally hit the low point region, because he wanted to lift up the spirit of his sister Lena. So the answer is Yes.""",
|
1480 |
-
|
1481 |
|
1482 |
Q: Question: Fidel tells the truth. Jerry says Fidel tells the truth. Vina says Jerry tells the truth. Millicent says Vina lies. Raymond says Millicent lies. Does Raymond tell the truth?
|
1483 |
A: Let's think step by step.
|
|
|
9 |
)
|
10 |
|
11 |
|
12 |
+
def return_blank_if_exception(func):
|
13 |
+
def wrapper(*args, **kwargs):
|
14 |
+
try:
|
15 |
+
return func(*args, **kwargs)
|
16 |
+
except Exception:
|
17 |
+
return ""
|
18 |
+
|
19 |
+
return wrapper
|
20 |
+
|
21 |
+
|
22 |
def extract_choice_zh(gen):
|
23 |
# 答案是A | 选项是A | 应该选A选项
|
24 |
res = re.search(
|
|
|
174 |
# "\\text{ " only ever occurs (at least in the val set) when describing units
|
175 |
if "\\text{ " in string:
|
176 |
splits = string.split("\\text{ ")
|
177 |
+
# assert len(splits) == 2
|
178 |
return splits[0]
|
179 |
else:
|
180 |
return string
|
|
|
288 |
|
289 |
# NOTE: X/Y changed to \frac{X}{Y} in dataset, but in simple cases fix in case the model output is X/Y
|
290 |
# string = fix_a_slash_b(string)
|
291 |
+
if string.startswith("\\boxed{") and string.endswith("}"):
|
292 |
+
string = string[7:-1]
|
293 |
+
string = string.split("=")[-1]
|
294 |
return string
|
295 |
|
296 |
|
297 |
def get_answer(string):
|
298 |
+
answer = remove_boxed(last_boxed_only_string(string))
|
|
|
|
|
|
|
|
|
299 |
return answer
|
300 |
|
301 |
|
|
|
315 |
except Exception:
|
316 |
return str1 == str2
|
317 |
|
318 |
+
|
319 |
def first_option_postprocess(text: str, options: str) -> str:
|
320 |
"""Find first valid option for text."""
|
321 |
|
322 |
patterns = [
|
323 |
+
f"[Tt]he answer is [{options}]",
|
324 |
+
f"[Tt]he correct answer\s?(?:option)?\s?is [{options}]", # noqa
|
325 |
+
f"答案(?:选项)?是(.*?)[{options}]",
|
326 |
+
f"答案(?:选项)?为(.*?)[{options}]",
|
327 |
+
f"答案(?:选项)?选(.*?)[{options}]",
|
328 |
+
f"选项[{options}]是?正确",
|
329 |
+
f"选项[{options}]为?正确",
|
330 |
+
f"固选(.*?)[{options}]",
|
331 |
+
f"答案应该是(.*?)[{options}]",
|
332 |
+
f"(\s|^)[{options}][\s。,,\.$]", # noqa
|
333 |
+
f"[{options}]",
|
334 |
]
|
335 |
|
336 |
regexes = [re.compile(pattern) for pattern in patterns]
|
|
|
341 |
for i in options:
|
342 |
if i in outputs:
|
343 |
return i
|
344 |
+
return ""
|
345 |
|
346 |
|
347 |
def first_capital_postprocess(text: str) -> str:
|
348 |
for t in text:
|
349 |
if t.isupper():
|
350 |
return t
|
351 |
+
return ""
|
352 |
|
353 |
|
354 |
def general_postprocess(text: str) -> str:
|
355 |
# Cut off the first newline, period, or comma
|
356 |
+
truncated_text = re.split(r"[\n.,]", text, 1)[0]
|
357 |
|
358 |
# Remove punctuation
|
359 |
+
no_punctuation = re.sub(r"[^\w\s]", "", truncated_text)
|
360 |
|
361 |
# Remove article
|
362 |
+
no_articles = re.sub(r"\b(a|an|the)\b", "", no_punctuation, flags=re.IGNORECASE)
|
|
|
|
|
|
|
363 |
|
364 |
# Remove duplicated blank spaces
|
365 |
+
cleaned_text = re.sub(r"\s+", " ", no_articles).strip()
|
366 |
|
367 |
return cleaned_text
|
368 |
|
369 |
+
|
370 |
def bbh_mcq_postprocess(text: str) -> str:
|
371 |
ans = text
|
372 |
+
ans_line = ans.split("answer is ")
|
373 |
if len(ans_line) != 1:
|
374 |
ans = ans_line[-1].strip()
|
375 |
+
match = re.search(r"\(([A-Z])\)*", ans)
|
376 |
if match:
|
377 |
return f"({match.group(1)})"
|
378 |
+
match = re.search(r"([A-Z])", ans)
|
379 |
if match:
|
380 |
return f"({match.group(1)})"
|
381 |
return f"({ans})"
|
|
|
383 |
|
384 |
def bbh_freeform_postprocess(text: str) -> str:
|
385 |
ans = text
|
386 |
+
ans_line = ans.split("answer is ")
|
387 |
if len(ans_line) != 1:
|
388 |
ans = ans_line[-1].strip()
|
389 |
+
ans = ans.split("\n")[0]
|
390 |
+
if ans.endswith("."):
|
391 |
ans = ans[:-1]
|
392 |
return ans
|
393 |
|
394 |
|
|
|
395 |
icl_prompts = {
|
396 |
+
"temporal_sequences": """Task description: Answer questions about which times certain events could have occurred.
|
397 |
|
398 |
Q: Today, Emily went to the museum. Between what times could they have gone?
|
399 |
We know that:
|
|
|
470 |
3pm-4pm: free.
|
471 |
The beach closure time: 4pm.
|
472 |
The only time when Tiffany could have gone to the beach was 3pm to 4pm. So the answer is (D).""",
|
473 |
+
"disambiguation_qa": """Clarify the meaning of sentences with ambiguous pronouns.
|
|
|
|
|
474 |
|
475 |
Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous.
|
476 |
Sentence: The chief told the counselor that they took the day off.
|
|
|
507 |
Let's consider Y first: "X will plan to meet Y at Y's office." This case makes sense, because X might want to meet up with Y at Y's office.
|
508 |
Now, consider X: "X will plan to meet Y at X's office." This case also makes sense, because X might want to meet up with Y at X's own office.
|
509 |
Because both X and Y are possible at the same time, we conclude that the antecedent of the pronoun is ambiguous. So the answer is (C).""",
|
510 |
+
"date_understanding": """Infer the date from context.
|
|
|
|
|
511 |
|
512 |
Q: Today is Christmas Eve of 1937. What is the date 10 days ago in MM/DD/YYYY?
|
513 |
Options:
|
|
|
540 |
(F) 12/03/1960
|
541 |
A: Let's think step by step.
|
542 |
If Jane and John married on Jan 2, 1958, then and if it is their 5-year anniversary today, then today's date is Jan 2, 1963. The date tomorrow is Jan 3, 1963, that is 01/03/1963. So the answer is (B).""",
|
543 |
+
"tracking_shuffled_objects_three_objects": """A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.
|
|
|
|
|
544 |
|
545 |
Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball.
|
546 |
As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the
|
|
|
580 |
(2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola.
|
581 |
(3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola.
|
582 |
At the end of the dance, Alice is dancing with Patrick. So the answer is (C).""",
|
583 |
+
"penguins_in_a_table": """Answer questions about a table of penguins and their attributes.
|
|
|
|
|
584 |
|
585 |
Q: Here is a table where the first line is a header and each subsequent line is a penguin: name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15 For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm. We now add a penguin to the table:
|
586 |
James, 12, 90, 12
|
|
|
621 |
When we sort their names alphabetically, we get Bernard, Gwen, Louis, Vincent.
|
622 |
The name of the second penguin sorted by alphabetical order is Gwen.
|
623 |
The name of the second penguin sorted by alphabetic order is Gwen. So the answer is (D).""",
|
624 |
+
"geometric_shapes": """Name geometric shapes from their SVG paths.
|
|
|
|
|
625 |
|
626 |
Q: This SVG path element <path d="M 31.00,73.00 L 32.00,59.00 L 44.00,50.00 L 49.00,41.00 L 64.00,37.00 L 71.00,55.00 L 64.00,76.00 L 52.00,61.00 L 31.00,73.00"/> draws a
|
627 |
Options:
|
|
|
699 |
Length of side C: |C| = sqrt((41.00-45.00)^2 + (33.00-34.00)^2)) = sqrt((-4)^2 + (-1)^2) = sqrt(16 + 1) = sqrt(17).
|
700 |
Length of side D: |D| = sqrt((45.00-41.00)^2 + (34.00-43.00)^2)) = sqrt((4)^2 + (-9)^2) = sqrt(16 + 81) = sqrt(97).
|
701 |
Note that |A| = |D| and |B| = |C|. Furthermore, A and D are adjacent and B and C are adjacent. Thus, this polygon has two pairs of equal-length adjacent sides and is "kite". So the answer is (D).""",
|
702 |
+
"snarks": """Determine which of two sentences is sarcastic.
|
|
|
|
|
703 |
|
704 |
According to Cambridge University Dictionary, sarcasm is "the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way." Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.
|
705 |
|
|
|
729 |
If we look at (A), it likens the consistency in the league's punishments with that in politics. Because politics or political affairs are often not considered to be consistent or dependable, this sentence appears to be satirical.
|
730 |
If we look at (B), it likens the consistency in the league's punishments with that in morality. Discussing the consistency of the league's punishments in the context of morality, ethics, or law makes sense and does not appear to make a satirical point about anything.
|
731 |
Above the above, the sarcastic option is (A). So the answer is (A).""",
|
732 |
+
"ruin_names": """Select the humorous edit that 'ruins' the input movie or musical artist name.
|
|
|
|
|
733 |
|
734 |
Q: Which of the following is a humorous edit of this artist or movie name: 'whitesnake'?
|
735 |
Options:
|
|
|
773 |
(C) "courting crows": Here the word "counting" is changed to "courting", and "courting" is an actual word; however, "courting crows" does not sound as humorous as "counting cows".
|
774 |
(D) "coutnting crows": Here the word "counting" is changed to "coutnting", but the word "coutnting" is not an actual word.
|
775 |
Above the above, the only humorous edit is (B). So the answer is (B).""",
|
776 |
+
"tracking_shuffled_objects_seven_objects": """A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.
|
|
|
|
|
777 |
|
778 |
Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball.
|
779 |
As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the
|
|
|
813 |
(2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola.
|
814 |
(3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola.
|
815 |
At the end of the dance, Alice is dancing with Patrick. So the answer is (C).""",
|
816 |
+
"tracking_shuffled_objects_five_objects": """A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.
|
|
|
|
|
817 |
|
818 |
Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball.
|
819 |
As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the
|
|
|
853 |
(2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola.
|
854 |
(3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola.
|
855 |
At the end of the dance, Alice is dancing with Patrick. So the answer is (C).""",
|
856 |
+
"logical_deduction_three_objects": """A logical deduction task which requires deducing the order of a sequence of objects.
|
|
|
|
|
857 |
|
858 |
Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy.
|
859 |
Options:
|
|
|
890 |
(3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)".
|
891 |
According to this ordering, the leftmost book is the white book.
|
892 |
The white book is the leftmost. So the answer is (C).""",
|
893 |
+
"hyperbaton": """Order adjectives correctly in English sentences.
|
|
|
|
|
894 |
|
895 |
Q: Which sentence has the correct adjective order:
|
896 |
Options:
|
|
|
918 |
When there is more than one adjective before a noun, the adjectives need to respect the following order before a noun: "[1. opinion] [2. size] [3. age] [4. shape] [5. color] [6. origin] [7. material] [8. purpose] noun".
|
919 |
Option (A): "blue gold wonderful square shoe". (1) "blue" falls into the color category. (2) "gold" falls into the material category. (3) "wonderful" falls into the opinion category. (4) "square" falls into the shape category. The adjective order that Option (A) has is [5. color] [7. material] [1. opinion] [4. shape] (or, in numeric terms, 5 7 1 4). Because 5 < 7 < 1 < 4 is not correct, (A) does not have the correct ordering.
|
920 |
Option (B): "wonderful square blue gold shoe". Option (B) has the following adjective order: [1. opinion] [4. shape] [5. color] [7. material] (or, in numeric terms, 1 4 5 7 ). Because 1 < 4 < 5 < 7 is correct, (B) has the correct ordering. So the answer is (B).""",
|
921 |
+
"logical_deduction_five_objects": """A logical deduction task which requires deducing the order of a sequence of objects.
|
|
|
|
|
922 |
|
923 |
Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy.
|
924 |
Options:
|
|
|
955 |
(3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)".
|
956 |
According to this ordering, the leftmost book is the white book.
|
957 |
The white book is the leftmost. So the answer is (C).""",
|
958 |
+
"logical_deduction_seven_objects": """A logical deduction task which requires deducing the order of a sequence of objects.
|
|
|
|
|
959 |
|
960 |
Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy.
|
961 |
Options:
|
|
|
992 |
(3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)".
|
993 |
According to this ordering, the leftmost book is the white book.
|
994 |
The white book is the leftmost. So the answer is (C).""",
|
995 |
+
"movie_recommendation": """Recommend movies similar to the given list of movies.
|
|
|
|
|
996 |
|
997 |
Q: Find a movie similar to Star Wars Episode IV - A New Hope, Indiana Jones and the Last Crusade, Star Wars Episode V - The Empire Strikes Back, The Big Lebowski:
|
998 |
Options:
|
|
|
1034 |
- Inside Out (animation, family, comedy; 2015)
|
1035 |
- Forrest Gump (comedy, drama, romance; 1994)
|
1036 |
These are all famous movies produced in the past few decades.Amongst all the options, the only movie similar to these ones seems to be Edge of Tomorrow (action, adventure, crime, mystery; 2014), as it is also a science-fiction movie and features Tom Cruise. So the answer is (D).""",
|
1037 |
+
"salient_translation_error_detection": """Detect the type of error in an English translation of a German source sentence.
|
|
|
|
|
1038 |
|
1039 |
Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error. Source: In der Liste der Baudenkmale in Lenzen (Elbe) sind alle Baudenkmale der brandenburgischen Stadt Lenzen (Elbe) und ihrer Ortsteile aufgelistet.
|
1040 |
Translation: In the list of architectural monuments in Lenzen all architectural monuments of the Brandenburg city of Lenzen and its districts are listed.
|
|
|
1074 |
(F) Facts
|
1075 |
A: Let's think step by step.
|
1076 |
We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "Łeba is a small town and seaside resort in the Powiat Lęborski of the Polish Pomeranian Voivodeship." On the other hand, the provided translation is "Łeba is not a small town and seaside resort in the Powiat Léborski county of the Pomeranian Voivodeship of Poland." Note that the provided sentence says, "Łeba is not a small town ..." However, the translation should have been "Łeba is a small town ..." Because a negation is introduced at the beginning of the sentence and has fundamentally changed the meaning of the original source, the translation contains an error pertaining to Negation or Antonyms. So the answer is (C).""",
|
1077 |
+
"reasoning_about_colored_objects": """Answer extremely simple questions about the colors of objects on a surface.
|
|
|
|
|
1078 |
|
1079 |
Q: On the nightstand, there is a red pencil, a purple mug, a burgundy keychain, a fuchsia teddy bear, a black plate, and a blue stress ball. What color is the stress ball?
|
1080 |
Options:
|
|
|
1137 |
According to this question, the objects are arranged in a row, from left to right, as follows: (1) a teal plate, (2) a burgundy keychain, (3) a yellow scrunchiephone charger, (4) an orange mug, (5) a pink notebook, (6) a grey cup.
|
1138 |
The teal plate is the first item, namely (1). There is no item to the left of the teal item.
|
1139 |
The number of non-orange items to the left of the teal item is zero. So the answer is (A).""",
|
1140 |
+
"multistep_arithmetic_two": """Solve multi-step arithmetic problems.
|
|
|
1141 |
|
1142 |
Q: ((-5 + 9 * -4 - 0) * (4 + -7 + 0 * -5)) =
|
1143 |
A: Let's think step by step.
|
|
|
1162 |
Let's calculate A = (-3 + 5 * 8 * -4) = (-3 + (5 * 8) * -4) = (-3 + (40) * -4) = (-3 + (40 * -4)) = (-3 + -160) = -163.
|
1163 |
Let's calculate B = (9 - 8 * -7 + -9) = (9 - (8 * -7) + -9) = (9 - (-56) + -9) = ((9 - (-56)) + -9) = ((65) + -9)= (65 - 9) = 56.
|
1164 |
Then, the final equation is A - B = -163 - 56 = -219. So the answer is -219.""",
|
1165 |
+
"navigate": """Given a series of navigation instructions, determine whether one would end up back at the starting point.
|
|
|
|
|
1166 |
|
1167 |
Q: If you follow these instructions, do you return to the starting point? Turn left. Turn around. Turn left. Take 7 steps. Take 2 steps. Take 4 steps. Take 8 steps.
|
1168 |
Options:
|
|
|
1205 |
(3) Take 9 steps left: (0, -7), facing the positive y-axis.
|
1206 |
(4) Take 7 steps right: (0, 7), facing the positive y-axis.
|
1207 |
Since (0, 0) is (0, 0), we are indeed where we started. So the answer is Yes.""",
|
1208 |
+
"dyck_languages": """Correctly close a Dyck-n word.
|
|
|
|
|
1209 |
|
1210 |
Q: Complete the rest of the sequence, making sure that the parentheses are closed properly. Input: [ { [
|
1211 |
A: Let's think step by step.
|
|
|
1277 |
Now, we have reached the end. The final stack is "< [ {".
|
1278 |
We will need to pop out "{", "[", "<" one by one in that order.
|
1279 |
So, we need "}", "]", ">". So the answer is } ] >.""",
|
1280 |
+
"word_sorting": """Sort a list of words.
|
|
|
|
|
1281 |
|
1282 |
Q: Sort the following words alphabetically: List: oven costume counterpart
|
1283 |
A: Let's think step by step.
|
|
|
1294 |
A: Let's think step by step.
|
1295 |
The first letter: "newt": "n" (14). "arson": "a" (1). "parthia": "p" (16). "seismography": "s" (19). "mugho": "m" (13). "aspect": "a" (1). "census": "c" (3). We now have: (1) ["arson" ? "aspect"] < (3) "census" < (13) "mugho" < (14) "newt" < (16) "parthia" < (19) "seismography". Now let's sort this subpart ["arson" ? "aspect"] by looking at their second letters.
|
1296 |
The second letter: "arson": "r" (18). "aspect": "s" (19). We now have: (18) "arson" < (19) "aspect". Hence, we have ["arson" < "aspect"] < "census" < "mugho" < "newt" < "parthia" < "seismography". So the answer is arson aspect census mugho newt parthia seismography.""",
|
1297 |
+
"sports_understanding": """Determine whether an artificially constructed sentence relating to sports is plausible or not.
|
|
|
|
|
1298 |
|
1299 |
Q: Is the following sentence plausible? "Bam Adebayo scored a reverse layup in the Western Conference Finals."
|
1300 |
A: Let's think step by step. Bam Adebayo is an American basketball player. Scoring a reverse layup in the Western Conference Finals is part of the NBA Finals. So the answer is yes.
|
|
|
1304 |
|
1305 |
Q: Is the following sentence plausible? "DeMar DeRozan was called for the goal tend."
|
1306 |
A: Let's think step by step. DeMar DeRozan is an American basketball player. Goal tending is part of basketball. So the answer is yes.""",
|
1307 |
+
"boolean_expressions": """Evaluate the result of a random Boolean expression.
|
|
|
|
|
1308 |
|
1309 |
Q: not ( ( not not True ) ) is
|
1310 |
A: Let's think step by step.
|
|
|
1327 |
We first simplify this expression "Z" as follows: "Z = not not ( not ( False ) ) = not not ( A )" where "A = not ( False )".
|
1328 |
Let's evaluate A: A = not ( False ) = not False = True.
|
1329 |
Plugging in A, we get: Z = not not ( A ) = not not (True) = not not False = True. So the answer is True.""",
|
1330 |
+
"object_counting": """Questions that involve enumerating objects and asking the model to count them.
|
|
|
|
|
1331 |
|
1332 |
Q: I have a blackberry, a clarinet, a nectarine, a plum, a strawberry, a banana, a flute, an orange, and a violin. How many fruits do I have?
|
1333 |
A: Let's think step by step.
|
|
|
1364 |
- garlic (1)
|
1365 |
- yam (1)
|
1366 |
Now, let's add the numbers in parentheses: 1 + 1 + 1 + 1 + 2 + 1 + 1 = 8. So the answer is 8.""",
|
1367 |
+
"formal_fallacies": """Distinguish deductively valid arguments from formal fallacies.
|
|
|
|
|
1368 |
|
1369 |
Q: "It is not always easy to see who is related to whom -- and in which ways. The following argument pertains to this question: To begin with, Lesley is a close friend of Fernando. Moreover, being a close friend of Fernando or a schoolmate of Lowell is sufficient for being a great-grandfather of Leroy. It follows that Lesley is a great-grandfather of Leroy."
|
1370 |
Is the argument, given the explicitly stated premises, deductively valid or invalid?
|
|
|
1408 |
The case X = rare-consumer(Nioxin) does not appear in (2).
|
1409 |
The case X = loyal-buyer(Caress) does not appear in (2), either.
|
1410 |
So, from (1) and (2), we cannot necessarily deduce the Hypothesis. So the answer is invalid.""",
|
1411 |
+
"causal_judgement": """Answer questions about causal attribution.
|
|
|
|
|
1412 |
|
1413 |
Q: How would a typical person answer each of the following questions about causation?
|
1414 |
Frank T., had an ongoing dispute with his neighbor over a stretch of land and one day decided to shoot his neighbor in the body. Frank T. had no experience with guns, his hand slipped on the barrel of the gun, and the shot went wild. Nonetheless, the bullet bounced off a large boulder several feet away and hit the neighbor's body, causing significant injury. Did Frank T. intentionally shoot his neighbor in the body?
|
|
|
1433 |
- No
|
1434 |
A: Let's think step by step.
|
1435 |
Here in this question, we are told that "He aims the dart at the low point region." A typical person might therefore think George did intentionally hit the low point region, because he wanted to lift up the spirit of his sister Lena. So the answer is Yes.""",
|
1436 |
+
"web_of_lies": """Evaluate a random boolean function expressed as a word problem.
|
1437 |
|
1438 |
Q: Question: Fidel tells the truth. Jerry says Fidel tells the truth. Vina says Jerry tells the truth. Millicent says Vina lies. Raymond says Millicent lies. Does Raymond tell the truth?
|
1439 |
A: Let's think step by step.
|