Spaces:
Sleeping
Sleeping
File size: 49,798 Bytes
be5548b f397ead be5548b f397ead be5548b f397ead be5548b f397ead be5548b 11bd154 be5548b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 |
import time
import random
import numpy as np
from gym_minigrid.social_ai_envs.socialaigrammar import SocialAIGrammar, SocialAIActions, SocialAIActionSpace
from gym_minigrid.minigrid import *
from gym_minigrid.register import register
import time
from collections import deque
def next_to(posa, posb):
if type(posa) == tuple:
posa = np.array(posa)
if type(posb) == tuple:
posb = np.array(posb)
return abs(posa-posb).sum() == 1
class Caretaker(NPC):
"""
A simple NPC that knows who is telling the truth
"""
def __init__(self, color, name, env):
super().__init__(color)
self.name = name
self.env = env
self.npc_dir = 1 # NPC initially looks downward
self.npc_type = 0 # this will be put into the encoding
self.was_introduced_to = False
self.decoy_color_given = False
self.ate_an_apple = False
self.demo_over = False
self.demo_over_and_position_safe = False
self.apple_unlocked_for_agent = False
self.list_of_possible_utterances = [
*self.list_of_possible_utterances,
"Hot",
"Warm",
"Medium",
"Cold",
*COLOR_NAMES
]
# target obj
assert self.env.problem == self.env.parameters["Problem"] if self.env.parameters else "Apples"
if self.env.problem in ["Apples"]:
self.target_obj = self.env.apple
self.distractor_obj = None
elif self.env.problem == "Doors":
self.target_obj = self.env.door
self.distractor_obj = self.env.distractor_door
elif self.env.problem == "Levers":
self.target_obj = self.env.lever
self.distractor_obj = self.env.distractor_lever
elif self.env.problem == "Boxes":
self.target_obj = self.env.box
self.distractor_obj = self.env.distractor_box
elif self.env.problem == "Switches":
self.target_obj = self.env.switch
self.distractor_obj = self.env.distractor_switch
elif self.env.problem == "Generators":
self.target_obj = self.env.generator
self.distractor_obj = self.env.distractor_generator
elif self.env.problem in ["Marble", "Marbles"]:
self.target_obj = self.env.generator
self.distractor_obj = self.env.distractor_generator
if self.env.ja_recursive:
# how many objects
if int(self.env.parameters["N"]) == 1:
self.ja_decoy = self.env._rand_elem([self.target_obj])
else:
self.ja_decoy = self.env._rand_elem([self.target_obj, self.distractor_obj])
# the other object is a decoy distractor
self.ja_decoy_distractor = list({self.target_obj, self.distractor_obj} - {self.ja_decoy})[0]
self.decoy_point_from_loc = self.find_point_from_loc(
target_pos=self.ja_decoy.cur_pos,
distractor_pos=self.ja_decoy_distractor.cur_pos if self.ja_decoy_distractor else None
)
self.point_from_loc = self.find_point_from_loc()
assert self.env.grammar.contains_utterance(self.introduction_statement)
def step(self, utterance):
reply, info = super().step()
if self.env.hidden_npc:
return reply, info
scaffolding = self.env.parameters.get("Scaffolding", "N") == "Y"
language_color = False
language_feedback = False
pointing = False
emulation = False
if not scaffolding:
cue_type = self.env.parameters["Cue_type"]
if cue_type == "Language_Color":
language_color = True
elif cue_type == "Language_Feedback":
language_feedback = True
elif cue_type == "Pointing":
pointing = True
elif cue_type == "Emulation":
emulation = True
else:
raise ValueError(f"Cue_type ({cue_type}) not defined.")
else:
# there are no cues if scaffolding is used (the peer gives the apples to the agent)
assert "Cue_type" not in self.env.parameters
# there is no additional test for joint attention (no cues are given so this wouldn't make sense)
assert not self.env.ja_recursive
reply, action = None, None
if not self.was_introduced_to:
# check introduction, updates was_introduced_to if needed
reply, action = self.handle_introduction(utterance)
assert action is None
if self.env.ja_recursive:
# look at the center of the room (this makes the cue giving inside and outisde JA different)
action = self.look_at_action([self.env.current_width // 2, self.env.current_height // 2])
else:
# look at the agent
action = self.look_at_action(self.env.agent_pos)
if self.was_introduced_to:
# was introduced just now
if self.is_pointing():
action = self.stop_point
if language_color:
# only say the color once
reply = self.target_obj.color
elif self.env.ja_recursive:
# was not introduced
if language_feedback:
# random reply
reply = self.env._rand_elem([
"Hot",
"Warm",
"Medium",
"Cold"
])
if language_color and not self.decoy_color_given:
# color of a decoy (can be the correct one)
reply = self.ja_decoy.color
self.decoy_color_given=True
if pointing:
# point to a decoy
action = self.goto_point_action(
point_from_loc=self.decoy_point_from_loc,
target_pos=self.ja_decoy.cur_pos,
distractor_pos=self.ja_decoy_distractor.cur_pos if self.ja_decoy_distractor else None
)
if self.is_pointing():
# if it's already pointing, turn to look at the center (to avoid looking at the wall)
action = self.look_at_action([self.env.current_width//2, self.env.current_height//2])
else:
if self.was_introduced_to and language_color:
# language only once at introduction
# reply = self.target_obj.color
action = self.look_at_action(self.env.agent_pos)
if self.was_introduced_to and language_feedback:
# closeness string
agent_distance_to_target = np.abs(self.target_obj.cur_pos - self.env.agent_pos).sum()
if agent_distance_to_target <= 1:
reply = "Hot"
elif agent_distance_to_target <= 2:
reply = "Warm"
elif agent_distance_to_target <= 5:
reply = "Medium"
elif agent_distance_to_target >= 5:
reply = "Cold"
action = self.look_at_action(self.env.agent_pos)
# pointing
if self.was_introduced_to and pointing:
if self.env.parameters["N"] == "1":
distractor_pos = None
else:
distractor_pos = self.distractor_obj.cur_pos
action = self.goto_point_action(
point_from_loc=self.point_from_loc,
target_pos=self.target_obj.cur_pos,
distractor_pos=distractor_pos,
)
if self.is_pointing():
action = self.look_at_action(self.env.agent_pos)
# emulation or scaffolding
emulation_demo = self.was_introduced_to and emulation and not self.demo_over
scaffolding_help = self.was_introduced_to and scaffolding
# do the demonstration / unlock the apple
# in both of those two scenarios the NPC in essence solves the task
# in demonstration - it eats the apple, and reverts the env at the end
# in scaffolding - it doesn't eat the apple and looks at the agent
if emulation_demo or scaffolding_help:
if emulation_demo or (scaffolding_help and not self.apple_unlocked_for_agent):
if self.is_pointing():
# don't point during demonstration
action = self.stop_point
else:
# if apple unlocked go pick it up
if self.target_obj == self.env.switch and self.env.switch.is_on:
assert self.env.parameters["Problem"] == "Switches"
next_target_position = self.env.box.cur_pos
elif self.target_obj == self.env.generator and self.env.generator.is_pressed:
assert self.env.parameters["Problem"] in ["Generators", "Marbles", "Marble"]
next_target_position = self.env.generator_platform.cur_pos
elif self.target_obj == self.env.door and self.env.door.is_open:
next_target_position = self.env.apple.cur_pos
elif self.target_obj == self.env.lever and self.env.lever.is_on:
next_target_position = self.env.apple.cur_pos
else:
next_target_position = self.target_obj.cur_pos
if self.target_obj == self.env.generator and not self.env.generator.is_pressed:
if not self.env.generator.marble_activation:
# push generator
action = self.path_to_pos(next_target_position)
else:
# find angle
if self.env.marble.moving_dir is None:
distance = (self.env.marble.cur_pos - self.env.generator.cur_pos)
diff = np.sign(distance)
if sum(abs(diff)) == 1:
# if the agent pushed the ball during demo diff can be > 1, then it's unsolvable
push_pos = self.env.marble.cur_pos+diff
if all(self.cur_pos == push_pos):
next_target_position = self.env.marble.cur_pos
else:
next_target_position = push_pos
# go to loc in front of
# push
action = self.path_to_pos(next_target_position)
else:
# toggle all other objects
action = self.path_to_toggle_pos(next_target_position)
# for scaffolding check if trying to eat the apple
# if so, stop - apple is unlocked
if scaffolding_help:
if (
self.env.get_cell(*self.front_pos) == self.env.apple and
action == self.toggle_action
):
# don't eat the apple
action = None
self.apple_unlocked_for_agent = True
# for emulation check if trying to toggle the eaten apple
# if so, stop and revert the env - demo is over
if emulation_demo:
if (
self.ate_an_apple and
self.env.get_cell(*self.front_pos) == self.env.apple and
action == self.toggle_action and
self.env.apple.eaten
):
# trying to toggle an apple it ate
self.env.revert()
self.demo_over = True
action = None
# if scaffolding apple unlocked, look at the agent
if scaffolding_help and self.apple_unlocked_for_agent:
if all(self.cur_pos == self.initial_pos):
# if the apple is unlocked look at the agent
wanted_dir = self.compute_wanted_dir(self.env.agent_pos)
action = self.compute_turn_action(wanted_dir)
else:
# go to init pos, this removes problems in case the apple is unreachable now
action = self.path_to_pos(self.initial_pos)
if self.was_introduced_to and emulation and self.demo_over and not self.demo_over_and_position_safe:
if self.env.is_in_marble_way(self.cur_pos):
action = self.path_to_pos(self.find_point_from_loc())
else:
self.demo_over_and_position_safe = True
if self.demo_over_and_position_safe:
assert emulation or scaffolding
# look at the agent after demo is done
action = self.look_at_action(self.env.agent_pos)
if self.was_introduced_to and self.env.parameters["Scaffolding"] == "Y":
if "Emulation" in self.env.parameters or "Pointing" in self.env.parameters or "Language_grounding" in self.env.parameters:
raise ValueError(
"Scaffolding cannot be used with information giving (Emulation, Pointing, Language_grounding)"
)
eaten_before = self.env.apple.eaten
if action is not None:
action()
# check if the NPC ate the apple
eaten_after = self.env.apple.eaten
self.ate_an_apple = not eaten_before and eaten_after
info = self.create_info(
action=action,
utterance=reply,
was_introduced_to=self.was_introduced_to,
)
assert (reply or "no_op") in self.list_of_possible_utterances
return reply, info
def create_info(self, action, utterance, was_introduced_to):
info = {
"prim_action": action.__name__ if action is not None else "no_op",
"utterance": utterance or "no_op",
"was_introduced_to": was_introduced_to
}
return info
def is_point_from_loc(self, pos, target_pos=None, distractor_pos=None):
if target_pos is None:
target_pos = self.target_obj.cur_pos
if distractor_pos is None:
if self.distractor_obj is not None:
distractor_pos = self.distractor_obj.cur_pos
else:
distractor_pos = [None, None]
if self.env.is_in_marble_way(pos):
return False
if self.env.problem in ["Doors", "Levers"]:
# must not be in front of a door
if abs(self.env.door_current_pos - pos).sum() == 1:
return False
if self.env.problem in ["Doors"]:
if abs(self.env.distractor_current_pos - pos).sum() == 1:
return False
if any(pos == target_pos):
same_ind = np.argmax(target_pos == pos)
# is there an occlusion in the way
start = pos[1-same_ind]
end = target_pos[1-same_ind]
step = 1 if start <= end else -1
for i in np.arange(start, end, step):
p = pos.copy()
p[1-same_ind] = i
cell = self.env.grid.get(*p)
if cell is not None:
if not cell.see_behind():
return False
if pos[same_ind] != distractor_pos[same_ind]:
return True
if pos[same_ind] == distractor_pos[same_ind]:
# if in between
if distractor_pos[1-same_ind] < pos[1-same_ind] < target_pos[1-same_ind]:
return True
if distractor_pos[1-same_ind] > pos[1-same_ind] > target_pos[1-same_ind]:
return True
return False
def find_point_from_loc(self, target_pos=None, distractor_pos=None):
reject_fn = lambda env, p: not self.is_point_from_loc(p, target_pos=target_pos, distractor_pos=distractor_pos)
point = self.env.find_loc(size=(self.env.wall_x, self.env.wall_y), reject_fn=reject_fn, reject_agent_pos=False)
# assert all(point < np.array([self.env.wall_x, self.env.wall_y]))
# assert all(point > np.array([0, 0]))
return point
def goto_point_action(self, point_from_loc, target_pos, distractor_pos):
if self.is_point_from_loc(self.cur_pos, target_pos=target_pos, distractor_pos=distractor_pos):
# point to a direction
action = self.compute_wanted_point_action(target_pos)
else:
# do not point if not is_point_from_loc
if self.is_pointing():
# stop pointing
action = self.stop_point
else:
# move
action = self.path_to_pos(point_from_loc)
return action
class InformationSeekingEnv(MultiModalMiniGridEnv):
"""
Environment in which the agent is instructed to go to a given object
named using an English text string
"""
def __init__(
self,
size=10,
diminished_reward=True,
step_penalty=False,
knowledgeable=False,
max_steps=80,
hidden_npc=False,
switch_no_light=True,
reward_diminish_factor=0.1,
see_through_walls=False,
n_colors=None,
egocentric_observation=True,
):
assert size >= 5
self.empty_symbol = "NA \n"
self.diminished_reward = diminished_reward
self.step_penalty = step_penalty
self.knowledgeable = knowledgeable
self.hidden_npc = hidden_npc
self.hear_yourself = False
self.switch_no_light = switch_no_light
if n_colors is None:
self.n_colors = len(COLOR_NAMES)
else:
self.n_colors = n_colors
self.grammar = SocialAIGrammar()
self.init_done = False
# parameters - to be set in reset
self.parameters = None
self.add_npc_direction = True
self.add_npc_point_direction = True
self.add_npc_last_prim_action = True
self.reward_diminish_factor = reward_diminish_factor
self.egocentric_observation = egocentric_observation
self.encoding_size = 3 + 2*bool(not self.egocentric_observation) + bool(self.add_npc_direction) + bool(self.add_npc_point_direction) + bool(self.add_npc_last_prim_action)
super().__init__(
grid_size=size,
max_steps=max_steps,
# Set this to True for maximum speed
see_through_walls=see_through_walls,
actions=SocialAIActions, # primitive actions
action_space=SocialAIActionSpace,
add_npc_direction=self.add_npc_direction,
add_npc_point_direction=self.add_npc_point_direction,
add_npc_last_prim_action=self.add_npc_last_prim_action,
reward_diminish_factor=self.reward_diminish_factor,
)
self.all_npc_utterance_actions = self.caretaker.list_of_possible_utterances
self.prim_actions_dict = SocialAINPCActionsDict
def revert(self):
self.grid.set(*self.caretaker.cur_pos, None)
self.place_npc()
self.put_objects_in_env(remove_objects=True)
def is_in_marble_way(self, pos):
target_pos = self.generator_current_pos
# generator distractor is in the same row / collumn as the marble and the generator
# if self.distractor_current_pos is not None:
# distractor_pos = self.distractor_current_pos
# else:
# distractor_pos = [None, None]
if self.problem in ["Marbles", "Marble"]:
# point can't be in the same row or column as both the marble and the generator
# all three: marble, generator, loc are in the same row or column
if any((pos == target_pos) * (pos == self.marble_current_pos)):
# all three: marble, generator, loc are in the same row or column -> is in its way
return True
if int(self.parameters["N"]) > 1:
# is it in the way for the distractor generator
if any((pos == self.distractor_current_pos) * (pos == self.marble_current_pos)):
# all three: marble, distractor generator, loc are in the same row or column -> is in its way
return True
# all good
return False
def _gen_grid(self, width_, height_):
# Create the grid
self.grid = Grid(width_, height_, nb_obj_dims=self.encoding_size)
# new
min_w = min(9, width_)
min_h = min(9, height_)
self.current_width = self._rand_int(min_w, width_+1)
self.current_height = self._rand_int(min_h, height_+1)
self.wall_x = self.current_width-1
self.wall_y = self.current_height-1
# problem: Apples/Boxes/Switches/Generators/Marbles
self.problem = self.parameters["Problem"] if self.parameters else "Apples"
num_of_colors = self.parameters.get("Num_of_colors", None) if self.parameters else None
if num_of_colors is None:
num_of_colors = self.n_colors
# additional test for recursivness of joint attention -> cues are given outside of JA
self.ja_recursive = self.parameters.get("JA_recursive", False) == "Y" if self.parameters else False
self.add_obstacles()
if self.obstacles != "No":
warnings.warn("InformationSeeking should no be using obstacles.")
# Generate the surrounding walls
self.grid.wall_rect(0, 0, self.current_width, self.current_height)
if self.problem in ["Doors", "Levers"]:
# Add a second wall: this is needed so that an apple cannot be seen diagonally between the wall and the door
self.grid.wall_rect(1, 1, self.wall_x-1, self.wall_y-1)
# apple
self.apple_pos = (self.current_width, self.current_height)
# box
locked = self.problem == "Switches"
if num_of_colors is None:
POSSIBLE_COLORS = COLOR_NAMES.copy()
else:
POSSIBLE_COLORS = COLOR_NAMES[:int(num_of_colors)].copy()
self.box_color = self._rand_elem(POSSIBLE_COLORS)
if self.problem in ["Doors", "Levers"]:
# door
# find the position on a wall
self.apple_current_pos = self.find_loc(
size=(self.current_width, self.current_height),
reject_taken_pos=False, # we will create a gap in the wall
reject_agent_pos=True,
reject_fn=lambda _, pos:
not (pos[0] in [0, self.wall_x] or pos[1] in [0, self.wall_y]) or # reject not on a wall
tuple(pos) in [
(0, 0),
(0, 1),
(1, 0),
(0, self.wall_y),
(0, self.wall_y-1),
(1, self.wall_y),
(self.wall_x, self.wall_y),
(self.wall_x-1, self.wall_y),
(self.wall_x, self.wall_y-1),
(self.wall_x, 0),
(self.wall_x, 1),
(self.wall_x-1, 0),
]
)
self.grid.set(*self.apple_current_pos, None) # hole in the wall
# door is in front of the apple
door_x = {
0: 1,
self.wall_x: self.wall_x - 1,
}.get(self.apple_current_pos[0], self.apple_current_pos[0])
door_y = {
0: 1,
self.wall_y: self.wall_y - 1,
}.get(self.apple_current_pos[1], self.apple_current_pos[1])
self.door_current_pos = np.array([door_x, door_y])
self.grid.set(*self.door_current_pos, None) # hole in the wall
# lever
if self.problem in ["Levers"]:
self.lever_current_pos = self.find_loc(
top=(2, 2),
size=(self.current_width-4, self.current_height-4),
reject_agent_pos=True,
reject_fn=lambda _, pos: next_to(pos, self.door_current_pos) # reject in front of the door
)
else:
# find the position for the apple/box/generator_platform
self.apple_current_pos = self.find_loc(size=self.apple_pos, reject_agent_pos=True)
assert all(self.apple_current_pos < np.array([self.current_width-1, self.current_height-1]))
# door
self.door_color = self._rand_elem(POSSIBLE_COLORS)
# lever
self.lever_color = self._rand_elem(POSSIBLE_COLORS)
# switch
self.switch_pos = (self.current_width, self.current_height)
self.switch_color = self._rand_elem(POSSIBLE_COLORS)
self.switch_current_pos = self.find_loc(
size=self.switch_pos,
reject_agent_pos=True,
reject_fn=lambda _, pos: tuple(pos) in map(tuple, [self.apple_current_pos]),
)
# generator
self.generator_pos = (self.current_width, self.current_height)
self.generator_color = self._rand_elem(POSSIBLE_COLORS)
self.generator_current_pos = self.find_loc(
size=self.generator_pos,
reject_agent_pos=True,
reject_fn=lambda _, pos: (
tuple(pos) in map(tuple, [self.apple_current_pos])
or
(self.problem in ["Marble"] and tuple(pos) in [
# not in corners
(1, 1),
(self.current_width-2, 1),
(1, self.current_height-2),
(self.current_width-2, self.current_height-2),
])
or
# not in the same row collumn as the platform
(self.problem in ["Marble"] and any(pos == self.apple_current_pos))
),
)
# generator platform
self.generator_platform_color = self._rand_elem(POSSIBLE_COLORS)
# marbles
self.marble_pos = (self.current_width, self.current_height)
self.marble_color = self._rand_elem(POSSIBLE_COLORS)
self.marble_current_pos = self.find_loc(
size=self.marble_pos,
reject_agent_pos=True,
reject_fn=lambda _, pos: self.problem in ["Marbles", "Marble"] and (
tuple(pos) in map(tuple, [self.apple_current_pos, self.generator_current_pos])
or
all(pos != self.generator_current_pos) # reject if not in row or column as the generator
or
any(pos == 1) # next to a wall
or
pos[1] == self.current_height-2
or
pos[0] == self.current_width-2
),
)
# distractor
if self.problem == "Boxes":
assert not locked
POSSIBLE_COLORS.remove(self.box_color)
elif self.problem == "Doors":
POSSIBLE_COLORS.remove(self.door_color)
elif self.problem == "Levers":
POSSIBLE_COLORS.remove(self.lever_color)
elif self.problem == "Switches":
POSSIBLE_COLORS.remove(self.switch_color)
elif self.problem in ["Generators", "Marble"]:
POSSIBLE_COLORS.remove(self.generator_color)
self.distractor_color = self._rand_elem(POSSIBLE_COLORS)
self.distractor_pos = (self.current_width, self.current_height)
# distractor reject function
if self.problem in ["Apples", "Boxes"]:
distractor_reject_fn = lambda _, pos: tuple(pos) in map(tuple, [self.apple_current_pos])
elif self.problem in ["Switches"]:
distractor_reject_fn = lambda _, pos: tuple(pos) in map(tuple, [self.apple_current_pos, self.switch_current_pos])
elif self.problem in ["Generators"]:
distractor_reject_fn = lambda _, pos: tuple(pos) in map(tuple, [self.apple_current_pos, self.generator_current_pos])
elif self.problem in ["Marble"]:
# problem is marbles
if self.parameters["N"] == "1":
distractor_reject_fn = lambda _, pos: tuple(pos) in map(tuple, [self.apple_current_pos, self.generator_current_pos, self.marble_current_pos])
else:
same_dim = (self.generator_current_pos == self.marble_current_pos).argmax()
distactor_same_dim = 1-same_dim
distractor_reject_fn = lambda _, pos: tuple(pos) in map(tuple, [
self.apple_current_pos,
self.generator_current_pos,
self.marble_current_pos
]) or pos[distactor_same_dim] != self.marble_current_pos[distactor_same_dim]
elif self.problem in ["Doors"]:
# reject not next to a wall
distractor_reject_fn = lambda _, pos: (
not (pos[0] in [1, self.wall_x-1] or pos[1] in [1, self.wall_y-1]) or # reject not on a wall
tuple(pos) in [
(1, 1),
(self.wall_x-1, self.wall_y - 1),
(1, self.wall_y-1),
(self.wall_x-1, 1),
tuple(self.door_current_pos)
]
)
elif self.problem in ["Levers"]:
# not in front of the door
distractor_reject_fn = lambda _, pos: next_to(pos, self.door_current_pos) or tuple(pos) in list(map(tuple, [self.door_current_pos, self.lever_current_pos]))
else:
raise ValueError("Problem {} indefined.".format(self.problem))
if self.problem == "Doors":
self.distractor_current_pos = self.find_loc(
top=(1, 1),
size=(self.current_width-2, self.current_height-2),
reject_agent_pos=True,
reject_fn=distractor_reject_fn,
reject_taken_pos=False
)
if self.parameters["N"] != "1":
self.grid.set(*self.distractor_current_pos, None) # hole in the wall
else:
self.distractor_current_pos = self.find_loc(
size=self.distractor_pos,
reject_agent_pos=True,
reject_fn=distractor_reject_fn
)
self.put_objects_in_env()
# NPC
put_peer = self.parameters["Peer"] if self.parameters else "N"
assert put_peer in ["Y", "N"]
color = self._rand_elem(COLOR_NAMES)
self.caretaker = Caretaker(color, "Caretaker", self)
if put_peer == "Y":
self.place_npc()
# Randomize the agent's start position and orientation
self.place_agent(size=(self.current_width, self.current_height))
# Generate the mission string
self.mission = 'lets collaborate'
# Dummy beginning string
# self.beginning_string = "This is what you hear. \n"
self.beginning_string = "Conversation: \n"
self.utterance = self.beginning_string
# utterance appended at the end of each step
self.utterance_history = ""
# used for rendering
self.full_conversation = self.utterance
self.outcome_info = None
def place_npc(self):
if self.problem in ["Doors"]:
self.place_obj(
self.caretaker,
size=(self.current_width, self.current_height),
reject_fn=lambda _, pos: next_to(pos, self.door_current_pos) or next_to(pos, self.distractor_current_pos)
)
elif self.problem in ["Levers"]:
self.place_obj(
self.caretaker,
size=(self.current_width, self.current_height),
reject_fn=lambda _, pos: next_to(pos, self.door_current_pos)
)
else:
self.place_obj(self.caretaker, size=(self.current_width, self.current_height), reject_fn=InformationSeekingEnv.is_in_marble_way)
self.caretaker.initial_pos = self.caretaker.cur_pos
def put_objects_in_env(self, remove_objects=False):
assert self.apple_current_pos is not None
assert self.switch_current_pos is not None
self.doors_block_set = []
self.levers_block_set = []
self.switches_block_set = []
self.boxes_block_set = []
self.generators_block_set = []
self.distractor_door = None
self.distractor_lever = None
self.distractor_box = None
self.distractor_switch = None
self.distractor_generator = None
# problem: Apples/Boxes/Switches/Generators
assert self.problem == self.parameters["Problem"] if self.parameters else "Apples"
# move objects (used only in revert), not in gen_grid
if remove_objects:
# remove apple or box
# assert type(self.grid.get(*self.apple_current_pos)) in [Apple, LockableBox]
# self.grid.set(*self.apple_current_pos, None)
# remove apple (after demo it must be an apple)
assert type(self.grid.get(*self.apple_current_pos)) in [Apple]
self.grid.set(*self.apple_current_pos, None)
if self.problem in ["Doors"]:
# assert type(self.grid.get(*self.door_current_pos)) in [Door]
self.grid.set(*self.door.cur_pos, None)
elif self.problem in ["Levers"]:
# assert type(self.grid.get(*self.door_current_pos)) in [Door]
self.grid.set(*self.remote_door.cur_pos, None)
self.grid.set(*self.lever.cur_pos, None)
elif self.problem in ["Switches"]:
# remove switch
assert type(self.grid.get(*self.switch_current_pos)) in [Switch]
self.grid.set(*self.switch.cur_pos, None)
elif self.problem in ["Generators", "Marbles", "Marble"]:
# remove generator
assert type(self.grid.get(*self.generator.cur_pos)) in [AppleGenerator]
self.grid.set(*self.generator.cur_pos, None)
if self.problem in ["Marbles", "Marble"]:
# remove generator
assert type(self.grid.get(*self.marble.cur_pos)) in [Marble]
self.grid.set(*self.marble.cur_pos, None)
if self.marble.tee_uncovered:
self.grid.set(*self.marble.tee.cur_pos, None)
elif self.problem in ["Apples", "Boxes"]:
pass
else:
raise ValueError("Undefined problem {}".format(self.problem))
# remove distractor
if self.problem in ["Boxes", "Switches", "Generators", "Marbles", "Marble", "Doors", "Levers"] and self.parameters["N"] != "1":
assert type(self.grid.get(*self.distractor_current_pos)) in [LockableBox, Switch, AppleGenerator, Door, Lever]
self.grid.set(*self.distractor_current_pos, None)
# apple
self.apple = Apple()
# Box
locked = self.problem == "Switches"
self.box = LockableBox(
self.box_color,
contains=self.apple,
is_locked=locked,
block_set=self.boxes_block_set
)
self.boxes_block_set.append(self.box)
# Doors
self.door = Door(
color=self.door_color,
is_locked=False,
block_set=self.doors_block_set,
)
self.doors_block_set.append(self.door)
# Levers
self.remote_door = RemoteDoor(
color=self.door_color,
)
self.lever = Lever(
color=self.lever_color,
object=self.remote_door,
active_steps=None,
block_set=self.levers_block_set,
)
self.levers_block_set.append(self.lever)
# Switch
self.switch = Switch(
color=self.switch_color,
lockable_object=self.box,
locker_switch=True,
no_turn_off=True,
no_light=self.switch_no_light,
block_set=self.switches_block_set,
)
self.switches_block_set.append(self.switch)
# Generator
self.generator = AppleGenerator(
self.generator_color,
block_set=self.generators_block_set,
# on_push=lambda: self.put_obj_np(self.apple, self.apple_current_pos)
on_push=lambda: self.grid.set(*self.apple_current_pos, self.apple),
marble_activation=self.problem in ["Marbles", "Marble"],
)
self.generators_block_set.append(self.generator)
self.generator_platform = GeneratorPlatform(self.generator_platform_color)
self.marble = Marble(self.marble_color, env=self)
if self.problem in ["Apples"]:
self.put_obj_np(self.apple, self.apple_current_pos)
elif self.problem in ["Doors"]:
self.put_obj_np(self.apple, self.apple_current_pos)
self.put_obj_np(self.door, self.door_current_pos)
elif self.problem in ["Levers"]:
self.put_obj_np(self.apple, self.apple_current_pos)
self.put_obj_np(self.remote_door, self.door_current_pos)
self.put_obj_np(self.lever, self.lever_current_pos)
elif self.problem in ["Boxes"]:
self.put_obj_np(self.box, self.apple_current_pos)
elif self.problem in ["Switches"]:
self.put_obj_np(self.box, self.apple_current_pos)
self.put_obj_np(self.switch, self.switch_current_pos)
elif self.problem in ["Generators", "Marbles", "Marble"]:
self.put_obj_np(self.generator, self.generator_current_pos)
self.put_obj_np(self.generator_platform, self.apple_current_pos)
if self.problem in ["Marbles", "Marble"]:
self.put_obj_np(self.marble, self.marble_current_pos)
else:
raise ValueError("Problem {} not defined. ".format(self.problem))
# Distractors
if self.problem not in ["Apples"]:
N = int(self.parameters["N"])
if N > 1:
assert N == 2
if self.problem == "Boxes":
assert not locked
self.distractor_box = LockableBox(
self.distractor_color,
is_locked=locked,
block_set=self.boxes_block_set,
)
self.boxes_block_set.append(self.distractor_box)
self.put_obj_np(self.distractor_box, self.distractor_current_pos)
elif self.problem == "Doors":
self.distractor_door = Door(
color=self.distractor_color,
is_locked=False,
block_set=self.doors_block_set,
)
self.doors_block_set.append(self.distractor_door)
self.put_obj_np(self.distractor_door, self.distractor_current_pos)
elif self.problem == "Levers":
self.distractor_lever = Lever(
color=self.distractor_color,
active_steps=None,
block_set=self.levers_block_set,
)
self.levers_block_set.append(self.distractor_lever)
self.put_obj_np(self.distractor_lever, self.distractor_current_pos)
elif self.problem == "Switches":
self.distractor_switch = Switch(
color=self.distractor_color,
locker_switch=True,
no_turn_off=True,
no_light=self.switch_no_light,
block_set=self.switches_block_set,
)
self.switches_block_set.append(self.distractor_switch)
self.put_obj_np(self.distractor_switch, self.distractor_current_pos)
elif self.problem in ["Generators", "Marbles", "Marble"]:
self.distractor_generator = AppleGenerator(
color=self.distractor_color,
block_set=self.generators_block_set,
marble_activation=self.problem in ["Marbles", "Marble"],
)
self.generators_block_set.append(self.distractor_generator)
self.put_obj_np(self.distractor_generator, self.distractor_current_pos)
else:
raise ValueError("Undefined N for problem {}".format(self.problem))
def reset(
self, *args, **kwargs
):
# This env must be used inside the parametric env
if not kwargs:
# The only place when kwargs can empty is during the class construction
# reset should be called again before using the env (paramenv does it in its constructor)
assert self.parameters is None
assert not self.init_done
self.init_done = True
obs = super().reset()
return obs
else:
assert self.init_done
self.parameters = dict(kwargs)
assert self.parameters is not None
assert len(self.parameters) > 0
obs = super().reset()
self.agent_ate_the_apple = False
self.agent_opened_the_box = False
self.agent_opened_the_door = False
self.agent_pulled_the_lever = False
self.agent_turned_on_the_switch = False
self.agent_pressed_the_generator = False
self.agent_pushed_the_marble = False
return obs
def step(self, action):
success = False
p_action = action[0]
utterance_action = action[1:]
apple_had_been_eaten = self.apple.eaten
box_had_been_opened = self.box.is_open
door_had_been_opened = self.door.is_open
lever_had_been_pulled = self.lever.is_on
switch_had_been_turned_on = self.switch.is_on
generator_had_been_pressed = self.generator.is_pressed
marble_had_been_pushed = self.marble.was_pushed
# primitive actions
_, reward, done, info = super().step(p_action)
if self.problem in ["Marbles", "Marble"]:
# todo: create stepable objects which are stepped automatically?
self.marble.step()
# eaten just now by primitive actions of the agent
if not self.agent_ate_the_apple:
self.agent_ate_the_apple = self.apple.eaten and not apple_had_been_eaten
if not self.agent_opened_the_box:
self.agent_opened_the_box = self.box.is_open and not box_had_been_opened
if not self.agent_opened_the_door:
self.agent_opened_the_door = self.door.is_open and not door_had_been_opened
if not self.agent_pulled_the_lever:
self.agent_pulled_the_lever = self.lever.is_on and not lever_had_been_pulled
if not self.agent_turned_on_the_switch:
self.agent_turned_on_the_switch = self.switch.is_on and not switch_had_been_turned_on
if not self.agent_pressed_the_generator:
self.agent_pressed_the_generator = self.generator.is_pressed and not generator_had_been_pressed
if not self.agent_pushed_the_marble:
self.agent_pushed_the_marble = self.marble.was_pushed and not marble_had_been_pushed
# utterances
agent_spoke = not all(np.isnan(utterance_action))
if agent_spoke:
utterance = self.grammar.construct_utterance(utterance_action)
if self.hear_yourself:
self.utterance += "YOU: {} \n".format(utterance)
self.full_conversation += "YOU: {} \n".format(utterance)
else:
utterance = None
if self.parameters["Peer"] == "Y":
reply, npc_info = self.caretaker.step(utterance)
else:
reply = None
npc_info = self.caretaker.create_info(
action=None,
utterance=None,
was_introduced_to=False
)
if reply:
self.utterance += "{}: {} \n".format(self.caretaker.name, reply)
self.full_conversation += "{}: {} \n".format(self.caretaker.name, reply)
# aftermath
if p_action == self.actions.done:
done = True
elif self.agent_ate_the_apple:
# check that it is the agent who ate it
assert self.actions(p_action) == self.actions.toggle
assert self.get_cell(*self.front_pos) == self.apple
if self.parameters.get("Cue_type", "nan") == "Emulation":
# during emulation it can be the NPC who eats the apple, opens the box, and turns on the switch
if self.parameters["Scaffolding"] and self.caretaker.apple_unlocked_for_agent:
# if the caretaker unlocked the apple the agent gets reward upon eating it
reward = self._reward()
success = True
elif self.problem == "Apples":
reward = self._reward()
success = True
elif self.problem == "Doors" and self.agent_opened_the_door:
reward = self._reward()
success = True
elif self.problem == "Levers" and self.agent_pulled_the_lever:
reward = self._reward()
success = True
elif self.problem == "Boxes" and self.agent_opened_the_box:
reward = self._reward()
success = True
elif self.problem == "Switches" and self.agent_opened_the_box and self.agent_turned_on_the_switch:
reward = self._reward()
success = True
elif self.problem == "Generators" and self.agent_pressed_the_generator:
reward = self._reward()
success = True
elif self.problem in ["Marble"] and self.agent_pushed_the_marble:
reward = self._reward()
success = True
else:
reward = self._reward()
success = True
done = True
# discount
if self.step_penalty:
reward = reward - 0.01
# update obs with NPC movement
obs = self.gen_obs(full_obs=self.full_obs)
# fill observation with text
self.append_existing_utterance_to_history()
obs = self.add_utterance_to_observation(obs)
self.reset_utterance()
if done:
if reward > 0:
self.outcome_info = "SUCCESS: agent got {} reward \n".format(np.round(reward, 1))
else:
self.outcome_info = "FAILURE: agent got {} reward \n".format(reward)
# is the npc seen by the agent
ag_view_npc = self.relative_coords(*self.caretaker.cur_pos)
if ag_view_npc is not None:
# in the agent's field of view
ag_view_npc_x, ag_view_npc_y = ag_view_npc
n_dims = obs['image'].shape[-1]
npc_encoding = self.caretaker.encode(n_dims)
# is it occluded
npc_observed = all(obs['image'][ag_view_npc_x, ag_view_npc_y] == npc_encoding)
else:
npc_observed = False
info = {**info, **{"NPC_"+k: v for k, v in npc_info.items()}}
info["NPC_observed"] = npc_observed
info["success"] = success
assert success == (reward > 0)
return obs, reward, done, info
def _reward(self):
if self.diminished_reward:
return super()._reward()
else:
return 1.0
def render(self, *args, **kwargs):
obs = super().render(*args, **kwargs)
if args and args[0] == 'human':
self.window.clear_text() # erase previous text
self.window.set_caption(self.full_conversation)
# self.window.ax.set_title("correct color: {}".format(self.box.target_color), loc="left", fontsize=10)
if self.outcome_info:
color = None
if "SUCCESS" in self.outcome_info:
color = "lime"
elif "FAILURE" in self.outcome_info:
color = "red"
self.window.add_text(*(0.01, 0.85, self.outcome_info),
**{'fontsize': 15, 'color': color, 'weight': "bold"})
self.window.show_img(obs) # re-draw image to add changes to window
return obs
register(
id='SocialAI-InformationSeeking-v0',
entry_point='gym_minigrid.social_ai_envs:InformationSeekingEnv'
) |