Skip to content

Commit 9d3f78e

Browse files
committed
Temp push
1 parent e4c6812 commit 9d3f78e

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

textworld/challenges/spaceship/agent_design_a2c.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ def _discount_rewards(self, last_values):
265265

266266
def play(agent, path, max_step=50, nb_episodes=10, verbose=True):
267267
"""
268-
This code uses the cooking agent design in the spaceship game.
268+
This code uses the agent design in the spaceship game.
269269
270270
:param agent: the obj of NeuralAgent, a sample object for the agent
271271
:param path: The path to the game (envo model)
@@ -302,9 +302,13 @@ def play(agent, path, max_step=50, nb_episodes=10, verbose=True):
302302
nb_moves = 0
303303
while not done:
304304
command = agent.act(obs, score, done, infos)
305+
print(command, "....", end="")
305306
obs, score, done, infos = env.step(command)
306307
nb_moves += 1
307308
agent.act(obs, score, done, infos) # Let the agent know the game is done.
309+
print(score)
310+
print(obs)
311+
print('-------------------------------------')
308312

309313
if verbose:
310314
print(".", end="")

textworld/challenges/spaceship/build_agent_TW_tutorial.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ class NeuralAgent:
8282
GAMMA = 0.9
8383

8484
def __init__(self) -> None:
85-
self._initialized = False
86-
self._epsiode_has_started = False
85+
# self._initialized = False
86+
# self._epsiode_has_started = False
8787
self.id2word = ["<PAD>", "<UNK>"]
8888
self.word2id = {w: i for i, w in enumerate(self.id2word)}
8989

@@ -138,6 +138,7 @@ def act(self, obs: str, score: int, done: bool, infos: Mapping[str, Any]) -> Opt
138138
self.transitions[-1][0] = reward # Update reward information.
139139

140140
self.stats["max"]["score"].append(score)
141+
141142
if self.no_train_step % self.UPDATE_FREQUENCY == 0:
142143
# Update model
143144
returns, advantages = self._discount_rewards(values)
@@ -252,10 +253,12 @@ def play(agent, path, max_step=50, nb_episodes=10, verbose=True):
252253
print(os.path.basename(path), end="")
253254

254255
# Collect some statistics: nb_steps, final reward.
255-
avg_moves, avg_scores, avg_norm_scores, seed_h = [], [], [], None
256+
avg_moves, avg_scores, avg_norm_scores, seed_h = [], [], [], 4567
256257
for no_episode in range(nb_episodes):
257258
obs, infos = env.reset() # Start new episode.
258-
seed_h = env.env.textworld_env._wrapped_env.seed(init_seed=seed_h)
259+
260+
env.env.textworld_env._wrapped_env.seed(seed=seed_h)
261+
seed_h += 1
259262

260263
score = 0
261264
done = False
@@ -288,7 +291,8 @@ def play(agent, path, max_step=50, nb_episodes=10, verbose=True):
288291
agent.train() # Tell the agent it should update its parameters.
289292

290293
starttime = time()
291-
play(agent, "./games/levelMedium.ulx", nb_episodes=500, verbose=False) # Medium level game.
294+
print(os.path.realpath("./games/levelMedium.ulx"))
295+
play(agent, "./games/levelMedium.ulx", nb_episodes=25, verbose=False) # Medium level game.
292296
print("Trained in {:.2f} secs".format(time() - starttime))
293297

294298
print('============== Time To Test ============== ')

0 commit comments

Comments
 (0)