@@ -82,8 +82,8 @@ class NeuralAgent:
8282 GAMMA = 0.9
8383
8484 def __init__ (self ) -> None :
85- self ._initialized = False
86- self ._epsiode_has_started = False
85+ # self._initialized = False
86+ # self._epsiode_has_started = False
8787 self .id2word = ["<PAD>" , "<UNK>" ]
8888 self .word2id = {w : i for i , w in enumerate (self .id2word )}
8989
@@ -138,6 +138,7 @@ def act(self, obs: str, score: int, done: bool, infos: Mapping[str, Any]) -> Opt
138138 self .transitions [- 1 ][0 ] = reward # Update reward information.
139139
140140 self .stats ["max" ]["score" ].append (score )
141+
141142 if self .no_train_step % self .UPDATE_FREQUENCY == 0 :
142143 # Update model
143144 returns , advantages = self ._discount_rewards (values )
@@ -252,10 +253,12 @@ def play(agent, path, max_step=50, nb_episodes=10, verbose=True):
252253 print (os .path .basename (path ), end = "" )
253254
254255 # Collect some statistics: nb_steps, final reward.
255- avg_moves , avg_scores , avg_norm_scores , seed_h = [], [], [], None
256+ avg_moves , avg_scores , avg_norm_scores , seed_h = [], [], [], 4567
256257 for no_episode in range (nb_episodes ):
257258 obs , infos = env .reset () # Start new episode.
258- seed_h = env .env .textworld_env ._wrapped_env .seed (init_seed = seed_h )
259+
260+ env .env .textworld_env ._wrapped_env .seed (seed = seed_h )
261+ seed_h += 1
259262
260263 score = 0
261264 done = False
@@ -288,7 +291,8 @@ def play(agent, path, max_step=50, nb_episodes=10, verbose=True):
288291agent .train () # Tell the agent it should update its parameters.
289292
290293starttime = time ()
291- play (agent , "./games/levelMedium.ulx" , nb_episodes = 500 , verbose = False ) # Medium level game.
294+ print (os .path .realpath ("./games/levelMedium.ulx" ))
295+ play (agent , "./games/levelMedium.ulx" , nb_episodes = 25 , verbose = False ) # Medium level game.
292296print ("Trained in {:.2f} secs" .format (time () - starttime ))
293297
294298print ('============== Time To Test ============== ' )
0 commit comments