!pip install gymnasium pygame
import numpy as np
import gymnasium as gym
import random
env = gym.make("FrozenLake-v1", is_slippery=True, render_mode=None)
alpha = 0.8 # taxa de aprendizado
gamma = 0.95 # fator de desconto
epsilon = 1.0 # taxa de exploração inicial
epsilon_decay = 0.995
epsilon_min = 0.01
episodes = 2000 # número de episódios
q_table = np.zeros([env.observation_space.n, env.action_space.n])
for episode in range(episodes):
state, _ = env.reset()
done = False
while not done:
# Política epsilon-greedy
if random.uniform(0,1) < epsilon:
action = env.action_space.sample() # explorar
else:
action = np.argmax(q_table[state]) # explorar
# Executar ação
next_state, reward, done, truncated, info = env.step(action)
# Atualizar Q-table
old_value = q_table[state, action]
next_max = np.max(q_table[next_state])
new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
q_table[state, action] = new_value
state = next_state
# Decaimento da exploração
if epsilon > epsilon_min:
epsilon *= epsilon_decay
print("Treinamento concluído!")
successes = 0
test_episodes = 100
for episode in range(test_episodes):
state, _ = env.reset()
done = False
while not done:
action = np.argmax(q_table[state]) # usar apenas exploração
state, reward, done, truncated, info = env.step(action)
if done and reward == 1:
successes += 1
print(f"Taxa de sucesso: {successes}/{test_episodes} episódios")