MiRobot / MiRobotEnv.py
Clemylia's picture
Upload 4 files
5026d89 verified
raw
history blame
3.6 kB
import gymnasium as gym
from gymnasium import spaces
import numpy as np
# Constantes pour les états et les récompenses
CMD_AVANCER = 0
CMD_TOURNER = 1
ETAT_FAIM = 2
ETAT_SOMMEIL = 3
ETAT_HUMEUR = 4
# Actions
ACTION_ARRETER = 0
ACTION_AVANCER = 1
ACTION_TOURNE_G = 2
ACTION_TOURNE_D = 3
class MiRobotEnv(gym.Env):
"""Environnement de simulation pour MiRobot, un chiot robot apprenant
à réagir aux commandes du maître et à son état interne.
"""
metadata = {"render_modes": ["human"], "render_fps": 30}
def __init__(self):
super(MiRobotEnv, self).__init__()
self.action_space = spaces.Discrete(4)
low = np.array([0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32)
high = np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32)
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
self.state = None
def reset(self, seed=None, options=None):
super().reset(seed=seed)
initial_state = np.array([
0.0,
0.0,
self.np_random.uniform(low=0.0, high=0.2),
self.np_random.uniform(low=0.0, high=0.2),
self.np_random.uniform(low=-0.1, high=0.1)
], dtype=np.float32)
self.state = initial_state
info = {}
return self.state, info
def _update_internal_states(self):
self.state[ETAT_FAIM] = np.clip(self.state[ETAT_FAIM] + 0.005, 0.0, 1.0)
self.state[ETAT_SOMMEIL] = np.clip(self.state[ETAT_SOMMEIL] + 0.003, 0.0, 1.0)
humeur_decay = -0.005 if (self.state[ETAT_FAIM] > 0.5 or self.state[ETAT_SOMMEIL] > 0.5) else 0.001
self.state[ETAT_HUMEUR] = np.clip(self.state[ETAT_HUMEUR] + humeur_decay, -1.0, 1.0)
def _calculate_reward(self, action):
reward = 0.0
if self.state[CMD_AVANCER] > 0.5:
if action == ACTION_AVANCER:
reward += 1.0
elif action == ACTION_ARRETER:
reward -= 0.5
if self.state[CMD_TOURNER] > 0.5:
if action == ACTION_TOURNE_G or action == ACTION_TOURNE_D:
reward += 1.0
elif action == ACTION_AVANCER:
reward -= 0.5
reward -= self.state[ETAT_FAIM] * 0.1
reward -= self.state[ETAT_SOMMEIL] * 0.1
reward += self.state[ETAT_HUMEUR] * 0.1
return reward
def _simulate_user_command(self):
if self.np_random.random() < 0.2:
self.state[CMD_AVANCER] = 0.0
self.state[CMD_TOURNER] = 0.0
choice = self.np_random.integers(0, 3)
if choice == 1:
self.state[CMD_AVANCER] = 1.0
elif choice == 2:
self.state[CMD_TOURNER] = 1.0
def step(self, action):
self._update_internal_states()
reward = self._calculate_reward(action)
self._simulate_user_command()
terminated = False
truncated = False
info = {}
return self.state, reward, terminated, truncated, info
def render(self, mode='human'):
faim = self.state[ETAT_FAIM] * 100
sommeil = self.state[ETAT_SOMMEIL] * 100
humeur = self.state[ETAT_HUMEUR]
cmd_a = "OUI" if self.state[CMD_AVANCER] > 0.5 else "NON"
cmd_t = "OUI" if self.state[CMD_TOURNER] > 0.5 else "NON"
print(f"--- État MiRobot ---")
print(f" > Commande Avancer: {cmd_a}, Commande Tourner: {cmd_t}")
print(f" > Faim: {faim:.0f}%, Sommeil: {sommeil:.0f}%")
print(f" > Humeur: {humeur:.2f} (entre -1.0 et 1.0)")
def close(self):
pass