|
|
|
|
|
import gymnasium as gym |
|
|
from gymnasium import spaces |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
CMD_AVANCER = 0 |
|
|
CMD_TOURNER = 1 |
|
|
ETAT_FAIM = 2 |
|
|
ETAT_SOMMEIL = 3 |
|
|
ETAT_HUMEUR = 4 |
|
|
|
|
|
|
|
|
ACTION_ARRETER = 0 |
|
|
ACTION_AVANCER = 1 |
|
|
ACTION_TOURNE_G = 2 |
|
|
ACTION_TOURNE_D = 3 |
|
|
|
|
|
|
|
|
class MiRobotEnv(gym.Env): |
|
|
"""Environnement de simulation pour MiRobot, un chiot robot apprenant |
|
|
à réagir aux commandes du maître et à son état interne. |
|
|
""" |
|
|
metadata = {"render_modes": ["human"], "render_fps": 30} |
|
|
|
|
|
def __init__(self): |
|
|
super(MiRobotEnv, self).__init__() |
|
|
|
|
|
self.action_space = spaces.Discrete(4) |
|
|
|
|
|
low = np.array([0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) |
|
|
high = np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32) |
|
|
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) |
|
|
|
|
|
self.state = None |
|
|
|
|
|
def reset(self, seed=None, options=None): |
|
|
super().reset(seed=seed) |
|
|
|
|
|
initial_state = np.array([ |
|
|
0.0, |
|
|
0.0, |
|
|
self.np_random.uniform(low=0.0, high=0.2), |
|
|
self.np_random.uniform(low=0.0, high=0.2), |
|
|
self.np_random.uniform(low=-0.1, high=0.1) |
|
|
], dtype=np.float32) |
|
|
|
|
|
self.state = initial_state |
|
|
|
|
|
info = {} |
|
|
return self.state, info |
|
|
|
|
|
def _update_internal_states(self): |
|
|
self.state[ETAT_FAIM] = np.clip(self.state[ETAT_FAIM] + 0.005, 0.0, 1.0) |
|
|
self.state[ETAT_SOMMEIL] = np.clip(self.state[ETAT_SOMMEIL] + 0.003, 0.0, 1.0) |
|
|
|
|
|
humeur_decay = -0.005 if (self.state[ETAT_FAIM] > 0.5 or self.state[ETAT_SOMMEIL] > 0.5) else 0.001 |
|
|
self.state[ETAT_HUMEUR] = np.clip(self.state[ETAT_HUMEUR] + humeur_decay, -1.0, 1.0) |
|
|
|
|
|
def _calculate_reward(self, action): |
|
|
reward = 0.0 |
|
|
|
|
|
if self.state[CMD_AVANCER] > 0.5: |
|
|
if action == ACTION_AVANCER: |
|
|
reward += 1.0 |
|
|
elif action == ACTION_ARRETER: |
|
|
reward -= 0.5 |
|
|
|
|
|
if self.state[CMD_TOURNER] > 0.5: |
|
|
if action == ACTION_TOURNE_G or action == ACTION_TOURNE_D: |
|
|
reward += 1.0 |
|
|
elif action == ACTION_AVANCER: |
|
|
reward -= 0.5 |
|
|
|
|
|
reward -= self.state[ETAT_FAIM] * 0.1 |
|
|
reward -= self.state[ETAT_SOMMEIL] * 0.1 |
|
|
|
|
|
reward += self.state[ETAT_HUMEUR] * 0.1 |
|
|
|
|
|
return reward |
|
|
|
|
|
def _simulate_user_command(self): |
|
|
if self.np_random.random() < 0.2: |
|
|
self.state[CMD_AVANCER] = 0.0 |
|
|
self.state[CMD_TOURNER] = 0.0 |
|
|
|
|
|
choice = self.np_random.integers(0, 3) |
|
|
|
|
|
if choice == 1: |
|
|
self.state[CMD_AVANCER] = 1.0 |
|
|
elif choice == 2: |
|
|
self.state[CMD_TOURNER] = 1.0 |
|
|
|
|
|
def step(self, action): |
|
|
self._update_internal_states() |
|
|
reward = self._calculate_reward(action) |
|
|
self._simulate_user_command() |
|
|
|
|
|
terminated = False |
|
|
truncated = False |
|
|
info = {} |
|
|
|
|
|
return self.state, reward, terminated, truncated, info |
|
|
|
|
|
def render(self, mode='human'): |
|
|
faim = self.state[ETAT_FAIM] * 100 |
|
|
sommeil = self.state[ETAT_SOMMEIL] * 100 |
|
|
humeur = self.state[ETAT_HUMEUR] |
|
|
cmd_a = "OUI" if self.state[CMD_AVANCER] > 0.5 else "NON" |
|
|
cmd_t = "OUI" if self.state[CMD_TOURNER] > 0.5 else "NON" |
|
|
|
|
|
print(f"--- État MiRobot ---") |
|
|
print(f" > Commande Avancer: {cmd_a}, Commande Tourner: {cmd_t}") |
|
|
print(f" > Faim: {faim:.0f}%, Sommeil: {sommeil:.0f}%") |
|
|
print(f" > Humeur: {humeur:.2f} (entre -1.0 et 1.0)") |
|
|
|
|
|
def close(self): |
|
|
pass |
|
|
|