Multi Player Environment

This example focuses on:

  • Two players mode environment usage (Competitive Multi-Agent, SelfPlay, Competitive Human-Agent)
  • Environment settings configuration
  • Agent - Environment interaction loop
  • Gym observation visualization

A dedicated section describing environment settings is presented here.

#!/usr/bin/env python3
import diambra.arena
from diambra.arena import SpaceTypes, EnvironmentSettingsMultiAgent

def main():
    # Environment Settings
    settings = EnvironmentSettingsMultiAgent() # Multi Agents environment

    # --- Environment settings ---

    # If to use discrete or multi_discrete action space
    settings.action_space = (SpaceTypes.DISCRETE, SpaceTypes.DISCRETE)

    # --- Episode settings ---

    # Characters to be used, automatically extended with None for games
    # requiring to select more than one character (e.g. Tekken Tag Tournament)
    settings.characters = ("Ryu", "Ken")

    # Characters outfit
    settings.outfits = (2, 2)

    env = diambra.arena.make("sfiii3n", settings, render_mode="human")

    observation, info = env.reset(seed=42)
    env.show_obs(observation)

    while True:
        actions = env.action_space.sample()
        print("Actions: {}".format(actions))

        observation, reward, terminated, truncated, info = env.step(actions)
        done = terminated or truncated
        env.show_obs(observation)
        print("Reward: {}".format(reward))
        print("Done: {}".format(done))
        print("Info: {}".format(info))

        if done:
            # Optionally, change episode settings here
            options = {}
            options["characters"] = (None, None)
            options["char_outfits"] = (5, 5)
            observation, info = env.reset(options=options)
            env.show_obs(observation)
            break

    env.close()

    # Return success
    return 0

if __name__ == '__main__':
    main()

Trained Agent SelfPlay

You can employ a single trained agent to play against itself in a two-player game setting. To do this effectively, you must separate the observations for each player, ensuring they are formatted correctly according to the model’s training configuration. Additionally, actions generated by the model for each player need to be handled distinctly to maintain the integrity of the game’s mechanics.

The following example demonstrates utilizing a single model for self-play within a two-player environment. It emphasizes the importance of correctly segregating observations and actions for both players.

This illustration assumes that the model was trained with both flatten and role_relative options set to True. Should your model’s training settings differ, adjustments to the handling of observations and actions will be necessary to align with those specific configurations.

#!/usr/bin/env python3
import diambra.arena
from diambra.arena import SpaceTypes, EnvironmentSettingsMultiAgent
from stable_baselines3 import PPO
import numpy as np

def main():
    # Environment Settings
    settings = EnvironmentSettingsMultiAgent() # Multi Agents environment

    # --- Environment settings ---

    # If to use discrete or multi_discrete action space
    settings.action_space = (SpaceTypes.DISCRETE, SpaceTypes.DISCRETE)

    # --- Episode settings ---

    # Characters to be used, automatically extended with None for games
    # requiring to select more than one character (e.g. Tekken Tag Tournament)
    settings.characters = ("Ken", "Ken")

    # Characters outfit
    settings.outfits = (2, 2)

    # Create environment
    env = diambra.arena.make("sfiii3n", settings, render_mode="human")

    # Load model
    model_path = "/path/to/model"

    # Load agent without passing the environment
    agent = PPO.load(model_path)
    
    # Begin evaluation
    observation, info = env.reset(seed=42)
    env.show_obs(observation)

    while True:
        env.render()

        # Extract observations for player 1 (P1), including shared environment information
        observation_p1 = {
            key: value for key, value in observation.items()
            if key.startswith('P1_') or key in ['frame', 'stage']
        }

        # Initialize player 2 (P2) observation with shared environment information
        observation_p2 = {'frame': observation['frame'], 'stage': observation['stage']}
        
        # Swap P1 and P2 keys for P2 observation
        # Modify P2 keys to match P1 format for the model, as it was trained with P1 observations
        observation_p2.update({
            key.replace('P2_', 'P1_'): value for key, value in observation.items()
            if key.startswith('P2_')
        })

        # Model prediction for P1 actions based on P1 observation
        action_p1, _ = agent.predict(observation_p1, deterministic=True)
        # Model prediction for P2 actions, using modified P2 observation
        action_p2, _ = agent.predict(observation_p2, deterministic=True)

        # Combine actions for both players
        actions = np.append(action_p1, action_p2)
        print("Actions: {}".format(actions))
        
        observation, reward, terminated, truncated, info = env.step(actions)
        done = terminated or truncated
        env.show_obs(observation)
        print("Reward: {}".format(reward))
        print("Done: {}".format(done))
        print("Info: {}".format(info))

        if done:
            # Optionally, change episode settings here
            options = {}
            options["characters"] = (None, None)
            options["char_outfits"] = (5, 5)
            observation, info = env.reset(options=options)
            env.show_obs(observation)
            break

    env.close()

    # Return success
    return 0

if __name__ == '__main__':
    main()