import gym
from metadrive import MultiAgentRoundaboutEnv
from metadrive.examples.ppo_expert.torch_expert import torch_expert as expert

# Create the MetaDrive environment with custom settings
env_config = {
    "num_agents": 3,  # Specify the number of agents
    "map": "T",       # Use a simple map for demonstration
    "use_render": False,
    "manual_control": False,
}

env = MultiAgentRoundaboutEnv(env_config)

# Initialize the expert PPO policy provided by MetaDrive

# Reset the environment to start a new episode
obs = env.reset()


# Main simulation loop
for step in range(100):
    # Ensure obs is a dictionary before processing
    if isinstance(obs, tuple):
        obs = obs[0]  # Get the first element if it's a tuple
    
    # Get actions for each agent
    actions = {}
    for agent in env.agents:
        # Use the expert policy to get actions
        actions[agent] = expert(env.agents[agent])
    
    # Step the environment
    obs, reward, terminated, truncated, info = env.step(actions)
    
    # Render the environment
    env.render(mode="top_down", screen_record=True, screen_size=(500, 500))
    
    # Reset if all agents are done
    if isinstance(terminated, dict) and terminated.get("__all__", False):
        obs = env.reset()
    elif isinstance(terminated, bool) and terminated:
        obs = env.reset()

env.top_down_renderer.generate_gif()
# Clean up
env.close()

from IPython.display import Image
Image(open("demo.gif", 'rb').read())