from neurenix.agent import Agent, Environment, MultiAgent
from neurenix.nn import Sequential, Linear, ReLU
from neurenix.tensor import Tensor
import numpy as np
class CompetitiveAgent(Agent):
def __init__(self, name, state_dim, action_dim):
super().__init__(name)
self.id = name
# Policy network
self.policy = Sequential(
Linear(state_dim, 64),
ReLU(),
Linear(64, action_dim)
)
self.experience_buffer = []
def act(self, observation):
state = Tensor(observation["state"])
action_logits = self.policy.forward(state)
# Epsilon-greedy exploration
if np.random.random() < 0.1:
return np.random.randint(0, len(action_logits.data))
else:
return np.argmax(action_logits.data)
def learn(self, experience):
self.experience_buffer.append(experience)
# Batch learning every 32 experiences
if len(self.experience_buffer) >= 32:
self._update_policy()
self.experience_buffer = []
def _update_policy(self):
# Implement policy update (e.g., PPO, DQN)
pass
class CompetitiveEnvironment(Environment):
def __init__(self, num_agents):
super().__init__()
self.num_agents = num_agents
self.agent_positions = {}
self.resources = []
def reset(self):
self._state = {
"positions": {},
"resources": self._generate_resources(),
"scores": {agent_id: 0 for agent_id in self._agents.keys()}
}
return self._state
def step(self, actions):
rewards = {}
# Process each agent's action
for agent_id, action in actions.items():
old_pos = self._state["positions"].get(agent_id, [0, 0])
new_pos = self._compute_new_position(old_pos, action)
self._state["positions"][agent_id] = new_pos
# Check for resource collection
reward = 0
if new_pos in self._state["resources"]:
reward = 10
self._state["resources"].remove(new_pos)
self._state["scores"][agent_id] += 1
# Penalty for collision with other agents
for other_id, other_pos in self._state["positions"].items():
if other_id != agent_id and new_pos == other_pos:
reward -= 5
rewards[agent_id] = reward
# Episode ends when all resources collected
done = len(self._state["resources"]) == 0
return {
"rewards": rewards,
"done": done,
"info": {"scores": self._state["scores"]}
}
def observe(self, agent):
pos = self._state["positions"].get(agent.id, [0, 0])
return {
"state": self._create_observation_vector(agent.id, pos),
"position": pos,
"score": self._state["scores"].get(agent.id, 0)
}
def _generate_resources(self):
return [[np.random.randint(0, 10), np.random.randint(0, 10)] for _ in range(20)]
def _compute_new_position(self, pos, action):
# 4 actions: up, down, left, right
moves = [[0, 1], [0, -1], [-1, 0], [1, 0]]
move = moves[action % 4]
return [pos[0] + move[0], pos[1] + move[1]]
def _create_observation_vector(self, agent_id, pos):
# Create observation vector (position + nearby resources + other agents)
obs = np.zeros(10)
obs[0:2] = pos
# Add more features...
return obs
# Create competitive multi-agent system
num_agents = 4
agents = [CompetitiveAgent(f"agent-{i}", state_dim=10, action_dim=4)
for i in range(num_agents)]
env = CompetitiveEnvironment(num_agents)
# Register agents with environment
for agent in agents:
env.register_agent(agent)
mas = MultiAgent(agents, env)
# Training loop
for episode in range(1000):
observations = mas.reset()
episode_rewards = {agent.id: 0 for agent in agents}
while True:
results = mas.step()
# Update episode rewards
for agent_id, reward in results["rewards"].items():
episode_rewards[agent_id] += reward
# Have agents learn from experience
for agent in agents:
experience = {
"observation": results["observations"][agent.id],
"action": results["actions"][agent.id],
"reward": results["rewards"][agent.id],
"done": results["done"]
}
agent.learn(experience)
if results["done"]:
break
# Print episode statistics
if episode % 100 == 0:
print(f"Episode {episode}:")
for agent_id, total_reward in episode_rewards.items():
print(f" {agent_id}: {total_reward}")