Source code for psychsim.examples.forward_planning_tom

import logging
import random
from psychsim.agent import Agent
from psychsim.probability import Distribution
from psychsim.pwl import makeTree, equalRow, setToConstantMatrix, rewardKey
from psychsim.world import World

__author__ = 'Pedro Sequeira'
__email__ = 'pedrodbs@gmail.com'
__description__ = 'Example of using theory-of-mind in a game-theory scenario involving two agents in the iterated' \
                  'version of the Prisoner\'s dilemma ' \
                  '(https://en.wikipedia.org/wiki/Prisoner%27s_dilemma#The_iterated_prisoner%27s_dilemma)' \
                  'Both agents follow a strategy inspired on tit-for-tat (https://en.wikipedia.org/wiki/Tit_for_tat)' \
                  'Namely, the first action is open and depends on the beliefs of the agent about the other\'s ' \
                  'behavior. From there on, retaliation is applied by always choosing defect after the first defection ' \
                  'of the other agent (non-forgiving).' \
                  'Hence the planning horizon has an influence on the agents\' decision to cooperate or defect:' \
                  '- if horizon is 0, first action for each agent will be random (0 reward), then tit-fot-tat' \
                  '- if horizon is 1, agents will always defect (one-shot decision, other\'s action does not matter' \
                  '- if horizon is 2, first action for each agent will be random, because CC followed by CC == ' \
                  'DC followed by DD = -2, so C or D have the same value independently of the other; then tit-fot-tat' \
                  '- if horizon is >2, agents will always cooperate because they can see each other\'s tit-for-tat ' \
                  'strategy using ToM, and hence believe the other will cooperate if they also cooperate, leading to ' \
                  'highest mutual payoff in the long run.' \
                  'Note: imperfect models can break this belief and make the agents cynical towards each other.'

# parameters
MAX_HORIZON = 4
NUM_STEPS = 4
TIEBREAK = 'random'  # when values of decisions are the same, choose randomly
SEED = 0

# decision labels
NOT_DECIDED = 'none'
DEFECTED = 'defected'
COOPERATED = 'cooperated'

# payoff parameters (according to PD)
SUCKER = -3  # CD
TEMPTATION = 0  # DC
MUTUAL_COOP = -1  # CC
PUNISHMENT = -2  # DD
INVALID = -10000

DEBUG = False


# defines a payoff matrix tree (0 = didn't decide, 1 = Defected, 2 = Cooperated)
[docs]def get_reward_tree(agent, my_dec, other_dec):
    reward_key = rewardKey(agent.name)
    return makeTree({'if': equalRow(my_dec, NOT_DECIDED),  # if I have not decided
                     True: setToConstantMatrix(reward_key, INVALID),
                     False: {'if': equalRow(other_dec, NOT_DECIDED),  # if other has not decided
                             True: setToConstantMatrix(reward_key, INVALID),
                             False: {'if': equalRow(my_dec, COOPERATED),  # if I cooperated
                                     True: {'if': equalRow(other_dec, COOPERATED),  # if other cooperated
                                            True: setToConstantMatrix(reward_key, MUTUAL_COOP),  # both cooperated
                                            False: setToConstantMatrix(reward_key, SUCKER)},
                                     False: {'if': equalRow(other_dec, COOPERATED),
                                             # if I defected and other cooperated
                                             True: setToConstantMatrix(reward_key, TEMPTATION),
                                             False: setToConstantMatrix(reward_key, PUNISHMENT)}}}})


if __name__ == '__main__':

    random.seed(0)

    # sets up log to screen
    logging.basicConfig(format='%(message)s', level=logging.DEBUG if DEBUG else logging.INFO)

    # create world and add agent
    world = World()
    agent1 = Agent('Agent 1')
    world.addAgent(agent1)
    agent2 = Agent('Agent 2')
    world.addAgent(agent2)

    agents_dec = []
    agents = [agent1, agent2]
    for agent in agents:
        # set agent's params
        agent.setAttribute('discount', 1)
        agent.setAttribute('selection', TIEBREAK)
        # agent.setRecursiveLevel(1)

        # add "decision" variable (0 = didn't decide, 1 = Defected, 2 = Cooperated)
        dec = world.defineState(agent.name, 'decision', list, [NOT_DECIDED, DEFECTED, COOPERATED])
        world.setFeature(dec, NOT_DECIDED)
        agents_dec.append(dec)

    # define agents' actions inspired on TIT-FOR-TAT: first decision is open, then retaliate non-cooperation.
    # as soon as one agent defects it will always defect from there on
    for i, agent in enumerate(agents):
        my_dec = agents_dec[i]
        other_dec = agents_dec[0 if i == 1 else 1]

        # defect (not legal if other has cooperated before, legal only if agent itself did not defect before)
        action = agent.addAction({'verb': '', 'action': 'defect'},
                                 makeTree({'if': equalRow(other_dec, COOPERATED),
                                           True: {'if': equalRow(my_dec, DEFECTED),
                                                  True: True,
                                                  False: False},
                                           False: True}))
        tree = makeTree(setToConstantMatrix(my_dec, DEFECTED))
        world.setDynamics(my_dec, action, tree)

        # cooperate (not legal if other or agent itself defected before)
        action = agent.addAction({'verb': '', 'action': 'cooperate'},
                                 makeTree({'if': equalRow(other_dec, DEFECTED),
                                           True: False,
                                           False: {'if': equalRow(my_dec, DEFECTED),
                                                   True: False,
                                                   False: True}}))
        tree = makeTree(setToConstantMatrix(my_dec, COOPERATED))
        world.setDynamics(my_dec, action, tree)

    # defines payoff matrices (equal to both agents)
    agent1.setReward(get_reward_tree(agent1, agents_dec[0], agents_dec[1]), 1)
    agent2.setReward(get_reward_tree(agent2, agents_dec[1], agents_dec[0]), 1)

    # define order
    my_turn_order = [{agent1.name, agent2.name}]
    world.setOrder(my_turn_order)

    # add true mental model of the other to each agent
    world.setMentalModel(agent1.name, agent2.name, Distribution({agent2.get_true_model(): 1}))
    world.setMentalModel(agent2.name, agent1.name, Distribution({agent1.get_true_model(): 1}))

    for h in range(MAX_HORIZON + 1):
        logging.info('====================================')
        logging.info(f'Horizon {h}')

        # set horizon (also to the true model!) and reset decisions
        for i in range(len(agents)):
            agents[i].setHorizon(h)
            agents[i].setHorizon(h, agents[i].get_true_model())
            world.setFeature(agents_dec[i], NOT_DECIDED, recurse=True)

        for t in range(NUM_STEPS):

            # decision per step (1 per agent): cooperate or defect?
            logging.info('---------------------')
            logging.info(f'Step {t}')
            step = world.step()
            for i in range(len(agents)):
                logging.info(f'{agents[i].name}: {world.getFeature(agents_dec[i], unique=True)}')
Source code for psychsim.examples.forward_planning_tom

PsychSim

Navigation

Related Topics