Source code for gameanalysis.regret

"""A module for computing regret and social welfare of profiles"""
import itertools
import multiprocessing

import numpy as np
from scipy import optimize


[docs]def pure_strategy_deviation_gains(game, prof):
    """Returns the pure strategy deviations gains

    The result is a compact array of deviation gains. Each element corresponds
    to the deviation from strategy i to strategy j ordered by (i, j) for all
    valid deviations."""
    prof = np.asarray(prof, int)
    supp = prof > 0
    num_supp = game.role_reduce(supp)
    from_inds = np.arange(game.num_role_strats)[supp]
    reps = game.num_strategies[game.role_index[from_inds]]
    num_devs = np.sum(num_supp * (game.num_strategies - 1))

    to_inds = np.ones(reps.sum(), int)
    to_inds[0] = 0
    to_inds[reps[:-1].cumsum()] -= reps[:-1]
    role_inds = (num_supp * game.num_strategies)[:-1].cumsum()
    to_inds[role_inds] += game.num_strategies[:-1]
    to_inds = to_inds.cumsum()
    to_inds = to_inds[to_inds != from_inds.repeat(reps)]
    from_inds = from_inds.repeat(reps - 1)

    pays = game.get_payoffs(prof)[from_inds]
    dev_profs = prof[None].repeat(num_devs, 0)
    dev_profs[np.arange(num_devs), from_inds] -= 1
    dev_profs[np.arange(num_devs), to_inds] += 1
    dev_pays = np.array([game.get_payoffs(dprof)[to]
                         for dprof, to in zip(dev_profs, to_inds)])
    return dev_pays - pays


[docs]def pure_strategy_regret(game, prof):
    """Returns the regret of a pure strategy profile

    If prof has more than one dimension, the last dimension is taken as a set
    of profiles and returned as a new array."""
    prof = np.asarray(prof, int)
    return max(pure_strategy_deviation_gains(game, prof).max(), 0)


[docs]def mixture_deviation_gains(game, mix, assume_complete=False):
    """Returns all the gains from deviation from a mixed strategy

    The result is ordered by role, then strategy."""
    mix = np.asarray(mix, float)
    strategy_evs = game.deviation_payoffs(mix, assume_complete=assume_complete)
    # strategy_evs is nan where there's no data, however, if it's not played in
    # the mix, it doesn't effect the role_evs
    masked = strategy_evs.copy()
    masked[mix == 0] = 0
    role_evs = game.role_reduce(masked * mix, keepdims=True)
    return strategy_evs - role_evs


[docs]def mixture_regret(game, mix):
    """Return the regret of a mixture profile"""
    mix = np.asarray(mix, float)
    return mixture_deviation_gains(game, mix).max()


[docs]def pure_social_welfare(game, profile):
    """Returns the social welfare of a pure strategy profile in game"""
    profile = np.asarray(profile, int)
    return game.get_payoffs(profile).dot(profile)


[docs]def mixed_social_welfare(game, mix):
    """Returns the social welfare of a mixed strategy profile"""
    return game.get_expected_payoffs(mix).dot(game.num_players)


[docs]class SocialWelfareOptimizer(object):
    """A pickleable object to find Nash equilibria

    This method uses constrained convex optimization to to attempt to solve a
    proxy for the nonconvex regret minimization."""
    def __init__(self, game, gtol=1e-8):
        self.game = game
        self.scale = game.max_payoffs() - game.min_payoffs()
        self.scale[self.scale == 0] = 1  # In case payoffs are the same
        self.offset = game.min_payoffs()
        self.gtol = gtol

[docs]    def obj_func(self, mix, penalty):
        # We assume that the initial point is in a constant sum subspace, and
        # so project the gradient so that any gradient step maintains that
        # constant step. Thus, sum to 1 is not one of the penalty terms

        # Because deviation payoffs uses log space, we max with 0 just for the
        # payoff calculation
        ep, ep_jac = self.game.get_expected_payoffs(
            np.maximum(0, mix), assume_complete=True, jacobian=True)
        # Normalize so payoffs are effectively in [0, 1]
        ep = (ep - self.offset) / self.scale
        ep_jac /= self.scale[:, None]

        # Compute normalized negative walfare (minimization)
        welfare = -self.game.num_players.dot(ep)
        dwelfare = -self.game.num_players.dot(ep_jac)

        # Add penalty for negative mixtures
        welfare += penalty * np.sum(np.minimum(mix, 0) ** 2) / 2
        dwelfare += penalty * np.minimum(mix, 0)

        # Project grad so steps stay in the simplex (more or less)
        dwelfare -= self.game.role_repeat(self.game.role_reduce(dwelfare) /
                                          self.game.num_strategies)
        return welfare, dwelfare

    def __call__(self, mix):
        # Pass in lambda, and make penalty not a member

        result = None
        penalty = np.sum(self.game.num_players)
        for _ in range(30):
            # First get an unconstrained result from the optimization
            with np.errstate(over='raise', invalid='raise'):
                try:
                    opt = optimize.minimize(
                        lambda m: self.obj_func(m, penalty), mix, method='CG',
                        jac=True, options={'gtol': self.gtol})
                except FloatingPointError:  # pragma: no cover
                    penalty *= 2
                    continue

            mix = opt.x
            # Project it onto the simplex, it might not be due to the penalty
            result = self.game.simplex_project(mix)
            if np.allclose(mix, result):
                break
            # Increase constraint penalty
            penalty *= 2

        return result


[docs]def max_mixed_social_welfare(game, grid_points=2, random_restarts=0,
                             processes=None, **swopt_args):
    """Returns the maximum role symmetric mixed social welfare profile

    Arguments
    ---------
    grid_points : int > 1
        The number of grid points to use for mixture seeds. two implies just
        pure mixtures, more will be denser, but scales exponentially with the
        dimension.
    random_restarts : int
        The number of random initializations.
    processes : int
        Number of processes to use when finding Nash equilibria. If greater
        than one, the game will need to be pickleable.
    """
    # XXX The code for this in game is rather complicated because of its
    # generality. This might be faster if that were not the case.
    assert game.is_complete(), \
        "Max welfare finding only works on complete games"""

    initial_points = itertools.chain(
        [game.uniform_mixture()],
        game.grid_mixtures(grid_points),
        game.biased_mixtures(),
        game.role_biased_mixtures(),
        game.random_mixtures(random_restarts))

    best = [-np.inf, None]  # Need a pointer for closure

    def process(mix):
        welfare = mixed_social_welfare(game, mix)
        if welfare > best[0]:
            best[0] = welfare
            best[1] = mix

    opt = SocialWelfareOptimizer(game, **swopt_args)
    if processes == 1:
        for mix in initial_points:
            process(opt(mix))
    else:
        with multiprocessing.Pool(processes) as pool:
            for mix in pool.imap_unordered(opt, initial_points):
                process(mix)

    return tuple(best)


[docs]def max_pure_social_welfare(game):
    """Get the max social welfare pure profile

    Returns a tuple of the max welfare and the corresponding profile"""
    mask = np.sum(game.profiles > 0, 1) == game.num_roles
    if mask.any():
        profiles = game.profiles[mask]
        welfares = np.sum(profiles * game.payoffs[mask], 1)
        prof_ind = np.nanargmax(welfares)
        return welfares[prof_ind], profiles[prof_ind]
    else:
        return np.nan, None


# def neighbors(game, p, *args, **kwargs):
#     if isinstance(p, Profile):
#         return profile_neighbors(game, p, *args, **kwargs)
#     elif isinstance(p, np.ndarray):
#         return mixture_neighbors(game, p, *args, **kwargs)
#     raise TypeError('unrecognized argument type: ' + type(p).__name__)


# def profile_neighbors(game, profile, role=None, strategy=None,
#                       deviation=None):
#     if role is None:
#         return list(chain(*[profile_neighbors(game, profile, r, strategy, \
#                 deviation) for r in game.roles]))
#     if strategy is None:
#         return list(chain(*[profile_neighbors(game, profile, role, s, \
#                 deviation) for s in profile[role]]))
#     if deviation is None:
#         return list(chain(*[profile_neighbors(game, profile, role, strategy, \ # noqa
#                 d) for d in set(game.strategies[role]) - {strategy}]))
#     return [profile.deviate(role, strategy, deviation)]


# def mixture_neighbors(game, mix, role=None, deviation=None):
#     n = set()
#     for profile in feasible_profiles(game, mix):
#         n.update(profile_neighbors(game, profile, role, deviation=deviation))
#     return n


# def feasible_profiles(game, mix, thresh=1e-3):
#     return [Profile({r:{s:p[game.index(r)].count(s) for s in set(p[ \
#             game.index(r)])} for r in game.roles}) for p in product(*[ \
#             CwR(filter(lambda s: mix[game.index(r), game.index(r,s)] >= \
#             thresh, game.strategies[r]), game.players[r]) for r \
#             in game.roles])]


# def symmetric_profile_regrets(game):
#     assert game.is_symmetric(), 'Game must be symmetric'
#     role = next(iter(game.strategies))
#     return {s: regret(game, rsgame.Profile({role:{s:game.players[role]}})) for s \ # noqa
#             in game.strategies[role]}