Source code for gameanalysis.regret

"""A module for computing regret and social welfare of profiles"""
import itertools
import multiprocessing

import numpy as np
from scipy import optimize


[docs]def pure_strategy_deviation_gains(game, profile):
    """Returns the pure strategy deviations gains

    The result is a compact array of deviation gains. Each element corresponds
    to the deviation from strategy i to strategy j ordered by (i, j) for all
    valid deviations."""
    profile = np.asarray(profile, int)
    dev_profs = profile[None].repeat(game.num_devs, 0)
    dev_profs[np.arange(game.num_devs), game.dev_from_indices] -= 1
    dev_profs[np.arange(game.num_devs), game.dev_to_indices] += 1

    pays = game.get_payoffs(profile)
    return np.fromiter(  # pragma: no branch
        (game.get_payoffs(prof)[t] - pays[f] if np.all(prof >= 0) else 0
         for prof, f, t
         in zip(dev_profs, game.dev_from_indices, game.dev_to_indices)),
        float, game.num_devs)


[docs]def pure_strategy_regret(game, prof):
    """Returns the regret of a pure strategy profile

    If prof has more than one dimension, the last dimension is taken as a set
    of profiles and returned as a new array."""
    return max(pure_strategy_deviation_gains(game, prof).max(), 0)


[docs]def mixture_deviation_gains(game, mix):
    """Returns all the gains from deviation from a mixed strategy

    The result is ordered by role, then strategy."""
    mix = np.asarray(mix, float)
    strategy_evs = game.deviation_payoffs(mix)
    # strategy_evs is nan where there's no data, however, if it's not played in
    # the mix, it doesn't effect the role_evs
    masked = strategy_evs.copy()
    masked[mix == 0] = 0
    role_evs = np.add.reduceat(
        masked * mix, game.role_starts).repeat(game.num_role_strats)
    return strategy_evs - role_evs


[docs]def mixture_regret(game, mix):
    """Return the regret of a mixture profile"""
    mix = np.asarray(mix, float)
    return mixture_deviation_gains(game, mix).max()


[docs]def pure_social_welfare(game, profile):
    """Returns the social welfare of a pure strategy profile in game"""
    profile = np.asarray(profile, int)
    return game.get_payoffs(profile).dot(profile)


[docs]def mixed_social_welfare(game, mix):
    """Returns the social welfare of a mixed strategy profile"""
    return game.expected_payoffs(mix).dot(game.num_role_players)


class _SocialWelfareOptimizer(object):
    """A pickleable object to find maximal social welfare

    This method uses constrained convex optimization to to attempt to maximize
    mixed social welfare."""

    def __init__(self, game, gtol=1e-8):
        self.game = game
        self.scale = game.max_role_payoffs() - game.min_role_payoffs()
        self.scale[self.scale == 0] = 1  # In case payoffs are the same
        self.offset = game.min_role_payoffs()
        self.gtol = gtol

    def obj_func(self, mix, penalty):
        # We assume that the initial point is in a constant sum subspace, and
        # so project the gradient so that any gradient step maintains that
        # constant step. Thus, sum to 1 is not one of the penalty terms

        # Because deviation payoffs uses log space, we max with 0 just for the
        # payoff calculation
        ep, ep_jac = self.game.expected_payoffs(
            np.maximum(0, mix), jacobian=True)
        # Normalize so payoffs are effectively in [0, 1]
        ep = (ep - self.offset) / self.scale
        ep_jac /= self.scale[:, None]

        # Compute normalized negative walfare (minimization)
        welfare = -self.game.num_role_players.dot(ep)
        dwelfare = -self.game.num_role_players.dot(ep_jac)

        # Add penalty for negative mixtures
        neg_mix = np.minimum(mix, 0)
        welfare += penalty * neg_mix.dot(neg_mix) / 2
        dwelfare += penalty * neg_mix

        # Project grad so steps stay in the simplex (more or less)
        dwelfare -= np.repeat(np.add.reduceat(dwelfare, self.game.role_starts)
                              / self.game.num_role_strats,
                              self.game.num_role_strats)
        return welfare, dwelfare

    def __call__(self, mix):
        # Pass in lambda, and make penalty not a member

        result = None
        penalty = np.sum(self.game.num_role_players)
        for _ in range(30):  # pragma: no branch
            # First get an unconstrained result from the optimization
            with np.errstate(over='raise', invalid='raise'):
                try:
                    opt = optimize.minimize(
                        lambda m: self.obj_func(m, penalty), mix, method='CG',
                        jac=True, options={'gtol': self.gtol})
                except FloatingPointError:  # pragma: no cover
                    penalty *= 2
                    continue

            mix = opt.x
            # Project it onto the simplex, it might not be due to the penalty
            result = self.game.mixture_project(mix)
            if np.allclose(mix, result):  # pragma: no cover
                break
            # Increase constraint penalty
            penalty *= 2  # pragma: no cover

        return result


[docs]def max_mixed_social_welfare(game, *, grid_points=2, random_restarts=0,
                             processes=0, **swopt_args):
    """Returns the maximum role symmetric mixed social welfare profile

    Arguments
    ---------
    grid_points : int > 1, optional
        The number of grid points to use for mixture seeds. two implies just
        pure mixtures, more will be denser, but scales exponentially with the
        dimension.
    random_restarts : int, optional
        The number of random initializations.
    processes : int, optional
        Number of processes to use when finding Nash equilibria. The game needs
        to be pickleable.
    """
    assert game.is_complete(), \
        "Max welfare finding only works on complete games"""

    initial_points = list(itertools.chain(
        [game.uniform_mixture()],
        game.grid_mixtures(grid_points),
        game.biased_mixtures(),
        game.role_biased_mixtures(),
        game.random_mixtures(random_restarts)))
    chunksize = len(initial_points) if processes == 1 else 4

    best = [-np.inf, -1, None]

    def process(i, mix):
        welfare = mixed_social_welfare(game, mix)
        best[:] = max(best, [welfare, i, mix])

    opt = _SocialWelfareOptimizer(game, **swopt_args)
    if processes == 0:
        for i, init in enumerate(initial_points):
            process(i, opt(init))
    else:
        with multiprocessing.Pool(processes) as pool:
            for i, mix in enumerate(pool.imap_unordered(
                    opt, initial_points, chunksize=chunksize)):
                process(i, mix)

    return best[0], best[2]


[docs]def max_pure_social_welfare(game, *, by_role=False):
    """Returns the maximum social welfare over the known profiles.

    If by_role is specified, then max social welfare applies to each role
    independently. If there are no profiles with full payoff data for a role,
    an arbitrary profile will be returned."""
    if by_role:
        if game.num_profiles:
            welfares = np.add.reduceat(
                game.profiles() * game.payoffs(), game.role_starts, 1)
            prof_inds = np.nanargmax(welfares, 0)
            return (welfares[prof_inds, np.arange(game.num_roles)],
                    game.profiles()[prof_inds])
        else:
            welfares = np.full(game.num_roles, np.nan)
            profiles = np.full(game.num_roles, None)
            return welfares, profiles

    else:
        if game.num_complete_profiles:
            welfares = np.einsum('ij,ij->i', game.profiles(), game.payoffs())
            prof_ind = np.nanargmax(welfares)
            return welfares[prof_ind], game.profiles()[prof_ind]
        else:
            return np.nan, None