@inproceedings{480,
abstract = {Graph games provide the foundation for modeling and synthesizing reactive processes. In the synthesis of stochastic reactive processes, the traditional model is perfect-information stochastic games, where some transitions of the game graph are controlled by two adversarial players, and the other transitions are executed probabilistically. We consider such games where the objective is the conjunction of several quantitative objectives (specified as mean-payoff conditions), which we refer to as generalized mean-payoff objectives. The basic decision problem asks for the existence of a finite-memory strategy for a player that ensures the generalized mean-payoff objective be satisfied with a desired probability against all strategies of the opponent. A special case of the decision problem is the almost-sure problem where the desired probability is 1. Previous results presented a semi-decision procedure for -approximations of the almost-sure problem. In this work, we show that both the almost-sure problem as well as the general basic decision problem are coNP-complete, significantly improving the previous results. Moreover, we show that in the case of 1-player stochastic games, randomized memoryless strategies are sufficient and the problem can be solved in polynomial time. In contrast, in two-player stochastic games, we show that even with randomized strategies exponential memory is required in general, and present a matching exponential upper bound. We also study the basic decision problem with infinite-memory strategies and present computational complexity results for the problem. Our results are relevant in the synthesis of stochastic reactive systems with multiple quantitative requirements.},
author = {Chatterjee, Krishnendu and Doyen, Laurent},
location = {New York, NY, USA},
pages = {247 -- 256},
publisher = {IEEE},
title = {{Perfect-information stochastic games with generalized mean-payoff objectives}},
doi = {10.1145/2933575.2934513},
volume = {05-08-July-2016},
year = {2016},
}
@misc{5445,
abstract = {We consider the quantitative analysis problem for interprocedural control-flow graphs (ICFGs). The input consists of an ICFG, a positive weight function that assigns every transition a positive integer-valued number, and a labelling of the transitions (events) as good, bad, and neutral events. The weight function assigns to each transition a numerical value that represents ameasure of how good or bad an event is. The quantitative analysis problem asks whether there is a run of the ICFG where the ratio of the sum of the numerical weights of good events versus the sum of weights of bad events in the long-run is at least a given threshold (or equivalently, to compute the maximal ratio among all valid paths in the ICFG). The quantitative analysis problem for ICFGs can be solved in polynomial time, and we present an efficient and practical algorithm for the problem. We show that several problems relevant for static program analysis, such as estimating the worst-case execution time of a program or the average energy consumption of a mobile application, can be modeled in our framework. We have implemented our algorithm as a tool in the Java Soot framework. We demonstrate the effectiveness of our approach with two case studies. First, we show that our framework provides a sound approach (no false positives) for the analysis of inefficiently-used containers. Second, we show that our approach can also be used for static profiling of programs which reasons about methods that are frequently invoked. Our experimental results show that our tool scales to relatively large benchmarks, and discovers relevant and useful information that can be used to optimize performance of the programs. },
author = {Chatterjee, Krishnendu and Pavlogiannis, Andreas and Velner, Yaron},
issn = {2664-1690},
pages = {33},
publisher = {IST Austria},
title = {{Quantitative interprocedural analysis}},
doi = {10.15479/AT:IST-2016-523-v1-1},
year = {2016},
}
@misc{5449,
abstract = {The fixation probability is the probability that a new mutant introduced in a homogeneous population eventually takes over the entire population.
The fixation probability is a fundamental quantity of natural selection, and known to depend on the population structure.
Amplifiers of natural selection are population structures which increase the fixation probability of advantageous mutants, as compared to the baseline case of well-mixed populations. In this work we focus on symmetric population structures represented as undirected graphs. In the regime of undirected graphs, the strongest amplifier known has been the Star graph, and the existence of undirected graphs with stronger amplification properties has remained open for over a decade.
In this work we present the Comet and Comet-swarm families of undirected graphs. We show that for a range of fitness values of the mutants, the Comet and Comet-swarm graphs have fixation probability strictly larger than the fixation probability of the Star graph, for fixed population size and at the limit of large populations, respectively.},
author = {Pavlogiannis, Andreas and Tkadlec, Josef and Chatterjee, Krishnendu and Nowak, Martin},
issn = {2664-1690},
pages = {22},
publisher = {IST Austria},
title = {{Amplification on undirected population structures: Comets beat stars}},
doi = {10.15479/AT:IST-2016-648-v1-1},
year = {2016},
}
@misc{5451,
author = {Pavlogiannis, Andreas and Tkadlec, Josef and Chatterjee, Krishnendu and Nowak, Martin},
issn = {2664-1690},
pages = {34},
publisher = {IST Austria},
title = {{Strong amplifiers of natural selection}},
doi = {10.15479/AT:IST-2016-728-v1-1},
year = {2016},
}
@misc{5452,
author = {Pavlogiannis, Andreas and Tkadlec, Josef and Chatterjee, Krishnendu and Nowak, Martin},
issn = {2664-1690},
pages = {32},
publisher = {IST Austria},
title = {{Arbitrarily strong amplifiers of natural selection}},
doi = {10.15479/AT:IST-2017-728-v2-1},
year = {2016},
}
@misc{5453,
author = {Pavlogiannis, Andreas and Tkadlec, Josef and Chatterjee, Krishnendu and Nowak, Martin},
issn = {2664-1690},
pages = {34},
publisher = {IST Austria},
title = {{Arbitrarily strong amplifiers of natural selection}},
doi = {10.15479/AT:IST-2017-749-v3-1},
year = {2016},
}
@article{1322,
abstract = {Direct reciprocity is a major mechanism for the evolution of cooperation. Several classical studies have suggested that humans should quickly learn to adopt reciprocal strategies to establish mutual cooperation in repeated interactions. On the other hand, the recently discovered theory of ZD strategies has found that subjects who use extortionate strategies are able to exploit and subdue cooperators. Although such extortioners have been predicted to succeed in any population of adaptive opponents, theoretical follow-up studies questioned whether extortion can evolve in reality. However, most of these studies presumed that individuals have similar strategic possibilities and comparable outside options, whereas asymmetries are ubiquitous in real world applications. Here we show with a model and an economic experiment that extortionate strategies readily emerge once subjects differ in their strategic power. Our experiment combines a repeated social dilemma with asymmetric partner choice. In our main treatment there is one randomly chosen group member who is unilaterally allowed to exchange one of the other group members after every ten rounds of the social dilemma. We find that this asymmetric replacement opportunity generally promotes cooperation, but often the resulting payoff distribution reflects the underlying power structure. Almost half of the subjects in a better strategic position turn into extortioners, who quickly proceed to exploit their peers. By adapting their cooperation probabilities consistent with ZD theory, extortioners force their co-players to cooperate without being similarly cooperative themselves. Comparison to non-extortionate players under the same conditions indicates a substantial net gain to extortion. Our results thus highlight how power asymmetries can endanger mutually beneficial interactions, and transform them into exploitative relationships. In particular, our results indicate that the extortionate strategies predicted from ZD theory could play a more prominent role in our daily interactions than previously thought.},
author = {Hilbe, Christian and Hagel, Kristin and Milinski, Manfred},
journal = {PLoS One},
number = {10},
publisher = {Public Library of Science},
title = {{Asymmetric power boosts extortion in an economic experiment}},
doi = {10.1371/journal.pone.0163867},
volume = {11},
year = {2016},
}
@inproceedings{1324,
abstract = {DEC-POMDPs extend POMDPs to a multi-agent setting, where several agents operate in an uncertain environment independently to achieve a joint objective. DEC-POMDPs have been studied with finite-horizon and infinite-horizon discounted-sum objectives, and there exist solvers both for exact and approximate solutions. In this work we consider Goal-DEC-POMDPs, where given a set of target states, the objective is to ensure that the target set is reached with minimal cost. We consider the indefinite-horizon (infinite-horizon with either discounted-sum, or undiscounted-sum, where absorbing goal states have zero-cost) problem. We present a new and novel method to solve the problem that extends methods for finite-horizon DEC-POMDPs and the RTDP-Bel approach for POMDPs. We present experimental results on several examples, and show that our approach presents promising results. Copyright },
author = {Chatterjee, Krishnendu and Chmelik, Martin},
booktitle = {Proceedings of the Twenty-Sixth International Conference on International Conference on Automated Planning and Scheduling},
location = {London, United Kingdom},
pages = {88 -- 96},
publisher = {AAAI Press},
title = {{Indefinite-horizon reachability in Goal-DEC-POMDPs}},
volume = {2016-January},
year = {2016},
}
@inproceedings{1325,
abstract = {We study graphs and two-player games in which rewards are assigned to states, and the goal of the players is to satisfy or dissatisfy certain property of the generated outcome, given as a mean payoff property. Since the notion of mean-payoff does not reflect possible fluctuations from the mean-payoff along a run, we propose definitions and algorithms for capturing the stability of the system, and give algorithms for deciding if a given mean payoff and stability objective can be ensured in the system.},
author = {Brázdil, Tomáš and Forejt, Vojtěch and Kučera, Antonín and Novotny, Petr},
location = {Quebec City, Canada},
publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
title = {{Stability in graphs and games}},
doi = {10.4230/LIPIcs.CONCUR.2016.10},
volume = {59},
year = {2016},
}
@inproceedings{1326,
abstract = {Energy Markov Decision Processes (EMDPs) are finite-state Markov decision processes where each transition is assigned an integer counter update and a rational payoff. An EMDP configuration is a pair s(n), where s is a control state and n is the current counter value. The configurations are changed by performing transitions in the standard way. We consider the problem of computing a safe strategy (i.e., a strategy that keeps the counter non-negative) which maximizes the expected mean payoff. },
author = {Brázdil, Tomáš and Kučera, Antonín and Novotny, Petr},
location = {Chiba, Japan},
pages = {32 -- 49},
publisher = {Springer},
title = {{Optimizing the expected mean payoff in Energy Markov Decision Processes}},
doi = {10.1007/978-3-319-46520-3_3},
volume = {9938},
year = {2016},
}
@inproceedings{1327,
abstract = {We consider partially observable Markov decision processes (POMDPs) with a set of target states and positive integer costs associated with every transition. The traditional optimization objective (stochastic shortest path) asks to minimize the expected total cost until the target set is reached. We extend the traditional framework of POMDPs to model energy consumption, which represents a hard constraint. The energy levels may increase and decrease with transitions, and the hard constraint requires that the energy level must remain positive in all steps till the target is reached. First, we present a novel algorithm for solving POMDPs with energy levels, developing on existing POMDP solvers and using RTDP as its main method. Our second contribution is related to policy representation. For larger POMDP instances the policies computed by existing solvers are too large to be understandable. We present an automated procedure based on machine learning techniques that automatically extracts important decisions of the policy allowing us to compute succinct human readable policies. Finally, we show experimentally that our algorithm performs well and computes succinct policies on a number of POMDP instances from the literature that were naturally enhanced with energy levels. },
author = {Brázdil, Tomáš and Chatterjee, Krishnendu and Chmelik, Martin and Gupta, Anchit and Novotny, Petr},
booktitle = {Proceedings of the 15th International Conference on Autonomous Agents and Multiagent Systems},
location = {Singapore},
pages = {1465 -- 1466},
publisher = {ACM},
title = {{Stochastic shortest path with energy constraints in POMDPs}},
year = {2016},
}
@article{1333,
abstract = {Social dilemmas force players to balance between personal and collective gain. In many dilemmas, such as elected governments negotiating climate-change mitigation measures, the decisions are made not by individual players but by their representatives. However, the behaviour of representatives in social dilemmas has not been investigated experimentally. Here inspired by the negotiations for greenhouse-gas emissions reductions, we experimentally study a collective-risk social dilemma that involves representatives deciding on behalf of their fellow group members. Representatives can be re-elected or voted out after each consecutive collective-risk game. Selfish players are preferentially elected and are hence found most frequently in the "representatives" treatment. Across all treatments, we identify the selfish players as extortioners. As predicted by our mathematical model, their steadfast strategies enforce cooperation from fair players who finally compensate almost completely the deficit caused by the extortionate co-players. Everybody gains, but the extortionate representatives and their groups gain the most.},
author = {Milinski, Manfred and Hilbe, Christian and Semmann, Dirk and Sommerfeld, Ralf and Marotzke, Jochem},
journal = {Nature Communications},
publisher = {Nature Publishing Group},
title = {{Humans choose representatives who enforce cooperation in social dilemmas through extortion}},
doi = {10.1038/ncomms10915},
volume = {7},
year = {2016},
}
@inproceedings{1335,
abstract = {In this paper we review various automata-theoretic formalisms for expressing quantitative properties. We start with finite-state Boolean automata that express the traditional regular properties. We then consider weighted ω-automata that can measure the average density of events, which finite-state Boolean automata cannot. However, even weighted ω-automata cannot express basic performance properties like average response time. We finally consider two formalisms of weighted ω-automata with monitors, where the monitors are either (a) counters or (b) weighted automata themselves. We present a translation result to establish that these two formalisms are equivalent. Weighted ω-automata with monitors generalize weighted ω-automata, and can express average response time property. They present a natural, robust, and expressive framework for quantitative specifications, with important decidable properties.},
author = {Chatterjee, Krishnendu and Henzinger, Thomas A and Otop, Jan},
location = {Edinburgh, United Kingdom},
pages = {23 -- 38},
publisher = {Springer},
title = {{Quantitative monitor automata}},
doi = {10.1007/978-3-662-53413-7_2},
volume = {9837},
year = {2016},
}
@inproceedings{1340,
abstract = {We study repeated games with absorbing states, a type of two-player, zero-sum concurrent mean-payoff games with the prototypical example being the Big Match of Gillete (1957). These games may not allow optimal strategies but they always have ε-optimal strategies. In this paper we design ε-optimal strategies for Player 1 in these games that use only O(log log T) space. Furthermore, we construct strategies for Player 1 that use space s(T), for an arbitrary small unbounded non-decreasing function s, and which guarantee an ε-optimal value for Player 1 in the limit superior sense. The previously known strategies use space Ω(log T) and it was known that no strategy can use constant space if it is ε-optimal even in the limit superior sense. We also give a complementary lower bound. Furthermore, we also show that no Markov strategy, even extended with finite memory, can ensure value greater than 0 in the Big Match, answering a question posed by Neyman [11].},
author = {Hansen, Kristoffer and Ibsen-Jensen, Rasmus and Koucký, Michal},
location = {Liverpool, United Kingdom},
pages = {64 -- 76},
publisher = {Springer},
title = {{The big match in small space}},
doi = {10.1007/978-3-662-53354-3_6},
volume = {9928},
year = {2016},
}
@article{1380,
abstract = {We consider higher-dimensional versions of Kannan and Lipton's Orbit Problem - determining whether a target vector space V may be reached from a starting point x under repeated applications of a linear transformation A. Answering two questions posed by Kannan and Lipton in the 1980s, we show that when V has dimension one, this problem is solvable in polynomial time, and when V has dimension two or three, the problem is in NPRP.},
author = {Chonev, Ventsislav K and Ouaknine, Joël and Worrell, James},
journal = {Journal of the ACM},
number = {3},
publisher = {ACM},
title = {{On the complexity of the orbit problem}},
doi = {10.1145/2857050},
volume = {63},
year = {2016},
}
@inproceedings{1389,
abstract = {The continuous evolution of a wide variety of systems, including continous-time Markov chains and linear hybrid automata, can be
described in terms of linear differential equations. In this paper we study the decision problem of whether the solution x(t) of a system of linear differential equations dx/dt = Ax reaches a target halfspace infinitely often. This recurrent reachability problem can
equivalently be formulated as the following Infinite Zeros Problem: does a real-valued function f:R≥0 --> R satisfying a given linear
differential equation have infinitely many zeros? Our main decidability result is that if the differential equation has order at most 7, then the Infinite Zeros Problem is decidable. On the other hand, we show that a decision procedure for the Infinite Zeros Problem at order 9 (and above) would entail a major breakthrough in Diophantine Approximation, specifically an algorithm for computing the Lagrange constants of arbitrary real algebraic numbers to arbitrary precision.},
author = {Chonev, Ventsislav K and Ouaknine, Joël and Worrell, James},
booktitle = {LICS '16},
location = {New York, NY, USA},
pages = {515 -- 524},
publisher = {IEEE},
title = {{On recurrent reachability for continuous linear dynamical systems}},
doi = {10.1145/2933575.2934548},
year = {2016},
}
@phdthesis{1397,
abstract = {We study partially observable Markov decision processes (POMDPs) with objectives used in verification and artificial intelligence. The qualitative analysis problem given a POMDP and an objective asks whether there is a strategy (policy) to ensure that the objective is satisfied almost surely (with probability 1), resp. with positive probability (with probability greater than 0). For POMDPs with limit-average payoff, where a reward value in the interval [0,1] is associated to every transition, and the payoff of an infinite path is the long-run average of the rewards, we consider two types of path constraints: (i) a quantitative limit-average constraint defines the set of paths where the payoff is at least a given threshold L1 = 1. Our main results for qualitative limit-average constraint under almost-sure winning are as follows: (i) the problem of deciding the existence of a finite-memory controller is EXPTIME-complete; and (ii) the problem of deciding the existence of an infinite-memory controller is undecidable. For quantitative limit-average constraints we show that the problem of deciding the existence of a finite-memory controller is undecidable. We present a prototype implementation of our EXPTIME algorithm. For POMDPs with w-regular conditions specified as parity objectives, while the qualitative analysis problems are known to be undecidable even for very special case of parity objectives, we establish decidability (with optimal complexity) of the qualitative analysis problems for POMDPs with parity objectives under finite-memory strategies. We establish optimal (exponential) memory bounds and EXPTIME-completeness of the qualitative analysis problems under finite-memory strategies for POMDPs with parity objectives. Based on our theoretical algorithms we also present a practical approach, where we design heuristics to deal with the exponential complexity, and have applied our implementation on a number of well-known POMDP examples for robotics applications. For POMDPs with a set of target states and an integer cost associated with every transition, we study the optimization objective that asks to minimize the expected total cost of reaching a state in the target set, while ensuring that the target set is reached almost surely. We show that for general integer costs approximating the optimal cost is undecidable. For positive costs, our results are as follows: (i) we establish matching lower and upper bounds for the optimal cost, both double and exponential in the POMDP state space size; (ii) we show that the problem of approximating the optimal cost is decidable and present approximation algorithms that extend existing algorithms for POMDPs with finite-horizon objectives. We show experimentally that it performs well in many examples of interest. We study more deeply the problem of almost-sure reachability, where given a set of target states, the question is to decide whether there is a strategy to ensure that the target set is reached almost surely. While in general the problem EXPTIME-complete, in many practical cases strategies with a small amount of memory suffice. Moreover, the existing solution to the problem is explicit, which first requires to construct explicitly an exponential reduction to a belief-support MDP. We first study the existence of observation-stationary strategies, which is NP-complete, and then small-memory strategies. We present a symbolic algorithm by an efficient encoding to SAT and using a SAT solver for the problem. We report experimental results demonstrating the scalability of our symbolic (SAT-based) approach. Decentralized POMDPs (DEC-POMDPs) extend POMDPs to a multi-agent setting, where several agents operate in an uncertain environment independently to achieve a joint objective. In this work we consider Goal DEC-POMDPs, where given a set of target states, the objective is to ensure that the target set is reached with minimal cost. We consider the indefinite-horizon (infinite-horizon with either discounted-sum, or undiscounted-sum, where absorbing goal states have zero-cost) problem. We present a new and novel method to solve the problem that extends methods for finite-horizon DEC-POMDPs and the real-time dynamic programming approach for POMDPs. We present experimental results on several examples, and show that our approach presents promising results. In the end we present a short summary of a few other results related to verification of MDPs and POMDPs.},
author = {Chmelik, Martin},
pages = {232},
publisher = {IST Austria},
title = {{Algorithms for partially observable markov decision processes}},
year = {2016},
}
@article{1423,
abstract = {Direct reciprocity is a mechanism for the evolution of cooperation based on repeated interactions. When individuals meet repeatedly, they can use conditional strategies to enforce cooperative outcomes that would not be feasible in one-shot social dilemmas. Direct reciprocity requires that individuals keep track of their past interactions and find the right response. However, there are natural bounds on strategic complexity: Humans find it difficult to remember past interactions accurately, especially over long timespans. Given these limitations, it is natural to ask how complex strategies need to be for cooperation to evolve. Here, we study stochastic evolutionary game dynamics in finite populations to systematically compare the evolutionary performance of reactive strategies, which only respond to the co-player's previous move, and memory-one strategies, which take into account the own and the co-player's previous move. In both cases, we compare deterministic strategy and stochastic strategy spaces. For reactive strategies and small costs, we find that stochasticity benefits cooperation, because it allows for generous-tit-for-tat. For memory one strategies and small costs, we find that stochasticity does not increase the propensity for cooperation, because the deterministic rule of win-stay, lose-shift works best. For memory one strategies and large costs, however, stochasticity can augment cooperation.},
author = {Baek, Seung and Jeong, Hyeongchai and Hilbe, Christian and Nowak, Martin},
journal = {Scientific Reports},
publisher = {Nature Publishing Group},
title = {{Comparing reactive and memory-one strategies of direct reciprocity}},
doi = {10.1038/srep25676},
volume = {6},
year = {2016},
}
@article{1426,
abstract = {Brood parasites exploit their host in order to increase their own fitness. Typically, this results in an arms race between parasite trickery and host defence. Thus, it is puzzling to observe hosts that accept parasitism without any resistance. The ‘mafia’ hypothesis suggests that these hosts accept parasitism to avoid retaliation. Retaliation has been shown to evolve when the hosts condition their response to mafia parasites, who use depredation as a targeted response to rejection. However, it is unclear if acceptance would also emerge when ‘farming’ parasites are present in the population. Farming parasites use depredation to synchronize the timing with the host, destroying mature clutches to force the host to re-nest. Herein, we develop an evolutionary model to analyse the interaction between depredatory parasites and their hosts. We show that coevolutionary cycles between farmers and mafia can still induce host acceptance of brood parasites. However, this equilibrium is unstable and in the long-run the dynamics of this host–parasite interaction exhibits strong oscillations: when farmers are the majority, accepters conditional to mafia (the host will reject first and only accept after retaliation by the parasite) have a higher fitness than unconditional accepters (the host always accepts parasitism). This leads to an increase in mafia parasites’ fitness and in turn induce an optimal environment for accepter hosts.},
author = {Chakra, Maria and Hilbe, Christian and Traulsen, Arne},
journal = {Royal Society Open Science},
number = {5},
publisher = {Royal Society, The},
title = {{Coevolutionary interactions between farmers and mafia induce host acceptance of avian brood parasites}},
doi = {10.1098/rsos.160036},
volume = {3},
year = {2016},
}
@inproceedings{1438,
abstract = {In this paper, we consider termination of probabilistic programs with real-valued variables. The questions concerned are: (a) qualitative ones that ask (i) whether the program terminates with probability 1 (almost-sure termination) and (ii) whether the expected termination time is finite (finite termination); (b) quantitative ones that ask (i) to approximate the expected termination time (expectation problem) and (ii) to compute a bound B such that the probability to terminate after B steps decreases exponentially (concentration problem). To solve these questions, we utilize the notion of ranking supermartingales which is a powerful approach for proving termination of probabilistic programs. In detail, we focus on algorithmic synthesis of linear ranking-supermartingales over affine probabilistic programs (APP's) with both angelic and demonic non-determinism. An important subclass of APP's is LRAPP which is defined as the class of all APP's over which a linear ranking-supermartingale exists. Our main contributions are as follows. Firstly, we show that the membership problem of LRAPP (i) can be decided in polynomial time for APP's with at most demonic non-determinism, and (ii) is NP-hard and in PSPACE for APP's with angelic non-determinism; moreover, the NP-hardness result holds already for APP's without probability and demonic non-determinism. Secondly, we show that the concentration problem over LRAPP can be solved in the same complexity as for the membership problem of LRAPP. Finally, we show that the expectation problem over LRAPP can be solved in 2EXPTIME and is PSPACE-hard even for APP's without probability and non-determinism (i.e., deterministic programs). Our experimental results demonstrate the effectiveness of our approach to answer the qualitative and quantitative questions over APP's with at most demonic non-determinism.},
author = {Chatterjee, Krishnendu and Fu, Hongfei and Novotny, Petr and Hasheminezhad, Rouzbeh},
location = {St. Petersburg, FL, USA},
pages = {327 -- 342},
publisher = {ACM},
title = {{Algorithmic analysis of qualitative and quantitative termination problems for affine probabilistic programs}},
doi = {10.1145/2837614.2837639},
volume = {20-22},
year = {2016},
}