@inproceedings{12767, abstract = {Several problems in planning and reactive synthesis can be reduced to the analysis of two-player quantitative graph games. Optimization is one form of analysis. We argue that in many cases it may be better to replace the optimization problem with the satisficing problem, where instead of searching for optimal solutions, the goal is to search for solutions that adhere to a given threshold bound. This work defines and investigates the satisficing problem on a two-player graph game with the discounted-sum cost model. We show that while the satisficing problem can be solved using numerical methods just like the optimization problem, this approach does not render compelling benefits over optimization. When the discount factor is, however, an integer, we present another approach to satisficing, which is purely based on automata methods. We show that this approach is algorithmically more performant – both theoretically and empirically – and demonstrates the broader applicability of satisficing over optimization.}, author = {Bansal, Suguman and Chatterjee, Krishnendu and Vardi, Moshe Y.}, booktitle = {27th International Conference on Tools and Algorithms for the Construction and Analysis of Systems}, isbn = {9783030720155}, issn = {1611-3349}, location = {Luxembourg City, Luxembourg}, pages = {20--37}, publisher = {Springer Nature}, title = {{On satisficing in quantitative games}}, doi = {10.1007/978-3-030-72016-2}, volume = {12651}, year = {2021}, } @inproceedings{10667, abstract = {Bayesian neural networks (BNNs) place distributions over the weights of a neural network to model uncertainty in the data and the network's prediction. We consider the problem of verifying safety when running a Bayesian neural network policy in a feedback loop with infinite time horizon systems. Compared to the existing sampling-based approaches, which are inapplicable to the infinite time horizon setting, we train a separate deterministic neural network that serves as an infinite time horizon safety certificate. In particular, we show that the certificate network guarantees the safety of the system over a subset of the BNN weight posterior's support. Our method first computes a safe weight set and then alters the BNN's weight posterior to reject samples outside this set. Moreover, we show how to extend our approach to a safe-exploration reinforcement learning setting, in order to avoid unsafe trajectories during the training of the policy. We evaluate our approach on a series of reinforcement learning benchmarks, including non-Lyapunovian safety specifications.}, author = {Lechner, Mathias and Žikelić, Ðorđe and Chatterjee, Krishnendu and Henzinger, Thomas A}, booktitle = {35th Conference on Neural Information Processing Systems}, location = {Virtual}, title = {{Infinite time horizon safety of Bayesian neural networks}}, doi = {10.48550/arXiv.2111.03165}, year = {2021}, } @article{8793, abstract = {We study optimal election sequences for repeatedly selecting a (very) small group of leaders among a set of participants (players) with publicly known unique ids. In every time slot, every player has to select exactly one player that it considers to be the current leader, oblivious to the selection of the other players, but with the overarching goal of maximizing a given parameterized global (“social”) payoff function in the limit. We consider a quite generic model, where the local payoff achieved by a given player depends, weighted by some arbitrary but fixed real parameter, on the number of different leaders chosen in a round, the number of players that choose the given player as the leader, and whether the chosen leader has changed w.r.t. the previous round or not. The social payoff can be the maximum, average or minimum local payoff of the players. Possible applications include quite diverse examples such as rotating coordinator-based distributed algorithms and long-haul formation flying of social birds. Depending on the weights and the particular social payoff, optimal sequences can be very different, from simple round-robin where all players chose the same leader alternatingly every time slot to very exotic patterns, where a small group of leaders (at most 2) is elected in every time slot. Moreover, we study the question if and when a single player would not benefit w.r.t. its local payoff when deviating from the given optimal sequence, i.e., when our optimal sequences are Nash equilibria in the restricted strategy space of oblivious strategies. As this is the case for many parameterizations of our model, our results reveal that no punishment is needed to make it rational for the players to optimize the social payoff.}, author = {Zeiner, Martin and Schmid, Ulrich and Chatterjee, Krishnendu}, issn = {0166218X}, journal = {Discrete Applied Mathematics}, number = {1}, pages = {392--415}, publisher = {Elsevier}, title = {{Optimal strategies for selecting coordinators}}, doi = {10.1016/j.dam.2020.10.022}, volume = {289}, year = {2021}, } @article{9381, abstract = {A game of rock-paper-scissors is an interesting example of an interaction where none of the pure strategies strictly dominates all others, leading to a cyclic pattern. In this work, we consider an unstable version of rock-paper-scissors dynamics and allow individuals to make behavioural mistakes during the strategy execution. We show that such an assumption can break a cyclic relationship leading to a stable equilibrium emerging with only one strategy surviving. We consider two cases: completely random mistakes when individuals have no bias towards any strategy and a general form of mistakes. Then, we determine conditions for a strategy to dominate all other strategies. However, given that individuals who adopt a dominating strategy are still prone to behavioural mistakes in the observed behaviour, we may still observe extinct strategies. That is, behavioural mistakes in strategy execution stabilise evolutionary dynamics leading to an evolutionary stable and, potentially, mixed co-existence equilibrium.}, author = {Kleshnina, Maria and Streipert, Sabrina S. and Filar, Jerzy A. and Chatterjee, Krishnendu}, issn = {15537358}, journal = {PLoS Computational Biology}, number = {4}, publisher = {Public Library of Science}, title = {{Mistakes can stabilise the dynamics of rock-paper-scissors games}}, doi = {10.1371/journal.pcbi.1008523}, volume = {17}, year = {2021}, } @article{9640, abstract = {Selection and random drift determine the probability that novel mutations fixate in a population. Population structure is known to affect the dynamics of the evolutionary process. Amplifiers of selection are population structures that increase the fixation probability of beneficial mutants compared to well-mixed populations. Over the past 15 years, extensive research has produced remarkable structures called strong amplifiers which guarantee that every beneficial mutation fixates with high probability. But strong amplification has come at the cost of considerably delaying the fixation event, which can slow down the overall rate of evolution. However, the precise relationship between fixation probability and time has remained elusive. Here we characterize the slowdown effect of strong amplification. First, we prove that all strong amplifiers must delay the fixation event at least to some extent. Second, we construct strong amplifiers that delay the fixation event only marginally as compared to the well-mixed populations. Our results thus establish a tight relationship between fixation probability and time: Strong amplification always comes at a cost of a slowdown, but more than a marginal slowdown is not needed.}, author = {Tkadlec, Josef and Pavlogiannis, Andreas and Chatterjee, Krishnendu and Nowak, Martin A.}, issn = {20411723}, journal = {Nature Communications}, number = {1}, publisher = {Springer Nature}, title = {{Fast and strong amplifiers of natural selection}}, doi = {10.1038/s41467-021-24271-w}, volume = {12}, year = {2021}, }