@article{14820, abstract = {We consider a natural problem dealing with weighted packet selection across a rechargeable link, which e.g., finds applications in cryptocurrency networks. The capacity of a link (u, v) is determined by how many nodes u and v allocate for this link. Specifically, the input is a finite ordered sequence of packets that arrive in both directions along a link. Given (u, v) and a packet of weight x going from u to v, node u can either accept or reject the packet. If u accepts the packet, the capacity on link (u, v) decreases by x. Correspondingly, v's capacity on increases by x. If a node rejects the packet, this will entail a cost affinely linear in the weight of the packet. A link is “rechargeable” in the sense that the total capacity of the link has to remain constant, but the allocation of capacity at the ends of the link can depend arbitrarily on the nodes' decisions. The goal is to minimise the sum of the capacity injected into the link and the cost of rejecting packets. We show that the problem is NP-hard, but can be approximated efficiently with a ratio of (1+E) . (1+3) for some arbitrary E>0.}, author = {Schmid, Stefan and Svoboda, Jakub and Yeo, Michelle X}, issn = {0304-3975}, journal = {Theoretical Computer Science}, keywords = {General Computer Science, Theoretical Computer Science}, publisher = {Elsevier}, title = {{Weighted packet selection for rechargeable links in cryptocurrency networks: Complexity and approximation}}, doi = {10.1016/j.tcs.2023.114353}, volume = {989}, year = {2024}, } @inproceedings{15006, abstract = {Graphical games are a useful framework for modeling the interactions of (selfish) agents who are connected via an underlying topology and whose behaviors influence each other. They have wide applications ranging from computer science to economics and biology. Yet, even though an agent’s payoff only depends on the actions of their direct neighbors in graphical games, computing the Nash equilibria and making statements about the convergence time of "natural" local dynamics in particular can be highly challenging. In this work, we present a novel approach for classifying complexity of Nash equilibria in graphical games by establishing a connection to local graph algorithms, a subfield of distributed computing. In particular, we make the observation that the equilibria of graphical games are equivalent to locally verifiable labelings (LVL) in graphs; vertex labelings which are verifiable with constant-round local algorithms. This connection allows us to derive novel lower bounds on the convergence time to equilibrium of best-response dynamics in graphical games. Since we establish that distributed convergence can sometimes be provably slow, we also introduce and give bounds on an intuitive notion of "time-constrained" inefficiency of best responses. We exemplify how our results can be used in the implementation of mechanisms that ensure convergence of best responses to a Nash equilibrium. Our results thus also give insight into the convergence of strategy-proof algorithms for graphical games, which is still not well understood.}, author = {Hirvonen, Juho and Schmid, Laura and Chatterjee, Krishnendu and Schmid, Stefan}, booktitle = {27th International Conference on Principles of Distributed Systems}, isbn = {9783959773089}, issn = {18688969}, location = {Tokyo, Japan}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{On the convergence time in graphical games: A locality-sensitive approach}}, doi = {10.4230/LIPIcs.OPODIS.2023.11}, volume = {286}, year = {2024}, } @article{15083, abstract = {Direct reciprocity is a powerful mechanism for cooperation in social dilemmas. The very logic of reciprocity, however, seems to require that individuals are symmetric, and that everyone has the same means to influence each others’ payoffs. Yet in many applications, individuals are asymmetric. Herein, we study the effect of asymmetry in linear public good games. Individuals may differ in their endowments (their ability to contribute to a public good) and in their productivities (how effective their contributions are). Given the individuals’ productivities, we ask which allocation of endowments is optimal for cooperation. To this end, we consider two notions of optimality. The first notion focuses on the resilience of cooperation. The respective endowment distribution ensures that full cooperation is feasible even under the most adverse conditions. The second notion focuses on efficiency. The corresponding endowment distribution maximizes group welfare. Using analytical methods, we fully characterize these two endowment distributions. This analysis reveals that both optimality notions favor some endowment inequality: More productive players ought to get higher endowments. Yet the two notions disagree on how unequal endowments are supposed to be. A focus on resilience results in less inequality. With additional simulations, we show that the optimal endowment allocation needs to account for both the resilience and the efficiency of cooperation.}, author = {Hübner, Valentin and Staab, Manuel and Hilbe, Christian and Chatterjee, Krishnendu and Kleshnina, Maria}, issn = {1091-6490}, journal = {Proceedings of the National Academy of Sciences}, number = {10}, publisher = {Proceedings of the National Academy of Sciences}, title = {{Efficiency and resilience of cooperation in asymmetric social dilemmas}}, doi = {10.1073/pnas.2315558121}, volume = {121}, year = {2024}, } @misc{15108, abstract = {in the research article "Efficiency and resilience of cooperation in asymmetric social dilemmas" (by Valentin Hübner, Manuel Staab, Christian Hilbe, Krishnendu Chatterjee, and Maria Kleshnina). We used different implementations for the case of two and three players, both described below.}, author = {Hübner, Valentin and Kleshnina, Maria}, publisher = {Zenodo}, title = {{Computer code for "Efficiency and resilience of cooperation in asymmetric social dilemmas"}}, doi = {10.5281/ZENODO.10639167}, year = {2024}, } @inproceedings{12676, abstract = {Turn-based stochastic games (aka simple stochastic games) are two-player zero-sum games played on directed graphs with probabilistic transitions. The goal of player-max is to maximize the probability to reach a target state against the adversarial player-min. These games lie in NP ∩ coNP and are among the rare combinatorial problems that belong to this complexity class for which the existence of polynomial-time algorithm is a major open question. While randomized sub-exponential time algorithm exists, all known deterministic algorithms require exponential time in the worst-case. An important open question has been whether faster algorithms can be obtained parametrized by the treewidth of the game graph. Even deterministic sub-exponential time algorithm for constant treewidth turn-based stochastic games has remain elusive. In this work our main result is a deterministic algorithm to solve turn-based stochastic games that, given a game with n states, treewidth at most t, and the bit-complexity of the probabilistic transition function log D, has running time O ((tn2 log D)t log n). In particular, our algorithm is quasi-polynomial time for games with constant or poly-logarithmic treewidth.}, author = {Chatterjee, Krishnendu and Meggendorfer, Tobias and Saona Urmeneta, Raimundo J and Svoboda, Jakub}, booktitle = {Proceedings of the 2023 Annual ACM-SIAM Symposium on Discrete Algorithms}, isbn = {9781611977554}, location = {Florence, Italy}, pages = {4590--4605}, publisher = {Society for Industrial and Applied Mathematics}, title = {{Faster algorithm for turn-based stochastic games with bounded treewidth}}, doi = {10.1137/1.9781611977554.ch173}, year = {2023}, } @inproceedings{13142, abstract = {Reinforcement learning has received much attention for learning controllers of deterministic systems. We consider a learner-verifier framework for stochastic control systems and survey recent methods that formally guarantee a conjunction of reachability and safety properties. Given a property and a lower bound on the probability of the property being satisfied, our framework jointly learns a control policy and a formal certificate to ensure the satisfaction of the property with a desired probability threshold. Both the control policy and the formal certificate are continuous functions from states to reals, which are learned as parameterized neural networks. While in the deterministic case, the certificates are invariant and barrier functions for safety, or Lyapunov and ranking functions for liveness, in the stochastic case the certificates are supermartingales. For certificate verification, we use interval arithmetic abstract interpretation to bound the expected values of neural network functions.}, author = {Chatterjee, Krishnendu and Henzinger, Thomas A and Lechner, Mathias and Zikelic, Dorde}, booktitle = {Tools and Algorithms for the Construction and Analysis of Systems }, isbn = {9783031308222}, issn = {1611-3349}, location = {Paris, France}, pages = {3--25}, publisher = {Springer Nature}, title = {{A learner-verifier framework for neural network controllers and certificates of stochastic systems}}, doi = {10.1007/978-3-031-30823-9_1}, volume = {13993}, year = {2023}, } @article{12787, abstract = {Populations evolve in spatially heterogeneous environments. While a certain trait might bring a fitness advantage in some patch of the environment, a different trait might be advantageous in another patch. Here, we study the Moran birth–death process with two types of individuals in a population stretched across two patches of size N, each patch favouring one of the two types. We show that the long-term fate of such populations crucially depends on the migration rate μ between the patches. To classify the possible fates, we use the distinction between polynomial (short) and exponential (long) timescales. We show that when μ is high then one of the two types fixates on the whole population after a number of steps that is only polynomial in N. By contrast, when μ is low then each type holds majority in the patch where it is favoured for a number of steps that is at least exponential in N. Moreover, we precisely identify the threshold migration rate μ⋆ that separates those two scenarios, thereby exactly delineating the situations that support long-term coexistence of the two types. We also discuss the case of various cycle graphs and we present computer simulations that perfectly match our analytical results.}, author = {Svoboda, Jakub and Tkadlec, Josef and Kaveh, Kamran and Chatterjee, Krishnendu}, issn = {1471-2946}, journal = {Proceedings of the Royal Society A: Mathematical, Physical and Engineering Sciences}, number = {2271}, publisher = {The Royal Society}, title = {{Coexistence times in the Moran process with environmental heterogeneity}}, doi = {10.1098/rspa.2022.0685}, volume = {479}, year = {2023}, } @article{12861, abstract = {The field of indirect reciprocity investigates how social norms can foster cooperation when individuals continuously monitor and assess each other’s social interactions. By adhering to certain social norms, cooperating individuals can improve their reputation and, in turn, receive benefits from others. Eight social norms, known as the “leading eight," have been shown to effectively promote the evolution of cooperation as long as information is public and reliable. These norms categorize group members as either ’good’ or ’bad’. In this study, we examine a scenario where individuals instead assign nuanced reputation scores to each other, and only cooperate with those whose reputation exceeds a certain threshold. We find both analytically and through simulations that such quantitative assessments are error-correcting, thus facilitating cooperation in situations where information is private and unreliable. Moreover, our results identify four specific norms that are robust to such conditions, and may be relevant for helping to sustain cooperation in natural populations.}, author = {Schmid, Laura and Ekbatani, Farbod and Hilbe, Christian and Chatterjee, Krishnendu}, issn = {2041-1723}, journal = {Nature Communications}, publisher = {Springer Nature}, title = {{Quantitative assessment can stabilize indirect reciprocity under imperfect information}}, doi = {10.1038/s41467-023-37817-x}, volume = {14}, year = {2023}, } @inproceedings{14242, abstract = {We study the problem of training and certifying adversarially robust quantized neural networks (QNNs). Quantization is a technique for making neural networks more efficient by running them using low-bit integer arithmetic and is therefore commonly adopted in industry. Recent work has shown that floating-point neural networks that have been verified to be robust can become vulnerable to adversarial attacks after quantization, and certification of the quantized representation is necessary to guarantee robustness. In this work, we present quantization-aware interval bound propagation (QA-IBP), a novel method for training robust QNNs. Inspired by advances in robust learning of non-quantized networks, our training algorithm computes the gradient of an abstract representation of the actual network. Unlike existing approaches, our method can handle the discrete semantics of QNNs. Based on QA-IBP, we also develop a complete verification procedure for verifying the adversarial robustness of QNNs, which is guaranteed to terminate and produce a correct answer. Compared to existing approaches, the key advantage of our verification procedure is that it runs entirely on GPU or other accelerator devices. We demonstrate experimentally that our approach significantly outperforms existing methods and establish the new state-of-the-art for training and certifying the robustness of QNNs.}, author = {Lechner, Mathias and Zikelic, Dorde and Chatterjee, Krishnendu and Henzinger, Thomas A and Rus, Daniela}, booktitle = {Proceedings of the 37th AAAI Conference on Artificial Intelligence}, isbn = {9781577358800}, location = {Washington, DC, United States}, number = {12}, pages = {14964--14973}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{Quantization-aware interval bound propagation for training certifiably robust quantized neural networks}}, doi = {10.1609/aaai.v37i12.26747}, volume = {37}, year = {2023}, } @inproceedings{14243, abstract = {Two-player zero-sum "graph games" are central in logic, verification, and multi-agent systems. The game proceeds by placing a token on a vertex of a graph, and allowing the players to move it to produce an infinite path, which determines the winner or payoff of the game. Traditionally, the players alternate turns in moving the token. In "bidding games", however, the players have budgets and in each turn, an auction (bidding) determines which player moves the token. So far, bidding games have only been studied as full-information games. In this work we initiate the study of partial-information bidding games: we study bidding games in which a player's initial budget is drawn from a known probability distribution. We show that while for some bidding mechanisms and objectives, it is straightforward to adapt the results from the full-information setting to the partial-information setting, for others, the analysis is significantly more challenging, requires new techniques, and gives rise to interesting results. Specifically, we study games with "mean-payoff" objectives in combination with "poorman" bidding. We construct optimal strategies for a partially-informed player who plays against a fully-informed adversary. We show that, somewhat surprisingly, the "value" under pure strategies does not necessarily exist in such games.}, author = {Avni, Guy and Jecker, Ismael R and Zikelic, Dorde}, booktitle = {Proceedings of the 37th AAAI Conference on Artificial Intelligence}, isbn = {9781577358800}, location = {Washington, DC, United States}, number = {5}, pages = {5464--5471}, title = {{Bidding graph games with partially-observable budgets}}, doi = {10.1609/aaai.v37i5.25679}, volume = {37}, year = {2023}, } @inproceedings{14259, abstract = {We provide a learning-based technique for guessing a winning strategy in a parity game originating from an LTL synthesis problem. A cheaply obtained guess can be useful in several applications. Not only can the guessed strategy be applied as best-effort in cases where the game’s huge size prohibits rigorous approaches, but it can also increase the scalability of rigorous LTL synthesis in several ways. Firstly, checking whether a guessed strategy is winning is easier than constructing one. Secondly, even if the guess is wrong in some places, it can be fixed by strategy iteration faster than constructing one from scratch. Thirdly, the guess can be used in on-the-fly approaches to prioritize exploration in the most fruitful directions. In contrast to previous works, we (i) reflect the highly structured logical information in game’s states, the so-called semantic labelling, coming from the recent LTL-to-automata translations, and (ii) learn to reflect it properly by learning from previously solved games, bringing the solving process closer to human-like reasoning.}, author = {Kretinsky, Jan and Meggendorfer, Tobias and Prokop, Maximilian and Rieder, Sabine}, booktitle = {35th International Conference on Computer Aided Verification }, isbn = {9783031377051}, issn = {1611-3349}, location = {Paris, France}, pages = {390--414}, publisher = {Springer Nature}, title = {{Guessing winning policies in LTL synthesis by semantic learning}}, doi = {10.1007/978-3-031-37706-8_20}, volume = {13964}, year = {2023}, } @inproceedings{14318, abstract = {Probabilistic recurrence relations (PRRs) are a standard formalism for describing the runtime of a randomized algorithm. Given a PRR and a time limit κ, we consider the tail probability Pr[T≥κ], i.e., the probability that the randomized runtime T of the PRR exceeds κ. Our focus is the formal analysis of tail bounds that aims at finding a tight asymptotic upper bound u≥Pr[T≥κ]. To address this problem, the classical and most well-known approach is the cookbook method by Karp (JACM 1994), while other approaches are mostly limited to deriving tail bounds of specific PRRs via involved custom analysis. In this work, we propose a novel approach for deriving the common exponentially-decreasing tail bounds for PRRs whose preprocessing time and random passed sizes observe discrete or (piecewise) uniform distribution and whose recursive call is either a single procedure call or a divide-and-conquer. We first establish a theoretical approach via Markov’s inequality, and then instantiate the theoretical approach with a template-based algorithmic approach via a refined treatment of exponentiation. Experimental evaluation shows that our algorithmic approach is capable of deriving tail bounds that are (i) asymptotically tighter than Karp’s method, (ii) match the best-known manually-derived asymptotic tail bound for QuickSelect, and (iii) is only slightly worse (with a loglogn factor) than the manually-proven optimal asymptotic tail bound for QuickSort. Moreover, our algorithmic approach handles all examples (including realistic PRRs such as QuickSort, QuickSelect, DiameterComputation, etc.) in less than 0.1 s, showing that our approach is efficient in practice.}, author = {Sun, Yican and Fu, Hongfei and Chatterjee, Krishnendu and Goharshady, Amir Kafshdar}, booktitle = {Computer Aided Verification}, isbn = {9783031377082}, issn = {1611-3349}, location = {Paris, France}, pages = {16--39}, publisher = {Springer Nature}, title = {{Automated tail bound analysis for probabilistic recurrence relations}}, doi = {10.1007/978-3-031-37709-9_2}, volume = {13966}, year = {2023}, } @inproceedings{14317, abstract = {Markov decision processes can be viewed as transformers of probability distributions. While this view is useful from a practical standpoint to reason about trajectories of distributions, basic reachability and safety problems are known to be computationally intractable (i.e., Skolem-hard) to solve in such models. Further, we show that even for simple examples of MDPs, strategies for safety objectives over distributions can require infinite memory and randomization. In light of this, we present a novel overapproximation approach to synthesize strategies in an MDP, such that a safety objective over the distributions is met. More precisely, we develop a new framework for template-based synthesis of certificates as affine distributional and inductive invariants for safety objectives in MDPs. We provide two algorithms within this framework. One can only synthesize memoryless strategies, but has relative completeness guarantees, while the other can synthesize general strategies. The runtime complexity of both algorithms is in PSPACE. We implement these algorithms and show that they can solve several non-trivial examples.}, author = {Akshay, S. and Chatterjee, Krishnendu and Meggendorfer, Tobias and Zikelic, Dorde}, booktitle = {International Conference on Computer Aided Verification}, isbn = {9783031377082}, issn = {1611-3349}, location = {Paris, France}, pages = {86--112}, publisher = {Springer Nature}, title = {{MDPs as distribution transformers: Affine invariant synthesis for safety objectives}}, doi = {10.1007/978-3-031-37709-9_5}, volume = {13966}, year = {2023}, } @article{12738, abstract = {We study turn-based stochastic zero-sum games with lexicographic preferences over objectives. Stochastic games are standard models in control, verification, and synthesis of stochastic reactive systems that exhibit both randomness as well as controllable and adversarial non-determinism. Lexicographic order allows one to consider multiple objectives with a strict preference order. To the best of our knowledge, stochastic games with lexicographic objectives have not been studied before. For a mixture of reachability and safety objectives, we show that deterministic lexicographically optimal strategies exist and memory is only required to remember the already satisfied and violated objectives. For a constant number of objectives, we show that the relevant decision problem is in NP∩coNP, matching the current known bound for single objectives; and in general the decision problem is PSPACE-hard and can be solved in NEXPTIME∩coNEXPTIME. We present an algorithm that computes the lexicographically optimal strategies via a reduction to the computation of optimal strategies in a sequence of single-objectives games. For omega-regular objectives, we restrict our analysis to one-player games, also known as Markov decision processes. We show that lexicographically optimal strategies exist and need either randomization or finite memory. We present an algorithm that solves the relevant decision problem in polynomial time. We have implemented our algorithms and report experimental results on various case studies.}, author = {Chatterjee, Krishnendu and Katoen, Joost P and Mohr, Stefanie and Weininger, Maximilian and Winkler, Tobias}, issn = {1572-8102}, journal = {Formal Methods in System Design}, publisher = {Springer Nature}, title = {{Stochastic games with lexicographic objectives}}, doi = {10.1007/s10703-023-00411-4}, year = {2023}, } @article{10770, abstract = {Mathematical models often aim to describe a complicated mechanism in a cohesive and simple manner. However, reaching perfect balance between being simple enough or overly simplistic is a challenging task. Frequently, game-theoretic models have an underlying assumption that players, whenever they choose to execute a specific action, do so perfectly. In fact, it is rare that action execution perfectly coincides with intentions of individuals, giving rise to behavioural mistakes. The concept of incompetence of players was suggested to address this issue in game-theoretic settings. Under the assumption of incompetence, players have non-zero probabilities of executing a different strategy from the one they chose, leading to stochastic outcomes of the interactions. In this article, we survey results related to the concept of incompetence in classic as well as evolutionary game theory and provide several new results. We also suggest future extensions of the model and argue why it is important to take into account behavioural mistakes when analysing interactions among players in both economic and biological settings.}, author = {Graham, Thomas and Kleshnina, Maria and Filar, Jerzy A.}, issn = {2153-0793}, journal = {Dynamic Games and Applications}, pages = {231--264}, publisher = {Springer Nature}, title = {{Where do mistakes lead? A survey of games with incompetent players}}, doi = {10.1007/s13235-022-00425-3}, volume = {13}, year = {2023}, } @inproceedings{14417, abstract = {Entropic risk (ERisk) is an established risk measure in finance, quantifying risk by an exponential re-weighting of rewards. We study ERisk for the first time in the context of turn-based stochastic games with the total reward objective. This gives rise to an objective function that demands the control of systems in a risk-averse manner. We show that the resulting games are determined and, in particular, admit optimal memoryless deterministic strategies. This contrasts risk measures that previously have been considered in the special case of Markov decision processes and that require randomization and/or memory. We provide several results on the decidability and the computational complexity of the threshold problem, i.e. whether the optimal value of ERisk exceeds a given threshold. In the most general case, the problem is decidable subject to Shanuel’s conjecture. If all inputs are rational, the resulting threshold problem can be solved using algebraic numbers, leading to decidability via a polynomial-time reduction to the existential theory of the reals. Further restrictions on the encoding of the input allow the solution of the threshold problem in NP∩coNP. Finally, an approximation algorithm for the optimal value of ERisk is provided.}, author = {Baier, Christel and Chatterjee, Krishnendu and Meggendorfer, Tobias and Piribauer, Jakob}, booktitle = {48th International Symposium on Mathematical Foundations of Computer Science}, isbn = {9783959772921}, issn = {1868-8969}, location = {Bordeaux, France}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{Entropic risk for turn-based stochastic games}}, doi = {10.4230/LIPIcs.MFCS.2023.15}, volume = {272}, year = {2023}, } @article{12706, abstract = {Allometric settings of population dynamics models are appealing due to their parsimonious nature and broad utility when studying system level effects. Here, we parameterise the size-scaled Rosenzweig-MacArthur differential equations to eliminate prey-mass dependency, facilitating an in depth analytic study of the equations which incorporates scaling parameters’ contributions to coexistence. We define the functional response term to match empirical findings, and examine situations where metabolic theory derivations and observation diverge. The dynamical properties of the Rosenzweig-MacArthur system, encompassing the distribution of size-abundance equilibria, the scaling of period and amplitude of population cycling, and relationships between predator and prey abundances, are consistent with empirical observation. Our parameterisation is an accurate minimal model across 15+ orders of mass magnitude.}, author = {Mckerral, Jody C. and Kleshnina, Maria and Ejov, Vladimir and Bartle, Louise and Mitchell, James G. and Filar, Jerzy A.}, issn = {1932-6203}, journal = {PLoS One}, number = {2}, pages = {e0279838}, publisher = {Public Library of Science}, title = {{Empirical parameterisation and dynamical analysis of the allometric Rosenzweig-MacArthur equations}}, doi = {10.1371/journal.pone.0279838}, volume = {18}, year = {2023}, } @inproceedings{14518, abstract = {We consider bidding games, a class of two-player zero-sum graph games. The game proceeds as follows. Both players have bounded budgets. A token is placed on a vertex of a graph, in each turn the players simultaneously submit bids, and the higher bidder moves the token, where we break bidding ties in favor of Player 1. Player 1 wins the game iff the token visits a designated target vertex. We consider, for the first time, poorman discrete-bidding in which the granularity of the bids is restricted and the higher bid is paid to the bank. Previous work either did not impose granularity restrictions or considered Richman bidding (bids are paid to the opponent). While the latter mechanisms are technically more accessible, the former is more appealing from a practical standpoint. Our study focuses on threshold budgets, which is the necessary and sufficient initial budget required for Player 1 to ensure winning against a given Player 2 budget. We first show existence of thresholds. In DAGs, we show that threshold budgets can be approximated with error bounds by thresholds under continuous-bidding and that they exhibit a periodic behavior. We identify closed-form solutions in special cases. We implement and experiment with an algorithm to find threshold budgets.}, author = {Avni, Guy and Meggendorfer, Tobias and Sadhukhan, Suman and Tkadlec, Josef and Zikelic, Dorde}, booktitle = {Frontiers in Artificial Intelligence and Applications}, isbn = {9781643684369}, issn = {0922-6389}, location = {Krakow, Poland}, pages = {141--148}, publisher = {IOS Press}, title = {{Reachability poorman discrete-bidding games}}, doi = {10.3233/FAIA230264}, volume = {372}, year = {2023}, } @inproceedings{14559, abstract = {We consider the problem of learning control policies in discrete-time stochastic systems which guarantee that the system stabilizes within some specified stabilization region with probability 1. Our approach is based on the novel notion of stabilizing ranking supermartingales (sRSMs) that we introduce in this work. Our sRSMs overcome the limitation of methods proposed in previous works whose applicability is restricted to systems in which the stabilizing region cannot be left once entered under any control policy. We present a learning procedure that learns a control policy together with an sRSM that formally certifies probability 1 stability, both learned as neural networks. We show that this procedure can also be adapted to formally verifying that, under a given Lipschitz continuous control policy, the stochastic system stabilizes within some stabilizing region with probability 1. Our experimental evaluation shows that our learning procedure can successfully learn provably stabilizing policies in practice.}, author = {Ansaripour, Matin and Chatterjee, Krishnendu and Henzinger, Thomas A and Lechner, Mathias and Zikelic, Dorde}, booktitle = {21st International Symposium on Automated Technology for Verification and Analysis}, isbn = {9783031453281}, issn = {1611-3349}, location = {Singapore, Singapore}, pages = {357--379}, publisher = {Springer Nature}, title = {{Learning provably stabilizing neural controllers for discrete-time stochastic systems}}, doi = {10.1007/978-3-031-45329-8_17}, volume = {14215}, year = {2023}, } @inproceedings{13238, abstract = {We consider a natural problem dealing with weighted packet selection across a rechargeable link, which e.g., finds applications in cryptocurrency networks. The capacity of a link (u, v) is determined by how much nodes u and v allocate for this link. Specifically, the input is a finite ordered sequence of packets that arrive in both directions along a link. Given (u, v) and a packet of weight x going from u to v, node u can either accept or reject the packet. If u accepts the packet, the capacity on link (u, v) decreases by x. Correspondingly, v’s capacity on (u, v) increases by x. If a node rejects the packet, this will entail a cost affinely linear in the weight of the packet. A link is “rechargeable” in the sense that the total capacity of the link has to remain constant, but the allocation of capacity at the ends of the link can depend arbitrarily on the nodes’ decisions. The goal is to minimise the sum of the capacity injected into the link and the cost of rejecting packets. We show that the problem is NP-hard, but can be approximated efficiently with a ratio of (1+ε)⋅(1+3–√) for some arbitrary ε>0. .}, author = {Schmid, Stefan and Svoboda, Jakub and Yeo, Michelle X}, booktitle = {SIROCCO 2023: Structural Information and Communication Complexity }, isbn = {9783031327322}, issn = {1611-3349}, location = {Alcala de Henares, Spain}, pages = {576--594}, publisher = {Springer Nature}, title = {{Weighted packet selection for rechargeable links in cryptocurrency networks: Complexity and approximation}}, doi = {10.1007/978-3-031-32733-9_26}, volume = {13892}, year = {2023}, } @article{14657, abstract = {Natural selection is usually studied between mutants that differ in reproductive rate, but are subject to the same population structure. Here we explore how natural selection acts on mutants that have the same reproductive rate, but different population structures. In our framework, population structure is given by a graph that specifies where offspring can disperse. The invading mutant disperses offspring on a different graph than the resident wild-type. We find that more densely connected dispersal graphs tend to increase the invader’s fixation probability, but the exact relationship between structure and fixation probability is subtle. We present three main results. First, we prove that if both invader and resident are on complete dispersal graphs, then removing a single edge in the invader’s dispersal graph reduces its fixation probability. Second, we show that for certain island models higher invader’s connectivity increases its fixation probability, but the magnitude of the effect depends on the exact layout of the connections. Third, we show that for lattices the effect of different connectivity is comparable to that of different fitness: for large population size, the invader’s fixation probability is either constant or exponentially small, depending on whether it is more or less connected than the resident.}, author = {Tkadlec, Josef and Kaveh, Kamran and Chatterjee, Krishnendu and Nowak, Martin A.}, issn = {1742-5662}, journal = {Journal of the Royal Society, Interface}, number = {208}, publisher = {The Royal Society}, title = {{Evolutionary dynamics of mutants that modify population structure}}, doi = {10.1098/rsif.2023.0355}, volume = {20}, year = {2023}, } @article{13258, abstract = {Many human interactions feature the characteristics of social dilemmas where individual actions have consequences for the group and the environment. The feedback between behavior and environment can be studied with the framework of stochastic games. In stochastic games, the state of the environment can change, depending on the choices made by group members. Past work suggests that such feedback can reinforce cooperative behaviors. In particular, cooperation can evolve in stochastic games even if it is infeasible in each separate repeated game. In stochastic games, participants have an interest in conditioning their strategies on the state of the environment. Yet in many applications, precise information about the state could be scarce. Here, we study how the availability of information (or lack thereof) shapes evolution of cooperation. Already for simple examples of two state games we find surprising effects. In some cases, cooperation is only possible if there is precise information about the state of the environment. In other cases, cooperation is most abundant when there is no information about the state of the environment. We systematically analyze all stochastic games of a given complexity class, to determine when receiving information about the environment is better, neutral, or worse for evolution of cooperation.}, author = {Kleshnina, Maria and Hilbe, Christian and Simsa, Stepan and Chatterjee, Krishnendu and Nowak, Martin A.}, issn = {2041-1723}, journal = {Nature Communications}, publisher = {Springer Nature}, title = {{The effect of environmental information on evolution of cooperation in stochastic games}}, doi = {10.1038/s41467-023-39625-9}, volume = {14}, year = {2023}, } @misc{13336, author = {Kleshnina, Maria}, publisher = {Zenodo}, title = {{kleshnina/stochgames_info: The effect of environmental information on evolution of cooperation in stochastic games}}, doi = {10.5281/ZENODO.8059564}, year = {2023}, } @inproceedings{13967, abstract = {A classic solution technique for Markov decision processes (MDP) and stochastic games (SG) is value iteration (VI). Due to its good practical performance, this approximative approach is typically preferred over exact techniques, even though no practical bounds on the imprecision of the result could be given until recently. As a consequence, even the most used model checkers could return arbitrarily wrong results. Over the past decade, different works derived stopping criteria, indicating when the precision reaches the desired level, for various settings, in particular MDP with reachability, total reward, and mean payoff, and SG with reachability.In this paper, we provide the first stopping criteria for VI on SG with total reward and mean payoff, yielding the first anytime algorithms in these settings. To this end, we provide the solution in two flavours: First through a reduction to the MDP case and second directly on SG. The former is simpler and automatically utilizes any advances on MDP. The latter allows for more local computations, heading towards better practical efficiency.Our solution unifies the previously mentioned approaches for MDP and SG and their underlying ideas. To achieve this, we isolate objective-specific subroutines as well as identify objective-independent concepts. These structural concepts, while surprisingly simple, form the very essence of the unified solution.}, author = {Kretinsky, Jan and Meggendorfer, Tobias and Weininger, Maximilian}, booktitle = {38th Annual ACM/IEEE Symposium on Logic in Computer Science}, isbn = {9798350335873}, issn = {1043-6871}, location = {Boston, MA, United States}, publisher = {Institute of Electrical and Electronics Engineers}, title = {{Stopping criteria for value iteration on stochastic games with quantitative objectives}}, doi = {10.1109/LICS56636.2023.10175771}, volume = {2023}, year = {2023}, } @article{12833, abstract = {The input to the token swapping problem is a graph with vertices v1, v2, . . . , vn, and n tokens with labels 1,2, . . . , n, one on each vertex. The goal is to get token i to vertex vi for all i= 1, . . . , n using a minimum number of swaps, where a swap exchanges the tokens on the endpoints of an edge.Token swapping on a tree, also known as “sorting with a transposition tree,” is not known to be in P nor NP-complete. We present some partial results: 1. An optimum swap sequence may need to perform a swap on a leaf vertex that has the correct token (a “happy leaf”), disproving a conjecture of Vaughan. 2. Any algorithm that fixes happy leaves—as all known approximation algorithms for the problem do—has approximation factor at least 4/3. Furthermore, the two best-known 2-approximation algorithms have approximation factor exactly 2. 3. A generalized problem—weighted coloured token swapping—is NP-complete on trees, but solvable in polynomial time on paths and stars. In this version, tokens and vertices have colours, and colours have weights. The goal is to get every token to a vertex of the same colour, and the cost of a swap is the sum of the weights of the two tokens involved.}, author = {Biniaz, Ahmad and Jain, Kshitij and Lubiw, Anna and Masárová, Zuzana and Miltzow, Tillmann and Mondal, Debajyoti and Naredla, Anurag Murty and Tkadlec, Josef and Turcotte, Alexi}, issn = {1365-8050}, journal = {Discrete Mathematics and Theoretical Computer Science}, number = {2}, publisher = {EPI Sciences}, title = {{Token swapping on trees}}, doi = {10.46298/DMTCS.8383}, volume = {24}, year = {2023}, } @inproceedings{14736, abstract = {Payment channel networks (PCNs) are a promising technology to improve the scalability of cryptocurrencies. PCNs, however, face the challenge that the frequent usage of certain routes may deplete channels in one direction, and hence prevent further transactions. In order to reap the full potential of PCNs, recharging and rebalancing mechanisms are required to provision channels, as well as an admission control logic to decide which transactions to reject in case capacity is insufficient. This paper presents a formal model of this optimisation problem. In particular, we consider an online algorithms perspective, where transactions arrive over time in an unpredictable manner. Our main contributions are competitive online algorithms which come with provable guarantees over time. We empirically evaluate our algorithms on randomly generated transactions to compare the average performance of our algorithms to our theoretical bounds. We also show how this model and approach differs from related problems in classic communication networks.}, author = {Bastankhah, Mahsa and Chatterjee, Krishnendu and Maddah-Ali, Mohammad Ali and Schmid, Stefan and Svoboda, Jakub and Yeo, Michelle X}, booktitle = {27th International Conference on Financial Cryptography and Data Security}, isbn = {9783031477539}, issn = {1611-3349}, location = {Bol, Brac, Croatia}, pages = {309--325}, publisher = {Springer Nature}, title = {{R2: Boosting liquidity in payment channel networks with online admission control}}, doi = {10.1007/978-3-031-47754-6_18}, volume = {13950}, year = {2023}, } @phdthesis{14539, abstract = {Stochastic systems provide a formal framework for modelling and quantifying uncertainty in systems and have been widely adopted in many application domains. Formal verification and control of finite state stochastic systems, a subfield of formal methods also known as probabilistic model checking, is well studied. In contrast, formal verification and control of infinite state stochastic systems have received comparatively less attention. However, infinite state stochastic systems commonly arise in practice. For instance, probabilistic models that contain continuous probability distributions such as normal or uniform, or stochastic dynamical systems which are a classical model for control under uncertainty, both give rise to infinite state systems. The goal of this thesis is to contribute to laying theoretical and algorithmic foundations of fully automated formal verification and control of infinite state stochastic systems, with a particular focus on systems that may be executed over a long or infinite time. We consider formal verification of infinite state stochastic systems in the setting of static analysis of probabilistic programs and formal control in the setting of controller synthesis in stochastic dynamical systems. For both problems, we present some of the first fully automated methods for probabilistic (a.k.a. quantitative) reachability and safety analysis applicable to infinite time horizon systems. We also advance the state of the art of probability 1 (a.k.a. qualitative) reachability analysis for both problems. Finally, for formal controller synthesis in stochastic dynamical systems, we present a novel framework for learning neural network control policies in stochastic dynamical systems with formal guarantees on correctness with respect to quantitative reachability, safety or reach-avoid specifications. }, author = {Zikelic, Dorde}, isbn = {978-3-99078-036-7}, issn = {2663 - 337X}, pages = {256}, publisher = {Institute of Science and Technology Austria}, title = {{Automated verification and control of infinite state stochastic systems}}, doi = {10.15479/14539}, year = {2023}, } @article{14778, abstract = {We consider the almost-sure (a.s.) termination problem for probabilistic programs, which are a stochastic extension of classical imperative programs. Lexicographic ranking functions provide a sound and practical approach for termination of non-probabilistic programs, and their extension to probabilistic programs is achieved via lexicographic ranking supermartingales (LexRSMs). However, LexRSMs introduced in the previous work have a limitation that impedes their automation: all of their components have to be non-negative in all reachable states. This might result in a LexRSM not existing even for simple terminating programs. Our contributions are twofold. First, we introduce a generalization of LexRSMs that allows for some components to be negative. This standard feature of non-probabilistic termination proofs was hitherto not known to be sound in the probabilistic setting, as the soundness proof requires a careful analysis of the underlying stochastic process. Second, we present polynomial-time algorithms using our generalized LexRSMs for proving a.s. termination in broad classes of linear-arithmetic programs.}, author = {Chatterjee, Krishnendu and Kafshdar Goharshady, Ehsan and Novotný, Petr and Zárevúcky, Jiří and Zikelic, Dorde}, issn = {1433-299X}, journal = {Formal Aspects of Computing}, keywords = {Theoretical Computer Science, Software}, number = {2}, publisher = {Association for Computing Machinery}, title = {{On lexicographic proof rules for probabilistic termination}}, doi = {10.1145/3585391}, volume = {35}, year = {2023}, } @inproceedings{14456, abstract = {In this paper, we present novel algorithms that efficiently compute a shortest reconfiguration sequence between two given dominating sets in trees and interval graphs under the TOKEN SLIDING model. In this problem, a graph is provided along with its two dominating sets, which can be imagined as tokens placed on vertices. The objective is to find a shortest sequence of dominating sets that transforms one set into the other, with each set in the sequence resulting from sliding a single token in the previous set. While identifying any sequence has been well studied, our work presents the first polynomial algorithms for this optimization variant in the context of dominating sets.}, author = {Křišťan, Jan Matyáš and Svoboda, Jakub}, booktitle = {24th International Symposium on Fundamentals of Computation Theory}, isbn = {9783031435867}, issn = {1611-3349}, location = {Trier, Germany}, pages = {333--347}, publisher = {Springer Nature}, title = {{Shortest dominating set reconfiguration under token sliding}}, doi = {10.1007/978-3-031-43587-4_24}, volume = {14292}, year = {2023}, } @inproceedings{14830, abstract = {We study the problem of learning controllers for discrete-time non-linear stochastic dynamical systems with formal reach-avoid guarantees. This work presents the first method for providing formal reach-avoid guarantees, which combine and generalize stability and safety guarantees, with a tolerable probability threshold p in [0,1] over the infinite time horizon. Our method leverages advances in machine learning literature and it represents formal certificates as neural networks. In particular, we learn a certificate in the form of a reach-avoid supermartingale (RASM), a novel notion that we introduce in this work. Our RASMs provide reachability and avoidance guarantees by imposing constraints on what can be viewed as a stochastic extension of level sets of Lyapunov functions for deterministic systems. Our approach solves several important problems -- it can be used to learn a control policy from scratch, to verify a reach-avoid specification for a fixed control policy, or to fine-tune a pre-trained policy if it does not satisfy the reach-avoid specification. We validate our approach on 3 stochastic non-linear reinforcement learning tasks.}, author = {Zikelic, Dorde and Lechner, Mathias and Henzinger, Thomas A and Chatterjee, Krishnendu}, booktitle = {Proceedings of the 37th AAAI Conference on Artificial Intelligence}, issn = {2374-3468}, keywords = {General Medicine}, location = {Washington, DC, United States}, number = {10}, pages = {11926--11935}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{Learning control policies for stochastic systems with reach-avoid guarantees}}, doi = {10.1609/aaai.v37i10.26407}, volume = {37}, year = {2023}, } @inproceedings{13139, abstract = {A classical problem for Markov chains is determining their stationary (or steady-state) distribution. This problem has an equally classical solution based on eigenvectors and linear equation systems. However, this approach does not scale to large instances, and iterative solutions are desirable. It turns out that a naive approach, as used by current model checkers, may yield completely wrong results. We present a new approach, which utilizes recent advances in partial exploration and mean payoff computation to obtain a correct, converging approximation.}, author = {Meggendorfer, Tobias}, booktitle = {TACAS 2023: Tools and Algorithms for the Construction and Analysis of Systems}, isbn = {9783031308222}, issn = {1611-3349}, location = {Paris, France}, pages = {489--507}, publisher = {Springer Nature}, title = {{Correct approximation of stationary distributions}}, doi = {10.1007/978-3-031-30823-9_25}, volume = {13993}, year = {2023}, } @misc{14990, abstract = {The software artefact to evaluate the approximation of stationary distributions implementation.}, author = {Meggendorfer, Tobias}, publisher = {Zenodo}, title = {{Artefact for: Correct Approximation of Stationary Distributions}}, doi = {10.5281/ZENODO.7548214}, year = {2023}, } @inproceedings{15023, abstract = {Reinforcement learning has shown promising results in learning neural network policies for complicated control tasks. However, the lack of formal guarantees about the behavior of such policies remains an impediment to their deployment. We propose a novel method for learning a composition of neural network policies in stochastic environments, along with a formal certificate which guarantees that a specification over the policy's behavior is satisfied with the desired probability. Unlike prior work on verifiable RL, our approach leverages the compositional nature of logical specifications provided in SpectRL, to learn over graphs of probabilistic reach-avoid specifications. The formal guarantees are provided by learning neural network policies together with reach-avoid supermartingales (RASM) for the graph’s sub-tasks and then composing them into a global policy. We also derive a tighter lower bound compared to previous work on the probability of reach-avoidance implied by a RASM, which is required to find a compositional policy with an acceptable probabilistic threshold for complex tasks with multiple edge policies. We implement a prototype of our approach and evaluate it on a Stochastic Nine Rooms environment.}, author = {Zikelic, Dorde and Lechner, Mathias and Verma, Abhinav and Chatterjee, Krishnendu and Henzinger, Thomas A}, booktitle = {37th Conference on Neural Information Processing Systems}, location = {New Orleans, LO, United States}, title = {{Compositional policy learning in stochastic control systems with formal guarantees}}, year = {2023}, } @inproceedings{12102, abstract = {Given a Markov chain M = (V, v_0, δ), with state space V and a starting state v_0, and a probability threshold ε, an ε-core is a subset C of states that is left with probability at most ε. More formally, C ⊆ V is an ε-core, iff ℙ[reach (V\C)] ≤ ε. Cores have been applied in a wide variety of verification problems over Markov chains, Markov decision processes, and probabilistic programs, as a means of discarding uninteresting and low-probability parts of a probabilistic system and instead being able to focus on the states that are likely to be encountered in a real-world run. In this work, we focus on the problem of computing a minimal ε-core in a Markov chain. Our contributions include both negative and positive results: (i) We show that the decision problem on the existence of an ε-core of a given size is NP-complete. This solves an open problem posed in [Jan Kretínský and Tobias Meggendorfer, 2020]. We additionally show that the problem remains NP-complete even when limited to acyclic Markov chains with bounded maximal vertex degree; (ii) We provide a polynomial time algorithm for computing a minimal ε-core on Markov chains over control-flow graphs of structured programs. A straightforward combination of our algorithm with standard branch prediction techniques allows one to apply the idea of cores to find a subset of program lines that are left with low probability and then focus any desired static analysis on this core subset.}, author = {Ahmadi, Ali and Chatterjee, Krishnendu and Goharshady, Amir Kafshdar and Meggendorfer, Tobias and Safavi Hemami, Roodabeh and Zikelic, Dorde}, booktitle = {42nd IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science}, isbn = {9783959772617}, issn = {1868-8969}, location = {Madras, India}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{Algorithms and hardness results for computing cores of Markov chains}}, doi = {10.4230/LIPIcs.FSTTCS.2022.29}, volume = {250}, year = {2022}, } @inproceedings{12101, abstract = {Spatial games form a widely-studied class of games from biology and physics modeling the evolution of social behavior. Formally, such a game is defined by a square (d by d) payoff matrix M and an undirected graph G. Each vertex of G represents an individual, that initially follows some strategy i ∈ {1,2,…,d}. In each round of the game, every individual plays the matrix game with each of its neighbors: An individual following strategy i meeting a neighbor following strategy j receives a payoff equal to the entry (i,j) of M. Then, each individual updates its strategy to its neighbors' strategy with the highest sum of payoffs, and the next round starts. The basic computational problems consist of reachability between configurations and the average frequency of a strategy. For general spatial games and graphs, these problems are in PSPACE. In this paper, we examine restricted setting: the game is a prisoner’s dilemma; and G is a subgraph of grid. We prove that basic computational problems for spatial games with prisoner’s dilemma on a subgraph of a grid are PSPACE-hard.}, author = {Chatterjee, Krishnendu and Ibsen-Jensen, Rasmus and Jecker, Ismael R and Svoboda, Jakub}, booktitle = {42nd IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science}, isbn = {9783959772617}, issn = {1868-8969}, location = {Madras, India}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{Complexity of spatial games}}, doi = {10.4230/LIPIcs.FSTTCS.2022.11}, volume = {250}, year = {2022}, } @inproceedings{12568, abstract = {We treat the problem of risk-aware control for stochastic shortest path (SSP) on Markov decision processes (MDP). Typically, expectation is considered for SSP, which however is oblivious to the incurred risk. We present an alternative view, instead optimizing conditional value-at-risk (CVaR), an established risk measure. We treat both Markov chains as well as MDP and introduce, through novel insights, two algorithms, based on linear programming and value iteration, respectively. Both algorithms offer precise and provably correct solutions. Evaluation of our prototype implementation shows that risk-aware control is feasible on several moderately sized models.}, author = {Meggendorfer, Tobias}, booktitle = {Proceedings of the 36th AAAI Conference on Artificial Intelligence, AAAI 2022}, isbn = {1577358767}, issn = {2374-3468}, location = {Virtual}, number = {9}, pages = {9858--9867}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{Risk-aware stochastic shortest path}}, doi = {10.1609/aaai.v36i9.21222}, volume = {36}, year = {2022}, } @article{11938, abstract = {A matching is compatible to two or more labeled point sets of size n with labels {1, . . . , n} if its straight-line drawing on each of these point sets is crossing-free. We study the maximum number of edges in a matching compatible to two or more labeled point sets in general position in the plane. We show that for any two labeled sets of n points in convex position there exists a compatible matching with ⌊√2n + 1 − 1⌋ edges. More generally, for any ℓ labeled point sets we construct compatible matchings of size Ω(n1/ℓ). As a corresponding upper bound, we use probabilistic arguments to show that for any ℓ given sets of n points there exists a labeling of each set such that the largest compatible matching has O(n2/(ℓ+1)) edges. Finally, we show that Θ(log n) copies of any set of n points are necessary and sufficient for the existence of labelings of these point sets such that any compatible matching consists only of a single edge.}, author = {Aichholzer, Oswin and Arroyo Guevara, Alan M and Masárová, Zuzana and Parada, Irene and Perz, Daniel and Pilz, Alexander and Tkadlec, Josef and Vogtenhuber, Birgit}, issn = {1526-1719}, journal = {Journal of Graph Algorithms and Applications}, number = {2}, pages = {225--240}, publisher = {Brown University}, title = {{On compatible matchings}}, doi = {10.7155/jgaa.00591}, volume = {26}, year = {2022}, } @unpublished{12677, abstract = {In modern sample-driven Prophet Inequality, an adversary chooses a sequence of n items with values v1,v2,…,vn to be presented to a decision maker (DM). The process follows in two phases. In the first phase (sampling phase), some items, possibly selected at random, are revealed to the DM, but she can never accept them. In the second phase, the DM is presented with the other items in a random order and online fashion. For each item, she must make an irrevocable decision to either accept the item and stop the process or reject the item forever and proceed to the next item. The goal of the DM is to maximize the expected value as compared to a Prophet (or offline algorithm) that has access to all information. In this setting, the sampling phase has no cost and is not part of the optimization process. However, in many scenarios, the samples are obtained as part of the decision-making process. We model this aspect as a two-phase Prophet Inequality where an adversary chooses a sequence of 2n items with values v1,v2,…,v2n and the items are randomly ordered. Finally, there are two phases of the Prophet Inequality problem with the first n-items and the rest of the items, respectively. We show that some basic algorithms achieve a ratio of at most 0.450. We present an algorithm that achieves a ratio of at least 0.495. Finally, we show that for every algorithm the ratio it can achieve is at most 0.502. Hence our algorithm is near-optimal.}, author = {Chatterjee, Krishnendu and Mohammadi, Mona and Saona Urmeneta, Raimundo J}, booktitle = {arXiv}, title = {{Repeated prophet inequality with near-optimal bounds}}, doi = {10.48550/ARXIV.2209.14368}, year = {2022}, } @article{10602, abstract = {Transforming ω-automata into parity automata is traditionally done using appearance records. We present an efficient variant of this idea, tailored to Rabin automata, and several optimizations applicable to all appearance records. We compare the methods experimentally and show that our method produces significantly smaller automata than previous approaches.}, author = {Kretinsky, Jan and Meggendorfer, Tobias and Waldmann, Clara and Weininger, Maximilian}, issn = {1432-0525}, journal = {Acta Informatica}, keywords = {computer networks and communications, information systems, software}, pages = {585--618}, publisher = {Springer Nature}, title = {{Index appearance record with preorders}}, doi = {10.1007/s00236-021-00412-y}, volume = {59}, year = {2022}, } @article{10731, abstract = {Motivated by COVID-19, we develop and analyze a simple stochastic model for the spread of disease in human population. We track how the number of infected and critically ill people develops over time in order to estimate the demand that is imposed on the hospital system. To keep this demand under control, we consider a class of simple policies for slowing down and reopening society and we compare their efficiency in mitigating the spread of the virus from several different points of view. We find that in order to avoid overwhelming of the hospital system, a policy must impose a harsh lockdown or it must react swiftly (or both). While reacting swiftly is universally beneficial, being harsh pays off only when the country is patient about reopening and when the neighboring countries coordinate their mitigation efforts. Our work highlights the importance of acting decisively when closing down and the importance of patience and coordination between neighboring countries when reopening.}, author = {Svoboda, Jakub and Tkadlec, Josef and Pavlogiannis, Andreas and Chatterjee, Krishnendu and Nowak, Martin A.}, issn = {2045-2322}, journal = {Scientific Reports}, number = {1}, publisher = {Springer Nature}, title = {{Infection dynamics of COVID-19 virus under lockdown and reopening}}, doi = {10.1038/s41598-022-05333-5}, volume = {12}, year = {2022}, } @inproceedings{11459, abstract = {We present a novel approach to differential cost analysis that, given a program revision, attempts to statically bound the difference in resource usage, or cost, between the two program versions. Differential cost analysis is particularly interesting because of the many compelling applications for it, such as detecting resource-use regressions at code-review time or proving the absence of certain side-channel vulnerabilities. One prior approach to differential cost analysis is to apply relational reasoning that conceptually constructs a product program on which one can over-approximate the difference in costs between the two program versions. However, a significant challenge in any relational approach is effectively aligning the program versions to get precise results. In this paper, our key insight is that we can avoid the need for and the limitations of program alignment if, instead, we bound the difference of two cost-bound summaries rather than directly bounding the concrete cost difference. In particular, our method computes a threshold value for the maximal difference in cost between two program versions simultaneously using two kinds of cost-bound summaries---a potential function that evaluates to an upper bound for the cost incurred in the first program and an anti-potential function that evaluates to a lower bound for the cost incurred in the second. Our method has a number of desirable properties: it can be fully automated, it allows optimizing the threshold value on relative cost, it is suitable for programs that are not syntactically similar, and it supports non-determinism. We have evaluated an implementation of our approach on a number of program pairs collected from the literature, and we find that our method computes tight threshold values on relative cost in most examples.}, author = {Zikelic, Dorde and Chang, Bor-Yuh Evan and Bolignano, Pauline and Raimondi, Franco}, booktitle = {Proceedings of the 43rd ACM SIGPLAN International Conference on Programming Language Design and Implementation}, isbn = {9781450392655}, location = {San Diego, CA, United States}, pages = {442--457}, publisher = {Association for Computing Machinery}, title = {{Differential cost analysis with simultaneous potentials and anti-potentials}}, doi = {10.1145/3519939.3523435}, year = {2022}, } @article{12257, abstract = {Structural balance theory is an established framework for studying social relationships of friendship and enmity. These relationships are modeled by a signed network whose energy potential measures the level of imbalance, while stochastic dynamics drives the network toward a state of minimum energy that captures social balance. It is known that this energy landscape has local minima that can trap socially aware dynamics, preventing it from reaching balance. Here we first study the robustness and attractor properties of these local minima. We show that a stochastic process can reach them from an abundance of initial states and that some local minima cannot be escaped by mild perturbations of the network. Motivated by these anomalies, we introduce best-edge dynamics (BED), a new plausible stochastic process. We prove that BED always reaches balance and that it does so fast in various interesting settings.}, author = {Chatterjee, Krishnendu and Svoboda, Jakub and Zikelic, Dorde and Pavlogiannis, Andreas and Tkadlec, Josef}, issn = {2470-0053}, journal = {Physical Review E}, number = {3}, publisher = {American Physical Society}, title = {{Social balance on networks: Local minima and best-edge dynamics}}, doi = {10.1103/physreve.106.034321}, volume = {106}, year = {2022}, } @article{12280, abstract = {In repeated interactions, players can use strategies that respond to the outcome of previous rounds. Much of the existing literature on direct reciprocity assumes that all competing individuals use the same strategy space. Here, we study both learning and evolutionary dynamics of players that differ in the strategy space they explore. We focus on the infinitely repeated donation game and compare three natural strategy spaces: memory-1 strategies, which consider the last moves of both players, reactive strategies, which respond to the last move of the co-player, and unconditional strategies. These three strategy spaces differ in the memory capacity that is needed. We compute the long term average payoff that is achieved in a pairwise learning process. We find that smaller strategy spaces can dominate larger ones. For weak selection, unconditional players dominate both reactive and memory-1 players. For intermediate selection, reactive players dominate memory-1 players. Only for strong selection and low cost-to-benefit ratio, memory-1 players dominate the others. We observe that the supergame between strategy spaces can be a social dilemma: maximum payoff is achieved if both players explore a larger strategy space, but smaller strategy spaces dominate.}, author = {Schmid, Laura and Hilbe, Christian and Chatterjee, Krishnendu and Nowak, Martin}, issn = {1553-7358}, journal = {PLOS Computational Biology}, keywords = {Computational Theory and Mathematics, Cellular and Molecular Neuroscience, Genetics, Molecular Biology, Ecology, Modeling and Simulation, Ecology, Evolution, Behavior and Systematics}, number = {6}, publisher = {Public Library of Science}, title = {{Direct reciprocity between individuals that use different strategy spaces}}, doi = {10.1371/journal.pcbi.1010149}, volume = {18}, year = {2022}, } @article{9311, abstract = {Partially observable Markov decision processes (POMDPs) are standard models for dynamic systems with probabilistic and nondeterministic behaviour in uncertain environments. We prove that in POMDPs with long-run average objective, the decision maker has approximately optimal strategies with finite memory. This implies notably that approximating the long-run value is recursively enumerable, as well as a weak continuity property of the value with respect to the transition function. }, author = {Chatterjee, Krishnendu and Saona Urmeneta, Raimundo J and Ziliotto, Bruno}, issn = {1526-5471}, journal = {Mathematics of Operations Research}, keywords = {Management Science and Operations Research, General Mathematics, Computer Science Applications}, number = {1}, pages = {100--119}, publisher = {Institute for Operations Research and the Management Sciences}, title = {{Finite-memory strategies in POMDPs with long-run average objectives}}, doi = {10.1287/moor.2020.1116}, volume = {47}, year = {2022}, } @inproceedings{12170, abstract = {We present PET, a specialized and highly optimized framework for partial exploration on probabilistic systems. Over the last decade, several significant advances in the analysis of Markov decision processes employed partial exploration. In a nutshell, this idea allows to focus computation on specific parts of the system, guided by heuristics, while maintaining correctness. In particular, only relevant parts of the system are constructed on demand, which in turn potentially allows to omit constructing large parts of the system. Depending on the model, this leads to dramatic speed-ups, in extreme cases even up to an arbitrary factor. PET unifies several previous implementations and provides a flexible framework to easily implement partial exploration for many further problems. Our experimental evaluation shows significant improvements compared to the previous implementations while vastly reducing the overhead required to add support for additional properties.}, author = {Meggendorfer, Tobias}, booktitle = {20th International Symposium on Automated Technology for Verification and Analysis}, isbn = {9783031199912}, issn = {1611-3349}, location = {Virtual}, pages = {320--326}, publisher = {Springer Nature}, title = {{PET – A partial exploration tool for probabilistic verification}}, doi = {10.1007/978-3-031-19992-9_20}, volume = {13505}, year = {2022}, } @article{11402, abstract = {Fixed-horizon planning considers a weighted graph and asks to construct a path that maximizes the sum of weights for a given time horizon T. However, in many scenarios, the time horizon is not fixed, but the stopping time is chosen according to some distribution such that the expected stopping time is T. If the stopping-time distribution is not known, then to ensure robustness, the distribution is chosen by an adversary as the worst-case scenario. A stationary plan for every vertex always chooses the same outgoing edge. For fixed horizon or fixed stopping-time distribution, stationary plans are not sufficient for optimality. Quite surprisingly we show that when an adversary chooses the stopping-time distribution with expected stopping-time T, then stationary plans are sufficient. While computing optimal stationary plans for fixed horizon is NP-complete, we show that computing optimal stationary plans under adversarial stopping-time distribution can be achieved in polynomial time.}, author = {Chatterjee, Krishnendu and Doyen, Laurent}, issn = {1090-2724}, journal = {Journal of Computer and System Sciences}, pages = {1--21}, publisher = {Elsevier}, title = {{Graph planning with expected finite horizon}}, doi = {10.1016/j.jcss.2022.04.003}, volume = {129}, year = {2022}, } @inproceedings{12775, abstract = {We consider the problem of approximating the reachability probabilities in Markov decision processes (MDP) with uncountable (continuous) state and action spaces. While there are algorithms that, for special classes of such MDP, provide a sequence of approximations converging to the true value in the limit, our aim is to obtain an algorithm with guarantees on the precision of the approximation. As this problem is undecidable in general, assumptions on the MDP are necessary. Our main contribution is to identify sufficient assumptions that are as weak as possible, thus approaching the "boundary" of which systems can be correctly and reliably analyzed. To this end, we also argue why each of our assumptions is necessary for algorithms based on processing finitely many observations. We present two solution variants. The first one provides converging lower bounds under weaker assumptions than typical ones from previous works concerned with guarantees. The second one then utilizes stronger assumptions to additionally provide converging upper bounds. Altogether, we obtain an anytime algorithm, i.e. yielding a sequence of approximants with known and iteratively improving precision, converging to the true value in the limit. Besides, due to the generality of our assumptions, our algorithms are very general templates, readily allowing for various heuristics from literature in contrast to, e.g., a specific discretization algorithm. Our theoretical contribution thus paves the way for future practical improvements without sacrificing correctness guarantees.}, author = {Grover, Kush and Kretinsky, Jan and Meggendorfer, Tobias and Weininger, Maimilian}, booktitle = {33rd International Conference on Concurrency Theory }, issn = {1868-8969}, location = {Warsaw, Poland}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{Anytime guarantees for reachability in uncountable Markov decision processes}}, doi = {10.4230/LIPIcs.CONCUR.2022.11}, volume = {243}, year = {2022}, } @inproceedings{12000, abstract = {We consider the quantitative problem of obtaining lower-bounds on the probability of termination of a given non-deterministic probabilistic program. Specifically, given a non-termination threshold p∈[0,1], we aim for certificates proving that the program terminates with probability at least 1−p. The basic idea of our approach is to find a terminating stochastic invariant, i.e. a subset SI of program states such that (i) the probability of the program ever leaving SI is no more than p, and (ii) almost-surely, the program either leaves SI or terminates. While stochastic invariants are already well-known, we provide the first proof that the idea above is not only sound, but also complete for quantitative termination analysis. We then introduce a novel sound and complete characterization of stochastic invariants that enables template-based approaches for easy synthesis of quantitative termination certificates, especially in affine or polynomial forms. Finally, by combining this idea with the existing martingale-based methods that are relatively complete for qualitative termination analysis, we obtain the first automated, sound, and relatively complete algorithm for quantitative termination analysis. Notably, our completeness guarantees for quantitative termination analysis are as strong as the best-known methods for the qualitative variant. Our prototype implementation demonstrates the effectiveness of our approach on various probabilistic programs. We also demonstrate that our algorithm certifies lower bounds on termination probability for probabilistic programs that are beyond the reach of previous methods.}, author = {Chatterjee, Krishnendu and Goharshady, Amir Kafshdar and Meggendorfer, Tobias and Zikelic, Dorde}, booktitle = {Proceedings of the 34th International Conference on Computer Aided Verification}, isbn = {9783031131844}, issn = {1611-3349}, location = {Haifa, Israel}, pages = {55--78}, publisher = {Springer}, title = {{Sound and complete certificates for auantitative termination analysis of probabilistic programs}}, doi = {10.1007/978-3-031-13185-1_4}, volume = {13371}, year = {2022}, } @article{12511, abstract = {We consider the problem of formally verifying almost-sure (a.s.) asymptotic stability in discrete-time nonlinear stochastic control systems. While verifying stability in deterministic control systems is extensively studied in the literature, verifying stability in stochastic control systems is an open problem. The few existing works on this topic either consider only specialized forms of stochasticity or make restrictive assumptions on the system, rendering them inapplicable to learning algorithms with neural network policies. In this work, we present an approach for general nonlinear stochastic control problems with two novel aspects: (a) instead of classical stochastic extensions of Lyapunov functions, we use ranking supermartingales (RSMs) to certify a.s. asymptotic stability, and (b) we present a method for learning neural network RSMs. We prove that our approach guarantees a.s. asymptotic stability of the system and provides the first method to obtain bounds on the stabilization time, which stochastic Lyapunov functions do not. Finally, we validate our approach experimentally on a set of nonlinear stochastic reinforcement learning environments with neural network policies.}, author = {Lechner, Mathias and Zikelic, Dorde and Chatterjee, Krishnendu and Henzinger, Thomas A}, isbn = {9781577358350}, issn = {2374-3468}, journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, keywords = {General Medicine}, number = {7}, pages = {7326--7336}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{Stability verification in stochastic control systems via neural network supermartingales}}, doi = {10.1609/aaai.v36i7.20695}, volume = {36}, year = {2022}, } @unpublished{14601, abstract = {In this work, we address the problem of learning provably stable neural network policies for stochastic control systems. While recent work has demonstrated the feasibility of certifying given policies using martingale theory, the problem of how to learn such policies is little explored. Here, we study the effectiveness of jointly learning a policy together with a martingale certificate that proves its stability using a single learning algorithm. We observe that the joint optimization problem becomes easily stuck in local minima when starting from a randomly initialized policy. Our results suggest that some form of pre-training of the policy is required for the joint optimization to repair and verify the policy successfully.}, author = {Zikelic, Dorde and Lechner, Mathias and Chatterjee, Krishnendu and Henzinger, Thomas A}, booktitle = {arXiv}, title = {{Learning stabilizing policies in stochastic control systems}}, doi = {10.48550/arXiv.2205.11991}, year = {2022}, }