@inproceedings{13139, abstract = {A classical problem for Markov chains is determining their stationary (or steady-state) distribution. This problem has an equally classical solution based on eigenvectors and linear equation systems. However, this approach does not scale to large instances, and iterative solutions are desirable. It turns out that a naive approach, as used by current model checkers, may yield completely wrong results. We present a new approach, which utilizes recent advances in partial exploration and mean payoff computation to obtain a correct, converging approximation.}, author = {Meggendorfer, Tobias}, booktitle = {TACAS 2023: Tools and Algorithms for the Construction and Analysis of Systems}, isbn = {9783031308222}, issn = {1611-3349}, location = {Paris, France}, pages = {489--507}, publisher = {Springer Nature}, title = {{Correct approximation of stationary distributions}}, doi = {10.1007/978-3-031-30823-9_25}, volume = {13993}, year = {2023}, } @misc{14990, abstract = {The software artefact to evaluate the approximation of stationary distributions implementation.}, author = {Meggendorfer, Tobias}, publisher = {Zenodo}, title = {{Artefact for: Correct Approximation of Stationary Distributions}}, doi = {10.5281/ZENODO.7548214}, year = {2023}, } @inproceedings{15023, abstract = {Reinforcement learning has shown promising results in learning neural network policies for complicated control tasks. However, the lack of formal guarantees about the behavior of such policies remains an impediment to their deployment. We propose a novel method for learning a composition of neural network policies in stochastic environments, along with a formal certificate which guarantees that a specification over the policy's behavior is satisfied with the desired probability. Unlike prior work on verifiable RL, our approach leverages the compositional nature of logical specifications provided in SpectRL, to learn over graphs of probabilistic reach-avoid specifications. The formal guarantees are provided by learning neural network policies together with reach-avoid supermartingales (RASM) for the graph’s sub-tasks and then composing them into a global policy. We also derive a tighter lower bound compared to previous work on the probability of reach-avoidance implied by a RASM, which is required to find a compositional policy with an acceptable probabilistic threshold for complex tasks with multiple edge policies. We implement a prototype of our approach and evaluate it on a Stochastic Nine Rooms environment.}, author = {Zikelic, Dorde and Lechner, Mathias and Verma, Abhinav and Chatterjee, Krishnendu and Henzinger, Thomas A}, booktitle = {37th Conference on Neural Information Processing Systems}, location = {New Orleans, LO, United States}, title = {{Compositional policy learning in stochastic control systems with formal guarantees}}, year = {2023}, } @inproceedings{12102, abstract = {Given a Markov chain M = (V, v_0, δ), with state space V and a starting state v_0, and a probability threshold ε, an ε-core is a subset C of states that is left with probability at most ε. More formally, C ⊆ V is an ε-core, iff ℙ[reach (V\C)] ≤ ε. Cores have been applied in a wide variety of verification problems over Markov chains, Markov decision processes, and probabilistic programs, as a means of discarding uninteresting and low-probability parts of a probabilistic system and instead being able to focus on the states that are likely to be encountered in a real-world run. In this work, we focus on the problem of computing a minimal ε-core in a Markov chain. Our contributions include both negative and positive results: (i) We show that the decision problem on the existence of an ε-core of a given size is NP-complete. This solves an open problem posed in [Jan Kretínský and Tobias Meggendorfer, 2020]. We additionally show that the problem remains NP-complete even when limited to acyclic Markov chains with bounded maximal vertex degree; (ii) We provide a polynomial time algorithm for computing a minimal ε-core on Markov chains over control-flow graphs of structured programs. A straightforward combination of our algorithm with standard branch prediction techniques allows one to apply the idea of cores to find a subset of program lines that are left with low probability and then focus any desired static analysis on this core subset.}, author = {Ahmadi, Ali and Chatterjee, Krishnendu and Goharshady, Amir Kafshdar and Meggendorfer, Tobias and Safavi Hemami, Roodabeh and Zikelic, Dorde}, booktitle = {42nd IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science}, isbn = {9783959772617}, issn = {1868-8969}, location = {Madras, India}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{Algorithms and hardness results for computing cores of Markov chains}}, doi = {10.4230/LIPIcs.FSTTCS.2022.29}, volume = {250}, year = {2022}, } @inproceedings{12101, abstract = {Spatial games form a widely-studied class of games from biology and physics modeling the evolution of social behavior. Formally, such a game is defined by a square (d by d) payoff matrix M and an undirected graph G. Each vertex of G represents an individual, that initially follows some strategy i ∈ {1,2,…,d}. In each round of the game, every individual plays the matrix game with each of its neighbors: An individual following strategy i meeting a neighbor following strategy j receives a payoff equal to the entry (i,j) of M. Then, each individual updates its strategy to its neighbors' strategy with the highest sum of payoffs, and the next round starts. The basic computational problems consist of reachability between configurations and the average frequency of a strategy. For general spatial games and graphs, these problems are in PSPACE. In this paper, we examine restricted setting: the game is a prisoner’s dilemma; and G is a subgraph of grid. We prove that basic computational problems for spatial games with prisoner’s dilemma on a subgraph of a grid are PSPACE-hard.}, author = {Chatterjee, Krishnendu and Ibsen-Jensen, Rasmus and Jecker, Ismael R and Svoboda, Jakub}, booktitle = {42nd IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science}, isbn = {9783959772617}, issn = {1868-8969}, location = {Madras, India}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{Complexity of spatial games}}, doi = {10.4230/LIPIcs.FSTTCS.2022.11}, volume = {250}, year = {2022}, } @inproceedings{12568, abstract = {We treat the problem of risk-aware control for stochastic shortest path (SSP) on Markov decision processes (MDP). Typically, expectation is considered for SSP, which however is oblivious to the incurred risk. We present an alternative view, instead optimizing conditional value-at-risk (CVaR), an established risk measure. We treat both Markov chains as well as MDP and introduce, through novel insights, two algorithms, based on linear programming and value iteration, respectively. Both algorithms offer precise and provably correct solutions. Evaluation of our prototype implementation shows that risk-aware control is feasible on several moderately sized models.}, author = {Meggendorfer, Tobias}, booktitle = {Proceedings of the 36th AAAI Conference on Artificial Intelligence, AAAI 2022}, isbn = {1577358767}, issn = {2374-3468}, location = {Virtual}, number = {9}, pages = {9858--9867}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{Risk-aware stochastic shortest path}}, doi = {10.1609/aaai.v36i9.21222}, volume = {36}, year = {2022}, } @article{11938, abstract = {A matching is compatible to two or more labeled point sets of size n with labels {1, . . . , n} if its straight-line drawing on each of these point sets is crossing-free. We study the maximum number of edges in a matching compatible to two or more labeled point sets in general position in the plane. We show that for any two labeled sets of n points in convex position there exists a compatible matching with ⌊√2n + 1 − 1⌋ edges. More generally, for any ℓ labeled point sets we construct compatible matchings of size Ω(n1/ℓ). As a corresponding upper bound, we use probabilistic arguments to show that for any ℓ given sets of n points there exists a labeling of each set such that the largest compatible matching has O(n2/(ℓ+1)) edges. Finally, we show that Θ(log n) copies of any set of n points are necessary and sufficient for the existence of labelings of these point sets such that any compatible matching consists only of a single edge.}, author = {Aichholzer, Oswin and Arroyo Guevara, Alan M and Masárová, Zuzana and Parada, Irene and Perz, Daniel and Pilz, Alexander and Tkadlec, Josef and Vogtenhuber, Birgit}, issn = {1526-1719}, journal = {Journal of Graph Algorithms and Applications}, number = {2}, pages = {225--240}, publisher = {Brown University}, title = {{On compatible matchings}}, doi = {10.7155/jgaa.00591}, volume = {26}, year = {2022}, } @unpublished{12677, abstract = {In modern sample-driven Prophet Inequality, an adversary chooses a sequence of n items with values v1,v2,…,vn to be presented to a decision maker (DM). The process follows in two phases. In the first phase (sampling phase), some items, possibly selected at random, are revealed to the DM, but she can never accept them. In the second phase, the DM is presented with the other items in a random order and online fashion. For each item, she must make an irrevocable decision to either accept the item and stop the process or reject the item forever and proceed to the next item. The goal of the DM is to maximize the expected value as compared to a Prophet (or offline algorithm) that has access to all information. In this setting, the sampling phase has no cost and is not part of the optimization process. However, in many scenarios, the samples are obtained as part of the decision-making process. We model this aspect as a two-phase Prophet Inequality where an adversary chooses a sequence of 2n items with values v1,v2,…,v2n and the items are randomly ordered. Finally, there are two phases of the Prophet Inequality problem with the first n-items and the rest of the items, respectively. We show that some basic algorithms achieve a ratio of at most 0.450. We present an algorithm that achieves a ratio of at least 0.495. Finally, we show that for every algorithm the ratio it can achieve is at most 0.502. Hence our algorithm is near-optimal.}, author = {Chatterjee, Krishnendu and Mohammadi, Mona and Saona Urmeneta, Raimundo J}, booktitle = {arXiv}, title = {{Repeated prophet inequality with near-optimal bounds}}, doi = {10.48550/ARXIV.2209.14368}, year = {2022}, } @article{10602, abstract = {Transforming ω-automata into parity automata is traditionally done using appearance records. We present an efficient variant of this idea, tailored to Rabin automata, and several optimizations applicable to all appearance records. We compare the methods experimentally and show that our method produces significantly smaller automata than previous approaches.}, author = {Kretinsky, Jan and Meggendorfer, Tobias and Waldmann, Clara and Weininger, Maximilian}, issn = {1432-0525}, journal = {Acta Informatica}, keywords = {computer networks and communications, information systems, software}, pages = {585--618}, publisher = {Springer Nature}, title = {{Index appearance record with preorders}}, doi = {10.1007/s00236-021-00412-y}, volume = {59}, year = {2022}, } @article{10731, abstract = {Motivated by COVID-19, we develop and analyze a simple stochastic model for the spread of disease in human population. We track how the number of infected and critically ill people develops over time in order to estimate the demand that is imposed on the hospital system. To keep this demand under control, we consider a class of simple policies for slowing down and reopening society and we compare their efficiency in mitigating the spread of the virus from several different points of view. We find that in order to avoid overwhelming of the hospital system, a policy must impose a harsh lockdown or it must react swiftly (or both). While reacting swiftly is universally beneficial, being harsh pays off only when the country is patient about reopening and when the neighboring countries coordinate their mitigation efforts. Our work highlights the importance of acting decisively when closing down and the importance of patience and coordination between neighboring countries when reopening.}, author = {Svoboda, Jakub and Tkadlec, Josef and Pavlogiannis, Andreas and Chatterjee, Krishnendu and Nowak, Martin A.}, issn = {2045-2322}, journal = {Scientific Reports}, number = {1}, publisher = {Springer Nature}, title = {{Infection dynamics of COVID-19 virus under lockdown and reopening}}, doi = {10.1038/s41598-022-05333-5}, volume = {12}, year = {2022}, } @inproceedings{11459, abstract = {We present a novel approach to differential cost analysis that, given a program revision, attempts to statically bound the difference in resource usage, or cost, between the two program versions. Differential cost analysis is particularly interesting because of the many compelling applications for it, such as detecting resource-use regressions at code-review time or proving the absence of certain side-channel vulnerabilities. One prior approach to differential cost analysis is to apply relational reasoning that conceptually constructs a product program on which one can over-approximate the difference in costs between the two program versions. However, a significant challenge in any relational approach is effectively aligning the program versions to get precise results. In this paper, our key insight is that we can avoid the need for and the limitations of program alignment if, instead, we bound the difference of two cost-bound summaries rather than directly bounding the concrete cost difference. In particular, our method computes a threshold value for the maximal difference in cost between two program versions simultaneously using two kinds of cost-bound summaries---a potential function that evaluates to an upper bound for the cost incurred in the first program and an anti-potential function that evaluates to a lower bound for the cost incurred in the second. Our method has a number of desirable properties: it can be fully automated, it allows optimizing the threshold value on relative cost, it is suitable for programs that are not syntactically similar, and it supports non-determinism. We have evaluated an implementation of our approach on a number of program pairs collected from the literature, and we find that our method computes tight threshold values on relative cost in most examples.}, author = {Zikelic, Dorde and Chang, Bor-Yuh Evan and Bolignano, Pauline and Raimondi, Franco}, booktitle = {Proceedings of the 43rd ACM SIGPLAN International Conference on Programming Language Design and Implementation}, isbn = {9781450392655}, location = {San Diego, CA, United States}, pages = {442--457}, publisher = {Association for Computing Machinery}, title = {{Differential cost analysis with simultaneous potentials and anti-potentials}}, doi = {10.1145/3519939.3523435}, year = {2022}, } @article{12257, abstract = {Structural balance theory is an established framework for studying social relationships of friendship and enmity. These relationships are modeled by a signed network whose energy potential measures the level of imbalance, while stochastic dynamics drives the network toward a state of minimum energy that captures social balance. It is known that this energy landscape has local minima that can trap socially aware dynamics, preventing it from reaching balance. Here we first study the robustness and attractor properties of these local minima. We show that a stochastic process can reach them from an abundance of initial states and that some local minima cannot be escaped by mild perturbations of the network. Motivated by these anomalies, we introduce best-edge dynamics (BED), a new plausible stochastic process. We prove that BED always reaches balance and that it does so fast in various interesting settings.}, author = {Chatterjee, Krishnendu and Svoboda, Jakub and Zikelic, Dorde and Pavlogiannis, Andreas and Tkadlec, Josef}, issn = {2470-0053}, journal = {Physical Review E}, number = {3}, publisher = {American Physical Society}, title = {{Social balance on networks: Local minima and best-edge dynamics}}, doi = {10.1103/physreve.106.034321}, volume = {106}, year = {2022}, } @article{12280, abstract = {In repeated interactions, players can use strategies that respond to the outcome of previous rounds. Much of the existing literature on direct reciprocity assumes that all competing individuals use the same strategy space. Here, we study both learning and evolutionary dynamics of players that differ in the strategy space they explore. We focus on the infinitely repeated donation game and compare three natural strategy spaces: memory-1 strategies, which consider the last moves of both players, reactive strategies, which respond to the last move of the co-player, and unconditional strategies. These three strategy spaces differ in the memory capacity that is needed. We compute the long term average payoff that is achieved in a pairwise learning process. We find that smaller strategy spaces can dominate larger ones. For weak selection, unconditional players dominate both reactive and memory-1 players. For intermediate selection, reactive players dominate memory-1 players. Only for strong selection and low cost-to-benefit ratio, memory-1 players dominate the others. We observe that the supergame between strategy spaces can be a social dilemma: maximum payoff is achieved if both players explore a larger strategy space, but smaller strategy spaces dominate.}, author = {Schmid, Laura and Hilbe, Christian and Chatterjee, Krishnendu and Nowak, Martin}, issn = {1553-7358}, journal = {PLOS Computational Biology}, keywords = {Computational Theory and Mathematics, Cellular and Molecular Neuroscience, Genetics, Molecular Biology, Ecology, Modeling and Simulation, Ecology, Evolution, Behavior and Systematics}, number = {6}, publisher = {Public Library of Science}, title = {{Direct reciprocity between individuals that use different strategy spaces}}, doi = {10.1371/journal.pcbi.1010149}, volume = {18}, year = {2022}, } @article{9311, abstract = {Partially observable Markov decision processes (POMDPs) are standard models for dynamic systems with probabilistic and nondeterministic behaviour in uncertain environments. We prove that in POMDPs with long-run average objective, the decision maker has approximately optimal strategies with finite memory. This implies notably that approximating the long-run value is recursively enumerable, as well as a weak continuity property of the value with respect to the transition function. }, author = {Chatterjee, Krishnendu and Saona Urmeneta, Raimundo J and Ziliotto, Bruno}, issn = {1526-5471}, journal = {Mathematics of Operations Research}, keywords = {Management Science and Operations Research, General Mathematics, Computer Science Applications}, number = {1}, pages = {100--119}, publisher = {Institute for Operations Research and the Management Sciences}, title = {{Finite-memory strategies in POMDPs with long-run average objectives}}, doi = {10.1287/moor.2020.1116}, volume = {47}, year = {2022}, } @inproceedings{12170, abstract = {We present PET, a specialized and highly optimized framework for partial exploration on probabilistic systems. Over the last decade, several significant advances in the analysis of Markov decision processes employed partial exploration. In a nutshell, this idea allows to focus computation on specific parts of the system, guided by heuristics, while maintaining correctness. In particular, only relevant parts of the system are constructed on demand, which in turn potentially allows to omit constructing large parts of the system. Depending on the model, this leads to dramatic speed-ups, in extreme cases even up to an arbitrary factor. PET unifies several previous implementations and provides a flexible framework to easily implement partial exploration for many further problems. Our experimental evaluation shows significant improvements compared to the previous implementations while vastly reducing the overhead required to add support for additional properties.}, author = {Meggendorfer, Tobias}, booktitle = {20th International Symposium on Automated Technology for Verification and Analysis}, isbn = {9783031199912}, issn = {1611-3349}, location = {Virtual}, pages = {320--326}, publisher = {Springer Nature}, title = {{PET – A partial exploration tool for probabilistic verification}}, doi = {10.1007/978-3-031-19992-9_20}, volume = {13505}, year = {2022}, } @article{11402, abstract = {Fixed-horizon planning considers a weighted graph and asks to construct a path that maximizes the sum of weights for a given time horizon T. However, in many scenarios, the time horizon is not fixed, but the stopping time is chosen according to some distribution such that the expected stopping time is T. If the stopping-time distribution is not known, then to ensure robustness, the distribution is chosen by an adversary as the worst-case scenario. A stationary plan for every vertex always chooses the same outgoing edge. For fixed horizon or fixed stopping-time distribution, stationary plans are not sufficient for optimality. Quite surprisingly we show that when an adversary chooses the stopping-time distribution with expected stopping-time T, then stationary plans are sufficient. While computing optimal stationary plans for fixed horizon is NP-complete, we show that computing optimal stationary plans under adversarial stopping-time distribution can be achieved in polynomial time.}, author = {Chatterjee, Krishnendu and Doyen, Laurent}, issn = {1090-2724}, journal = {Journal of Computer and System Sciences}, pages = {1--21}, publisher = {Elsevier}, title = {{Graph planning with expected finite horizon}}, doi = {10.1016/j.jcss.2022.04.003}, volume = {129}, year = {2022}, } @inproceedings{12775, abstract = {We consider the problem of approximating the reachability probabilities in Markov decision processes (MDP) with uncountable (continuous) state and action spaces. While there are algorithms that, for special classes of such MDP, provide a sequence of approximations converging to the true value in the limit, our aim is to obtain an algorithm with guarantees on the precision of the approximation. As this problem is undecidable in general, assumptions on the MDP are necessary. Our main contribution is to identify sufficient assumptions that are as weak as possible, thus approaching the "boundary" of which systems can be correctly and reliably analyzed. To this end, we also argue why each of our assumptions is necessary for algorithms based on processing finitely many observations. We present two solution variants. The first one provides converging lower bounds under weaker assumptions than typical ones from previous works concerned with guarantees. The second one then utilizes stronger assumptions to additionally provide converging upper bounds. Altogether, we obtain an anytime algorithm, i.e. yielding a sequence of approximants with known and iteratively improving precision, converging to the true value in the limit. Besides, due to the generality of our assumptions, our algorithms are very general templates, readily allowing for various heuristics from literature in contrast to, e.g., a specific discretization algorithm. Our theoretical contribution thus paves the way for future practical improvements without sacrificing correctness guarantees.}, author = {Grover, Kush and Kretinsky, Jan and Meggendorfer, Tobias and Weininger, Maimilian}, booktitle = {33rd International Conference on Concurrency Theory }, issn = {1868-8969}, location = {Warsaw, Poland}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{Anytime guarantees for reachability in uncountable Markov decision processes}}, doi = {10.4230/LIPIcs.CONCUR.2022.11}, volume = {243}, year = {2022}, } @inproceedings{12000, abstract = {We consider the quantitative problem of obtaining lower-bounds on the probability of termination of a given non-deterministic probabilistic program. Specifically, given a non-termination threshold p∈[0,1], we aim for certificates proving that the program terminates with probability at least 1−p. The basic idea of our approach is to find a terminating stochastic invariant, i.e. a subset SI of program states such that (i) the probability of the program ever leaving SI is no more than p, and (ii) almost-surely, the program either leaves SI or terminates. While stochastic invariants are already well-known, we provide the first proof that the idea above is not only sound, but also complete for quantitative termination analysis. We then introduce a novel sound and complete characterization of stochastic invariants that enables template-based approaches for easy synthesis of quantitative termination certificates, especially in affine or polynomial forms. Finally, by combining this idea with the existing martingale-based methods that are relatively complete for qualitative termination analysis, we obtain the first automated, sound, and relatively complete algorithm for quantitative termination analysis. Notably, our completeness guarantees for quantitative termination analysis are as strong as the best-known methods for the qualitative variant. Our prototype implementation demonstrates the effectiveness of our approach on various probabilistic programs. We also demonstrate that our algorithm certifies lower bounds on termination probability for probabilistic programs that are beyond the reach of previous methods.}, author = {Chatterjee, Krishnendu and Goharshady, Amir Kafshdar and Meggendorfer, Tobias and Zikelic, Dorde}, booktitle = {Proceedings of the 34th International Conference on Computer Aided Verification}, isbn = {9783031131844}, issn = {1611-3349}, location = {Haifa, Israel}, pages = {55--78}, publisher = {Springer}, title = {{Sound and complete certificates for auantitative termination analysis of probabilistic programs}}, doi = {10.1007/978-3-031-13185-1_4}, volume = {13371}, year = {2022}, } @article{12511, abstract = {We consider the problem of formally verifying almost-sure (a.s.) asymptotic stability in discrete-time nonlinear stochastic control systems. While verifying stability in deterministic control systems is extensively studied in the literature, verifying stability in stochastic control systems is an open problem. The few existing works on this topic either consider only specialized forms of stochasticity or make restrictive assumptions on the system, rendering them inapplicable to learning algorithms with neural network policies. In this work, we present an approach for general nonlinear stochastic control problems with two novel aspects: (a) instead of classical stochastic extensions of Lyapunov functions, we use ranking supermartingales (RSMs) to certify a.s. asymptotic stability, and (b) we present a method for learning neural network RSMs. We prove that our approach guarantees a.s. asymptotic stability of the system and provides the first method to obtain bounds on the stabilization time, which stochastic Lyapunov functions do not. Finally, we validate our approach experimentally on a set of nonlinear stochastic reinforcement learning environments with neural network policies.}, author = {Lechner, Mathias and Zikelic, Dorde and Chatterjee, Krishnendu and Henzinger, Thomas A}, isbn = {9781577358350}, issn = {2374-3468}, journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, keywords = {General Medicine}, number = {7}, pages = {7326--7336}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{Stability verification in stochastic control systems via neural network supermartingales}}, doi = {10.1609/aaai.v36i7.20695}, volume = {36}, year = {2022}, } @unpublished{14601, abstract = {In this work, we address the problem of learning provably stable neural network policies for stochastic control systems. While recent work has demonstrated the feasibility of certifying given policies using martingale theory, the problem of how to learn such policies is little explored. Here, we study the effectiveness of jointly learning a policy together with a martingale certificate that proves its stability using a single learning algorithm. We observe that the joint optimization problem becomes easily stuck in local minima when starting from a randomly initialized policy. Our results suggest that some form of pre-training of the policy is required for the joint optimization to repair and verify the policy successfully.}, author = {Zikelic, Dorde and Lechner, Mathias and Chatterjee, Krishnendu and Henzinger, Thomas A}, booktitle = {arXiv}, title = {{Learning stabilizing policies in stochastic control systems}}, doi = {10.48550/arXiv.2205.11991}, year = {2022}, }