@inproceedings{13139, abstract = {A classical problem for Markov chains is determining their stationary (or steady-state) distribution. This problem has an equally classical solution based on eigenvectors and linear equation systems. However, this approach does not scale to large instances, and iterative solutions are desirable. It turns out that a naive approach, as used by current model checkers, may yield completely wrong results. We present a new approach, which utilizes recent advances in partial exploration and mean payoff computation to obtain a correct, converging approximation.}, author = {Meggendorfer, Tobias}, booktitle = {TACAS 2023: Tools and Algorithms for the Construction and Analysis of Systems}, isbn = {9783031308222}, issn = {1611-3349}, location = {Paris, France}, pages = {489--507}, publisher = {Springer Nature}, title = {{Correct approximation of stationary distributions}}, doi = {10.1007/978-3-031-30823-9_25}, volume = {13993}, year = {2023}, } @misc{14990, abstract = {The software artefact to evaluate the approximation of stationary distributions implementation.}, author = {Meggendorfer, Tobias}, publisher = {Zenodo}, title = {{Artefact for: Correct Approximation of Stationary Distributions}}, doi = {10.5281/ZENODO.7548214}, year = {2023}, } @inproceedings{15023, abstract = {Reinforcement learning has shown promising results in learning neural network policies for complicated control tasks. However, the lack of formal guarantees about the behavior of such policies remains an impediment to their deployment. We propose a novel method for learning a composition of neural network policies in stochastic environments, along with a formal certificate which guarantees that a specification over the policy's behavior is satisfied with the desired probability. Unlike prior work on verifiable RL, our approach leverages the compositional nature of logical specifications provided in SpectRL, to learn over graphs of probabilistic reach-avoid specifications. The formal guarantees are provided by learning neural network policies together with reach-avoid supermartingales (RASM) for the graph’s sub-tasks and then composing them into a global policy. We also derive a tighter lower bound compared to previous work on the probability of reach-avoidance implied by a RASM, which is required to find a compositional policy with an acceptable probabilistic threshold for complex tasks with multiple edge policies. We implement a prototype of our approach and evaluate it on a Stochastic Nine Rooms environment.}, author = {Zikelic, Dorde and Lechner, Mathias and Verma, Abhinav and Chatterjee, Krishnendu and Henzinger, Thomas A}, booktitle = {37th Conference on Neural Information Processing Systems}, location = {New Orleans, LO, United States}, title = {{Compositional policy learning in stochastic control systems with formal guarantees}}, year = {2023}, } @inproceedings{12102, abstract = {Given a Markov chain M = (V, v_0, δ), with state space V and a starting state v_0, and a probability threshold ε, an ε-core is a subset C of states that is left with probability at most ε. More formally, C ⊆ V is an ε-core, iff ℙ[reach (V\C)] ≤ ε. Cores have been applied in a wide variety of verification problems over Markov chains, Markov decision processes, and probabilistic programs, as a means of discarding uninteresting and low-probability parts of a probabilistic system and instead being able to focus on the states that are likely to be encountered in a real-world run. In this work, we focus on the problem of computing a minimal ε-core in a Markov chain. Our contributions include both negative and positive results: (i) We show that the decision problem on the existence of an ε-core of a given size is NP-complete. This solves an open problem posed in [Jan Kretínský and Tobias Meggendorfer, 2020]. We additionally show that the problem remains NP-complete even when limited to acyclic Markov chains with bounded maximal vertex degree; (ii) We provide a polynomial time algorithm for computing a minimal ε-core on Markov chains over control-flow graphs of structured programs. A straightforward combination of our algorithm with standard branch prediction techniques allows one to apply the idea of cores to find a subset of program lines that are left with low probability and then focus any desired static analysis on this core subset.}, author = {Ahmadi, Ali and Chatterjee, Krishnendu and Goharshady, Amir Kafshdar and Meggendorfer, Tobias and Safavi Hemami, Roodabeh and Zikelic, Dorde}, booktitle = {42nd IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science}, isbn = {9783959772617}, issn = {1868-8969}, location = {Madras, India}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{Algorithms and hardness results for computing cores of Markov chains}}, doi = {10.4230/LIPIcs.FSTTCS.2022.29}, volume = {250}, year = {2022}, } @inproceedings{12101, abstract = {Spatial games form a widely-studied class of games from biology and physics modeling the evolution of social behavior. Formally, such a game is defined by a square (d by d) payoff matrix M and an undirected graph G. Each vertex of G represents an individual, that initially follows some strategy i ∈ {1,2,…,d}. In each round of the game, every individual plays the matrix game with each of its neighbors: An individual following strategy i meeting a neighbor following strategy j receives a payoff equal to the entry (i,j) of M. Then, each individual updates its strategy to its neighbors' strategy with the highest sum of payoffs, and the next round starts. The basic computational problems consist of reachability between configurations and the average frequency of a strategy. For general spatial games and graphs, these problems are in PSPACE. In this paper, we examine restricted setting: the game is a prisoner’s dilemma; and G is a subgraph of grid. We prove that basic computational problems for spatial games with prisoner’s dilemma on a subgraph of a grid are PSPACE-hard.}, author = {Chatterjee, Krishnendu and Ibsen-Jensen, Rasmus and Jecker, Ismael R and Svoboda, Jakub}, booktitle = {42nd IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science}, isbn = {9783959772617}, issn = {1868-8969}, location = {Madras, India}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{Complexity of spatial games}}, doi = {10.4230/LIPIcs.FSTTCS.2022.11}, volume = {250}, year = {2022}, } @inproceedings{12568, abstract = {We treat the problem of risk-aware control for stochastic shortest path (SSP) on Markov decision processes (MDP). Typically, expectation is considered for SSP, which however is oblivious to the incurred risk. We present an alternative view, instead optimizing conditional value-at-risk (CVaR), an established risk measure. We treat both Markov chains as well as MDP and introduce, through novel insights, two algorithms, based on linear programming and value iteration, respectively. Both algorithms offer precise and provably correct solutions. Evaluation of our prototype implementation shows that risk-aware control is feasible on several moderately sized models.}, author = {Meggendorfer, Tobias}, booktitle = {Proceedings of the 36th AAAI Conference on Artificial Intelligence, AAAI 2022}, isbn = {1577358767}, issn = {2374-3468}, location = {Virtual}, number = {9}, pages = {9858--9867}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{Risk-aware stochastic shortest path}}, doi = {10.1609/aaai.v36i9.21222}, volume = {36}, year = {2022}, } @article{11938, abstract = {A matching is compatible to two or more labeled point sets of size n with labels {1, . . . , n} if its straight-line drawing on each of these point sets is crossing-free. We study the maximum number of edges in a matching compatible to two or more labeled point sets in general position in the plane. We show that for any two labeled sets of n points in convex position there exists a compatible matching with ⌊√2n + 1 − 1⌋ edges. More generally, for any ℓ labeled point sets we construct compatible matchings of size Ω(n1/ℓ). As a corresponding upper bound, we use probabilistic arguments to show that for any ℓ given sets of n points there exists a labeling of each set such that the largest compatible matching has O(n2/(ℓ+1)) edges. Finally, we show that Θ(log n) copies of any set of n points are necessary and sufficient for the existence of labelings of these point sets such that any compatible matching consists only of a single edge.}, author = {Aichholzer, Oswin and Arroyo Guevara, Alan M and Masárová, Zuzana and Parada, Irene and Perz, Daniel and Pilz, Alexander and Tkadlec, Josef and Vogtenhuber, Birgit}, issn = {1526-1719}, journal = {Journal of Graph Algorithms and Applications}, number = {2}, pages = {225--240}, publisher = {Brown University}, title = {{On compatible matchings}}, doi = {10.7155/jgaa.00591}, volume = {26}, year = {2022}, } @unpublished{12677, abstract = {In modern sample-driven Prophet Inequality, an adversary chooses a sequence of n items with values v1,v2,…,vn to be presented to a decision maker (DM). The process follows in two phases. In the first phase (sampling phase), some items, possibly selected at random, are revealed to the DM, but she can never accept them. In the second phase, the DM is presented with the other items in a random order and online fashion. For each item, she must make an irrevocable decision to either accept the item and stop the process or reject the item forever and proceed to the next item. The goal of the DM is to maximize the expected value as compared to a Prophet (or offline algorithm) that has access to all information. In this setting, the sampling phase has no cost and is not part of the optimization process. However, in many scenarios, the samples are obtained as part of the decision-making process. We model this aspect as a two-phase Prophet Inequality where an adversary chooses a sequence of 2n items with values v1,v2,…,v2n and the items are randomly ordered. Finally, there are two phases of the Prophet Inequality problem with the first n-items and the rest of the items, respectively. We show that some basic algorithms achieve a ratio of at most 0.450. We present an algorithm that achieves a ratio of at least 0.495. Finally, we show that for every algorithm the ratio it can achieve is at most 0.502. Hence our algorithm is near-optimal.}, author = {Chatterjee, Krishnendu and Mohammadi, Mona and Saona Urmeneta, Raimundo J}, booktitle = {arXiv}, title = {{Repeated prophet inequality with near-optimal bounds}}, doi = {10.48550/ARXIV.2209.14368}, year = {2022}, } @article{10602, abstract = {Transforming ω-automata into parity automata is traditionally done using appearance records. We present an efficient variant of this idea, tailored to Rabin automata, and several optimizations applicable to all appearance records. We compare the methods experimentally and show that our method produces significantly smaller automata than previous approaches.}, author = {Kretinsky, Jan and Meggendorfer, Tobias and Waldmann, Clara and Weininger, Maximilian}, issn = {1432-0525}, journal = {Acta Informatica}, keywords = {computer networks and communications, information systems, software}, pages = {585--618}, publisher = {Springer Nature}, title = {{Index appearance record with preorders}}, doi = {10.1007/s00236-021-00412-y}, volume = {59}, year = {2022}, } @article{10731, abstract = {Motivated by COVID-19, we develop and analyze a simple stochastic model for the spread of disease in human population. We track how the number of infected and critically ill people develops over time in order to estimate the demand that is imposed on the hospital system. To keep this demand under control, we consider a class of simple policies for slowing down and reopening society and we compare their efficiency in mitigating the spread of the virus from several different points of view. We find that in order to avoid overwhelming of the hospital system, a policy must impose a harsh lockdown or it must react swiftly (or both). While reacting swiftly is universally beneficial, being harsh pays off only when the country is patient about reopening and when the neighboring countries coordinate their mitigation efforts. Our work highlights the importance of acting decisively when closing down and the importance of patience and coordination between neighboring countries when reopening.}, author = {Svoboda, Jakub and Tkadlec, Josef and Pavlogiannis, Andreas and Chatterjee, Krishnendu and Nowak, Martin A.}, issn = {2045-2322}, journal = {Scientific Reports}, number = {1}, publisher = {Springer Nature}, title = {{Infection dynamics of COVID-19 virus under lockdown and reopening}}, doi = {10.1038/s41598-022-05333-5}, volume = {12}, year = {2022}, }