@inproceedings{6462, abstract = {A controller is a device that interacts with a plant. At each time point,it reads the plant’s state and issues commands with the goal that the plant oper-ates optimally. Constructing optimal controllers is a fundamental and challengingproblem. Machine learning techniques have recently been successfully applied totrain controllers, yet they have limitations. Learned controllers are monolithic andhard to reason about. In particular, it is difficult to add features without retraining,to guarantee any level of performance, and to achieve acceptable performancewhen encountering untrained scenarios. These limitations can be addressed bydeploying quantitative run-timeshieldsthat serve as a proxy for the controller.At each time point, the shield reads the command issued by the controller andmay choose to alter it before passing it on to the plant. We show how optimalshields that interfere as little as possible while guaranteeing a desired level ofcontroller performance, can be generated systematically and automatically usingreactive synthesis. First, we abstract the plant by building a stochastic model.Second, we consider the learned controller to be a black box. Third, we mea-surecontroller performanceandshield interferenceby two quantitative run-timemeasures that are formally defined using weighted automata. Then, the problemof constructing a shield that guarantees maximal performance with minimal inter-ference is the problem of finding an optimal strategy in a stochastic2-player game“controller versus shield” played on the abstract state space of the plant with aquantitative objective obtained from combining the performance and interferencemeasures. We illustrate the effectiveness of our approach by automatically con-structing lightweight shields for learned traffic-light controllers in various roadnetworks. The shields we generate avoid liveness bugs, improve controller per-formance in untrained and changing traffic situations, and add features to learnedcontrollers, such as giving priority to emergency vehicles.}, author = {Avni, Guy and Bloem, Roderick and Chatterjee, Krishnendu and Henzinger, Thomas A and Konighofer, Bettina and Pranger, Stefan}, booktitle = {31st International Conference on Computer-Aided Verification}, isbn = {9783030255398}, issn = {0302-9743}, location = {New York, NY, United States}, pages = {630--649}, publisher = {Springer}, title = {{Run-time optimization for learned controllers through quantitative games}}, doi = {10.1007/978-3-030-25540-4_36}, volume = {11561}, year = {2019}, } @article{6836, abstract = {Direct reciprocity is a powerful mechanism for the evolution of cooperation on the basis of repeated interactions1,2,3,4. It requires that interacting individuals are sufficiently equal, such that everyone faces similar consequences when they cooperate or defect. Yet inequality is ubiquitous among humans5,6 and is generally considered to undermine cooperation and welfare7,8,9,10. Most previous models of reciprocity do not include inequality11,12,13,14,15. These models assume that individuals are the same in all relevant aspects. Here we introduce a general framework to study direct reciprocity among unequal individuals. Our model allows for multiple sources of inequality. Subjects can differ in their endowments, their productivities and in how much they benefit from public goods. We find that extreme inequality prevents cooperation. But if subjects differ in productivity, some endowment inequality can be necessary for cooperation to prevail. Our mathematical predictions are supported by a behavioural experiment in which we vary the endowments and productivities of the subjects. We observe that overall welfare is maximized when the two sources of heterogeneity are aligned, such that more productive individuals receive higher endowments. By contrast, when endowments and productivities are misaligned, cooperation quickly breaks down. Our findings have implications for policy-makers concerned with equity, efficiency and the provisioning of public goods.}, author = {Hauser, Oliver P. and Hilbe, Christian and Chatterjee, Krishnendu and Nowak, Martin A.}, issn = {14764687}, journal = {Nature}, number = {7770}, pages = {524--527}, publisher = {Springer Nature}, title = {{Social dilemmas among unequals}}, doi = {10.1038/s41586-019-1488-5}, volume = {572}, year = {2019}, } @inproceedings{6942, abstract = {Graph games and Markov decision processes (MDPs) are standard models in reactive synthesis and verification of probabilistic systems with nondeterminism. The class of 𝜔 -regular winning conditions; e.g., safety, reachability, liveness, parity conditions; provides a robust and expressive specification formalism for properties that arise in analysis of reactive systems. The resolutions of nondeterminism in games and MDPs are represented as strategies, and we consider succinct representation of such strategies. The decision-tree data structure from machine learning retains the flavor of decisions of strategies and allows entropy-based minimization to obtain succinct trees. However, in contrast to traditional machine-learning problems where small errors are allowed, for winning strategies in graph games and MDPs no error is allowed, and the decision tree must represent the entire strategy. In this work we propose decision trees with linear classifiers for representation of strategies in graph games and MDPs. We have implemented strategy representation using this data structure and we present experimental results for problems on graph games and MDPs, which show that this new data structure presents a much more efficient strategy representation as compared to standard decision trees.}, author = {Ashok, Pranav and Brázdil, Tomáš and Chatterjee, Krishnendu and Křetínský, Jan and Lampert, Christoph and Toman, Viktor}, booktitle = {16th International Conference on Quantitative Evaluation of Systems}, isbn = {9783030302801}, issn = {0302-9743}, location = {Glasgow, United Kingdom}, pages = {109--128}, publisher = {Springer Nature}, title = {{Strategy representation by decision trees with linear classifiers}}, doi = {10.1007/978-3-030-30281-8_7}, volume = {11785}, year = {2019}, } @inproceedings{7183, abstract = {A probabilistic vector addition system with states (pVASS) is a finite state Markov process augmented with non-negative integer counters that can be incremented or decremented during each state transition, blocking any behaviour that would cause a counter to decrease below zero. The pVASS can be used as abstractions of probabilistic programs with many decidable properties. The use of pVASS as abstractions requires the presence of nondeterminism in the model. In this paper, we develop techniques for checking fast termination of pVASS with nondeterminism. That is, for every initial configuration of size n, we consider the worst expected number of transitions needed to reach a configuration with some counter negative (the expected termination time). We show that the problem whether the asymptotic expected termination time is linear is decidable in polynomial time for a certain natural class of pVASS with nondeterminism. Furthermore, we show the following dichotomy: if the asymptotic expected termination time is not linear, then it is at least quadratic, i.e., in Ω(n2).}, author = {Brázdil, Tomás and Chatterjee, Krishnendu and Kucera, Antonín and Novotný, Petr and Velan, Dominik}, booktitle = {International Symposium on Automated Technology for Verification and Analysis}, isbn = {9783030317836}, issn = {16113349}, location = {Taipei, Taiwan}, pages = {462--478}, publisher = {Springer Nature}, title = {{Deciding fast termination for probabilistic VASS with nondeterminism}}, doi = {10.1007/978-3-030-31784-3_27}, volume = {11781}, year = {2019}, } @article{7210, abstract = {The rate of biological evolution depends on the fixation probability and on the fixation time of new mutants. Intensive research has focused on identifying population structures that augment the fixation probability of advantageous mutants. But these amplifiers of natural selection typically increase fixation time. Here we study population structures that achieve a tradeoff between fixation probability and time. First, we show that no amplifiers can have an asymptotically lower absorption time than the well-mixed population. Then we design population structures that substantially augment the fixation probability with just a minor increase in fixation time. Finally, we show that those structures enable higher effective rate of evolution than the well-mixed population provided that the rate of generating advantageous mutants is relatively low. Our work sheds light on how population structure affects the rate of evolution. Moreover, our structures could be useful for lab-based, medical, or industrial applications of evolutionary optimization.}, author = {Tkadlec, Josef and Pavlogiannis, Andreas and Chatterjee, Krishnendu and Nowak, Martin A.}, issn = {2399-3642}, journal = {Communications Biology}, publisher = {Springer Nature}, title = {{Population structure determines the tradeoff between fixation probability and fixation time}}, doi = {10.1038/s42003-019-0373-y}, volume = {2}, year = {2019}, }