@inproceedings{8193,
  abstract     = {Multiple-environment Markov decision processes (MEMDPs) are MDPs equipped with not one, but multiple probabilistic transition functions, which represent the various possible unknown environments. While the previous research on MEMDPs focused on theoretical properties for long-run average payoff, we study them with discounted-sum payoff and focus on their practical advantages and applications. MEMDPs can be viewed as a special case of Partially observable and Mixed observability MDPs: the state of the system is perfectly observable, but not the environment. We show that the specific structure of MEMDPs allows for more efficient algorithmic analysis, in particular for faster belief updates. We demonstrate the applicability of MEMDPs in several domains. In particular, we formalize the sequential decision-making approach to contextual recommendation systems as MEMDPs and substantially improve over the previous MDP approach.},
  author       = {Chatterjee, Krishnendu and Chmelik, Martin and Karkhanis, Deep and Novotný, Petr and Royer, Amélie},
  booktitle    = {Proceedings of the 30th International Conference on Automated Planning and Scheduling},
  issn         = {23340843},
  location     = {Nancy, France},
  pages        = {48--56},
  publisher    = {Association for the Advancement of Artificial Intelligence},
  title        = {{Multiple-environment Markov decision processes: Efficient analysis and applications}},
  volume       = {30},
  year         = {2020},
}

@article{1407,
  abstract     = {We consider the problem of computing the set of initial states of a dynamical system such that there exists a control strategy to ensure that the trajectories satisfy a temporal logic specification with probability 1 (almost-surely). We focus on discrete-time, stochastic linear dynamics and specifications given as formulas of the Generalized Reactivity(1) fragment of Linear Temporal Logic over linear predicates in the states of the system. We propose a solution based on iterative abstraction-refinement, and turn-based 2-player probabilistic games. While the theoretical guarantee of our algorithm after any finite number of iterations is only a partial solution, we show that if our algorithm terminates, then the result is the set of all satisfying initial states. Moreover, for any (partial) solution our algorithm synthesizes witness control strategies to ensure almost-sure satisfaction of the temporal logic specification. While the proposed algorithm guarantees progress and soundness in every iteration, it is computationally demanding. We offer an alternative, more efficient solution for the reachability properties that decomposes the problem into a series of smaller problems of the same type. All algorithms are demonstrated on an illustrative case study.},
  author       = {Svoreňová, Mária and Kretinsky, Jan and Chmelik, Martin and Chatterjee, Krishnendu and Cěrná, Ivana and Belta, Cǎlin},
  journal      = {Nonlinear Analysis: Hybrid Systems},
  number       = {2},
  pages        = {230 -- 253},
  publisher    = {Elsevier},
  title        = {{Temporal logic control for stochastic linear systems using abstraction refinement of probabilistic games}},
  doi          = {10.1016/j.nahs.2016.04.006},
  volume       = {23},
  year         = {2017},
}

@inproceedings{1324,
  abstract     = {DEC-POMDPs extend POMDPs to a multi-agent setting, where several agents operate in an uncertain environment independently to achieve a joint objective. DEC-POMDPs have been studied with finite-horizon and infinite-horizon discounted-sum objectives, and there exist solvers both for exact and approximate solutions. In this work we consider Goal-DEC-POMDPs, where given a set of target states, the objective is to ensure that the target set is reached with minimal cost. We consider the indefinite-horizon (infinite-horizon with either discounted-sum, or undiscounted-sum, where absorbing goal states have zero-cost) problem. We present a new and novel method to solve the problem that extends methods for finite-horizon DEC-POMDPs and the RTDP-Bel approach for POMDPs. We present experimental results on several examples, and show that our approach presents promising results. Copyright },
  author       = {Chatterjee, Krishnendu and Chmelik, Martin},
  booktitle    = {Proceedings of the Twenty-Sixth International Conference on International Conference on Automated Planning and Scheduling},
  location     = {London, United Kingdom},
  pages        = {88 -- 96},
  publisher    = {AAAI Press},
  title        = {{Indefinite-horizon reachability in Goal-DEC-POMDPs}},
  volume       = {2016-January},
  year         = {2016},
}

@inproceedings{1327,
  abstract     = {We consider partially observable Markov decision processes (POMDPs) with a set of target states and positive integer costs associated with every transition. The traditional optimization objective (stochastic shortest path) asks to minimize the expected total cost until the target set is reached. We extend the traditional framework of POMDPs to model energy consumption, which represents a hard constraint. The energy levels may increase and decrease with transitions, and the hard constraint requires that the energy level must remain positive in all steps till the target is reached. First, we present a novel algorithm for solving POMDPs with energy levels, developing on existing POMDP solvers and using RTDP as its main method. Our second contribution is related to policy representation. For larger POMDP instances the policies computed by existing solvers are too large to be understandable. We present an automated procedure based on machine learning techniques that automatically extracts important decisions of the policy allowing us to compute succinct human readable policies. Finally, we show experimentally that our algorithm performs well and computes succinct policies on a number of POMDP instances from the literature that were naturally enhanced with energy levels. },
  author       = {Brázdil, Tomáš and Chatterjee, Krishnendu and Chmelik, Martin and Gupta, Anchit and Novotny, Petr},
  booktitle    = {Proceedings of the 15th International Conference on Autonomous Agents and Multiagent Systems},
  location     = {Singapore},
  pages        = {1465 -- 1466},
  publisher    = {ACM},
  title        = {{Stochastic shortest path with energy constraints in POMDPs}},
  year         = {2016},
}

@article{1518,
  abstract     = {The inference of demographic history from genome data is hindered by a lack of efficient computational approaches. In particular, it has proved difficult to exploit the information contained in the distribution of genealogies across the genome. We have previously shown that the generating function (GF) of genealogies can be used to analytically compute likelihoods of demographic models from configurations of mutations in short sequence blocks (Lohse et al. 2011). Although the GF has a simple, recursive form, the size of such likelihood calculations explodes quickly with the number of individuals and applications of this framework have so far been mainly limited to small samples (pairs and triplets) for which the GF can be written by hand. Here we investigate several strategies for exploiting the inherent symmetries of the coalescent. In particular, we show that the GF of genealogies can be decomposed into a set of equivalence classes that allows likelihood calculations from nontrivial samples. Using this strategy, we automated blockwise likelihood calculations for a general set of demographic scenarios in Mathematica. These histories may involve population size changes, continuous migration, discrete divergence, and admixture between multiple populations. To give a concrete example, we calculate the likelihood for a model of isolation with migration (IM), assuming two diploid samples without phase and outgroup information. We demonstrate the new inference scheme with an analysis of two individual butterfly genomes from the sister species Heliconius melpomene rosina and H. cydno.},
  author       = {Lohse, Konrad and Chmelik, Martin and Martin, Simon and Barton, Nicholas H},
  journal      = {Genetics},
  number       = {2},
  pages        = {775 -- 786},
  publisher    = {Genetics Society of America},
  title        = {{Efficient strategies for calculating blockwise likelihoods under the coalescent}},
  doi          = {10.1534/genetics.115.183814},
  volume       = {202},
  year         = {2016},
}

@article{1477,
  abstract     = {We consider partially observable Markov decision processes (POMDPs) with ω-regular conditions specified as parity objectives. The class of ω-regular languages provides a robust specification language to express properties in verification, and parity objectives are canonical forms to express them. The qualitative analysis problem given a POMDP and a parity objective asks whether there is a strategy to ensure that the objective is satisfied with probability 1 (resp. positive probability). While the qualitative analysis problems are undecidable even for special cases of parity objectives, we establish decidability (with optimal complexity) for POMDPs with all parity objectives under finite-memory strategies. We establish optimal (exponential) memory bounds and EXPTIME-completeness of the qualitative analysis problems under finite-memory strategies for POMDPs with parity objectives. We also present a practical approach, where we design heuristics to deal with the exponential complexity, and have applied our implementation on a number of POMDP examples.},
  author       = {Chatterjee, Krishnendu and Chmelik, Martin and Tracol, Mathieu},
  journal      = {Journal of Computer and System Sciences},
  number       = {5},
  pages        = {878 -- 911},
  publisher    = {Elsevier},
  title        = {{What is decidable about partially observable Markov decision processes with ω-regular objectives}},
  doi          = {10.1016/j.jcss.2016.02.009},
  volume       = {82},
  year         = {2016},
}

@article{1529,
  abstract     = {We consider partially observable Markov decision processes (POMDPs) with a set of target states and an integer cost associated with every transition. The optimization objective we study asks to minimize the expected total cost of reaching a state in the target set, while ensuring that the target set is reached almost surely (with probability 1). We show that for integer costs approximating the optimal cost is undecidable. For positive costs, our results are as follows: (i) we establish matching lower and upper bounds for the optimal cost, both double exponential in the POMDP state space size; (ii) we show that the problem of approximating the optimal cost is decidable and present approximation algorithms developing on the existing algorithms for POMDPs with finite-horizon objectives. While the worst-case running time of our algorithm is double exponential, we also present efficient stopping criteria for the algorithm and show experimentally that it performs well in many examples of interest.},
  author       = {Chatterjee, Krishnendu and Chmelik, Martin and Gupta, Raghav and Kanodia, Ayush},
  journal      = {Artificial Intelligence},
  pages        = {26 -- 48},
  publisher    = {Elsevier},
  title        = {{Optimal cost almost-sure reachability in POMDPs}},
  doi          = {10.1016/j.artint.2016.01.007},
  volume       = {234},
  year         = {2016},
}

@inproceedings{1166,
  abstract     = {POMDPs are standard models for probabilistic planning problems, where an agent interacts with an uncertain environment. We study the problem of almost-sure reachability, where given a set of target states, the question is to decide whether there is a policy to ensure that the target set is reached with probability 1 (almost-surely). While in general the problem is EXPTIMEcomplete, in many practical cases policies with a small amount of memory suffice. Moreover, the existing solution to the problem is explicit, which first requires to construct explicitly an exponential reduction to a belief-support MDP. In this work, we first study the existence of observation-stationary strategies, which is NP-complete, and then small-memory strategies. We present a symbolic algorithm by an efficient encoding to SAT and using a SAT solver for the problem. We report experimental results demonstrating the scalability of our symbolic (SAT-based) approach. © 2016, Association for the Advancement of Artificial Intelligence (www.aaai.org). All rights reserved.},
  author       = {Chatterjee, Krishnendu and Chmelik, Martin and Davies, Jessica},
  booktitle    = {Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence},
  location     = {Phoenix, AZ, USA},
  pages        = {3225 -- 3232},
  publisher    = {AAAI Press},
  title        = {{A symbolic SAT based algorithm for almost sure reachability with small strategies in pomdps}},
  volume       = {2016},
  year         = {2016},
}

@phdthesis{1397,
  abstract     = {We study partially observable Markov decision processes (POMDPs) with objectives used in verification and artificial intelligence. The qualitative analysis problem given a POMDP and an objective asks whether there is a strategy (policy) to ensure that the objective is satisfied almost surely (with probability 1), resp. with positive probability (with probability greater than 0). For POMDPs with limit-average payoff, where a reward value in the interval [0,1] is associated to every transition, and the payoff of an infinite path is the long-run average of the rewards, we consider two types of path constraints: (i) a quantitative limit-average constraint defines the set of paths where the payoff is at least a given threshold L1 = 1. Our main results for qualitative limit-average constraint under almost-sure winning are as follows: (i) the problem of deciding the existence of a finite-memory controller is EXPTIME-complete; and (ii) the problem of deciding the existence of an infinite-memory controller is undecidable. For quantitative limit-average constraints we show that the problem of deciding the existence of a finite-memory controller is undecidable. We present a prototype implementation of our EXPTIME algorithm. For POMDPs with w-regular conditions specified as parity objectives, while the qualitative analysis problems are known to be undecidable even for very special case of parity objectives, we establish decidability (with optimal complexity) of the qualitative analysis problems for POMDPs with parity objectives under finite-memory strategies. We establish optimal (exponential) memory bounds and EXPTIME-completeness of the qualitative analysis problems under finite-memory strategies for POMDPs with parity objectives. Based on our theoretical algorithms we also present a practical approach, where we design heuristics to deal with the exponential complexity, and have applied our implementation on a number of well-known POMDP examples for robotics applications. For POMDPs with a set of target states and an integer cost associated with every transition, we study the optimization objective that asks to minimize the expected total cost of reaching a state in the target set, while ensuring that the target set is reached almost surely. We show that for general integer costs approximating the optimal cost is undecidable. For positive costs, our results are as follows: (i) we establish matching lower and upper bounds for the optimal cost, both double and exponential in the POMDP state space size; (ii) we show that the problem of approximating the optimal cost is decidable and present approximation algorithms that extend existing algorithms for POMDPs with finite-horizon objectives. We show experimentally that it performs well in many examples of interest. We study more deeply the problem of almost-sure reachability, where  given a set of target states, the question is to decide whether there is a strategy to ensure that the target set is reached almost surely. While in general the problem EXPTIME-complete, in many practical cases strategies with a small amount of memory suffice. Moreover, the existing solution to the problem is explicit, which first requires to construct explicitly an exponential reduction to a belief-support MDP. We first study the existence of observation-stationary strategies, which is NP-complete, and then small-memory strategies. We present a symbolic algorithm by an efficient encoding to SAT and using a SAT solver for the problem. We report experimental results demonstrating the scalability of our symbolic (SAT-based) approach. Decentralized POMDPs (DEC-POMDPs) extend POMDPs to a multi-agent setting, where several agents operate in an uncertain environment independently to achieve a joint objective. In this work we consider Goal DEC-POMDPs, where given a set of target states, the objective is to ensure that the target set is reached with minimal cost. We consider the indefinite-horizon (infinite-horizon with either discounted-sum, or undiscounted-sum, where absorbing goal states have zero-cost) problem. We present a new and novel method to solve the problem that extends methods for finite-horizon DEC-POMDPs and the real-time dynamic programming approach for POMDPs. We present experimental results on several examples, and show that our approach presents promising results. In the end we present a short summary of a few other results related to verification of MDPs and POMDPs.},
  author       = {Chmelik, Martin},
  issn         = {2663-337X},
  pages        = {232},
  publisher    = {Institute of Science and Technology Austria},
  title        = {{Algorithms for partially observable markov decision processes}},
  year         = {2016},
}

@inproceedings{1691,
  abstract     = {We consider a case study of the problem of deploying an autonomous air vehicle in a partially observable, dynamic, indoor environment from a specification given as a linear temporal logic (LTL) formula over regions of interest. We model the motion and sensing capabilities of the vehicle as a partially observable Markov decision process (POMDP). We adapt recent results for solving POMDPs with parity objectives to generate a control policy. We also extend the existing framework with a policy minimization technique to obtain a better implementable policy, while preserving its correctness. The proposed techniques are illustrated in an experimental setup involving an autonomous quadrotor performing surveillance in a dynamic environment.},
  author       = {Svoreňová, Mária and Chmelik, Martin and Leahy, Kevin and Eniser, Hasan and Chatterjee, Krishnendu and Cěrná, Ivana and Belta, Cǎlin},
  booktitle    = {Proceedings of the 18th International Conference on Hybrid Systems: Computation and Control},
  location     = {Seattle, WA, United States},
  pages        = {233 -- 238},
  publisher    = {ACM},
  title        = {{Temporal logic motion planning using POMDPs with parity objectives: Case study paper}},
  doi          = {10.1145/2728606.2728617},
  year         = {2015},
}

@inproceedings{1820,
  abstract     = {We consider partially observable Markov decision processes (POMDPs) with a set of target states and every transition is associated with an integer cost. The optimization objec- tive we study asks to minimize the expected total cost till the target set is reached, while ensuring that the target set is reached almost-surely (with probability 1). We show that for integer costs approximating the optimal cost is undecidable. For positive costs, our results are as follows: (i) we establish matching lower and upper bounds for the optimal cost and the bound is double exponential; (ii) we show that the problem of approximating the optimal cost is decidable and present ap- proximation algorithms developing on the existing algorithms for POMDPs with finite-horizon objectives. While the worst- case running time of our algorithm is double exponential, we present efficient stopping criteria for the algorithm and show experimentally that it performs well in many examples.},
  author       = {Chatterjee, Krishnendu and Chmelik, Martin and Gupta, Raghav and Kanodia, Ayush},
  booktitle    = {Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence },
  location     = {Austin, TX, USA},
  pages        = {3496--3502},
  publisher    = {AAAI Press},
  title        = {{Optimal cost almost-sure reachability in POMDPs}},
  volume       = {5},
  year         = {2015},
}

@article{1873,
  abstract     = {We consider partially observable Markov decision processes (POMDPs) with limit-average payoff, where a reward value in the interval [0,1] is associated with every transition, and the payoff of an infinite path is the long-run average of the rewards. We consider two types of path constraints: (i) a quantitative constraint defines the set of paths where the payoff is at least a given threshold λ1ε(0,1]; and (ii) a qualitative constraint which is a special case of the quantitative constraint with λ1=1. We consider the computation of the almost-sure winning set, where the controller needs to ensure that the path constraint is satisfied with probability 1. Our main results for qualitative path constraints are as follows: (i) the problem of deciding the existence of a finite-memory controller is EXPTIME-complete; and (ii) the problem of deciding the existence of an infinite-memory controller is undecidable. For quantitative path constraints we show that the problem of deciding the existence of a finite-memory controller is undecidable. We also present a prototype implementation of our EXPTIME algorithm and experimental results on several examples.},
  author       = {Chatterjee, Krishnendu and Chmelik, Martin},
  journal      = {Artificial Intelligence},
  pages        = {46 -- 72},
  publisher    = {Elsevier},
  title        = {{POMDPs under probabilistic semantics}},
  doi          = {10.1016/j.artint.2014.12.009},
  volume       = {221},
  year         = {2015},
}

@inproceedings{1732,
  abstract     = {We consider partially observable Markov decision processes (POMDPs), that are a standard framework for robotics applications to model uncertainties present in the real world, with temporal logic specifications. All temporal logic specifications in linear-time temporal logic (LTL) can be expressed as parity objectives. We study the qualitative analysis problem for POMDPs with parity objectives that asks whether there is a controller (policy) to ensure that the objective holds with probability 1 (almost-surely). While the qualitative analysis of POMDPs with parity objectives is undecidable, recent results show that when restricted to finite-memory policies the problem is EXPTIME-complete. While the problem is intractable in theory, we present a practical approach to solve the qualitative analysis problem. We designed several heuristics to deal with the exponential complexity, and have used our implementation on a number of well-known POMDP examples for robotics applications. Our results provide the first practical approach to solve the qualitative analysis of robot motion planning with LTL properties in the presence of uncertainty.},
  author       = {Chatterjee, Krishnendu and Chmelik, Martin and Gupta, Raghav and Kanodia, Ayush},
  location     = {Seattle, WA, United States},
  pages        = {325 -- 330},
  publisher    = {IEEE},
  title        = {{Qualitative analysis of POMDPs with temporal logic specifications for robotics applications}},
  doi          = {10.1109/ICRA.2015.7139019},
  year         = {2015},
}

@misc{5443,
  abstract     = {POMDPs are standard models for probabilistic planning problems, where an agent interacts with an uncertain environment. We study the problem of almost-sure reachability, where given a set of target states, the question is to decide whether there is a policy to ensure that the target set is reached with probability 1 (almost-surely). While in general the problem is EXPTIME-complete, in many practical cases policies with a small amount of memory suffice. Moreover, the existing solution to the problem is explicit, which first requires to construct explicitly an exponential reduction to a belief-support MDP. In this work, we first study the existence of observation-stationary strategies, which is NP-complete, and then small-memory strategies. We present a symbolic algorithm by an efficient encoding to SAT and using a SAT solver for the problem. We report experimental results demonstrating the scalability of our symbolic (SAT-based) approach.},
  author       = {Chatterjee, Krishnendu and Chmelik, Martin and Davies, Jessica},
  issn         = {2664-1690},
  pages        = {23},
  publisher    = {IST Austria},
  title        = {{A symbolic SAT-based algorithm for almost-sure reachability with small strategies in POMDPs}},
  doi          = {10.15479/AT:IST-2015-325-v2-1},
  year         = {2015},
}

@article{1501,
  abstract     = {We consider Markov decision processes (MDPs) which are a standard model for probabilistic systems. We focus on qualitative properties for MDPs that can express that desired behaviors of the system arise almost-surely (with probability 1) or with positive probability. We introduce a new simulation relation to capture the refinement relation of MDPs with respect to qualitative properties, and present discrete graph algorithms with quadratic complexity to compute the simulation relation. We present an automated technique for assume-guarantee style reasoning for compositional analysis of two-player games by giving a counterexample guided abstraction-refinement approach to compute our new simulation relation. We show a tight link between two-player games and MDPs, and as a consequence the results for games are lifted to MDPs with qualitative properties. We have implemented our algorithms and show that the compositional analysis leads to significant improvements. },
  author       = {Chatterjee, Krishnendu and Chmelik, Martin and Daca, Przemyslaw},
  journal      = {Formal Methods in System Design},
  number       = {2},
  pages        = {230 -- 264},
  publisher    = {Springer},
  title        = {{CEGAR for compositional analysis of qualitative properties in Markov decision processes}},
  doi          = {10.1007/s10703-015-0235-2},
  volume       = {47},
  year         = {2015},
}

@inproceedings{1689,
  abstract     = {We consider the problem of computing the set of initial states of a dynamical system such that there exists a control strategy to ensure that the trajectories satisfy a temporal logic specification with probability 1 (almost-surely). We focus on discrete-time, stochastic linear dynamics and specifications given as formulas of the Generalized Reactivity(1) fragment of Linear Temporal Logic over linear predicates in the states of the system. We propose a solution based on iterative abstraction-refinement, and turn-based 2-player probabilistic games. While the theoretical guarantee of our algorithm after any finite number of iterations is only a partial solution, we show that if our algorithm terminates, then the result is the set of satisfying initial states. Moreover, for any (partial) solution our algorithm synthesizes witness control strategies to ensure almost-sure satisfaction of the temporal logic specification. We demonstrate our approach on an illustrative case study.},
  author       = {Svoreňová, Mária and Kretinsky, Jan and Chmelik, Martin and Chatterjee, Krishnendu and Cěrná, Ivana and Belta, Cǎlin},
  booktitle    = {Proceedings of the 18th International Conference on Hybrid Systems: Computation and Control},
  location     = {Seattle, WA, United States},
  pages        = {259 -- 268},
  publisher    = {ACM},
  title        = {{Temporal logic control for stochastic linear systems using abstraction refinement of probabilistic games}},
  doi          = {10.1145/2728606.2728608},
  year         = {2015},
}

@inproceedings{1603,
  abstract     = {For deterministic systems, a counterexample to a property can simply be an error trace, whereas counterexamples in probabilistic systems are necessarily more complex. For instance, a set of erroneous traces with a sufficient cumulative probability mass can be used. Since these are too large objects to understand and manipulate, compact representations such as subchains have been considered. In the case of probabilistic systems with non-determinism, the situation is even more complex. While a subchain for a given strategy (or scheduler, resolving non-determinism) is a straightforward choice, we take a different approach. Instead, we focus on the strategy itself, and extract the most important decisions it makes, and present its succinct representation.
The key tools we employ to achieve this are (1) introducing a concept of importance of a state w.r.t. the strategy, and (2) learning using decision trees. There are three main consequent advantages of our approach. Firstly, it exploits the quantitative information on states, stressing the more important decisions. Secondly, it leads to a greater variability and degree of freedom in representing the strategies. Thirdly, the representation uses a self-explanatory data structure. In summary, our approach produces more succinct and more explainable strategies, as opposed to e.g. binary decision diagrams. Finally, our experimental results show that we can extract several rules describing the strategy even for very large systems that do not fit in memory, and based on the rules explain the erroneous behaviour.},
  author       = {Brázdil, Tomáš and Chatterjee, Krishnendu and Chmelik, Martin and Fellner, Andreas and Kretinsky, Jan},
  location     = {San Francisco, CA, United States},
  pages        = {158 -- 177},
  publisher    = {Springer},
  title        = {{Counterexample explanation by learning small strategies in Markov decision processes}},
  doi          = {10.1007/978-3-319-21690-4_10},
  volume       = {9206},
  year         = {2015},
}

@inproceedings{2027,
  abstract     = {We present a general framework for applying machine-learning algorithms to the verification of Markov decision processes (MDPs). The primary goal of these techniques is to improve performance by avoiding an exhaustive exploration of the state space. Our framework focuses on probabilistic reachability, which is a core property for verification, and is illustrated through two distinct instantiations. The first assumes that full knowledge of the MDP is available, and performs a heuristic-driven partial exploration of the model, yielding precise lower and upper bounds on the required probability. The second tackles the case where we may only sample the MDP, and yields probabilistic guarantees, again in terms of both the lower and upper bounds, which provides efficient stopping criteria for the approximation. The latter is the first extension of statistical model checking for unbounded properties inMDPs. In contrast with other related techniques, our approach is not restricted to time-bounded (finite-horizon) or discounted properties, nor does it assume any particular properties of the MDP. We also show how our methods extend to LTL objectives. We present experimental results showing the performance of our framework on several examples.},
  author       = {Brázdil, Tomáš and Chatterjee, Krishnendu and Chmelik, Martin and Forejt, Vojtěch and Kretinsky, Jan and Kwiatkowska, Marta and Parker, David and Ujma, Mateusz},
  booktitle    = { Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
  editor       = {Cassez, Franck and Raskin, Jean-François},
  location     = {Sydney, Australia},
  pages        = {98 -- 114},
  publisher    = {Society of Industrial and Applied Mathematics},
  title        = {{Verification of markov decision processes using learning algorithms}},
  doi          = {10.1007/978-3-319-11936-6_8},
  volume       = {8837},
  year         = {2014},
}

@article{1733,
  abstract     = {The classical (boolean) notion of refinement for behavioral interfaces of system components is the alternating refinement preorder. In this paper, we define a distance for interfaces, called interface simulation distance. It makes the alternating refinement preorder quantitative by, intuitively, tolerating errors (while counting them) in the alternating simulation game. We show that the interface simulation distance satisfies the triangle inequality, that the distance between two interfaces does not increase under parallel composition with a third interface, that the distance between two interfaces can be bounded from above and below by distances between abstractions of the two interfaces, and how to synthesize an interface from incompatible requirements. We illustrate the framework, and the properties of the distances under composition of interfaces, with two case studies.},
  author       = {Cerny, Pavol and Chmelik, Martin and Henzinger, Thomas A and Radhakrishna, Arjun},
  journal      = {Theoretical Computer Science},
  number       = {3},
  pages        = {348 -- 363},
  publisher    = {Elsevier},
  title        = {{Interface simulation distances}},
  doi          = {10.1016/j.tcs.2014.08.019},
  volume       = {560},
  year         = {2014},
}

@misc{5413,
  abstract     = {We consider Markov decision processes (MDPs) which are a standard model for probabilistic systems. We focus on qualitative properties for MDPs that can express that desired behaviors of the system arise almost-surely (with probability 1) or with positive probability.
We introduce a new simulation relation to capture the refinement relation of MDPs with respect to qualitative properties, and present discrete graph theoretic algorithms with quadratic complexity to compute the simulation relation.
We present an automated technique for assume-guarantee style reasoning for compositional analysis of MDPs with qualitative properties by giving a counter-example guided abstraction-refinement approach to compute our new simulation relation. We have implemented our algorithms and show that the compositional analysis leads to significant improvements. },
  author       = {Chatterjee, Krishnendu and Daca, Przemyslaw and Chmelik, Martin},
  issn         = {2664-1690},
  pages        = {33},
  publisher    = {IST Austria},
  title        = {{CEGAR for qualitative analysis of probabilistic systems}},
  doi          = {10.15479/AT:IST-2014-153-v2-2},
  year         = {2014},
}