@inproceedings{3337,
abstract = {Playing table tennis is a difficult task for robots, especially due to their limitations of acceleration. A key bottleneck is the amount of time needed to reach the desired hitting position and velocity of the racket for returning the incoming ball. Here, it often does not suffice to simply extrapolate the ball's trajectory after the opponent returns it but more information is needed. Humans are able to predict the ball's trajectory based on the opponent's moves and, thus, have a considerable advantage. Hence, we propose to incorporate an anticipation system into robot table tennis players, which enables the robot to react earlier while the opponent is performing the striking movement. Based on visual observation of the opponent's racket movement, the robot can predict the aim of the opponent and adjust its movement generation accordingly. The policies for deciding how and when to react are obtained by reinforcement learning. We conduct experiments with an existing robot player to show that the learned reaction policy can significantly improve the performance of the overall system.},
author = {Wang, Zhikun and Lampert, Christoph and Mülling, Katharina and Schölkopf, Bernhard and Peters, Jan},
location = {San Francisco, USA},
pages = {332 -- 337},
publisher = {IEEE},
title = {{Learning anticipation policies for robot table tennis}},
doi = {10.1109/IROS.2011.6094892},
year = {2011},
}
@unpublished{3338,
abstract = {We consider 2-player games played on a finite state space for an infinite number of rounds. The games are concurrent: in each round, the two players (player 1 and player 2) choose their moves inde- pendently and simultaneously; the current state and the two moves determine the successor state. We study concurrent games with ω-regular winning conditions specified as parity objectives. We consider the qualitative analysis problems: the computation of the almost-sure and limit-sure winning set of states, where player 1 can ensure to win with probability 1 and with probability arbitrarily close to 1, respec- tively. In general the almost-sure and limit-sure winning strategies require both infinite-memory as well as infinite-precision (to describe probabilities). We study the bounded-rationality problem for qualitative analysis of concurrent parity games, where the strategy set for player 1 is restricted to bounded-resource strategies. In terms of precision, strategies can be deterministic, uniform, finite-precision or infinite- precision; and in terms of memory, strategies can be memoryless, finite-memory or infinite-memory. We present a precise and complete characterization of the qualitative winning sets for all combinations of classes of strategies. In particular, we show that uniform memoryless strategies are as powerful as finite-precision infinite-memory strategies, and infinite-precision memoryless strategies are as power- ful as infinite-precision finite-memory strategies. We show that the winning sets can be computed in O(n2d+3) time, where n is the size of the game structure and 2d is the number of priorities (or colors), and our algorithms are symbolic. The membership problem of whether a state belongs to a winning set can be decided in NP ∩ coNP. While this complexity is the same as for the simpler class of turn-based parity games, where in each state only one of the two players has a choice of moves, our algorithms, that are obtained by characterization of the winning sets as μ-calculus formulas, are considerably more involved than those for turn-based games.},
author = {Chatterjee, Krishnendu},
booktitle = {arXiv},
pages = {1 -- 51},
publisher = {ArXiv},
title = {{Bounded rationality in concurrent parity games}},
year = {2011},
}
@unpublished{3339,
abstract = {Turn-based stochastic games and its important subclass Markov decision processes (MDPs) provide models for systems with both probabilistic and nondeterministic behaviors. We consider turn-based stochastic games with two classical quantitative objectives: discounted-sum and long-run average objectives. The game models and the quantitative objectives are widely used in probabilistic verification, planning, optimal inventory control, network protocol and performance analysis. Games and MDPs that model realistic systems often have very large state spaces, and probabilistic abstraction techniques are necessary to handle the state-space explosion. The commonly used full-abstraction techniques do not yield space-savings for systems that have many states with similar value, but does not necessarily have similar transition structure. A semi-abstraction technique, namely Magnifying-lens abstractions (MLA), that clusters states based on value only, disregarding differences in their transition relation was proposed for qualitative objectives (reachability and safety objectives). In this paper we extend the MLA technique to solve stochastic games with discounted-sum and long-run average objectives. We present the MLA technique based abstraction-refinement algorithm for stochastic games and MDPs with discounted-sum objectives. For long-run average objectives, our solution works for all MDPs and a sub-class of stochastic games where every state has the same value. },
author = {Chatterjee, Krishnendu and De Alfaro, Luca and Pritam, Roy},
booktitle = {arXiv},
pages = {17},
publisher = {ArXiv},
title = {{Magnifying lens abstraction for stochastic games with discounted and long-run average objectives}},
year = {2011},
}
@inproceedings{3342,
abstract = {We consider Markov decision processes (MDPs) with ω-regular specifications given as parity objectives. We consider the problem of computing the set of almost-sure winning states from where the objective can be ensured with probability 1. The algorithms for the computation of the almost-sure winning set for parity objectives iteratively use the solutions for the almost-sure winning set for Büchi objectives (a special case of parity objectives). Our contributions are as follows: First, we present the first subquadratic symbolic algorithm to compute the almost-sure winning set for MDPs with Büchi objectives; our algorithm takes O(nm) symbolic steps as compared to the previous known algorithm that takes O(n 2) symbolic steps, where n is the number of states and m is the number of edges of the MDP. In practice MDPs often have constant out-degree, and then our symbolic algorithm takes O(nn) symbolic steps, as compared to the previous known O(n 2) symbolic steps algorithm. Second, we present a new algorithm, namely win-lose algorithm, with the following two properties: (a) the algorithm iteratively computes subsets of the almost-sure winning set and its complement, as compared to all previous algorithms that discover the almost-sure winning set upon termination; and (b) requires O(nK) symbolic steps, where K is the maximal number of edges of strongly connected components (scc’s) of the MDP. The win-lose algorithm requires symbolic computation of scc’s. Third, we improve the algorithm for symbolic scc computation; the previous known algorithm takes linear symbolic steps, and our new algorithm improves the constants associated with the linear number of steps. In the worst case the previous known algorithm takes 5·n symbolic steps, whereas our new algorithm takes 4 ·n symbolic steps.},
author = {Chatterjee, Krishnendu and Henzinger, Monika and Joglekar, Manas and Nisarg, Shah},
editor = {Gopalakrishnan, Ganesh and Qadeer, Shaz},
location = {Snowbird, USA},
pages = {260 -- 276},
publisher = {Springer},
title = {{Symbolic algorithms for qualitative analysis of Markov decision processes with Büchi objectives}},
doi = {10.1007/978-3-642-22110-1_21},
volume = {6806},
year = {2011},
}
@inproceedings{3343,
abstract = {We present faster and dynamic algorithms for the following problems arising in probabilistic verification: Computation of the maximal end-component (mec) decomposition of Markov decision processes (MDPs), and of the almost sure winning set for reachability and parity objectives in MDPs. We achieve the following running time for static algorithms in MDPs with graphs of n vertices and m edges: (1) O(m · min{ √m, n2/3 }) for the mec decomposition, improving the longstanding O(m·n) bound; (2) O(m·n2/3) for reachability objectives, improving the previous O(m · √m) bound for m > n4/3; and (3) O(m · min{ √m, n2/3 } · log(d)) for parity objectives with d priorities, improving the previous O(m · √m · d) bound. We also give incremental and decremental algorithms in linear time for mec decomposition and reachability objectives and O(m · log d) time for parity ob jectives.},
author = {Chatterjee, Krishnendu and Henzinger, Monika},
location = {San Francisco, USA},
pages = {1318 -- 1336},
publisher = {SIAM},
title = {{Faster and dynamic algorithms for maximal end component decomposition and related graph problems in probabilistic verification}},
doi = {10.1137/1.9781611973082.101},
year = {2011},
}
@inproceedings{3344,
abstract = {Games played on graphs provide the mathematical framework to analyze several important problems in computer science as well as mathematics, such as the synthesis problem of Church, model checking of open reactive systems and many others. On the basis of mode of interaction of the players these games can be classified as follows: (a) turn-based (players make moves in turns); and (b) concurrent (players make moves simultaneously). On the basis of the information available to the players these games can be classified as follows: (a) perfect-information (players have perfect view of the game); and (b) partial-information (players have partial view of the game). In this talk we will consider all these classes of games with reachability objectives, where the goal of one player is to reach a set of target vertices of the graph, and the goal of the opponent player is to prevent the player from reaching the target. We will survey the results for various classes of games, and the results range from linear time decision algorithms to EXPTIME-complete problems to undecidable problems.},
author = {Chatterjee, Krishnendu},
editor = {Delzanno, Giorgo and Potapov, Igor},
location = {Genoa, Italy},
pages = {1 -- 1},
publisher = {Springer},
title = {{Graph games with reachability objectives}},
doi = {10.1007/978-3-642-24288-5_1},
volume = {6945},
year = {2011},
}
@inproceedings{3345,
abstract = {We consider Markov Decision Processes (MDPs) with mean-payoff parity and energy parity objectives. In system design, the parity objective is used to encode ω-regular specifications, and the mean-payoff and energy objectives can be used to model quantitative resource constraints. The energy condition re- quires that the resource level never drops below 0, and the mean-payoff condi- tion requires that the limit-average value of the resource consumption is within a threshold. While these two (energy and mean-payoff) classical conditions are equivalent for two-player games, we show that they differ for MDPs. We show that the problem of deciding whether a state is almost-sure winning (i.e., winning with probability 1) in energy parity MDPs is in NP ∩ coNP, while for mean- payoff parity MDPs, the problem is solvable in polynomial time, improving a recent PSPACE bound.},
author = {Chatterjee, Krishnendu and Doyen, Laurent},
location = {Warsaw, Poland},
pages = {206 -- 218},
publisher = {Springer},
title = {{Energy and mean-payoff parity Markov Decision Processes}},
doi = {10.1007/978-3-642-22993-0_21},
volume = {6907},
year = {2011},
}
@inproceedings{3346,
abstract = {We study Markov decision processes (MDPs) with multiple limit-average (or mean-payoff) functions. We consider two different objectives, namely, expectation and satisfaction objectives. Given an MDP with k reward functions, in the expectation objective the goal is to maximize the expected limit-average value, and in the satisfaction objective the goal is to maximize the probability of runs such that the limit-average value stays above a given vector. We show that under the expectation objective, in contrast to the single-objective case, both randomization and memory are necessary for strategies, and that finite-memory randomized strategies are sufficient. Under the satisfaction objective, in contrast to the single-objective case, infinite memory is necessary for strategies, and that randomized memoryless strategies are sufficient for epsilon-approximation, for all epsilon>;0. We further prove that the decision problems for both expectation and satisfaction objectives can be solved in polynomial time and the trade-off curve (Pareto curve) can be epsilon-approximated in time polynomial in the size of the MDP and 1/epsilon, and exponential in the number of reward functions, for all epsilon>;0. Our results also reveal flaws in previous work for MDPs with multiple mean-payoff functions under the expectation objective, correct the flaws and obtain improved results.},
author = {Brázdil, Tomáš and Brožek, Václav and Chatterjee, Krishnendu and Forejt, Vojtěch and Kučera, Antonín},
location = {Toronto, Canada},
publisher = {IEEE},
title = {{Two views on multiple mean payoff objectives in Markov Decision Processes}},
doi = {10.1109/LICS.2011.10},
year = {2011},
}
@inproceedings{3347,
abstract = {The class of omega-regular languages provides a robust specification language in verification. Every omega-regular condition can be decomposed into a safety part and a liveness part. The liveness part ensures that something good happens "eventually". Finitary liveness was proposed by Alur and Henzinger as a stronger formulation of liveness. It requires that there exists an unknown, fixed bound b such that something good happens within b transitions. In this work we consider automata with finitary acceptance conditions defined by finitary Buchi, parity and Streett languages. We study languages expressible by such automata: we give their topological complexity and present a regular-expression characterization. We compare the expressive power of finitary automata and give optimal algorithms for classical decisions questions. We show that the finitary languages are Sigma 2-complete; we present a complete picture of the expressive power of various classes of automata with finitary and infinitary acceptance conditions; we show that the languages defined by finitary parity automata exactly characterize the star-free fragment of omega B-regular languages; and we show that emptiness is NLOGSPACE-complete and universality as well as language inclusion are PSPACE-complete for finitary parity and Streett automata.},
author = {Chatterjee, Krishnendu and Fijalkow, Nathanaël},
location = {Tarragona, Spain},
pages = {216 -- 226},
publisher = {Springer},
title = {{Finitary languages}},
doi = {10.1007/978-3-642-21254-3_16},
volume = {6638},
year = {2011},
}
@inproceedings{3348,
abstract = {We study synthesis of controllers for real-time systems, where the objective is to stay in a given safe set. The problem is solved by obtaining winning strategies in the setting of concurrent two-player timed automaton games with safety objectives. To prevent a player from winning by blocking time, we restrict each player to strategies that ensure that the player cannot be responsible for causing a zeno run. We construct winning strategies for the controller which require access only to (1) the system clocks (thus, controllers which require their own internal infinitely precise clocks are not necessary), and (2) a linear (in the number of clocks) number of memory bits. Precisely, we show that for safety objectives, a memory of size (3 · |C|+lg(|C|+1)) bits suffices for winning controller strategies, where C is the set of clocks of the timed automaton game, significantly improving the previous known exponential bound. We also settle the open question of whether winning region controller strategies require memory for safety objectives by showing with an example the necessity of memory for region strategies to win for safety objectives.},
author = {Chatterjee, Krishnendu and Prabhu, Vinayak},
location = {Chicago, USA},
pages = {221 -- 230},
publisher = {Springer},
title = {{Synthesis of memory efficient real time controllers for safety objectives}},
doi = {10.1145/1967701.1967734},
year = {2011},
}