@article{1200,
  author       = {Hilbe, Christian and Traulsen, Arne},
  journal      = {Physics of Life Reviews},
  pages        = {29 -- 31},
  publisher    = {Elsevier},
  title        = {{Only the combination of mathematics and agent based simulations can leverage the full potential of evolutionary modeling: Comment on “Evolutionary game theory using agent-based methods” by C. Adami, J. Schossau and A. Hintze}},
  doi          = {10.1016/j.plrev.2016.10.004},
  volume       = {19},
  year         = {2016},
}

@inproceedings{1245,
  abstract     = {To facilitate collaboration in massive online classrooms, instructors must make many decisions. For instance, the following parameters need to be decided when designing a peer-feedback system where students review each others' essays: the number of students each student must provide feedback to, an algorithm to map feedback providers to receivers, constraints that ensure students do not become free-riders (receiving feedback but not providing it), the best times to receive feedback to improve learning etc. While instructors can answer these questions by running experiments or invoking past experience, game-theoretic models with data from online learning platforms can identify better initial designs for further improvements. As an example, we explore the design space of a peer feedback system by modeling it using game theory. Our simulations show that incentivizing students to provide feedback requires the value obtained from receiving a feedback to exceed the cost of providing it by a large factor (greater than 7). Furthermore, hiding feedback from low-effort students incentivizes them to provide more feedback.},
  author       = {Pandey, Vineet and Chatterjee, Krishnendu},
  booktitle    = {Proceedings of the ACM Conference on Computer Supported Cooperative Work},
  location     = {San Francisco, CA, USA},
  number       = {Februar-2016},
  pages        = {365 -- 368},
  publisher    = {ACM},
  title        = {{Game-theoretic models identify useful principles for peer collaboration in online learning platforms}},
  doi          = {10.1145/2818052.2869122},
  volume       = {26},
  year         = {2016},
}

@inproceedings{1325,
  abstract     = {We study graphs and two-player games in which rewards are assigned to states, and the goal of the players is to satisfy or dissatisfy certain property of the generated outcome, given as a mean payoff property. Since the notion of mean-payoff does not reflect possible fluctuations from the mean-payoff along a run, we propose definitions and algorithms for capturing the stability of the system, and give algorithms for deciding if a given mean payoff and stability objective can be ensured in the system.},
  author       = {Brázdil, Tomáš and Forejt, Vojtěch and Kučera, Antonín and Novotny, Petr},
  location     = {Quebec City, Canada},
  publisher    = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik},
  title        = {{Stability in graphs and games}},
  doi          = {10.4230/LIPIcs.CONCUR.2016.10},
  volume       = {59},
  year         = {2016},
}

@inproceedings{1324,
  abstract     = {DEC-POMDPs extend POMDPs to a multi-agent setting, where several agents operate in an uncertain environment independently to achieve a joint objective. DEC-POMDPs have been studied with finite-horizon and infinite-horizon discounted-sum objectives, and there exist solvers both for exact and approximate solutions. In this work we consider Goal-DEC-POMDPs, where given a set of target states, the objective is to ensure that the target set is reached with minimal cost. We consider the indefinite-horizon (infinite-horizon with either discounted-sum, or undiscounted-sum, where absorbing goal states have zero-cost) problem. We present a new and novel method to solve the problem that extends methods for finite-horizon DEC-POMDPs and the RTDP-Bel approach for POMDPs. We present experimental results on several examples, and show that our approach presents promising results. Copyright },
  author       = {Chatterjee, Krishnendu and Chmelik, Martin},
  booktitle    = {Proceedings of the Twenty-Sixth International Conference on International Conference on Automated Planning and Scheduling},
  location     = {London, United Kingdom},
  pages        = {88 -- 96},
  publisher    = {AAAI Press},
  title        = {{Indefinite-horizon reachability in Goal-DEC-POMDPs}},
  volume       = {2016-January},
  year         = {2016},
}

@inproceedings{1327,
  abstract     = {We consider partially observable Markov decision processes (POMDPs) with a set of target states and positive integer costs associated with every transition. The traditional optimization objective (stochastic shortest path) asks to minimize the expected total cost until the target set is reached. We extend the traditional framework of POMDPs to model energy consumption, which represents a hard constraint. The energy levels may increase and decrease with transitions, and the hard constraint requires that the energy level must remain positive in all steps till the target is reached. First, we present a novel algorithm for solving POMDPs with energy levels, developing on existing POMDP solvers and using RTDP as its main method. Our second contribution is related to policy representation. For larger POMDP instances the policies computed by existing solvers are too large to be understandable. We present an automated procedure based on machine learning techniques that automatically extracts important decisions of the policy allowing us to compute succinct human readable policies. Finally, we show experimentally that our algorithm performs well and computes succinct policies on a number of POMDP instances from the literature that were naturally enhanced with energy levels. },
  author       = {Brázdil, Tomáš and Chatterjee, Krishnendu and Chmelik, Martin and Gupta, Anchit and Novotny, Petr},
  booktitle    = {Proceedings of the 15th International Conference on Autonomous Agents and Multiagent Systems},
  location     = {Singapore},
  pages        = {1465 -- 1466},
  publisher    = {ACM},
  title        = {{Stochastic shortest path with energy constraints in POMDPs}},
  year         = {2016},
}

@inproceedings{1326,
  abstract     = {Energy Markov Decision Processes (EMDPs) are finite-state Markov decision processes where each transition is assigned an integer counter update and a rational payoff. An EMDP configuration is a pair s(n), where s is a control state and n is the current counter value. The configurations are changed by performing transitions in the standard way. We consider the problem of computing a safe strategy (i.e., a strategy that keeps the counter non-negative) which maximizes the expected mean payoff. },
  author       = {Brázdil, Tomáš and Kučera, Antonín and Novotny, Petr},
  location     = {Chiba, Japan},
  pages        = {32 -- 49},
  publisher    = {Springer},
  title        = {{Optimizing the expected mean payoff in Energy Markov Decision Processes}},
  doi          = {10.1007/978-3-319-46520-3_3},
  volume       = {9938},
  year         = {2016},
}

@article{1333,
  abstract     = {Social dilemmas force players to balance between personal and collective gain. In many dilemmas, such as elected governments negotiating climate-change mitigation measures, the decisions are made not by individual players but by their representatives. However, the behaviour of representatives in social dilemmas has not been investigated experimentally. Here inspired by the negotiations for greenhouse-gas emissions reductions, we experimentally study a collective-risk social dilemma that involves representatives deciding on behalf of their fellow group members. Representatives can be re-elected or voted out after each consecutive collective-risk game. Selfish players are preferentially elected and are hence found most frequently in the &quot;representatives&quot; treatment. Across all treatments, we identify the selfish players as extortioners. As predicted by our mathematical model, their steadfast strategies enforce cooperation from fair players who finally compensate almost completely the deficit caused by the extortionate co-players. Everybody gains, but the extortionate representatives and their groups gain the most.},
  author       = {Milinski, Manfred and Hilbe, Christian and Semmann, Dirk and Sommerfeld, Ralf and Marotzke, Jochem},
  journal      = {Nature Communications},
  publisher    = {Nature Publishing Group},
  title        = {{Humans choose representatives who enforce cooperation in social dilemmas through extortion}},
  doi          = {10.1038/ncomms10915},
  volume       = {7},
  year         = {2016},
}

@inproceedings{1335,
  abstract     = {In this paper we review various automata-theoretic formalisms for expressing quantitative properties. We start with finite-state Boolean automata that express the traditional regular properties. We then consider weighted ω-automata that can measure the average density of events, which finite-state Boolean automata cannot. However, even weighted ω-automata cannot express basic performance properties like average response time. We finally consider two formalisms of weighted ω-automata with monitors, where the monitors are either (a) counters or (b) weighted automata themselves. We present a translation result to establish that these two formalisms are equivalent. Weighted ω-automata with monitors generalize weighted ω-automata, and can express average response time property. They present a natural, robust, and expressive framework for quantitative specifications, with important decidable properties.},
  author       = {Chatterjee, Krishnendu and Henzinger, Thomas A and Otop, Jan},
  location     = {Edinburgh, United Kingdom},
  pages        = {23 -- 38},
  publisher    = {Springer},
  title        = {{Quantitative monitor automata}},
  doi          = {10.1007/978-3-662-53413-7_2},
  volume       = {9837},
  year         = {2016},
}

@inproceedings{1340,
  abstract     = {We study repeated games with absorbing states, a type of two-player, zero-sum concurrent mean-payoff games with the prototypical example being the Big Match of Gillete (1957). These games may not allow optimal strategies but they always have ε-optimal strategies. In this paper we design ε-optimal strategies for Player 1 in these games that use only O(log log T) space. Furthermore, we construct strategies for Player 1 that use space s(T), for an arbitrary small unbounded non-decreasing function s, and which guarantee an ε-optimal value for Player 1 in the limit superior sense. The previously known strategies use space Ω(log T) and it was known that no strategy can use constant space if it is ε-optimal even in the limit superior sense. We also give a complementary lower bound. Furthermore, we also show that no Markov strategy, even extended with finite memory, can ensure value greater than 0 in the Big Match, answering a question posed by Neyman [11].},
  author       = {Hansen, Kristoffer and Ibsen-Jensen, Rasmus and Koucký, Michal},
  location     = {Liverpool, United Kingdom},
  pages        = {64 -- 76},
  publisher    = {Springer},
  title        = {{The big match in small space}},
  doi          = {10.1007/978-3-662-53354-3_6},
  volume       = {9928},
  year         = {2016},
}

@article{1380,
  abstract     = {We consider higher-dimensional versions of Kannan and Lipton's Orbit Problem - determining whether a target vector space V may be reached from a starting point x under repeated applications of a linear transformation A. Answering two questions posed by Kannan and Lipton in the 1980s, we show that when V has dimension one, this problem is solvable in polynomial time, and when V has dimension two or three, the problem is in NPRP.},
  author       = {Chonev, Ventsislav K and Ouaknine, Joël and Worrell, James},
  journal      = {Journal of the ACM},
  number       = {3},
  publisher    = {ACM},
  title        = {{On the complexity of the orbit problem}},
  doi          = {10.1145/2857050},
  volume       = {63},
  year         = {2016},
}