@article{5945, abstract = {In developing organisms, spatially prescribed cell identities are thought to be determined by the expression levels of multiple genes. Quantitative tests of this idea, however, require a theoretical framework capable of exposing the rules and precision of cell specification over developmental time. We use the gap gene network in the early fly embryo as an example to show how expression levels of the four gap genes can be jointly decoded into an optimal specification of position with 1% accuracy. The decoder correctly predicts, with no free parameters, the dynamics of pair-rule expression patterns at different developmental time points and in various mutant backgrounds. Precise cellular identities are thus available at the earliest stages of development, contrasting the prevailing view of positional information being slowly refined across successive layers of the patterning network. Our results suggest that developmental enhancers closely approximate a mathematically optimal decoding strategy.}, author = {Petkova, Mariela D. and Tkacik, Gasper and Bialek, William and Wieschaus, Eric F. and Gregor, Thomas}, journal = {Cell}, number = {4}, pages = {844--855.e15}, publisher = {Cell Press}, title = {{Optimal decoding of cellular identities in a genetic network}}, doi = {10.1016/j.cell.2019.01.007}, volume = {176}, year = {2019}, } @article{6049, abstract = {In this article it is shown that large systems with many interacting units endowing multiple phases display self-oscillations in the presence of linear feedback between the control and order parameters, where an Andronov–Hopf bifurcation takes over the phase transition. This is simply illustrated through the mean field Landau theory whose feedback dynamics turn out to be described by the Van der Pol equation and it is then validated for the fully connected Ising model following heat bath dynamics. Despite its simplicity, this theory accounts potentially for a rich range of phenomena: here it is applied to describe in a stylized way (i) excess demand-price cycles due to strong herding in a simple agent-based market model; (ii) congestion waves in queuing networks triggered by user feedback to delays in overloaded conditions; and (iii) metabolic network oscillations resulting from cell growth control in a bistable phenotypic landscape.}, author = {De Martino, Daniele}, journal = {Journal of Physics A: Mathematical and Theoretical}, number = {4}, publisher = {IOP Publishing}, title = {{Feedback-induced self-oscillations in large interacting systems subjected to phase transitions}}, doi = {10.1088/1751-8121/aaf2dd}, volume = {52}, year = {2019}, } @article{6046, abstract = {Sudden stress often triggers diverse, temporally structured gene expression responses in microbes, but it is largely unknown how variable in time such responses are and if genes respond in the same temporal order in every single cell. Here, we quantified timing variability of individual promoters responding to sublethal antibiotic stress using fluorescent reporters, microfluidics, and time‐lapse microscopy. We identified lower and upper bounds that put definite constraints on timing variability, which varies strongly among promoters and conditions. Timing variability can be interpreted using results from statistical kinetics, which enable us to estimate the number of rate‐limiting molecular steps underlying different responses. We found that just a few critical steps control some responses while others rely on dozens of steps. To probe connections between different stress responses, we then tracked the temporal order and response time correlations of promoter pairs in individual cells. Our results support that, when bacteria are exposed to the antibiotic nitrofurantoin, the ensuing oxidative stress and SOS responses are part of the same causal chain of molecular events. In contrast, under trimethoprim, the acid stress response and the SOS response are part of different chains of events running in parallel. Our approach reveals fundamental constraints on gene expression timing and provides new insights into the molecular events that underlie the timing of stress responses.}, author = {Mitosch, Karin and Rieckh, Georg and Bollenbach, Mark Tobias}, journal = {Molecular systems biology}, number = {2}, publisher = {Embo Press}, title = {{Temporal order and precision of complex stress responses in individual bacteria}}, doi = {10.15252/msb.20188470}, volume = {15}, year = {2019}, } @article{6784, abstract = {Mathematical models have been used successfully at diverse scales of biological organization, ranging from ecology and population dynamics to stochastic reaction events occurring between individual molecules in single cells. Generally, many biological processes unfold across multiple scales, with mutations being the best studied example of how stochasticity at the molecular scale can influence outcomes at the population scale. In many other contexts, however, an analogous link between micro- and macro-scale remains elusive, primarily due to the challenges involved in setting up and analyzing multi-scale models. Here, we employ such a model to investigate how stochasticity propagates from individual biochemical reaction events in the bacterial innate immune system to the ecology of bacteria and bacterial viruses. We show analytically how the dynamics of bacterial populations are shaped by the activities of immunity-conferring enzymes in single cells and how the ecological consequences imply optimal bacterial defense strategies against viruses. Our results suggest that bacterial populations in the presence of viruses can either optimize their initial growth rate or their population size, with the first strategy favoring simple immunity featuring a single restriction modification system and the second strategy favoring complex bacterial innate immunity featuring several simultaneously active restriction modification systems.}, author = {Ruess, Jakob and Pleska, Maros and Guet, Calin C and Tkačik, Gašper}, issn = {1553-7358}, journal = {PLoS Computational Biology}, number = {7}, publisher = {Public Library of Science}, title = {{Molecular noise of innate immunity shapes bacteria-phage ecologies}}, doi = {10.1371/journal.pcbi.1007168}, volume = {15}, year = {2019}, } @misc{9786, author = {Ruess, Jakob and Pleska, Maros and Guet, Calin C and Tkačik, Gašper}, publisher = {Public Library of Science}, title = {{Supporting text and results}}, doi = {10.1371/journal.pcbi.1007168.s001}, year = {2019}, } @article{7422, abstract = {Biochemical reactions often occur at low copy numbers but at once in crowded and diverse environments. Space and stochasticity therefore play an essential role in biochemical networks. Spatial-stochastic simulations have become a prominent tool for understanding how stochasticity at the microscopic level influences the macroscopic behavior of such systems. While particle-based models guarantee the level of detail necessary to accurately describe the microscopic dynamics at very low copy numbers, the algorithms used to simulate them typically imply trade-offs between computational efficiency and biochemical accuracy. eGFRD (enhanced Green’s Function Reaction Dynamics) is an exact algorithm that evades such trade-offs by partitioning the N-particle system into M ≤ N analytically tractable one- and two-particle systems; the analytical solutions (Green’s functions) then are used to implement an event-driven particle-based scheme that allows particles to make large jumps in time and space while retaining access to their state variables at arbitrary simulation times. Here we present “eGFRD2,” a new eGFRD version that implements the principle of eGFRD in all dimensions, thus enabling efficient particle-based simulation of biochemical reaction-diffusion processes in the 3D cytoplasm, on 2D planes representing membranes, and on 1D elongated cylinders representative of, e.g., cytoskeletal tracks or DNA; in 1D, it also incorporates convective motion used to model active transport. We find that, for low particle densities, eGFRD2 is up to 6 orders of magnitude faster than conventional Brownian dynamics. We exemplify the capabilities of eGFRD2 by simulating an idealized model of Pom1 gradient formation, which involves 3D diffusion, active transport on microtubules, and autophosphorylation on the membrane, confirming recent experimental and theoretical results on this system to hold under genuinely stochastic conditions.}, author = {Sokolowski, Thomas R and Paijmans, Joris and Bossen, Laurens and Miedema, Thomas and Wehrens, Martijn and Becker, Nils B. and Kaizu, Kazunari and Takahashi, Koichi and Dogterom, Marileen and ten Wolde, Pieter Rein}, issn = {1089-7690}, journal = {The Journal of Chemical Physics}, number = {5}, publisher = {AIP Publishing}, title = {{eGFRD in all dimensions}}, doi = {10.1063/1.5064867}, volume = {150}, year = {2019}, } @article{6900, abstract = {Across diverse biological systems—ranging from neural networks to intracellular signaling and genetic regulatory networks—the information about changes in the environment is frequently encoded in the full temporal dynamics of the network nodes. A pressing data-analysis challenge has thus been to efficiently estimate the amount of information that these dynamics convey from experimental data. Here we develop and evaluate decoding-based estimation methods to lower bound the mutual information about a finite set of inputs, encoded in single-cell high-dimensional time series data. For biological reaction networks governed by the chemical Master equation, we derive model-based information approximations and analytical upper bounds, against which we benchmark our proposed model-free decoding estimators. In contrast to the frequently-used k-nearest-neighbor estimator, decoding-based estimators robustly extract a large fraction of the available information from high-dimensional trajectories with a realistic number of data samples. We apply these estimators to previously published data on Erk and Ca2+ signaling in mammalian cells and to yeast stress-response, and find that substantial amount of information about environmental state can be encoded by non-trivial response statistics even in stationary signals. We argue that these single-cell, decoding-based information estimates, rather than the commonly-used tests for significant differences between selected population response statistics, provide a proper and unbiased measure for the performance of biological signaling networks.}, author = {Cepeda Humerez, Sarah A and Ruess, Jakob and Tkačik, Gašper}, issn = {15537358}, journal = {PLoS computational biology}, number = {9}, pages = {e1007290}, publisher = {Public Library of Science}, title = {{Estimating information in time-varying signals}}, doi = {10.1371/journal.pcbi.1007290}, volume = {15}, year = {2019}, } @article{196, abstract = {The abelian sandpile serves as a model to study self-organized criticality, a phenomenon occurring in biological, physical and social processes. The identity of the abelian group is a fractal composed of self-similar patches, and its limit is subject of extensive collaborative research. Here, we analyze the evolution of the sandpile identity under harmonic fields of different orders. We show that this evolution corresponds to periodic cycles through the abelian group characterized by the smooth transformation and apparent conservation of the patches constituting the identity. The dynamics induced by second and third order harmonics resemble smooth stretchings, respectively translations, of the identity, while the ones induced by fourth order harmonics resemble magnifications and rotations. Starting with order three, the dynamics pass through extended regions of seemingly random configurations which spontaneously reassemble into accentuated patterns. We show that the space of harmonic functions projects to the extended analogue of the sandpile group, thus providing a set of universal coordinates identifying configurations between different domains. Since the original sandpile group is a subgroup of the extended one, this directly implies that it admits a natural renormalization. Furthermore, we show that the harmonic fields can be induced by simple Markov processes, and that the corresponding stochastic dynamics show remarkable robustness over hundreds of periods. Finally, we encode information into seemingly random configurations, and decode this information with an algorithm requiring minimal prior knowledge. Our results suggest that harmonic fields might split the sandpile group into sub-sets showing different critical coefficients, and that it might be possible to extend the fractal structure of the identity beyond the boundaries of its domain. }, author = {Lang, Moritz and Shkolnikov, Mikhail}, issn = {1091-6490}, journal = {Proceedings of the National Academy of Sciences}, number = {8}, pages = {2821--2830}, publisher = {National Academy of Sciences}, title = {{Harmonic dynamics of the Abelian sandpile}}, doi = {10.1073/pnas.1812015116}, volume = {116}, year = {2019}, } @article{5817, abstract = {We theoretically study the shapes of lipid vesicles confined to a spherical cavity, elaborating a framework based on the so-called limiting shapes constructed from geometrically simple structural elements such as double-membrane walls and edges. Partly inspired by numerical results, the proposed non-compartmentalized and compartmentalized limiting shapes are arranged in the bilayer-couple phase diagram which is then compared to its free-vesicle counterpart. We also compute the area-difference-elasticity phase diagram of the limiting shapes and we use it to interpret shape transitions experimentally observed in vesicles confined within another vesicle. The limiting-shape framework may be generalized to theoretically investigate the structure of certain cell organelles such as the mitochondrion.}, author = {Kavcic, Bor and Sakashita, A. and Noguchi, H. and Ziherl, P.}, issn = {1744-6848}, journal = {Soft Matter}, number = {4}, pages = {602--614}, publisher = {Royal Society of Chemistry}, title = {{Limiting shapes of confined lipid vesicles}}, doi = {10.1039/c8sm01956h}, volume = {15}, year = {2019}, } @phdthesis{6473, abstract = {Single cells are constantly interacting with their environment and each other, more importantly, the accurate perception of environmental cues is crucial for growth, survival, and reproduction. This communication between cells and their environment can be formalized in mathematical terms and be quantified as the information flow between them, as prescribed by information theory. The recent availability of real–time dynamical patterns of signaling molecules in single cells has allowed us to identify encoding about the identity of the environment in the time–series. However, efficient estimation of the information transmitted by these signals has been a data–analysis challenge due to the high dimensionality of the trajectories and the limited number of samples. In the first part of this thesis, we develop and evaluate decoding–based estimation methods to lower bound the mutual information and derive model–based precise information estimates for biological reaction networks governed by the chemical master equation. This is followed by applying the decoding-based methods to study the intracellular representation of extracellular changes in budding yeast, by observing the transient dynamics of nuclear translocation of 10 transcription factors in response to 3 stress conditions. Additionally, we apply these estimators to previously published data on ERK and Ca2+ signaling and yeast stress response. We argue that this single cell decoding-based measure of information provides an unbiased, quantitative and interpretable measure for the fidelity of biological signaling processes. Finally, in the last section, we deal with gene regulation which is primarily controlled by transcription factors (TFs) that bind to the DNA to activate gene expression. The possibility that non-cognate TFs activate transcription diminishes the accuracy of regulation with potentially disastrous effects for the cell. This ’crosstalk’ acts as a previously unexplored source of noise in biochemical networks and puts a strong constraint on their performance. To mitigate erroneous initiation we propose an out of equilibrium scheme that implements kinetic proofreading. We show that such architectures are favored over their equilibrium counterparts for complex organisms despite introducing noise in gene expression. }, author = {Cepeda Humerez, Sarah A}, issn = {2663-337X}, keywords = {Information estimation, Time-series, data analysis}, pages = {135}, publisher = {Institute of Science and Technology Austria}, title = {{Estimating information flow in single cells}}, doi = {10.15479/AT:ISTA:6473}, year = {2019}, } @phdthesis{6071, abstract = {Transcription factors, by binding to specific sequences on the DNA, control the precise spatio-temporal expression of genes inside a cell. However, this specificity is limited, leading to frequent incorrect binding of transcription factors that might have deleterious consequences on the cell. By constructing a biophysical model of TF-DNA binding in the context of gene regulation, I will first explore how regulatory constraints can strongly shape the distribution of a population in sequence space. Then, by directly linking this to a picture of multiple types of transcription factors performing their functions simultaneously inside the cell, I will explore the extent of regulatory crosstalk -- incorrect binding interactions between transcription factors and binding sites that lead to erroneous regulatory states -- and understand the constraints this places on the design of regulatory systems. I will then develop a generic theoretical framework to investigate the coevolution of multiple transcription factors and multiple binding sites, in the context of a gene regulatory network that performs a certain function. As a particular tractable version of this problem, I will consider the evolution of two transcription factors when they transmit upstream signals to downstream target genes. Specifically, I will describe the evolutionary steady states and the evolutionary pathways involved, along with their timescales, of a system that initially undergoes a transcription factor duplication event. To connect this important theoretical model to the prominent biological event of transcription factor duplication giving rise to paralogous families, I will then describe a bioinformatics analysis of C2H2 Zn-finger transcription factors, a major family in humans, and focus on the patterns of evolution that paralogs have undergone in their various protein domains in the recent past. }, author = {Prizak, Roshan}, issn = {2663-337X}, pages = {189}, publisher = {Institute of Science and Technology Austria}, title = {{Coevolution of transcription factors and their binding sites in sequence space}}, doi = {10.15479/at:ista:th6071}, year = {2019}, } @article{7103, abstract = {Origin and functions of intermittent transitions among sleep stages, including short awakenings and arousals, constitute a challenge to the current homeostatic framework for sleep regulation, focusing on factors modulating sleep over large time scales. Here we propose that the complex micro-architecture characterizing the sleep-wake cycle results from an underlying non-equilibrium critical dynamics, bridging collective behaviors across spatio-temporal scales. We investigate θ and δ wave dynamics in control rats and in rats with lesions of sleep-promoting neurons in the parafacial zone. We demonstrate that intermittent bursts in θ and δ rhythms exhibit a complex temporal organization, with long-range power-law correlations and a robust duality of power law (θ-bursts, active phase) and exponential-like (δ-bursts, quiescent phase) duration distributions, typical features of non-equilibrium systems self-organizing at criticality. Crucially, such temporal organization relates to anti-correlated coupling between θ- and δ-bursts, and is independent of the dominant physiologic state and lesions, a solid indication of a basic principle in sleep dynamics.}, author = {Wang, Jilin W. J. L. and Lombardi, Fabrizio and Zhang, Xiyun and Anaclet, Christelle and Ivanov, Plamen Ch.}, issn = {1553-7358}, journal = {PLoS Computational Biology}, number = {11}, publisher = {Public Library of Science}, title = {{Non-equilibrium critical dynamics of bursts in θ and δ rhythms as fundamental characteristic of sleep and wake micro-architecture}}, doi = {10.1371/journal.pcbi.1007268}, volume = {15}, year = {2019}, } @article{6090, abstract = {Cells need to reliably sense external ligand concentrations to achieve various biological functions such as chemotaxis or signaling. The molecular recognition of ligands by surface receptors is degenerate in many systems, leading to crosstalk between ligand-receptor pairs. Crosstalk is often thought of as a deviation from optimal specific recognition, as the binding of noncognate ligands can interfere with the detection of the receptor's cognate ligand, possibly leading to a false triggering of a downstream signaling pathway. Here we quantify the optimal precision of sensing the concentrations of multiple ligands by a collection of promiscuous receptors. We demonstrate that crosstalk can improve precision in concentration sensing and discrimination tasks. To achieve superior precision, the additional information about ligand concentrations contained in short binding events of the noncognate ligand should be exploited. We present a proofreading scheme to realize an approximate estimation of multiple ligand concentrations that reaches a precision close to the derived optimal bounds. Our results help rationalize the observed ubiquity of receptor crosstalk in molecular sensing.}, author = {Carballo-Pacheco, Martín and Desponds, Jonathan and Gavrilchenko, Tatyana and Mayer, Andreas and Prizak, Roshan and Reddy, Gautam and Nemenman, Ilya and Mora, Thierry}, journal = {Physical Review E}, number = {2}, publisher = {American Physical Society}, title = {{Receptor crosstalk improves concentration sensing of multiple ligands}}, doi = {10.1103/PhysRevE.99.022423}, volume = {99}, year = {2019}, } @inproceedings{7606, abstract = {We derive a tight lower bound on equivocation (conditional entropy), or equivalently a tight upper bound on mutual information between a signal variable and channel outputs. The bound is in terms of the joint distribution of the signals and maximum a posteriori decodes (most probable signals given channel output). As part of our derivation, we describe the key properties of the distribution of signals, channel outputs and decodes, that minimizes equivocation and maximizes mutual information. This work addresses a problem in data analysis, where mutual information between signals and decodes is sometimes used to lower bound the mutual information between signals and channel outputs. Our result provides a corresponding upper bound.}, author = {Hledik, Michal and Sokolowski, Thomas R and Tkačik, Gašper}, booktitle = {IEEE Information Theory Workshop, ITW 2019}, isbn = {9781538669006}, location = {Visby, Sweden}, publisher = {IEEE}, title = {{A tight upper bound on mutual information}}, doi = {10.1109/ITW44776.2019.8989292}, year = {2019}, } @article{306, abstract = {A cornerstone of statistical inference, the maximum entropy framework is being increasingly applied to construct descriptive and predictive models of biological systems, especially complex biological networks, from large experimental data sets. Both its broad applicability and the success it obtained in different contexts hinge upon its conceptual simplicity and mathematical soundness. Here we try to concisely review the basic elements of the maximum entropy principle, starting from the notion of ‘entropy’, and describe its usefulness for the analysis of biological systems. As examples, we focus specifically on the problem of reconstructing gene interaction networks from expression data and on recent work attempting to expand our system-level understanding of bacterial metabolism. Finally, we highlight some extensions and potential limitations of the maximum entropy approach, and point to more recent developments that are likely to play a key role in the upcoming challenges of extracting structures and information from increasingly rich, high-throughput biological data.}, author = {De Martino, Andrea and De Martino, Daniele}, journal = {Heliyon}, number = {4}, publisher = {Elsevier}, title = {{An introduction to the maximum entropy approach and its application to inference problems in biology}}, doi = {10.1016/j.heliyon.2018.e00596}, volume = {4}, year = {2018}, } @article{305, abstract = {The hanging-drop network (HDN) is a technology platform based on a completely open microfluidic network at the bottom of an inverted, surface-patterned substrate. The platform is predominantly used for the formation, culturing, and interaction of self-assembled spherical microtissues (spheroids) under precisely controlled flow conditions. Here, we describe design, fabrication, and operation of microfluidic hanging-drop networks.}, author = {Misun, Patrick and Birchler, Axel and Lang, Moritz and Hierlemann, Andreas and Frey, Olivier}, journal = {Methods in Molecular Biology}, pages = {183 -- 202}, publisher = {Springer}, title = {{Fabrication and operation of microfluidic hanging drop networks}}, doi = {10.1007/978-1-4939-7792-5_15}, volume = {1771}, year = {2018}, } @article{281, abstract = {Although cells respond specifically to environments, how environmental identity is encoded intracellularly is not understood. Here, we study this organization of information in budding yeast by estimating the mutual information between environmental transitions and the dynamics of nuclear translocation for 10 transcription factors. Our method of estimation is general, scalable, and based on decoding from single cells. The dynamics of the transcription factors are necessary to encode the highest amounts of extracellular information, and we show that information is transduced through two channels: Generalists (Msn2/4, Tod6 and Dot6, Maf1, and Sfp1) can encode the nature of multiple stresses, but only if stress is high; specialists (Hog1, Yap1, and Mig1/2) encode one particular stress, but do so more quickly and for a wider range of magnitudes. In particular, Dot6 encodes almost as much information as Msn2, the master regulator of the environmental stress response. Each transcription factor reports differently, and it is only their collective behavior that distinguishes between multiple environmental states. Changes in the dynamics of the localization of transcription factors thus constitute a precise, distributed internal representation of extracellular change. We predict that such multidimensional representations are common in cellular decision-making.}, author = {Granados, Alejandro and Pietsch, Julian and Cepeda Humerez, Sarah A and Farquhar, Isebail and Tkacik, Gasper and Swain, Peter}, journal = {PNAS}, number = {23}, pages = {6088 -- 6093}, publisher = {National Academy of Sciences}, title = {{Distributed and dynamic intracellular organization of extracellular information}}, doi = {10.1073/pnas.1716659115}, volume = {115}, year = {2018}, } @article{316, abstract = {Self-incompatibility (SI) is a genetically based recognition system that functions to prevent self-fertilization and mating among related plants. An enduring puzzle in SI is how the high diversity observed in nature arises and is maintained. Based on the underlying recognition mechanism, SI can be classified into two main groups: self- and non-self recognition. Most work has focused on diversification within self-recognition systems despite expected differences between the two groups in the evolutionary pathways and outcomes of diversification. Here, we use a deterministic population genetic model and stochastic simulations to investigate how novel S-haplotypes evolve in a gametophytic non-self recognition (SRNase/S Locus F-box (SLF)) SI system. For this model the pathways for diversification involve either the maintenance or breakdown of SI and can vary in the order of mutations of the female (SRNase) and male (SLF) components. We show analytically that diversification can occur with high inbreeding depression and self-pollination, but this varies with evolutionary pathway and level of completeness (which determines the number of potential mating partners in the population), and in general is more likely for lower haplotype number. The conditions for diversification are broader in stochastic simulations of finite population size. However, the number of haplotypes observed under high inbreeding and moderate to high self-pollination is less than that commonly observed in nature. Diversification was observed through pathways that maintain SI as well as through self-compatible intermediates. Yet the lifespan of diversified haplotypes was sensitive to their level of completeness. By examining diversification in a non-self recognition SI system, this model extends our understanding of the evolution and maintenance of haplotype diversity observed in a self recognition system common in flowering plants.}, author = {Bodova, Katarina and Priklopil, Tadeas and Field, David and Barton, Nicholas H and Pickup, Melinda}, journal = {Genetics}, number = {3}, pages = {861--883}, publisher = {Genetics Society of America}, title = {{Evolutionary pathways for the generation of new self-incompatibility haplotypes in a non-self recognition system}}, doi = {10.1534/genetics.118.300748}, volume = {209}, year = {2018}, } @misc{9813, abstract = {File S1 contains figures that clarify the following features: (i) effect of population size on the average number/frequency of SI classes, (ii) changes in the minimal completeness deficit in time for a single class, and (iii) diversification diagrams for all studied pathways, including the summary figure for k = 8. File S2 contains the code required for a stochastic simulation of the SLF system with an example. This file also includes the output in the form of figures and tables.}, author = {Bod'ová, Katarína and Priklopil, Tadeas and Field, David and Barton, Nicholas H and Pickup, Melinda}, publisher = {Genetics Society of America}, title = {{Supplemental material for Bodova et al., 2018}}, doi = {10.25386/genetics.6148304.v1}, year = {2018}, } @article{406, abstract = {Recent developments in automated tracking allow uninterrupted, high-resolution recording of animal trajectories, sometimes coupled with the identification of stereotyped changes of body pose or other behaviors of interest. Analysis and interpretation of such data represents a challenge: the timing of animal behaviors may be stochastic and modulated by kinematic variables, by the interaction with the environment or with the conspecifics within the animal group, and dependent on internal cognitive or behavioral state of the individual. Existing models for collective motion typically fail to incorporate the discrete, stochastic, and internal-state-dependent aspects of behavior, while models focusing on individual animal behavior typically ignore the spatial aspects of the problem. Here we propose a probabilistic modeling framework to address this gap. Each animal can switch stochastically between different behavioral states, with each state resulting in a possibly different law of motion through space. Switching rates for behavioral transitions can depend in a very general way, which we seek to identify from data, on the effects of the environment as well as the interaction between the animals. We represent the switching dynamics as a Generalized Linear Model and show that: (i) forward simulation of multiple interacting animals is possible using a variant of the Gillespie’s Stochastic Simulation Algorithm; (ii) formulated properly, the maximum likelihood inference of switching rate functions is tractably solvable by gradient descent; (iii) model selection can be used to identify factors that modulate behavioral state switching and to appropriately adjust model complexity to data. To illustrate our framework, we apply it to two synthetic models of animal motion and to real zebrafish tracking data. }, author = {Bod’Ová, Katarína and Mitchell, Gabriel and Harpaz, Roy and Schneidman, Elad and Tkacik, Gasper}, journal = {PLoS One}, number = {3}, publisher = {Public Library of Science}, title = {{Probabilistic models of individual and collective animal behavior}}, doi = {10.1371/journal.pone.0193049}, volume = {13}, year = {2018}, }