@article{430, abstract = {In this issue of GENETICS, a new method for detecting natural selection on polygenic traits is developed and applied to sev- eral human examples ( Racimo et al. 2018 ). By de fi nition, many loci contribute to variation in polygenic traits, and a challenge for evolutionary ge neticists has been that these traits can evolve by small, nearly undetectable shifts in allele frequencies across each of many, typically unknown, loci. Recently, a helpful remedy has arisen. Genome-wide associ- ation studies (GWAS) have been illuminating sets of loci that can be interrogated jointly for c hanges in allele frequencies. By aggregating small signal s of change across many such loci, directional natural selection is now in principle detect- able using genetic data, even for highly polygenic traits. This is an exciting arena of progress – with these methods, tests can be made for selection associated with traits, and we can now study selection in what may be its most prevalent mode. The continuing fast pace of GWAS publications suggest there will be many more polygenic tests of selection in the near future, as every new GWAS is an opportunity for an accom- panying test of polygenic selection. However, it is important to be aware of complications th at arise in interpretation, especially given that these studies may easily be misinter- preted both in and outside the evolutionary genetics commu- nity. Here, we provide context for understanding polygenic tests and urge caution regarding how these results are inter- preted and reported upon more broadly.}, author = {Novembre, John and Barton, Nicholas H}, journal = {Genetics}, number = {4}, pages = {1351 -- 1355}, publisher = {Genetics Society of America}, title = {{Tread lightly interpreting polygenic tests of selection}}, doi = {10.1534/genetics.118.300786}, volume = {208}, year = {2018}, } @article{607, abstract = {We study the Fokker-Planck equation derived in the large system limit of the Markovian process describing the dynamics of quantitative traits. The Fokker-Planck equation is posed on a bounded domain and its transport and diffusion coefficients vanish on the domain's boundary. We first argue that, despite this degeneracy, the standard no-flux boundary condition is valid. We derive the weak formulation of the problem and prove the existence and uniqueness of its solutions by constructing the corresponding contraction semigroup on a suitable function space. Then, we prove that for the parameter regime with high enough mutation rate the problem exhibits a positive spectral gap, which implies exponential convergence to equilibrium.Next, we provide a simple derivation of the so-called Dynamic Maximum Entropy (DynMaxEnt) method for approximation of observables (moments) of the Fokker-Planck solution, which can be interpreted as a nonlinear Galerkin approximation. The limited applicability of the DynMaxEnt method inspires us to introduce its modified version that is valid for the whole range of admissible parameters. Finally, we present several numerical experiments to demonstrate the performance of both the original and modified DynMaxEnt methods. We observe that in the parameter regimes where both methods are valid, the modified one exhibits slightly better approximation properties compared to the original one.}, author = {Bodova, Katarina and Haskovec, Jan and Markowich, Peter}, journal = {Physica D: Nonlinear Phenomena}, pages = {108--120}, publisher = {Elsevier}, title = {{Well posedness and maximum entropy approximation for the dynamics of quantitative traits}}, doi = {10.1016/j.physd.2017.10.015}, volume = {376-377}, year = {2018}, } @phdthesis{200, abstract = {This thesis is concerned with the inference of current population structure based on geo-referenced genetic data. The underlying idea is that population structure affects its spatial genetic structure. Therefore, genotype information can be utilized to estimate important demographic parameters such as migration rates. These indirect estimates of population structure have become very attractive, as genotype data is now widely available. However, there also has been much concern about these approaches. Importantly, genetic structure can be influenced by many complex patterns, which often cannot be disentangled. Moreover, many methods merely fit heuristic patterns of genetic structure, and do not build upon population genetics theory. Here, I describe two novel inference methods that address these shortcomings. In Chapter 2, I introduce an inference scheme based on a new type of signal, identity by descent (IBD) blocks. Recently, it has become feasible to detect such long blocks of genome shared between pairs of samples. These blocks are direct traces of recent coalescence events. As such, they contain ample signal for inferring recent demography. I examine sharing of IBD blocks in two-dimensional populations with local migration. Using a diffusion approximation, I derive formulas for an isolation by distance pattern of long IBD blocks and show that sharing of long IBD blocks approaches rapid exponential decay for growing sample distance. I describe an inference scheme based on these results. It can robustly estimate the dispersal rate and population density, which is demonstrated on simulated data. I also show an application to estimate mean migration and the rate of recent population growth within Eastern Europe. Chapter 3 is about a novel method to estimate barriers to gene flow in a two dimensional population. This inference scheme utilizes geographically localized allele frequency fluctuations - a classical isolation by distance signal. The strength of these local fluctuations increases on average next to a barrier, and there is less correlation across it. I again use a framework of diffusion of ancestral lineages to model this effect, and provide an efficient numerical implementation to fit the results to geo-referenced biallelic SNP data. This inference scheme is able to robustly estimate strong barriers to gene flow, as tests on simulated data confirm.}, author = {Ringbauer, Harald}, issn = {2663-337X}, pages = {146}, publisher = {Institute of Science and Technology Austria}, title = {{Inferring recent demography from spatial genetic structure}}, doi = {10.15479/AT:ISTA:th_963}, year = {2018}, } @article{139, abstract = {Genome-scale diversity data are increasingly available in a variety of biological systems, and can be used to reconstruct the past evolutionary history of species divergence. However, extracting the full demographic information from these data is not trivial, and requires inferential methods that account for the diversity of coalescent histories throughout the genome. Here, we evaluate the potential and limitations of one such approach. We reexamine a well-known system of mussel sister species, using the joint site frequency spectrum (jSFS) of synonymousmutations computed either fromexome capture or RNA-seq, in an Approximate Bayesian Computation (ABC) framework. We first assess the best sampling strategy (number of: individuals, loci, and bins in the jSFS), and show that model selection is robust to variation in the number of individuals and loci. In contrast, different binning choices when summarizing the jSFS, strongly affect the results: including classes of low and high frequency shared polymorphisms can more effectively reveal recent migration events. We then take advantage of the flexibility of ABC to compare more realistic models of speciation, including variation in migration rates through time (i.e., periodic connectivity) and across genes (i.e., genome-wide heterogeneity in migration rates). We show that these models were consistently selected as the most probable, suggesting that mussels have experienced a complex history of gene flow during divergence and that the species boundary is semi-permeable. Our work provides a comprehensive evaluation of ABC demographic inference in mussels based on the coding jSFS, and supplies guidelines for employing different sequencing techniques and sampling strategies. We emphasize, perhaps surprisingly, that inferences are less limited by the volume of data, than by the way in which they are analyzed.}, author = {Fraisse, Christelle and Roux, Camille and Gagnaire, Pierre and Romiguier, Jonathan and Faivre, Nicolas and Welch, John and Bierne, Nicolas}, journal = {PeerJ}, number = {7}, publisher = {PeerJ}, title = {{The divergence history of European blue mussel species reconstructed from Approximate Bayesian Computation: The effects of sequencing techniques and sampling strategies}}, doi = {10.7717/peerj.5198}, volume = {2018}, year = {2018}, } @article{33, abstract = {Secondary contact is the reestablishment of gene flow between sister populations that have diverged. For instance, at the end of the Quaternary glaciations in Europe, secondary contact occurred during the northward expansion of the populations which had found refugia in the southern peninsulas. With the advent of multi-locus markers, secondary contact can be investigated using various molecular signatures including gradients of allele frequency, admixture clines, and local increase of genetic differentiation. We use coalescent simulations to investigate if molecular data provide enough information to distinguish between secondary contact following range expansion and an alternative evolutionary scenario consisting of a barrier to gene flow in an isolation-by-distance model. We find that an excess of linkage disequilibrium and of genetic diversity at the suture zone is a unique signature of secondary contact. We also find that the directionality index ψ, which was proposed to study range expansion, is informative to distinguish between the two hypotheses. However, although evidence for secondary contact is usually conveyed by statistics related to admixture coefficients, we find that they can be confounded by isolation-by-distance. We recommend to account for the spatial repartition of individuals when investigating secondary contact in order to better reflect the complex spatio-temporal evolution of populations and species.}, author = {Bertl, Johanna and Ringbauer, Harald and Blum, Michaël}, journal = {PeerJ}, number = {10}, publisher = {PeerJ}, title = {{Can secondary contact following range expansion be distinguished from barriers to gene flow?}}, doi = {10.7717/peerj.5325}, volume = {2018}, year = {2018}, } @article{286, abstract = {Pedigree and sibship reconstruction are important methods in quantifying relationships and fitness of individuals in natural populations. Current methods employ a Markov chain-based algorithm to explore plausible possible pedigrees iteratively. This provides accurate results, but is time-consuming. Here, we develop a method to infer sibship and paternity relationships from half-sibling arrays of known maternity using hierarchical clustering. Given 50 or more unlinked SNP markers and empirically derived error rates, the method performs as well as the widely used package Colony, but is faster by two orders of magnitude. Using simulations, we show that the method performs well across contrasting mating scenarios, even when samples are large. We then apply the method to open-pollinated arrays of the snapdragon Antirrhinum majus and find evidence for a high degree of multiple mating. Although we focus on diploid SNP data, the method does not depend on marker type and as such has broad applications in nonmodel systems. }, author = {Ellis, Thomas and Field, David and Barton, Nicholas H}, journal = {Molecular Ecology Resources}, number = {5}, pages = {988 -- 999}, publisher = {Wiley}, title = {{Efficient inference of paternity and sibship inference given known maternity via hierarchical clustering}}, doi = {10.1111/1755-0998.12782}, volume = {18}, year = {2018}, } @inproceedings{1112, abstract = {There has been renewed interest in modelling the behaviour of evolutionary algorithms by more traditional mathematical objects, such as ordinary differential equations or Markov chains. The advantage is that the analysis becomes greatly facilitated due to the existence of well established methods. However, this typically comes at the cost of disregarding information about the process. Here, we introduce the use of stochastic differential equations (SDEs) for the study of EAs. SDEs can produce simple analytical results for the dynamics of stochastic processes, unlike Markov chains which can produce rigorous but unwieldy expressions about the dynamics. On the other hand, unlike ordinary differential equations (ODEs), they do not discard information about the stochasticity of the process. We show that these are especially suitable for the analysis of fixed budget scenarios and present analogs of the additive and multiplicative drift theorems for SDEs. We exemplify the use of these methods for two model algorithms ((1+1) EA and RLS) on two canonical problems(OneMax and LeadingOnes).}, author = {Paixao, Tiago and Pérez Heredia, Jorge}, booktitle = {Proceedings of the 14th ACM/SIGEVO Conference on Foundations of Genetic Algorithms}, isbn = {978-145034651-1}, location = {Copenhagen, Denmark}, pages = {3 -- 11}, publisher = {ACM}, title = {{An application of stochastic differential equations to evolutionary algorithms}}, doi = {10.1145/3040718.3040729}, year = {2017}, } @article{1191, abstract = {Variation in genotypes may be responsible for differences in dispersal rates, directional biases, and growth rates of individuals. These traits may favor certain genotypes and enhance their spatiotemporal spreading into areas occupied by the less advantageous genotypes. We study how these factors influence the speed of spreading in the case of two competing genotypes under the assumption that spatial variation of the total population is small compared to the spatial variation of the frequencies of the genotypes in the population. In that case, the dynamics of the frequency of one of the genotypes is approximately described by a generalized Fisher–Kolmogorov–Petrovskii–Piskunov (F–KPP) equation. This generalized F–KPP equation with (nonlinear) frequency-dependent diffusion and advection terms admits traveling wave solutions that characterize the invasion of the dominant genotype. Our existence results generalize the classical theory for traveling waves for the F–KPP with constant coefficients. Moreover, in the particular case of the quadratic (monostable) nonlinear growth–decay rate in the generalized F–KPP we study in detail the influence of the variance in diffusion and mean displacement rates of the two genotypes on the minimal wave propagation speed.}, author = {Kollár, Richard and Novak, Sebastian}, journal = {Bulletin of Mathematical Biology}, number = {3}, pages = {525--559}, publisher = {Springer}, title = {{Existence of traveling waves for the generalized F–KPP equation}}, doi = {10.1007/s11538-016-0244-3}, volume = {79}, year = {2017}, } @article{570, abstract = {Most phenotypes are determined by molecular systems composed of specifically interacting molecules. However, unlike for individual components, little is known about the distributions of mutational effects of molecular systems as a whole. We ask how the distribution of mutational effects of a transcriptional regulatory system differs from the distributions of its components, by first independently, and then simultaneously, mutating a transcription factor and the associated promoter it represses. We find that the system distribution exhibits increased phenotypic variation compared to individual component distributions - an effect arising from intermolecular epistasis between the transcription factor and its DNA-binding site. In large part, this epistasis can be qualitatively attributed to the structure of the transcriptional regulatory system and could therefore be a common feature in prokaryotes. Counter-intuitively, intermolecular epistasis can alleviate the constraints of individual components, thereby increasing phenotypic variation that selection could act on and facilitating adaptive evolution. }, author = {Lagator, Mato and Sarikas, Srdjan and Acar, Hande and Bollback, Jonathan P and Guet, Calin C}, issn = {2050084X}, journal = {eLife}, publisher = {eLife Sciences Publications}, title = {{Regulatory network structure determines patterns of intermolecular epistasis}}, doi = {10.7554/eLife.28921}, volume = {6}, year = {2017}, } @article{611, abstract = {Small RNAs (sRNAs) regulate genes in plants and animals. Here, we show that population-wide differences in color patterns in snapdragon flowers are caused by an inverted duplication that generates sRNAs. The complexity and size of the transcripts indicate that the duplication represents an intermediate on the pathway to microRNA evolution. The sRNAs repress a pigment biosynthesis gene, creating a yellow highlight at the site of pollinator entry. The inverted duplication exhibits steep clines in allele frequency in a natural hybrid zone, showing that the allele is under selection. Thus, regulatory interactions of evolutionarily recent sRNAs can be acted upon by selection and contribute to the evolution of phenotypic diversity.}, author = {Bradley, Desmond and Xu, Ping and Mohorianu, Irina and Whibley, Annabel and Field, David and Tavares, Hugo and Couchman, Matthew and Copsey, Lucy and Carpenter, Rosemary and Li, Miaomiao and Li, Qun and Xue, Yongbiao and Dalmay, Tamas and Coen, Enrico}, issn = {00368075}, journal = {Science}, number = {6365}, pages = {925 -- 928}, publisher = {American Association for the Advancement of Science}, title = {{Evolution of flower color pattern through selection on regulatory small RNAs}}, doi = {10.1126/science.aao3526}, volume = {358}, year = {2017}, } @article{626, abstract = {Our focus here is on the infinitesimal model. In this model, one or several quantitative traits are described as the sum of a genetic and a non-genetic component, the first being distributed within families as a normal random variable centred at the average of the parental genetic components, and with a variance independent of the parental traits. Thus, the variance that segregates within families is not perturbed by selection, and can be predicted from the variance components. This does not necessarily imply that the trait distribution across the whole population should be Gaussian, and indeed selection or population structure may have a substantial effect on the overall trait distribution. One of our main aims is to identify some general conditions on the allelic effects for the infinitesimal model to be accurate. We first review the long history of the infinitesimal model in quantitative genetics. Then we formulate the model at the phenotypic level in terms of individual trait values and relationships between individuals, but including different evolutionary processes: genetic drift, recombination, selection, mutation, population structure, …. We give a range of examples of its application to evolutionary questions related to stabilising selection, assortative mating, effective population size and response to selection, habitat preference and speciation. We provide a mathematical justification of the model as the limit as the number M of underlying loci tends to infinity of a model with Mendelian inheritance, mutation and environmental noise, when the genetic component of the trait is purely additive. We also show how the model generalises to include epistatic effects. We prove in particular that, within each family, the genetic components of the individual trait values in the current generation are indeed normally distributed with a variance independent of ancestral traits, up to an error of order 1∕M. Simulations suggest that in some cases the convergence may be as fast as 1∕M.}, author = {Barton, Nicholas H and Etheridge, Alison and Véber, Amandine}, issn = {00405809}, journal = {Theoretical Population Biology}, pages = {50 -- 73}, publisher = {Academic Press}, title = {{The infinitesimal model: Definition derivation and implications}}, doi = {10.1016/j.tpb.2017.06.001}, volume = {118}, year = {2017}, } @misc{9849, abstract = {This text provides additional information about the model, a derivation of the analytic results in Eq (4), and details about simulations of an additional parameter set.}, author = {Lukacisinova, Marta and Novak, Sebastian and Paixao, Tiago}, publisher = {Public Library of Science}, title = {{Modelling and simulation details}}, doi = {10.1371/journal.pcbi.1005609.s001}, year = {2017}, } @misc{9850, abstract = {In this text, we discuss how a cost of resistance and the possibility of lethal mutations impact our model.}, author = {Lukacisinova, Marta and Novak, Sebastian and Paixao, Tiago}, publisher = {Public Library of Science}, title = {{Extensions of the model}}, doi = {10.1371/journal.pcbi.1005609.s002}, year = {2017}, } @misc{9851, abstract = {Based on the intuitive derivation of the dynamics of SIM allele frequency pM in the main text, we present a heuristic prediction for the long-term SIM allele frequencies with χ > 1 stresses and compare it to numerical simulations.}, author = {Lukacisinova, Marta and Novak, Sebastian and Paixao, Tiago}, publisher = {Public Library of Science}, title = {{Heuristic prediction for multiple stresses}}, doi = {10.1371/journal.pcbi.1005609.s003}, year = {2017}, } @misc{9852, abstract = {We show how different combination strategies affect the fraction of individuals that are multi-resistant.}, author = {Lukacisinova, Marta and Novak, Sebastian and Paixao, Tiago}, publisher = {Public Library of Science}, title = {{Resistance frequencies for different combination strategies}}, doi = {10.1371/journal.pcbi.1005609.s004}, year = {2017}, } @phdthesis{6291, abstract = {Bacteria and their pathogens – phages – are the most abundant living entities on Earth. Throughout their coevolution, bacteria have evolved multiple immune systems to overcome the ubiquitous threat from the phages. Although the molecu- lar details of these immune systems’ functions are relatively well understood, their epidemiological consequences for the phage-bacterial communities have been largely neglected. In this thesis we employed both experimental and theoretical methods to explore whether herd and social immunity may arise in bacterial popu- lations. Using our experimental system consisting of Escherichia coli strains with a CRISPR based immunity to the T7 phage we show that herd immunity arises in phage-bacterial communities and that it is accentuated when the populations are spatially structured. By fitting a mathematical model, we inferred expressions for the herd immunity threshold and the velocity of spread of a phage epidemic in partially resistant bacterial populations, which both depend on the bacterial growth rate, phage burst size and phage latent period. We also investigated the poten- tial for social immunity in Streptococcus thermophilus and its phage 2972 using a bioinformatic analysis of potentially coding short open reading frames with a signalling signature, encoded within the CRISPR associated genes. Subsequently, we tested one identified potentially signalling peptide and found that its addition to a phage-challenged culture increases probability of survival of bacteria two fold, although the results were only marginally significant. Together, these results demonstrate that the ubiquitous arms races between bacteria and phages have further consequences at the level of the population.}, author = {Payne, Pavel}, issn = {2663-337X}, pages = {83}, publisher = {Institute of Science and Technology Austria}, title = {{Bacterial herd and social immunity to phages}}, year = {2017}, } @misc{9842, abstract = {Mathematica notebooks used to generate figures.}, author = {Etheridge, Alison and Barton, Nicholas H}, publisher = {Mendeley Data}, title = {{Data for: Establishment in a new habitat by polygenic adaptation}}, doi = {10.17632/nw68fxzjpm.1}, year = {2017}, } @article{1351, abstract = {The behaviour of gene regulatory networks (GRNs) is typically analysed using simulation-based statistical testing-like methods. In this paper, we demonstrate that we can replace this approach by a formal verification-like method that gives higher assurance and scalability. We focus on Wagner’s weighted GRN model with varying weights, which is used in evolutionary biology. In the model, weight parameters represent the gene interaction strength that may change due to genetic mutations. For a property of interest, we synthesise the constraints over the parameter space that represent the set of GRNs satisfying the property. We experimentally show that our parameter synthesis procedure computes the mutational robustness of GRNs—an important problem of interest in evolutionary biology—more efficiently than the classical simulation method. We specify the property in linear temporal logic. We employ symbolic bounded model checking and SMT solving to compute the space of GRNs that satisfy the property, which amounts to synthesizing a set of linear constraints on the weights.}, author = {Giacobbe, Mirco and Guet, Calin C and Gupta, Ashutosh and Henzinger, Thomas A and Paixao, Tiago and Petrov, Tatjana}, issn = {00015903}, journal = {Acta Informatica}, number = {8}, pages = {765 -- 787}, publisher = {Springer}, title = {{Model checking the evolution of gene regulatory networks}}, doi = {10.1007/s00236-016-0278-x}, volume = {54}, year = {2017}, } @article{1336, abstract = {Evolutionary algorithms (EAs) form a popular optimisation paradigm inspired by natural evolution. In recent years the field of evolutionary computation has developed a rigorous analytical theory to analyse the runtimes of EAs on many illustrative problems. Here we apply this theory to a simple model of natural evolution. In the Strong Selection Weak Mutation (SSWM) evolutionary regime the time between occurrences of new mutations is much longer than the time it takes for a mutated genotype to take over the population. In this situation, the population only contains copies of one genotype and evolution can be modelled as a stochastic process evolving one genotype by means of mutation and selection between the resident and the mutated genotype. The probability of accepting the mutated genotype then depends on the change in fitness. We study this process, SSWM, from an algorithmic perspective, quantifying its expected optimisation time for various parameters and investigating differences to a similar evolutionary algorithm, the well-known (1+1) EA. We show that SSWM can have a moderate advantage over the (1+1) EA at crossing fitness valleys and study an example where SSWM outperforms the (1+1) EA by taking advantage of information on the fitness gradient.}, author = {Paixao, Tiago and Pérez Heredia, Jorge and Sudholt, Dirk and Trubenova, Barbora}, issn = {01784617}, journal = {Algorithmica}, number = {2}, pages = {681 -- 713}, publisher = {Springer}, title = {{Towards a runtime comparison of natural and artificial evolution}}, doi = {10.1007/s00453-016-0212-1}, volume = {78}, year = {2017}, } @article{1199, abstract = {Much of quantitative genetics is based on the ‘infinitesimal model’, under which selection has a negligible effect on the genetic variance. This is typically justified by assuming a very large number of loci with additive effects. However, it applies even when genes interact, provided that the number of loci is large enough that selection on each of them is weak relative to random drift. In the long term, directional selection will change allele frequencies, but even then, the effects of epistasis on the ultimate change in trait mean due to selection may be modest. Stabilising selection can maintain many traits close to their optima, even when the underlying alleles are weakly selected. However, the number of traits that can be optimised is apparently limited to ~4Ne by the ‘drift load’, and this is hard to reconcile with the apparent complexity of many organisms. Just as for the mutation load, this limit can be evaded by a particular form of negative epistasis. A more robust limit is set by the variance in reproductive success. This suggests that selection accumulates information most efficiently in the infinitesimal regime, when selection on individual alleles is weak, and comparable with random drift. A review of evidence on selection strength suggests that although most variance in fitness may be because of alleles with large Nes, substantial amounts of adaptation may be because of alleles in the infinitesimal regime, in which epistasis has modest effects.}, author = {Barton, Nicholas H}, journal = {Heredity}, pages = {96 -- 109}, publisher = {Nature Publishing Group}, title = {{How does epistasis influence the response to selection?}}, doi = {10.1038/hdy.2016.109}, volume = {118}, year = {2017}, }