@phdthesis{26,
abstract = {Expression of genes is a fundamental molecular phenotype that is subject to evolution by different types of mutations. Both the rate and the effect of mutations may depend on the DNA sequence context of a particular gene or a particular promoter sequence. In this thesis I investigate the nature of this dependence using simple genetic systems in Escherichia coli. With these systems I explore the evolution of constitutive gene expression from random starting sequences at different loci on the chromosome and at different locations in sequence space. First, I dissect chromosomal neighborhood effects that underlie locus-dependent differences in the potential of a gene under selection to become more highly expressed. Next, I find that the effects of point mutations in promoter sequences are dependent on sequence context, and that an existing energy matrix model performs poorly in predicting relative expression of unrelated sequences. Finally, I show that a substantial fraction of random sequences contain functional promoters and I present an extended thermodynamic model that predicts promoter strength in full sequence space. Taken together, these results provide new insights and guides on how to integrate information on sequence context to improve our qualitative and quantitative understanding of bacterial gene expression, with implications for rapid evolution of drug resistance, de novo evolution of genes, and horizontal gene transfer.},
author = {Steinrück, Magdalena},
pages = {109},
publisher = {IST Austria},
title = {{The influence of sequence context on the evolution of bacterial gene expression}},
doi = {10.15479/AT:ISTA:th1059},
year = {2018},
}
@phdthesis{197,
abstract = {Modern computer vision systems heavily rely on statistical machine learning models, which typically require large amounts of labeled data to be learned reliably. Moreover, very recently computer vision research widely adopted techniques for representation learning, which further increase the demand for labeled data. However, for many important practical problems there is relatively small amount of labeled data available, so it is problematic to leverage full potential of the representation learning methods. One way to overcome this obstacle is to invest substantial resources into producing large labelled datasets. Unfortunately, this can be prohibitively expensive in practice. In this thesis we focus on the alternative way of tackling the aforementioned issue. We concentrate on methods, which make use of weakly-labeled or even unlabeled data. Specifically, the first half of the thesis is dedicated to the semantic image segmentation task. We develop a technique, which achieves competitive segmentation performance and only requires annotations in a form of global image-level labels instead of dense segmentation masks. Subsequently, we present a new methodology, which further improves segmentation performance by leveraging tiny additional feedback from a human annotator. By using our methods practitioners can greatly reduce the amount of data annotation effort, which is required to learn modern image segmentation models. In the second half of the thesis we focus on methods for learning from unlabeled visual data. We study a family of autoregressive models for modeling structure of natural images and discuss potential applications of these models. Moreover, we conduct in-depth study of one of these applications, where we develop the state-of-the-art model for the probabilistic image colorization task.},
author = {Kolesnikov, Alexander},
pages = {113},
publisher = {IST Austria},
title = {{Weakly-Supervised Segmentation and Unsupervised Modeling of Natural Images}},
doi = {10.15479/AT:ISTA:th_1021},
year = {2018},
}
@phdthesis{10,
abstract = {Genomic imprinting is an epigenetic process that leads to parent of origin-specific gene expression in a subset of genes. Imprinted genes are essential for brain development, and deregulation of imprinting is associated with neurodevelopmental diseases and the pathogenesis of psychiatric disorders. However, the cell-type specificity of imprinting at single cell resolution, and how imprinting and thus gene dosage regulates neuronal circuit assembly is still largely unknown. Here, MADM (Mosaic Analysis with Double Markers) technology was employed to assess genomic imprinting at single cell level. By visualizing MADM-induced uniparental disomies (UPDs) in distinct colors at single cell level in genetic mosaic animals, this experimental paradigm provides a unique quantitative platform to systematically assay the UPD-mediated imbalances in imprinted gene expression at unprecedented resolution. An experimental pipeline based on FACS, RNA-seq and bioinformatics analysis was established and applied to systematically map cell-type-specific ‘imprintomes’ in the mouse brain. The results revealed that parental-specific expression of imprinted genes per se is rarely cell-type-specific even at the individual cell level. Conversely, when we extended the comparison to downstream responses resulting from imbalanced imprinted gene expression, we discovered an unexpectedly high degree of cell-type specificity. Furthermore, we determined a novel function of genomic imprinting in cortical astrocyte production and in olfactory bulb (OB) granule cell generation. These results suggest important functional implication of genomic imprinting for generating cell-type diversity in the brain. In addition, MADM provides a powerful tool to study candidate genes by concomitant genetic manipulation and fluorescent labelling of single cells. MADM-based candidate gene approach was utilized to identify potential imprinted genes involved in the generation of cortical astrocytes and OB granule cells. We investigated p57Kip2, a maternally expressed gene and known cell cycle regulator. Although we found that p57Kip2 does not play a role in these processes, we detected an unexpected function of the paternal allele previously thought to be silent. Finally, we took advantage of a key property of MADM which is to allow unambiguous investigation of environmental impact on single cells. The experimental pipeline based on FACS and RNA-seq analysis of MADM-labeled cells was established to probe the functional differences of single cell loss of gene function compared to global loss of function on a transcriptional level. With this method, both common and distinct responses were isolated due to cell-autonomous and non-autonomous effects acting on genotypically identical cells. As a result, transcriptional changes were identified which result solely from the surrounding environment. Using the MADM technology to study genomic imprinting at single cell resolution, we have identified cell-type-specific gene expression, novel gene function and the impact of environment on single cell transcriptomes. Together, these provide important insights to the understanding of mechanisms regulating cell-type specificity and thus diversity in the brain.},
author = {Laukoter, Susanne},
pages = {1 -- 139},
publisher = {IST Austria},
title = {{Role of genomic imprinting in cerebral cortex development}},
doi = {10.15479/AT:ISTA:th1057},
year = {2018},
}
@phdthesis{6266,
abstract = {A major challenge in neuroscience research is to dissect the circuits that orchestrate behavior in health and disease. Proteins from a wide range of non-mammalian species, such as microbial opsins, have been successfully transplanted to specific neuronal targets to override their natural communication patterns. The goal of our work is to manipulate synaptic communication in a manner that closely incorporates the functional intricacies of synapses by preserving temporal encoding (i.e. the firing pattern of the presynaptic neuron) and connectivity (i.e. target specific synapses rather than specific neurons). Our strategy to achieve this goal builds on the use of non-mammalian transplants to create a synthetic synapse. The mode of modulation comes from pre-synaptic uptake of a synthetic neurotransmitter (SN) into synaptic vesicles by means of a genetically targeted transporter selective for the SN. Upon natural vesicular release, exposure of the SN to the synaptic cleft will modify the post-synaptic potential through an orthogonal ligand gated ion channel. To achieve this goal we have functionally characterized a mixed cationic methionine-gated ion channel from Arabidopsis thaliana, designed a method to functionally characterize a synthetic transporter in isolated synaptic vesicles without the need for transgenic animals, identified and extracted multiple prokaryotic uptake systems that are substrate specific for methionine (Met), and established a primary/cell line co-culture system that would allow future combinatorial testing of this orthogonal transmitter-transporter-channel trifecta. Synthetic synapses will provide a unique opportunity to manipulate synaptic communication while maintaining the electrophysiological integrity of the pre-synaptic cell. In this way, information may be preserved that was generated in upstream circuits and that could be essential for concerted function and information processing. },
author = {Mckenzie, Catherine},
pages = {95},
publisher = {IST Austria},
title = {{Design and characterization of methods and biological components to realize synthetic neurotransmission }},
doi = {10.15479/at:ista:th_1055},
year = {2018},
}
@phdthesis{200,
abstract = {This thesis is concerned with the inference of current population structure based on geo-referenced genetic data. The underlying idea is that population structure affects its spatial genetic structure. Therefore, genotype information can be utilized to estimate important demographic parameters such as migration rates. These indirect estimates of population structure have become very attractive, as genotype data is now widely available. However, there also has been much concern about these approaches. Importantly, genetic structure can be influenced by many complex patterns, which often cannot be disentangled. Moreover, many methods merely fit heuristic patterns of genetic structure, and do not build upon population genetics theory. Here, I describe two novel inference methods that address these shortcomings. In Chapter 2, I introduce an inference scheme based on a new type of signal, identity by descent (IBD) blocks. Recently, it has become feasible to detect such long blocks of genome shared between pairs of samples. These blocks are direct traces of recent coalescence events. As such, they contain ample signal for inferring recent demography. I examine sharing of IBD blocks in two-dimensional populations with local migration. Using a diffusion approximation, I derive formulas for an isolation by distance pattern of long IBD blocks and show that sharing of long IBD blocks approaches rapid exponential decay for growing sample distance. I describe an inference scheme based on these results. It can robustly estimate the dispersal rate and population density, which is demonstrated on simulated data. I also show an application to estimate mean migration and the rate of recent population growth within Eastern Europe. Chapter 3 is about a novel method to estimate barriers to gene flow in a two dimensional population. This inference scheme utilizes geographically localized allele frequency fluctuations - a classical isolation by distance signal. The strength of these local fluctuations increases on average next to a barrier, and there is less correlation across it. I again use a framework of diffusion of ancestral lineages to model this effect, and provide an efficient numerical implementation to fit the results to geo-referenced biallelic SNP data. This inference scheme is able to robustly estimate strong barriers to gene flow, as tests on simulated data confirm.},
author = {Ringbauer, Harald},
pages = {146},
publisher = {IST Austria},
title = {{Inferring recent demography from spatial genetic structure}},
doi = {10.15479/AT:ISTA:th_963},
year = {2018},
}
@phdthesis{539,
abstract = {The whole life cycle of plants as well as their responses to environmental stimuli is governed by a complex network of hormonal regulations. A number of studies have demonstrated an essential role of both auxin and cytokinin in the regulation of many aspects of plant growth and development including embryogenesis, postembryonic organogenic processes such as root, and shoot branching, root and shoot apical meristem activity and phyllotaxis. Over the last decades essential knowledge on the key molecular factors and pathways that spatio-temporally define auxin and cytokinin activities in the plant body has accumulated. However, how both hormonal pathways are interconnected by a complex network of interactions and feedback circuits that determines the final outcome of the individual hormone actions is still largely unknown. Root system architecture establishment and in particular formation of lateral organs is prime example of developmental process at whose regulation both auxin and cytokinin pathways converge. To dissect convergence points and pathways that tightly balance auxin - cytokinin antagonistic activities that determine the root branching pattern transcriptome profiling was applied. Genome wide expression analyses of the xylem pole pericycle, a tissue giving rise to lateral roots, led to identification of genes that are highly responsive to combinatorial auxin and cytokinin treatments and play an essential function in the auxin-cytokinin regulated root branching. SYNERGISTIC AUXIN CYTOKININ 1 (SYAC1) gene, which encodes for a protein of unknown function, was detected among the top candidate genes of which expression was synergistically up-regulated by simultaneous hormonal treatment. Plants with modulated SYAC1 activity exhibit severe defects in the root system establishment and attenuate developmental responses to both auxin and cytokinin. To explore the biological function of the SYAC1, we employed different strategies including expression pattern analysis, subcellular localization and phenotypic analyses of the syac1 loss-of-function and gain-of-function transgenic lines along with the identification of the SYAC1 interaction partners. Detailed functional characterization revealed that SYAC1 acts as a developmentally specific regulator of the secretory pathway to control deposition of cell wall components and thereby rapidly fine tune elongation growth.},
author = {Hurny, Andrej},
pages = {147},
publisher = {IST Austria},
title = {{ Identification and characterization of novel auxin-cytokinin cross-talk components}},
doi = {10.15479/AT:ISTA:th_930},
year = {2018},
}
@phdthesis{821,
abstract = {This dissertation focuses on algorithmic aspects of program verification, and presents modeling and complexity advances on several problems related to the
static analysis of programs, the stateless model checking of concurrent programs, and the competitive analysis of real-time scheduling algorithms.
Our contributions can be broadly grouped into five categories.
Our first contribution is a set of new algorithms and data structures for the quantitative and data-flow analysis of programs, based on the graph-theoretic notion of treewidth.
It has been observed that the control-flow graphs of typical programs have special structure, and are characterized as graphs of small treewidth.
We utilize this structural property to provide faster algorithms for the quantitative and data-flow analysis of recursive and concurrent programs.
In most cases we make an algebraic treatment of the considered problem,
where several interesting analyses, such as the reachability, shortest path, and certain kind of data-flow analysis problems follow as special cases.
We exploit the constant-treewidth property to obtain algorithmic improvements for on-demand versions of the problems,
and provide data structures with various tradeoffs between the resources spent in the preprocessing and querying phase.
We also improve on the algorithmic complexity of quantitative problems outside the algebraic path framework,
namely of the minimum mean-payoff, minimum ratio, and minimum initial credit for energy problems.
Our second contribution is a set of algorithms for Dyck reachability with applications to data-dependence analysis and alias analysis.
In particular, we develop an optimal algorithm for Dyck reachability on bidirected graphs, which are ubiquitous in context-insensitive, field-sensitive points-to analysis.
Additionally, we develop an efficient algorithm for context-sensitive data-dependence analysis via Dyck reachability,
where the task is to obtain analysis summaries of library code in the presence of callbacks.
Our algorithm preprocesses libraries in almost linear time, after which the contribution of the library in the complexity of the client analysis is (i)~linear in the number of call sites and (ii)~only logarithmic in the size of the whole library, as opposed to linear in the size of the whole library.
Finally, we prove that Dyck reachability is Boolean Matrix Multiplication-hard in general, and the hardness also holds for graphs of constant treewidth.
This hardness result strongly indicates that there exist no combinatorial algorithms for Dyck reachability with truly subcubic complexity.
Our third contribution is the formalization and algorithmic treatment of the Quantitative Interprocedural Analysis framework.
In this framework, the transitions of a recursive program are annotated as good, bad or neutral, and receive a weight which measures
the magnitude of their respective effect.
The Quantitative Interprocedural Analysis problem asks to determine whether there exists an infinite run of the program where the long-run ratio of the bad weights over the good weights is above a given threshold.
We illustrate how several quantitative problems related to static analysis of recursive programs can be instantiated in this framework,
and present some case studies to this direction.
Our fourth contribution is a new dynamic partial-order reduction for the stateless model checking of concurrent programs. Traditional approaches rely on the standard Mazurkiewicz equivalence between traces, by means of partitioning the trace space into equivalence classes, and attempting to explore a few representatives from each class.
We present a new dynamic partial-order reduction method called the Data-centric Partial Order Reduction (DC-DPOR).
Our algorithm is based on a new equivalence between traces, called the observation equivalence.
DC-DPOR explores a coarser partitioning of the trace space than any exploration method based on the standard Mazurkiewicz equivalence.
Depending on the program, the new partitioning can be even exponentially coarser.
Additionally, DC-DPOR spends only polynomial time in each explored class.
Our fifth contribution is the use of automata and game-theoretic verification techniques in the competitive analysis and synthesis of real-time scheduling algorithms for firm-deadline tasks.
On the analysis side, we leverage automata on infinite words to compute the competitive ratio of real-time schedulers subject to various environmental constraints.
On the synthesis side, we introduce a new instance of two-player mean-payoff partial-information games, and show
how the synthesis of an optimal real-time scheduler can be reduced to computing winning strategies in this new type of games.},
author = {Pavlogiannis, Andreas},
pages = {418},
publisher = {IST Austria},
title = {{Algorithmic advances in program analysis and their applications}},
doi = {10.15479/AT:ISTA:th_854},
year = {2017},
}
@phdthesis{819,
abstract = {Contagious diseases must transmit from infectious to susceptible hosts in order to reproduce. Whilst vectored pathogens can rely on intermediaries to find new hosts for them, many infectious pathogens require close contact or direct interaction between hosts for transmission. Hence, this means that conspecifics are often the main source of infection for most animals and so, in theory, animals should avoid conspecifics to reduce their risk of infection. Of course, in reality animals must interact with one another, as a bare minimum, to mate. However, being social provides many additional benefits and group living has become a taxonomically diverse and widespread trait. How then do social animals overcome the issue of increased disease? Over the last few decades, the social insects (ants, termites and some bees and wasps) have become a model system for studying disease in social animals. On paper, a social insect colony should be particularly susceptible to disease, given that they often contain thousands of potential hosts that are closely related and frequently interact, as well as exhibiting stable environmental conditions that encourage microbial growth. Yet, disease outbreaks appear to be rare and attempts to eradicate pest species using pathogens have failed time and again. Evolutionary biologists investigating this observation have discovered that the reduced disease susceptibility in social insects is, in part, due to collectively performed disease defences of the workers. These defences act like a “social immune system” for the colony, resulting in a per capita decrease in disease, termed social immunity. Our understanding of social immunity, and its importance in relation to the immunological defences of each insect, continues to grow, but there remain many open questions. In this thesis I have studied disease defence in garden ants. In the first data chapter, I use the invasive garden ant, Lasius neglectus, to investigate how colonies mitigate lethal infections and prevent them from spreading systemically. I find that ants have evolved ‘destructive disinfection’ – a behaviour that uses endogenously produced acidic poison to kill diseased brood and to prevent the pathogen from replicating. In the second experimental chapter, I continue to study the use of poison in invasive garden ant colonies, finding that it is sprayed prophylactically within the nest. However, this spraying has negative effects on developing pupae when they have had their cocoons artificially removed. Hence, I suggest that acidic nest sanitation may be maintaining larval cocoon spinning in this species. In the next experimental chapter, I investigated how colony founding black garden ant queens (Lasius niger) prevent disease when a co-foundress dies. I show that ant queens prophylactically perform undertaking behaviours, similar to those performed by the workers in mature nests. When a co-foundress was infected, these undertaking behaviours improved the survival of the healthy queen. In the final data chapter, I explored how immunocompetence (measured as antifungal activity) changes as incipient black garden ant colonies grow and mature, from the solitary queen phase to colonies with several hundred workers. Queen and worker antifungal activity varied throughout this time period, but despite social immunity, did not decrease as colonies matured. In addition to the above data chapters, this thesis includes two co-authored reviews. In the first, we examine the state of the art in the field of social immunity and how it might develop in the future. In the second, we identify several challenges and open questions in the study of disease defence in animals. We highlight how social insects offer a unique model to tackle some of these problems, as disease defence can be studied from the cell to the society. },
author = {Pull, Christopher},
pages = {122},
publisher = {IST Austria},
title = {{Disease defence in garden ants}},
doi = {10.15479/AT:ISTA:th_861},
year = {2017},
}
@phdthesis{838,
abstract = {In this thesis we discuss the exact security of message authentications codes HMAC , NMAC , and PMAC . NMAC is a mode of operation which turns a fixed input-length keyed hash function f into a variable input-length function. A practical single-key variant of NMAC called HMAC is a very popular and widely deployed message authentication code (MAC). PMAC is a block-cipher based mode of operation, which also happens to be the most famous fully parallel MAC. NMAC was introduced by Bellare, Canetti and Krawczyk Crypto’96, who proved it to be a secure pseudorandom function (PRF), and thus also a MAC, under two assumptions. Unfortunately, for many instantiations of HMAC one of them has been found to be wrong. To restore the provable guarantees for NMAC , Bellare [Crypto’06] showed its security without this assumption. PMAC was introduced by Black and Rogaway at Eurocrypt 2002. If instantiated with a pseudorandom permutation over n -bit strings, PMAC constitutes a provably secure variable input-length PRF. For adversaries making q queries, each of length at most ` (in n -bit blocks), and of total length σ ≤ q` , the original paper proves an upper bound on the distinguishing advantage of O ( σ 2 / 2 n ), while the currently best bound is O ( qσ/ 2 n ). In this work we show that this bound is tight by giving an attack with advantage Ω( q 2 `/ 2 n ). In the PMAC construction one initially XORs a mask to every message block, where the mask for the i th block is computed as τ i := γ i · L , where L is a (secret) random value, and γ i is the i -th codeword of the Gray code. Our attack applies more generally to any sequence of γ i ’s which contains a large coset of a subgroup of GF (2 n ). As for NMAC , our first contribution is a simpler and uniform proof: If f is an ε -secure PRF (against q queries) and a δ - non-adaptively secure PRF (against q queries), then NMAC f is an ( ε + `qδ )-secure PRF against q queries of length at most ` blocks each. We also show that this ε + `qδ bound is basically tight by constructing an f for which an attack with advantage `qδ exists. Moreover, we analyze the PRF-security of a modification of NMAC called NI by An and Bellare that avoids the constant rekeying on multi-block messages in NMAC and allows for an information-theoretic analysis. We carry out such an analysis, obtaining a tight `q 2 / 2 c bound for this step, improving over the trivial bound of ` 2 q 2 / 2 c . Finally, we investigate, if the security of PMAC can be further improved by using τ i ’s that are k -wise independent, for k > 1 (the original has k = 1). We observe that the security of PMAC will not increase in general if k = 2, and then prove that the security increases to O ( q 2 / 2 n ), if the k = 4. Due to simple extension attacks, this is the best bound one can hope for, using any distribution on the masks. Whether k = 3 is already sufficient to get this level of security is left as an open problem. Keywords: Message authentication codes, Pseudorandom functions, HMAC, PMAC. },
author = {Rybar, Michal},
pages = {86},
publisher = {IST Austria},
title = {{(The exact security of) Message authentication codes}},
doi = {10.15479/AT:ISTA:th_828},
year = {2017},
}
@phdthesis{1155,
abstract = {This dissertation concerns the automatic verification of probabilistic systems and programs with arrays by statistical and logical methods. Although statistical and logical methods are different in nature, we show that they can be successfully combined for system analysis. In the first part of the dissertation we present a new statistical algorithm for the verification of probabilistic systems with respect to unbounded properties, including linear temporal logic. Our algorithm often performs faster than the previous approaches, and at the same time requires less information about the system. In addition, our method can be generalized to unbounded quantitative properties such as mean-payoff bounds. In the second part, we introduce two techniques for comparing probabilistic systems. Probabilistic systems are typically compared using the notion of equivalence, which requires the systems to have the equal probability of all behaviors. However, this notion is often too strict, since probabilities are typically only empirically estimated, and any imprecision may break the relation between processes. On the one hand, we propose to replace the Boolean notion of equivalence by a quantitative distance of similarity. For this purpose, we introduce a statistical framework for estimating distances between Markov chains based on their simulation runs, and we investigate which distances can be approximated in our framework. On the other hand, we propose to compare systems with respect to a new qualitative logic, which expresses that behaviors occur with probability one or a positive probability. This qualitative analysis is robust with respect to modeling errors and applicable to many domains. In the last part, we present a new quantifier-free logic for integer arrays, which allows us to express counting. Counting properties are prevalent in array-manipulating programs, however they cannot be expressed in the quantified fragments of the theory of arrays. We present a decision procedure for our logic, and provide several complexity results.},
author = {Daca, Przemyslaw},
pages = {163},
publisher = {IST Austria},
title = {{Statistical and logical methods for property checking}},
doi = {10.15479/AT:ISTA:TH_730},
year = {2017},
}