@inproceedings{14201, abstract = {Variational inference is a popular technique to approximate a possibly intractable Bayesian posterior with a more tractable one. Recently, boosting variational inference has been proposed as a new paradigm to approximate the posterior by a mixture of densities by greedily adding components to the mixture. However, as is the case with many other variational inference algorithms, its theoretical properties have not been studied. In the present work, we study the convergence properties of this approach from a modern optimization viewpoint by establishing connections to the classic Frank-Wolfe algorithm. Our analyses yields novel theoretical insights regarding the sufficient conditions for convergence, explicit rates, and algorithmic simplifications. Since a lot of focus in previous works for variational inference has been on tractability, our work is especially important as a much needed attempt to bridge the gap between probabilistic models and their corresponding theoretical properties.}, author = {Locatello, Francesco and Khanna, Rajiv and Ghosh, Joydeep and Rätsch, Gunnar}, booktitle = {Proceedings of the 21st International Conference on Artificial Intelligence and Statistics}, location = {Playa Blanca, Lanzarote}, pages = {464--472}, publisher = {ML Research Press}, title = {{Boosting variational inference: An optimization perspective}}, volume = {84}, year = {2018}, } @inproceedings{14198, abstract = {High-dimensional time series are common in many domains. Since human cognition is not optimized to work well in high-dimensional spaces, these areas could benefit from interpretable low-dimensional representations. However, most representation learning algorithms for time series data are difficult to interpret. This is due to non-intuitive mappings from data features to salient properties of the representation and non-smoothness over time. To address this problem, we propose a new representation learning framework building on ideas from interpretable discrete dimensionality reduction and deep generative modeling. This framework allows us to learn discrete representations of time series, which give rise to smooth and interpretable embeddings with superior clustering performance. We introduce a new way to overcome the non-differentiability in discrete representation learning and present a gradient-based version of the traditional self-organizing map algorithm that is more performant than the original. Furthermore, to allow for a probabilistic interpretation of our method, we integrate a Markov model in the representation space. This model uncovers the temporal transition structure, improves clustering performance even further and provides additional explanatory insights as well as a natural representation of uncertainty. We evaluate our model in terms of clustering performance and interpretability on static (Fashion-)MNIST data, a time series of linearly interpolated (Fashion-)MNIST images, a chaotic Lorenz attractor system with two macro states, as well as on a challenging real world medical time series application on the eICU data set. Our learned representations compare favorably with competitor methods and facilitate downstream tasks on the real world data.}, author = {Fortuin, Vincent and Hüser, Matthias and Locatello, Francesco and Strathmann, Heiko and Rätsch, Gunnar}, booktitle = {International Conference on Learning Representations}, location = {New Orleans, LA, United States}, title = {{SOM-VAE: Interpretable discrete representation learning on time series}}, year = {2018}, } @inproceedings{14203, abstract = {We propose a conditional gradient framework for a composite convex minimization template with broad applications. Our approach combines smoothing and homotopy techniques under the CGM framework, and provably achieves the optimal O(1/k−−√) convergence rate. We demonstrate that the same rate holds if the linear subproblems are solved approximately with additive or multiplicative error. In contrast with the relevant work, we are able to characterize the convergence when the non-smooth term is an indicator function. Specific applications of our framework include the non-smooth minimization, semidefinite programming, and minimization with linear inclusion constraints over a compact domain. Numerical evidence demonstrates the benefits of our framework.}, author = {Yurtsever, Alp and Fercoq, Olivier and Locatello, Francesco and Cevher, Volkan}, booktitle = {Proceedings of the 35th International Conference on Machine Learning}, location = {Stockholm, Sweden}, pages = {5727--5736}, publisher = {ML Research Press}, title = {{A conditional gradient framework for composite convex minimization with applications to semidefinite programming}}, volume = {80}, year = {2018}, } @article{282, abstract = {Adaptive introgression is common in nature and can be driven by selection acting on multiple, linked genes. We explore the effects of polygenic selection on introgression under the infinitesimal model with linkage. This model assumes that the introgressing block has an effectively infinite number of genes, each with an infinitesimal effect on the trait under selection. The block is assumed to introgress under directional selection within a native population that is genetically homogeneous. We use individual-based simulations and a branching process approximation to compute various statistics of the introgressing block, and explore how these depend on parameters such as the map length and initial trait value associated with the introgressing block, the genetic variability along the block, and the strength of selection. Our results show that the introgression dynamics of a block under infinitesimal selection is qualitatively different from the dynamics of neutral introgression. We also find that in the long run, surviving descendant blocks are likely to have intermediate lengths, and clarify how the length is shaped by the interplay between linkage and infinitesimal selection. Our results suggest that it may be difficult to distinguish introgression of single loci from that of genomic blocks with multiple, tightly linked and weakly selected loci.}, author = {Sachdeva, Himani and Barton, Nicholas H}, journal = {Genetics}, number = {4}, pages = {1279 -- 1303}, publisher = {Genetics Society of America}, title = {{Introgression of a block of genome under infinitesimal selection}}, doi = {10.1534/genetics.118.301018}, volume = {209}, year = {2018}, } @inproceedings{108, abstract = {Universal hashing found a lot of applications in computer science. In cryptography the most important fact about universal families is the so called Leftover Hash Lemma, proved by Impagliazzo, Levin and Luby. In the language of modern cryptography it states that almost universal families are good extractors. In this work we provide a somewhat surprising characterization in the opposite direction. Namely, every extractor with sufficiently good parameters yields a universal family on a noticeable fraction of its inputs. Our proof technique is based on tools from extremal graph theory applied to the \'collision graph\' induced by the extractor, and may be of independent interest. We discuss possible applications to the theory of randomness extractors and non-malleable codes.}, author = {Obremski, Marciej and Skorski, Maciej}, location = {Vail, CO, USA}, publisher = {IEEE}, title = {{Inverted leftover hash lemma}}, doi = {10.1109/ISIT.2018.8437654}, volume = {2018}, year = {2018}, } @inproceedings{14204, abstract = {Two popular examples of first-order optimization methods over linear spaces are coordinate descent and matching pursuit algorithms, with their randomized variants. While the former targets the optimization by moving along coordinates, the latter considers a generalized notion of directions. Exploiting the connection between the two algorithms, we present a unified analysis of both, providing affine invariant sublinear O(1/t) rates on smooth objectives and linear convergence on strongly convex objectives. As a byproduct of our affine invariant analysis of matching pursuit, our rates for steepest coordinate descent are the tightest known. Furthermore, we show the first accelerated convergence rate O(1/t2) for matching pursuit and steepest coordinate descent on convex objectives.}, author = {Locatello, Francesco and Raj, Anant and Karimireddy, Sai Praneeth and Rätsch, Gunnar and Schölkopf, Bernhard and Stich, Sebastian U. and Jaggi, Martin}, booktitle = {Proceedings of the 35th International Conference on Machine Learning}, pages = {3198--3207}, publisher = {ML Research Press}, title = {{On matching pursuit and coordinate descent}}, volume = {80}, year = {2018}, } @inproceedings{160, abstract = {We present layered concurrent programs, a compact and expressive notation for specifying refinement proofs of concurrent programs. A layered concurrent program specifies a sequence of connected concurrent programs, from most concrete to most abstract, such that common parts of different programs are written exactly once. These programs are expressed in the ordinary syntax of imperative concurrent programs using gated atomic actions, sequencing, choice, and (recursive) procedure calls. Each concurrent program is automatically extracted from the layered program. We reduce refinement to the safety of a sequence of concurrent checker programs, one each to justify the connection between every two consecutive concurrent programs. These checker programs are also automatically extracted from the layered program. Layered concurrent programs have been implemented in the CIVL verifier which has been successfully used for the verification of several complex concurrent programs.}, author = {Kragl, Bernhard and Qadeer, Shaz}, location = {Oxford, UK}, pages = {79 -- 102}, publisher = {Springer}, title = {{Layered Concurrent Programs}}, doi = {10.1007/978-3-319-96145-3_5}, volume = {10981}, year = {2018}, } @article{82, abstract = {In experimental cultures, when bacteria are mixed with lytic (virulent) bacteriophage, bacterial cells resistant to the phage commonly emerge and become the dominant population of bacteria. Following the ascent of resistant mutants, the densities of bacteria in these simple communities become limited by resources rather than the phage. Despite the evolution of resistant hosts, upon which the phage cannot replicate, the lytic phage population is most commonly maintained in an apparently stable state with the resistant bacteria. Several mechanisms have been put forward to account for this result. Here we report the results of population dynamic/evolution experiments with a virulent mutant of phage Lambda, λVIR, and Escherichia coli in serial transfer cultures. We show that, following the ascent of λVIR-resistant bacteria, λVIRis maintained in the majority of cases in maltose-limited minimal media and in all cases in nutrient-rich broth. Using mathematical models and experiments, we show that the dominant mechanism responsible for maintenance of λVIRin these resource-limited populations dominated by resistant E. coli is a high rate of either phenotypic or genetic transition from resistance to susceptibility—a hitherto undemonstrated mechanism we term "leaky resistance." We discuss the implications of leaky resistance to our understanding of the conditions for the maintenance of phage in populations of bacteria—their “existence conditions.”.}, author = {Chaudhry, Waqas and Pleska, Maros and Shah, Nilang and Weiss, Howard and Mccall, Ingrid and Meyer, Justin and Gupta, Animesh and Guet, Calin C and Levin, Bruce}, journal = {PLoS Biology}, number = {8}, publisher = {Public Library of Science}, title = {{Leaky resistance and the conditions for the existence of lytic bacteriophage}}, doi = {10.1371/journal.pbio.2005971}, volume = {16}, year = {2018}, } @article{4, abstract = {We present a data-driven technique to instantly predict how fluid flows around various three-dimensional objects. Such simulation is useful for computational fabrication and engineering, but is usually computationally expensive since it requires solving the Navier-Stokes equation for many time steps. To accelerate the process, we propose a machine learning framework which predicts aerodynamic forces and velocity and pressure fields given a threedimensional shape input. Handling detailed free-form three-dimensional shapes in a data-driven framework is challenging because machine learning approaches usually require a consistent parametrization of input and output. We present a novel PolyCube maps-based parametrization that can be computed for three-dimensional shapes at interactive rates. This allows us to efficiently learn the nonlinear response of the flow using a Gaussian process regression. We demonstrate the effectiveness of our approach for the interactive design and optimization of a car body.}, author = {Umetani, Nobuyuki and Bickel, Bernd}, journal = {ACM Trans. Graph.}, number = {4}, publisher = {ACM}, title = {{Learning three-dimensional flow for interactive aerodynamic design}}, doi = {10.1145/3197517.3201325}, volume = {37}, year = {2018}, } @article{566, abstract = {We consider large random matrices X with centered, independent entries which have comparable but not necessarily identical variances. Girko's circular law asserts that the spectrum is supported in a disk and in case of identical variances, the limiting density is uniform. In this special case, the local circular law by Bourgade et. al. [11,12] shows that the empirical density converges even locally on scales slightly above the typical eigenvalue spacing. In the general case, the limiting density is typically inhomogeneous and it is obtained via solving a system of deterministic equations. Our main result is the local inhomogeneous circular law in the bulk spectrum on the optimal scale for a general variance profile of the entries of X. }, author = {Alt, Johannes and Erdös, László and Krüger, Torben H}, journal = {Annals Applied Probability }, number = {1}, pages = {148--203}, publisher = {Institute of Mathematical Statistics}, title = {{Local inhomogeneous circular law}}, doi = {10.1214/17-AAP1302}, volume = {28}, year = {2018}, }