@article{9571, abstract = {As the size and complexity of models and datasets grow, so does the need for communication-efficient variants of stochastic gradient descent that can be deployed to perform parallel model training. One popular communication-compression method for data-parallel SGD is QSGD (Alistarh et al., 2017), which quantizes and encodes gradients to reduce communication costs. The baseline variant of QSGD provides strong theoretical guarantees, however, for practical purposes, the authors proposed a heuristic variant which we call QSGDinf, which demonstrated impressive empirical gains for distributed training of large neural networks. In this paper, we build on this work to propose a new gradient quantization scheme, and show that it has both stronger theoretical guarantees than QSGD, and matches and exceeds the empirical performance of the QSGDinf heuristic and of other compression methods.}, author = {Ramezani-Kebrya, Ali and Faghri, Fartash and Markov, Ilya and Aksenov, Vitalii and Alistarh, Dan-Adrian and Roy, Daniel M.}, issn = {15337928}, journal = {Journal of Machine Learning Research}, number = {114}, pages = {1−43}, publisher = {Journal of Machine Learning Research}, title = {{NUQSGD: Provably communication-efficient data-parallel SGD via nonuniform quantization}}, volume = {22}, year = {2021}, } @article{8544, abstract = {The synaptotrophic hypothesis posits that synapse formation stabilizes dendritic branches, yet this hypothesis has not been causally tested in vivo in the mammalian brain. Presynaptic ligand cerebellin-1 (Cbln1) and postsynaptic receptor GluD2 mediate synaptogenesis between granule cells and Purkinje cells in the molecular layer of the cerebellar cortex. Here we show that sparse but not global knockout of GluD2 causes under-elaboration of Purkinje cell dendrites in the deep molecular layer and overelaboration in the superficial molecular layer. Developmental, overexpression, structure-function, and genetic epistasis analyses indicate that dendrite morphogenesis defects result from competitive synaptogenesis in a Cbln1/GluD2-dependent manner. A generative model of dendritic growth based on competitive synaptogenesis largely recapitulates GluD2 sparse and global knockout phenotypes. Our results support the synaptotrophic hypothesis at initial stages of dendrite development, suggest a second mode in which cumulative synapse formation inhibits further dendrite growth, and highlight the importance of competition in dendrite morphogenesis.}, author = {Takeo, Yukari H. and Shuster, S. Andrew and Jiang, Linnie and Hu, Miley and Luginbuhl, David J. and Rülicke, Thomas and Contreras, Ximena and Hippenmeyer, Simon and Wagner, Mark J. and Ganguli, Surya and Luo, Liqun}, issn = {1097-4199}, journal = {Neuron}, number = {4}, pages = {P629--644.E8}, publisher = {Elsevier}, title = {{GluD2- and Cbln1-mediated competitive synaptogenesis shapes the dendritic arbors of cerebellar Purkinje cells}}, doi = {10.1016/j.neuron.2020.11.028}, volume = {109}, year = {2021}, } @unpublished{9791, abstract = {We provide a definition of the effective mass for the classical polaron described by the Landau-Pekar equations. It is based on a novel variational principle, minimizing the energy functional over states with given (initial) velocity. The resulting formula for the polaron's effective mass agrees with the prediction by Landau and Pekar.}, author = {Feliciangeli, Dario and Rademacher, Simone Anna Elvira and Seiringer, Robert}, booktitle = {arXiv}, title = {{The effective mass problem for the Landau-Pekar equations}}, year = {2021}, } @article{7553, abstract = {Normative theories and statistical inference provide complementary approaches for the study of biological systems. A normative theory postulates that organisms have adapted to efficiently solve essential tasks, and proceeds to mathematically work out testable consequences of such optimality; parameters that maximize the hypothesized organismal function can be derived ab initio, without reference to experimental data. In contrast, statistical inference focuses on efficient utilization of data to learn model parameters, without reference to any a priori notion of biological function, utility, or fitness. Traditionally, these two approaches were developed independently and applied separately. Here we unify them in a coherent Bayesian framework that embeds a normative theory into a family of maximum-entropy “optimization priors.” This family defines a smooth interpolation between a data-rich inference regime (characteristic of “bottom-up” statistical models), and a data-limited ab inito prediction regime (characteristic of “top-down” normative theory). We demonstrate the applicability of our framework using data from the visual cortex, and argue that the flexibility it affords is essential to address a number of fundamental challenges relating to inference and prediction in complex, high-dimensional biological problems.}, author = {Mlynarski, Wiktor F and Hledik, Michal and Sokolowski, Thomas R and Tkačik, Gašper}, journal = {Neuron}, number = {7}, pages = {1227--1241.e5}, publisher = {Cell Press}, title = {{Statistical analysis and optimality of neural systems}}, doi = {10.1016/j.neuron.2021.01.020}, volume = {109}, year = {2021}, } @inproceedings{10598, abstract = { We consider the problem of estimating a signal from measurements obtained via a generalized linear model. We focus on estimators based on approximate message passing (AMP), a family of iterative algorithms with many appealing features: the performance of AMP in the high-dimensional limit can be succinctly characterized under suitable model assumptions; AMP can also be tailored to the empirical distribution of the signal entries, and for a wide class of estimation problems, AMP is conjectured to be optimal among all polynomial-time algorithms. However, a major issue of AMP is that in many models (such as phase retrieval), it requires an initialization correlated with the ground-truth signal and independent from the measurement matrix. Assuming that such an initialization is available is typically not realistic. In this paper, we solve this problem by proposing an AMP algorithm initialized with a spectral estimator. With such an initialization, the standard AMP analysis fails since the spectral estimator depends in a complicated way on the design matrix. Our main contribution is a rigorous characterization of the performance of AMP with spectral initialization in the high-dimensional limit. The key technical idea is to define and analyze a two-phase artificial AMP algorithm that first produces the spectral estimator, and then closely approximates the iterates of the true AMP. We also provide numerical results that demonstrate the validity of the proposed approach. }, author = {Mondelli, Marco and Venkataramanan, Ramji}, booktitle = {Proceedings of The 24th International Conference on Artificial Intelligence and Statistics}, editor = {Banerjee, Arindam and Fukumizu, Kenji}, issn = {2640-3498}, location = {Virtual, San Diego, CA, United States}, pages = {397--405}, publisher = {ML Research Press}, title = {{Approximate message passing with spectral initialization for generalized linear models}}, volume = {130}, year = {2021}, }