@article{9571, abstract = {As the size and complexity of models and datasets grow, so does the need for communication-efficient variants of stochastic gradient descent that can be deployed to perform parallel model training. One popular communication-compression method for data-parallel SGD is QSGD (Alistarh et al., 2017), which quantizes and encodes gradients to reduce communication costs. The baseline variant of QSGD provides strong theoretical guarantees, however, for practical purposes, the authors proposed a heuristic variant which we call QSGDinf, which demonstrated impressive empirical gains for distributed training of large neural networks. In this paper, we build on this work to propose a new gradient quantization scheme, and show that it has both stronger theoretical guarantees than QSGD, and matches and exceeds the empirical performance of the QSGDinf heuristic and of other compression methods.}, author = {Ramezani-Kebrya, Ali and Faghri, Fartash and Markov, Ilya and Aksenov, Vitalii and Alistarh, Dan-Adrian and Roy, Daniel M.}, issn = {15337928}, journal = {Journal of Machine Learning Research}, number = {114}, pages = {1−43}, publisher = {Journal of Machine Learning Research}, title = {{NUQSGD: Provably communication-efficient data-parallel SGD via nonuniform quantization}}, volume = {22}, year = {2021}, } @article{8544, abstract = {The synaptotrophic hypothesis posits that synapse formation stabilizes dendritic branches, yet this hypothesis has not been causally tested in vivo in the mammalian brain. Presynaptic ligand cerebellin-1 (Cbln1) and postsynaptic receptor GluD2 mediate synaptogenesis between granule cells and Purkinje cells in the molecular layer of the cerebellar cortex. Here we show that sparse but not global knockout of GluD2 causes under-elaboration of Purkinje cell dendrites in the deep molecular layer and overelaboration in the superficial molecular layer. Developmental, overexpression, structure-function, and genetic epistasis analyses indicate that dendrite morphogenesis defects result from competitive synaptogenesis in a Cbln1/GluD2-dependent manner. A generative model of dendritic growth based on competitive synaptogenesis largely recapitulates GluD2 sparse and global knockout phenotypes. Our results support the synaptotrophic hypothesis at initial stages of dendrite development, suggest a second mode in which cumulative synapse formation inhibits further dendrite growth, and highlight the importance of competition in dendrite morphogenesis.}, author = {Takeo, Yukari H. and Shuster, S. Andrew and Jiang, Linnie and Hu, Miley and Luginbuhl, David J. and Rülicke, Thomas and Contreras, Ximena and Hippenmeyer, Simon and Wagner, Mark J. and Ganguli, Surya and Luo, Liqun}, issn = {1097-4199}, journal = {Neuron}, number = {4}, pages = {P629--644.E8}, publisher = {Elsevier}, title = {{GluD2- and Cbln1-mediated competitive synaptogenesis shapes the dendritic arbors of cerebellar Purkinje cells}}, doi = {10.1016/j.neuron.2020.11.028}, volume = {109}, year = {2021}, } @unpublished{9791, abstract = {We provide a definition of the effective mass for the classical polaron described by the Landau-Pekar equations. It is based on a novel variational principle, minimizing the energy functional over states with given (initial) velocity. The resulting formula for the polaron's effective mass agrees with the prediction by Landau and Pekar.}, author = {Feliciangeli, Dario and Rademacher, Simone Anna Elvira and Seiringer, Robert}, booktitle = {arXiv}, title = {{The effective mass problem for the Landau-Pekar equations}}, year = {2021}, } @article{7553, abstract = {Normative theories and statistical inference provide complementary approaches for the study of biological systems. A normative theory postulates that organisms have adapted to efficiently solve essential tasks, and proceeds to mathematically work out testable consequences of such optimality; parameters that maximize the hypothesized organismal function can be derived ab initio, without reference to experimental data. In contrast, statistical inference focuses on efficient utilization of data to learn model parameters, without reference to any a priori notion of biological function, utility, or fitness. Traditionally, these two approaches were developed independently and applied separately. Here we unify them in a coherent Bayesian framework that embeds a normative theory into a family of maximum-entropy “optimization priors.” This family defines a smooth interpolation between a data-rich inference regime (characteristic of “bottom-up” statistical models), and a data-limited ab inito prediction regime (characteristic of “top-down” normative theory). We demonstrate the applicability of our framework using data from the visual cortex, and argue that the flexibility it affords is essential to address a number of fundamental challenges relating to inference and prediction in complex, high-dimensional biological problems.}, author = {Mlynarski, Wiktor F and Hledik, Michal and Sokolowski, Thomas R and Tkačik, Gašper}, journal = {Neuron}, number = {7}, pages = {1227--1241.e5}, publisher = {Cell Press}, title = {{Statistical analysis and optimality of neural systems}}, doi = {10.1016/j.neuron.2021.01.020}, volume = {109}, year = {2021}, } @inproceedings{10598, abstract = { We consider the problem of estimating a signal from measurements obtained via a generalized linear model. We focus on estimators based on approximate message passing (AMP), a family of iterative algorithms with many appealing features: the performance of AMP in the high-dimensional limit can be succinctly characterized under suitable model assumptions; AMP can also be tailored to the empirical distribution of the signal entries, and for a wide class of estimation problems, AMP is conjectured to be optimal among all polynomial-time algorithms. However, a major issue of AMP is that in many models (such as phase retrieval), it requires an initialization correlated with the ground-truth signal and independent from the measurement matrix. Assuming that such an initialization is available is typically not realistic. In this paper, we solve this problem by proposing an AMP algorithm initialized with a spectral estimator. With such an initialization, the standard AMP analysis fails since the spectral estimator depends in a complicated way on the design matrix. Our main contribution is a rigorous characterization of the performance of AMP with spectral initialization in the high-dimensional limit. The key technical idea is to define and analyze a two-phase artificial AMP algorithm that first produces the spectral estimator, and then closely approximates the iterates of the true AMP. We also provide numerical results that demonstrate the validity of the proposed approach. }, author = {Mondelli, Marco and Venkataramanan, Ramji}, booktitle = {Proceedings of The 24th International Conference on Artificial Intelligence and Statistics}, editor = {Banerjee, Arindam and Fukumizu, Kenji}, issn = {2640-3498}, location = {Virtual, San Diego, CA, United States}, pages = {397--405}, publisher = {ML Research Press}, title = {{Approximate message passing with spectral initialization for generalized linear models}}, volume = {130}, year = {2021}, } @article{8196, abstract = {This paper aims to obtain a strong convergence result for a Douglas–Rachford splitting method with inertial extrapolation step for finding a zero of the sum of two set-valued maximal monotone operators without any further assumption of uniform monotonicity on any of the involved maximal monotone operators. Furthermore, our proposed method is easy to implement and the inertial factor in our proposed method is a natural choice. Our method of proof is of independent interest. Finally, some numerical implementations are given to confirm the theoretical analysis.}, author = {Shehu, Yekini and Dong, Qiao-Li and Liu, Lu-Lu and Yao, Jen-Chih}, issn = {1573-2924}, journal = {Optimization and Engineering}, pages = {2627--2653}, publisher = {Springer Nature}, title = {{New strong convergence method for the sum of two maximal monotone operators}}, doi = {10.1007/s11081-020-09544-5}, volume = {22}, year = {2021}, } @article{8911, abstract = {In the worldwide endeavor for disruptive quantum technologies, germanium is emerging as a versatile material to realize devices capable of encoding, processing, or transmitting quantum information. These devices leverage special properties of the germanium valence-band states, commonly known as holes, such as their inherently strong spin-orbit coupling and the ability to host superconducting pairing correlations. In this Review, we initially introduce the physics of holes in low-dimensional germanium structures with key insights from a theoretical perspective. We then examine the material science progress underpinning germanium-based planar heterostructures and nanowires. We review the most significant experimental results demonstrating key building blocks for quantum technology, such as an electrically driven universal quantum gate set with spin qubits in quantum dots and superconductor-semiconductor devices for hybrid quantum systems. We conclude by identifying the most promising prospects toward scalable quantum information processing. }, author = {Scappucci, Giordano and Kloeffel, Christoph and Zwanenburg, Floris A. and Loss, Daniel and Myronov, Maksym and Zhang, Jian-Jun and Franceschi, Silvano De and Katsaros, Georgios and Veldhorst, Menno}, issn = {2058-8437}, journal = {Nature Reviews Materials}, pages = {926–943 }, publisher = {Springer Nature}, title = {{The germanium quantum information route}}, doi = {10.1038/s41578-020-00262-z}, volume = {6}, year = {2021}, } @article{8338, abstract = {Canonical parametrisations of classical confocal coordinate systems are introduced and exploited to construct non-planar analogues of incircular (IC) nets on individual quadrics and systems of confocal quadrics. Intimate connections with classical deformations of quadrics that are isometric along asymptotic lines and circular cross-sections of quadrics are revealed. The existence of octahedral webs of surfaces of Blaschke type generated by asymptotic and characteristic lines that are diagonally related to lines of curvature is proved theoretically and established constructively. Appropriate samplings (grids) of these webs lead to three-dimensional extensions of non-planar IC nets. Three-dimensional octahedral grids composed of planes and spatially extending (checkerboard) IC-nets are shown to arise in connection with systems of confocal quadrics in Minkowski space. In this context, the Laguerre geometric notion of conical octahedral grids of planes is introduced. The latter generalise the octahedral grids derived from systems of confocal quadrics in Minkowski space. An explicit construction of conical octahedral grids is presented. The results are accompanied by various illustrations which are based on the explicit formulae provided by the theory.}, author = {Akopyan, Arseniy and Bobenko, Alexander I. and Schief, Wolfgang K. and Techter, Jan}, issn = {1432-0444}, journal = {Discrete and Computational Geometry}, pages = {938--976}, publisher = {Springer Nature}, title = {{On mutually diagonal nets on (confocal) quadrics and 3-dimensional webs}}, doi = {10.1007/s00454-020-00240-w}, volume = {66}, year = {2021}, } @article{7939, abstract = {We design fast deterministic algorithms for distance computation in the Congested Clique model. Our key contributions include: A (2+ϵ)-approximation for all-pairs shortest paths in O(log2n/ϵ) rounds on unweighted undirected graphs. With a small additional additive factor, this also applies for weighted graphs. This is the first sub-polynomial constant-factor approximation for APSP in this model. A (1+ϵ)-approximation for multi-source shortest paths from O(n−−√) sources in O(log2n/ϵ) rounds on weighted undirected graphs. This is the first sub-polynomial algorithm obtaining this approximation for a set of sources of polynomial size. Our main techniques are new distance tools that are obtained via improved algorithms for sparse matrix multiplication, which we leverage to construct efficient hopsets and shortest paths. Furthermore, our techniques extend to additional distance problems for which we improve upon the state-of-the-art, including diameter approximation, and an exact single-source shortest paths algorithm for weighted undirected graphs in O~(n1/6) rounds. }, author = {Censor-Hillel, Keren and Dory, Michal and Korhonen, Janne and Leitersdorf, Dean}, issn = {1432-0452}, journal = {Distributed Computing}, pages = {463--487}, publisher = {Springer Nature}, title = {{Fast approximate shortest paths in the congested clique}}, doi = {10.1007/s00446-020-00380-5}, volume = {34}, year = {2021}, } @article{8248, abstract = {We consider the following setting: suppose that we are given a manifold M in Rd with positive reach. Moreover assume that we have an embedded simplical complex A without boundary, whose vertex set lies on the manifold, is sufficiently dense and such that all simplices in A have sufficient quality. We prove that if, locally, interiors of the projection of the simplices onto the tangent space do not intersect, then A is a triangulation of the manifold, that is, they are homeomorphic.}, author = {Boissonnat, Jean-Daniel and Dyer, Ramsay and Ghosh, Arijit and Lieutier, Andre and Wintraecken, Mathijs}, issn = {1432-0444}, journal = {Discrete and Computational Geometry}, pages = {666--686}, publisher = {Springer Nature}, title = {{Local conditions for triangulating submanifolds of Euclidean space}}, doi = {10.1007/s00454-020-00233-9}, volume = {66}, year = {2021}, }