@inproceedings{15011, abstract = {Pruning large language models (LLMs) from the BERT family has emerged as a standard compression benchmark, and several pruning methods have been proposed for this task. The recent “Sparsity May Cry” (SMC) benchmark put into question the validity of all existing methods, exhibiting a more complex setup where many known pruning methods appear to fail. We revisit the question of accurate BERT-pruning during fine-tuning on downstream datasets, and propose a set of general guidelines for successful pruning, even on the challenging SMC benchmark. First, we perform a cost-vs-benefits analysis of pruning model components, such as the embeddings and the classification head; second, we provide a simple-yet-general way of scaling training, sparsification and learning rate schedules relative to the desired target sparsity; finally, we investigate the importance of proper parametrization for Knowledge Distillation in the context of LLMs. Our simple insights lead to state-of-the-art results, both on classic BERT-pruning benchmarks, as well as on the SMC benchmark, showing that even classic gradual magnitude pruning (GMP) can yield competitive results, with the right approach.}, author = {Kurtic, Eldar and Hoefler, Torsten and Alistarh, Dan-Adrian}, booktitle = {Proceedings of Machine Learning Research}, issn = {2640-3498}, location = {Hongkong, China}, pages = {542--553}, publisher = {ML Research Press}, title = {{How to prune your language model: Recovering accuracy on the "Sparsity May Cry" benchmark}}, volume = {234}, year = {2024}, } @article{15024, abstract = {Electrostatic correlations between ions dissolved in water are known to impact their transport properties in numerous ways, from conductivity to ion selectivity. The effects of these correlations on the solvent itself remain, however, much less clear. In particular, the addition of salt has been consistently reported to affect the solution’s viscosity, but most modeling attempts fail to reproduce experimental data even at moderate salt concentrations. Here, we use an approach based on stochastic density functional theory, which accurately captures charge fluctuations and correlations. We derive a simple analytical expression for the viscosity correction in concentrated electrolytes, by directly linking it to the liquid’s structure factor. Our prediction compares quantitatively to experimental data at all temperatures and all salt concentrations up to the saturation limit. This universal link between the microscopic structure and viscosity allows us to shed light on the nanoscale dynamics of water and ions under highly concentrated and correlated conditions.}, author = {Robin, Paul}, issn = {1089-7690}, journal = {Journal of Chemical Physics}, number = {6}, publisher = {AIP Publishing}, title = {{Correlation-induced viscous dissipation in concentrated electrolytes}}, doi = {10.1063/5.0188215}, volume = {160}, year = {2024}, } @article{15025, abstract = {We consider quadratic forms of deterministic matrices A evaluated at the random eigenvectors of a large N×N GOE or GUE matrix, or equivalently evaluated at the columns of a Haar-orthogonal or Haar-unitary random matrix. We prove that, as long as the deterministic matrix has rank much smaller than √N, the distributions of the extrema of these quadratic forms are asymptotically the same as if the eigenvectors were independent Gaussians. This reduces the problem to Gaussian computations, which we carry out in several cases to illustrate our result, finding Gumbel or Weibull limiting distributions depending on the signature of A. Our result also naturally applies to the eigenvectors of any invariant ensemble.}, author = {Erdös, László and McKenna, Benjamin}, issn = {1050-5164}, journal = {Annals of Applied Probability}, number = {1B}, pages = {1623--1662}, publisher = {Institute of Mathematical Statistics}, title = {{Extremal statistics of quadratic forms of GOE/GUE eigenvectors}}, doi = {10.1214/23-AAP2000}, volume = {34}, year = {2024}, } @article{15033, abstract = {The GNOM (GN) Guanine nucleotide Exchange Factor for ARF small GTPases (ARF-GEF) is among the best studied trafficking regulators in plants, playing crucial and unique developmental roles in patterning and polarity. The current models place GN at the Golgi apparatus (GA), where it mediates secretion/recycling, and at the plasma membrane (PM) presumably contributing to clathrin-mediated endocytosis (CME). The mechanistic basis of the developmental function of GN, distinct from the other ARF-GEFs including its closest homologue GNOM-LIKE1 (GNL1), remains elusive. Insights from this study largely extend the current notions of GN function. We show that GN, but not GNL1, localizes to the cell periphery at long-lived structures distinct from clathrin-coated pits, while CME and secretion proceed normally in gn knockouts. The functional GN mutant variant GNfewerroots, absent from the GA, suggests that the cell periphery is the major site of GN action responsible for its developmental function. Following inhibition by Brefeldin A, GN, but not GNL1, relocates to the PM likely on exocytic vesicles, suggesting selective molecular associations en route to the cell periphery. A study of GN-GNL1 chimeric ARF-GEFs indicates that all GN domains contribute to the specific GN function in a partially redundant manner. Together, this study offers significant steps toward the elucidation of the mechanism underlying unique cellular and development functions of GNOM.}, author = {Adamowski, Maciek and Matijevic, Ivana and Friml, Jiří}, issn = {2050-084X}, journal = {eLife}, keywords = {General Immunology and Microbiology, General Biochemistry, Genetics and Molecular Biology, General Medicine, General Neuroscience}, publisher = {eLife Sciences Publications}, title = {{Developmental patterning function of GNOM ARF-GEF mediated from the cell periphery}}, doi = {10.7554/elife.68993}, volume = {13}, year = {2024}, } @article{14479, abstract = {In animals, parasitic infections impose significant fitness costs.1,2,3,4,5,6 Infected animals can alter their feeding behavior to resist infection,7,8,9,10,11,12 but parasites can manipulate animal foraging behavior to their own benefits.13,14,15,16 How nutrition influences host-parasite interactions is not well understood, as studies have mainly focused on the host and less on the parasite.9,12,17,18,19,20,21,22,23 We used the nutritional geometry framework24 to investigate the role of amino acids (AA) and carbohydrates (C) in a host-parasite system: the Argentine ant, Linepithema humile, and the entomopathogenic fungus, Metarhizium brunneum. First, using 18 diets varying in AA:C composition, we established that the fungus performed best on the high-amino-acid diet 1:4. Second, we found that the fungus reached this optimal diet when given various diet pairings, revealing its ability to cope with nutritional challenges. Third, we showed that the optimal fungal diet reduced the lifespan of healthy ants when compared with a high-carbohydrate diet but had no effect on infected ants. Fourth, we revealed that infected ant colonies, given a choice between the optimal fungal diet and a high-carbohydrate diet, chose the optimal fungal diet, whereas healthy colonies avoided it. Lastly, by disentangling fungal infection from host immune response, we demonstrated that infected ants foraged on the optimal fungal diet in response to immune activation and not as a result of parasite manipulation. Therefore, we revealed that infected ant colonies chose a diet that is costly for survival in the long term but beneficial in the short term—a form of collective self-medication.}, author = {Csata, Eniko and Perez-Escudero, Alfonso and Laury, Emmanuel and Leitner, Hanna and Latil, Gerard and Heinze, Juerge and Simpson, Stephen and Cremer, Sylvia and Dussutour, Audrey}, issn = {1879-0445}, journal = {Current Biology}, number = {4}, pages = {902--909.e6}, publisher = {Elsevier}, title = {{Fungal infection alters collective nutritional intake of ant colonies}}, doi = {10.1016/j.cub.2024.01.017}, volume = {34}, year = {2024}, }