@article{12330, abstract = {The design and implementation of efficient concurrent data structures has seen significant attention. However, most of this work has focused on concurrent data structures providing good worst-case guarantees, although, in real workloads, objects are often accessed at different rates. Efficient distribution-adaptive data structures, such as splay-trees, are known in the sequential case; however, they often are hard to translate efficiently to the concurrent case. We investigate distribution-adaptive concurrent data structures, and propose a new design called the splay-list. At a high level, the splay-list is similar to a standard skip-list, with the key distinction that the height of each element adapts dynamically to its access rate: popular elements “move up,” whereas rarely-accessed elements decrease in height. We show that the splay-list provides order-optimal amortized complexity bounds for a subset of operations, while being amenable to efficient concurrent implementation. Experiments show that the splay-list can leverage distribution-adaptivity for performance, and can outperform the only previously-known distribution-adaptive concurrent design in certain workloads.}, author = {Aksenov, Vitalii and Alistarh, Dan-Adrian and Drozdova, Alexandra and Mohtashami, Amirkeivan}, issn = {1432-0452}, journal = {Distributed Computing}, pages = {395--418}, publisher = {Springer Nature}, title = {{The splay-list: A distribution-adaptive concurrent skip-list}}, doi = {10.1007/s00446-022-00441-x}, volume = {36}, year = {2023}, } @article{9571, abstract = {As the size and complexity of models and datasets grow, so does the need for communication-efficient variants of stochastic gradient descent that can be deployed to perform parallel model training. One popular communication-compression method for data-parallel SGD is QSGD (Alistarh et al., 2017), which quantizes and encodes gradients to reduce communication costs. The baseline variant of QSGD provides strong theoretical guarantees, however, for practical purposes, the authors proposed a heuristic variant which we call QSGDinf, which demonstrated impressive empirical gains for distributed training of large neural networks. In this paper, we build on this work to propose a new gradient quantization scheme, and show that it has both stronger theoretical guarantees than QSGD, and matches and exceeds the empirical performance of the QSGDinf heuristic and of other compression methods.}, author = {Ramezani-Kebrya, Ali and Faghri, Fartash and Markov, Ilya and Aksenov, Vitalii and Alistarh, Dan-Adrian and Roy, Daniel M.}, issn = {15337928}, journal = {Journal of Machine Learning Research}, number = {114}, pages = {1−43}, publisher = {Journal of Machine Learning Research}, title = {{NUQSGD: Provably communication-efficient data-parallel SGD via nonuniform quantization}}, volume = {22}, year = {2021}, } @article{7214, abstract = {Background: Many cancer genomes are extensively rearranged with highly aberrant chromosomal karyotypes. Structural and copy number variations in cancer genomes can be determined via abnormal mapping of sequenced reads to the reference genome. Recently it became possible to reconcile both of these types of large-scale variations into a karyotype graph representation of the rearranged cancer genomes. Such a representation, however, does not directly describe the linear and/or circular structure of the underlying rearranged cancer chromosomes, thus limiting possible analysis of cancer genomes somatic evolutionary process as well as functional genomic changes brought by the large-scale genome rearrangements. Results: Here we address the aforementioned limitation by introducing a novel methodological framework for recovering rearranged cancer chromosomes from karyotype graphs. For a cancer karyotype graph we formulate an Eulerian Decomposition Problem (EDP) of finding a collection of linear and/or circular rearranged cancer chromosomes that are determined by the graph. We derive and prove computational complexities for several variations of the EDP. We then demonstrate that Eulerian decomposition of the cancer karyotype graphs is not always unique and present the Consistent Contig Covering Problem (CCCP) of recovering unambiguous cancer contigs from the cancer karyotype graph, and describe a novel algorithm CCR capable of solving CCCP in polynomial time. We apply CCR on a prostate cancer dataset and demonstrate that it is capable of consistently recovering large cancer contigs even when underlying cancer genomes are highly rearranged. Conclusions: CCR can recover rearranged cancer contigs from karyotype graphs thereby addressing existing limitation in inferring chromosomal structures of rearranged cancer genomes and advancing our understanding of both patient/cancer-specific as well as the overall genetic instability in cancer.}, author = {Aganezov, Sergey and Zban, Ilya and Aksenov, Vitalii and Alexeev, Nikita and Schatz, Michael C.}, issn = {14712105}, journal = {BMC Bioinformatics}, publisher = {BMC}, title = {{Recovering rearranged cancer chromosomes from karyotype graphs}}, doi = {10.1186/s12859-019-3208-4}, volume = {20}, year = {2019}, }