@inproceedings{10216, abstract = {This paper reports a new concurrent graph data structure that supports updates of both edges and vertices and queries: Breadth-first search, Single-source shortest-path, and Betweenness centrality. The operations are provably linearizable and non-blocking.}, author = {Chatterjee, Bapi and Peri, Sathya and Sa, Muktikanta}, booktitle = {35th International Symposium on Distributed Computing}, isbn = {9-783-9597-7210-5}, issn = {1868-8969}, location = {Freiburg, Germany}, publisher = {Schloss Dagstuhl - Leibniz Zentrum für Informatik}, title = {{Brief announcement: Non-blocking dynamic unbounded graphs with worst-case amortized bounds}}, doi = {10.4230/LIPIcs.DISC.2021.52}, volume = {209}, year = {2021}, } @inproceedings{11436, abstract = {Asynchronous distributed algorithms are a popular way to reduce synchronization costs in large-scale optimization, and in particular for neural network training. However, for nonsmooth and nonconvex objectives, few convergence guarantees exist beyond cases where closed-form proximal operator solutions are available. As training most popular deep neural networks corresponds to optimizing nonsmooth and nonconvex objectives, there is a pressing need for such convergence guarantees. In this paper, we analyze for the first time the convergence of stochastic asynchronous optimization for this general class of objectives. In particular, we focus on stochastic subgradient methods allowing for block variable partitioning, where the shared model is asynchronously updated by concurrent processes. To this end, we use a probabilistic model which captures key features of real asynchronous scheduling between concurrent processes. Under this model, we establish convergence with probability one to an invariant set for stochastic subgradient methods with momentum. From a practical perspective, one issue with the family of algorithms that we consider is that they are not efficiently supported by machine learning frameworks, which mostly focus on distributed data-parallel strategies. To address this, we propose a new implementation strategy for shared-memory based training of deep neural networks for a partitioned but shared model in single- and multi-GPU settings. Based on this implementation, we achieve on average1.2x speed-up in comparison to state-of-the-art training methods for popular image classification tasks, without compromising accuracy.}, author = {Kungurtsev, Vyacheslav and Egan, Malcolm and Chatterjee, Bapi and Alistarh, Dan-Adrian}, booktitle = {35th AAAI Conference on Artificial Intelligence, AAAI 2021}, isbn = {9781713835974}, issn = {2374-3468}, location = {Virtual, Online}, number = {9B}, pages = {8209--8216}, publisher = {AAAI Press}, title = {{Asynchronous optimization methods for efficient training of deep neural networks with guarantees}}, volume = {35}, year = {2021}, } @article{9827, abstract = {The Nearest neighbour search (NNS) is a fundamental problem in many application domains dealing with multidimensional data. In a concurrent setting, where dynamic modifications are allowed, a linearizable implementation of the NNS is highly desirable.This paper introduces the LockFree-kD-tree (LFkD-tree ): a lock-free concurrent kD-tree, which implements an abstract data type (ADT) that provides the operations Add, Remove, Contains, and NNS. Our implementation is linearizable. The operations in the LFkD-tree use single-word read and compare-and-swap (Image 1 ) atomic primitives, which are readily supported on available multi-core processors. We experimentally evaluate the LFkD-tree using several benchmarks comprising real-world and synthetic datasets. The experiments show that the presented design is scalable and achieves significant speed-up compared to the implementations of an existing sequential kD-tree and a recently proposed multidimensional indexing structure, PH-tree.}, author = {Chatterjee, Bapi and Walulya, Ivan and Tsigas, Philippas}, issn = {0304-3975}, journal = {Theoretical Computer Science}, keywords = {Concurrent data structure, kD-tree, Nearest neighbor search, Similarity search, Lock-free, Linearizability}, pages = {27--48}, publisher = {Elsevier}, title = {{Concurrent linearizable nearest neighbour search in LockFree-kD-tree}}, doi = {10.1016/j.tcs.2021.06.041}, volume = {886}, year = {2021}, } @inproceedings{10432, abstract = {One key element behind the recent progress of machine learning has been the ability to train machine learning models in large-scale distributed shared-memory and message-passing environments. Most of these models are trained employing variants of stochastic gradient descent (SGD) based optimization, but most methods involve some type of consistency relaxation relative to sequential SGD, to mitigate its large communication or synchronization costs at scale. In this paper, we introduce a general consistency condition covering communication-reduced and asynchronous distributed SGD implementations. Our framework, called elastic consistency, decouples the system-specific aspects of the implementation from the SGD convergence requirements, giving a general way to obtain convergence bounds for a wide variety of distributed SGD methods used in practice. Elastic consistency can be used to re-derive or improve several previous convergence bounds in message-passing and shared-memory settings, but also to analyze new models and distribution schemes. As a direct application, we propose and analyze a new synchronization-avoiding scheduling scheme for distributed SGD, and show that it can be used to efficiently train deep convolutional models for image classification.}, author = {Nadiradze, Giorgi and Markov, Ilia and Chatterjee, Bapi and Kungurtsev, Vyacheslav and Alistarh, Dan-Adrian}, booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence}, location = {Virtual}, number = {10}, pages = {9037--9045}, title = {{Elastic consistency: A practical consistency model for distributed stochastic gradient descent}}, volume = {35}, year = {2021}, } @inproceedings{7213, abstract = {Persistent homology is a powerful tool in Topological Data Analysis (TDA) to capture the topological properties of data succinctly at different spatial resolutions. For graphical data, the shape, and structure of the neighborhood of individual data items (nodes) are an essential means of characterizing their properties. We propose the use of persistent homology methods to capture structural and topological properties of graphs and use it to address the problem of link prediction. We achieve encouraging results on nine different real-world datasets that attest to the potential of persistent homology-based methods for network analysis.}, author = {Bhatia, Sumit and Chatterjee, Bapi and Nathani, Deepak and Kaul, Manohar}, booktitle = {Complex Networks and their applications VIII}, isbn = {9783030366865}, issn = {18609503}, location = {Lisbon, Portugal}, pages = {27--39}, publisher = {Springer Nature}, title = {{A persistent homology perspective to the link prediction problem}}, doi = {10.1007/978-3-030-36687-2_3}, volume = {881}, year = {2020}, } @inproceedings{5947, abstract = {Graph algorithms applied in many applications, including social networks, communication networks, VLSI design, graphics, and several others, require dynamic modifications - addition and removal of vertices and/or edges - in the graph. This paper presents a novel concurrent non-blocking algorithm to implement a dynamic unbounded directed graph in a shared-memory machine. The addition and removal operations of vertices and edges are lock-free. For a finite sized graph, the lookup operations are wait-free. Most significant component of the presented algorithm is the reachability query in a concurrent graph. The reachability queries in our algorithm are obstruction-free and thus impose minimal additional synchronization cost over other operations. We prove that each of the data structure operations are linearizable. We extensively evaluate a sample C/C++ implementation of the algorithm through a number of micro-benchmarks. The experimental results show that the proposed algorithm scales well with the number of threads and on an average provides 5 to 7x performance improvement over a concurrent graph implementation using coarse-grained locking.}, author = {Chatterjee, Bapi and Peri, Sathya and Sa, Muktikanta and Singhal, Nandini}, booktitle = {ACM International Conference Proceeding Series}, isbn = {978-1-4503-6094-4 }, location = {Bangalore, India}, pages = {168--177}, publisher = {ACM}, title = {{A simple and practical concurrent non-blocking unbounded graph with linearizable reachability queries}}, doi = {10.1145/3288599.3288617}, year = {2019}, }