@article{703, abstract = {We consider the NP-hard problem of MAP-inference for undirected discrete graphical models. We propose a polynomial time and practically efficient algorithm for finding a part of its optimal solution. Specifically, our algorithm marks some labels of the considered graphical model either as (i) optimal, meaning that they belong to all optimal solutions of the inference problem; (ii) non-optimal if they provably do not belong to any solution. With access to an exact solver of a linear programming relaxation to the MAP-inference problem, our algorithm marks the maximal possible (in a specified sense) number of labels. We also present a version of the algorithm, which has access to a suboptimal dual solver only and still can ensure the (non-)optimality for the marked labels, although the overall number of the marked labels may decrease. We propose an efficient implementation, which runs in time comparable to a single run of a suboptimal dual solver. Our method is well-scalable and shows state-of-the-art results on computational benchmarks from machine learning and computer vision.}, author = {Shekhovtsov, Alexander and Swoboda, Paul and Savchynskyy, Bogdan}, issn = {01628828}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, number = {7}, pages = {1668--1682}, publisher = {IEEE}, title = {{Maximum persistency via iterative relaxed inference with graphical models}}, doi = {10.1109/TPAMI.2017.2730884}, volume = {40}, year = {2018}, } @inproceedings{7116, abstract = {Training deep learning models has received tremendous research interest recently. In particular, there has been intensive research on reducing the communication cost of training when using multiple computational devices, through reducing the precision of the underlying data representation. Naturally, such methods induce system trade-offs—lowering communication precision could de-crease communication overheads and improve scalability; but, on the other hand, it can also reduce the accuracy of training. In this paper, we study this trade-off space, and ask:Can low-precision communication consistently improve the end-to-end performance of training modern neural networks, with no accuracy loss?From the performance point of view, the answer to this question may appear deceptively easy: compressing communication through low precision should help when the ratio between communication and computation is high. However, this answer is less straightforward when we try to generalize this principle across various neural network architectures (e.g., AlexNet vs. ResNet),number of GPUs (e.g., 2 vs. 8 GPUs), machine configurations(e.g., EC2 instances vs. NVIDIA DGX-1), communication primitives (e.g., MPI vs. NCCL), and even different GPU architectures(e.g., Kepler vs. Pascal). Currently, it is not clear how a realistic realization of all these factors maps to the speed up provided by low-precision communication. In this paper, we conduct an empirical study to answer this question and report the insights.}, author = {Grubic, Demjan and Tam, Leo and Alistarh, Dan-Adrian and Zhang, Ce}, booktitle = {Proceedings of the 21st International Conference on Extending Database Technology}, isbn = {9783893180783}, issn = {2367-2005}, location = {Vienna, Austria}, pages = {145--156}, publisher = {OpenProceedings}, title = {{Synchronous multi-GPU training for deep learning with low-precision communications: An empirical study}}, doi = {10.5441/002/EDBT.2018.14}, year = {2018}, } @inproceedings{7407, abstract = {Proofs of space (PoS) [Dziembowski et al., CRYPTO'15] are proof systems where a prover can convince a verifier that he "wastes" disk space. PoS were introduced as a more ecological and economical replacement for proofs of work which are currently used to secure blockchains like Bitcoin. In this work we investigate extensions of PoS which allow the prover to embed useful data into the dedicated space, which later can be recovered. Our first contribution is a security proof for the original PoS from CRYPTO'15 in the random oracle model (the original proof only applied to a restricted class of adversaries which can store a subset of the data an honest prover would store). When this PoS is instantiated with recent constructions of maximally depth robust graphs, our proof implies basically optimal security. As a second contribution we show three different extensions of this PoS where useful data can be embedded into the space required by the prover. Our security proof for the PoS extends (non-trivially) to these constructions. We discuss how some of these variants can be used as proofs of catalytic space (PoCS), a notion we put forward in this work, and which basically is a PoS where most of the space required by the prover can be used to backup useful data. Finally we discuss how one of the extensions is a candidate construction for a proof of replication (PoR), a proof system recently suggested in the Filecoin whitepaper. }, author = {Pietrzak, Krzysztof Z}, booktitle = {10th Innovations in Theoretical Computer Science Conference (ITCS 2019)}, isbn = {978-3-95977-095-8}, issn = {1868-8969}, location = {San Diego, CA, United States}, pages = {59:1--59:25}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum für Informatik}, title = {{Proofs of catalytic space}}, doi = {10.4230/LIPICS.ITCS.2019.59}, volume = {124}, year = {2018}, } @article{6001, abstract = {The concurrent memory reclamation problem is that of devising a way for a deallocating thread to verify that no other concurrent threads hold references to a memory block being deallocated. To date, in the absence of automatic garbage collection, there is no satisfactory solution to this problem; existing tracking methods like hazard pointers, reference counters, or epoch-based techniques like RCU are either prohibitively expensive or require significant programming expertise to the extent that implementing them efficiently can be worthy of a publication. None of the existing techniques are automatic or even semi-automated. In this article, we take a new approach to concurrent memory reclamation. Instead of manually tracking access to memory locations as done in techniques like hazard pointers, or restricting shared accesses to specific epoch boundaries as in RCU, our algorithm, called ThreadScan, leverages operating system signaling to automatically detect which memory locations are being accessed by concurrent threads. Initial empirical evidence shows that ThreadScan scales surprisingly well and requires negligible programming effort beyond the standard use of Malloc and Free.}, author = {Alistarh, Dan-Adrian and Leiserson, William and Matveev, Alexander and Shavit, Nir}, issn = {2329-4949}, journal = {ACM Transactions on Parallel Computing}, number = {4}, publisher = {Association for Computing Machinery}, title = {{ThreadScan: Automatic and scalable memory reclamation}}, doi = {10.1145/3201897}, volume = {4}, year = {2018}, } @inproceedings{7812, abstract = {Deep neural networks (DNNs) continue to make significant advances, solving tasks from image classification to translation or reinforcement learning. One aspect of the field receiving considerable attention is efficiently executing deep models in resource-constrained environments, such as mobile or embedded devices. This paper focuses on this problem, and proposes two new compression methods, which jointly leverage weight quantization and distillation of larger teacher networks into smaller student networks. The first method we propose is called quantized distillation and leverages distillation during the training process, by incorporating distillation loss, expressed with respect to the teacher, into the training of a student network whose weights are quantized to a limited set of levels. The second method, differentiable quantization, optimizes the location of quantization points through stochastic gradient descent, to better fit the behavior of the teacher model. We validate both methods through experiments on convolutional and recurrent architectures. We show that quantized shallow students can reach similar accuracy levels to full-precision teacher models, while providing order of magnitude compression, and inference speedup that is linear in the depth reduction. In sum, our results enable DNNs for resource-constrained environments to leverage architecture and accuracy advances developed on more powerful devices.}, author = {Polino, Antonio and Pascanu, Razvan and Alistarh, Dan-Adrian}, booktitle = {6th International Conference on Learning Representations}, location = {Vancouver, Canada}, title = {{Model compression via distillation and quantization}}, year = {2018}, }