@inproceedings{10599,
abstract = {A two-part successive syndrome-check decoding of polar codes is proposed with the first part successively refining the received codeword and the second part checking its syndrome. A new formulation of the successive-cancellation (SC) decoding algorithm is presented that allows for successively refining the received codeword by comparing the log-likelihood ratio value of a frozen bit with its predefined value. The syndrome of the refined received codeword is then checked for possible errors. In case there are no errors, the decoding process is terminated. Otherwise, the decoder continues to refine the received codeword. The proposed method is extended to the case of SC list (SCL) decoding by terminating the decoding process when the syndrome of the best candidate in the list indicates no errors. Simulation results show that the proposed method reduces the time-complexity of SC and SCL decoders and their fast variants, especially at high signal-to-noise ratios.},
author = {Hashemi, Seyyed Ali and Mondelli, Marco and Cioffi, John and Goldsmith, Andrea},
booktitle = {Proceedings of the 55th Asilomar Conference on Signals, Systems, and Computers},
isbn = {9781665458283},
issn = {1058-6393},
location = {Virtual, Pacific Grove, CA, United States},
pages = {943--947},
publisher = {IEEE Signal Processing Society},
title = {{Successive syndrome-check decoding of polar codes}},
doi = {10.1109/IEEECONF53345.2021.9723394},
volume = {2021-October},
year = {2022},
}
@inproceedings{10593,
abstract = {We study the problem of estimating a rank-$1$ signal in the presence of rotationally invariant noise-a class of perturbations more general than Gaussian noise. Principal Component Analysis (PCA) provides a natural estimator, and sharp results on its performance have been obtained in the high-dimensional regime. Recently, an Approximate Message Passing (AMP) algorithm has been proposed as an alternative estimator with the potential to improve the accuracy of PCA. However, the existing analysis of AMP requires an initialization that is both correlated with the signal and independent of the noise, which is often unrealistic in practice. In this work, we combine the two methods, and propose to initialize AMP with PCA. Our main result is a rigorous asymptotic characterization of the performance of this estimator. Both the AMP algorithm and its analysis differ from those previously derived in the Gaussian setting: at every iteration, our AMP algorithm requires a specific term to account for PCA initialization, while in the Gaussian case, PCA initialization affects only the first iteration of AMP. The proof is based on a two-phase artificial AMP that first approximates the PCA estimator and then mimics the true AMP. Our numerical simulations show an excellent agreement between AMP results and theoretical predictions, and suggest an interesting open direction on achieving Bayes-optimal performance.},
author = {Mondelli, Marco and Venkataramanan, Ramji},
booktitle = {35th Conference on Neural Information Processing Systems},
location = {Virtual},
publisher = {NeurIPS},
title = {{PCA initialization for approximate message passing in rotationally invariant models}},
year = {2021},
}
@inproceedings{10594,
abstract = {The question of how and why the phenomenon of mode connectivity occurs in training deep neural networks has gained remarkable attention in the research community. From a theoretical perspective, two possible explanations have been proposed: (i) the loss function has connected sublevel sets, and (ii) the solutions found by stochastic gradient descent are dropout stable. While these explanations provide insights into the phenomenon, their assumptions are not always satisfied in practice. In particular, the first approach requires the network to have one layer with order of N neurons (N being the number of training samples), while the second one requires the loss to be almost invariant after removing half of the neurons at each layer (up to some rescaling of the remaining ones). In this work, we improve both conditions by exploiting the quality of the features at every intermediate layer together with a milder over-parameterization condition. More specifically, we show that: (i) under generic assumptions on the features of intermediate layers, it suffices that the last two hidden layers have order of N−−√ neurons, and (ii) if subsets of features at each layer are linearly separable, then no over-parameterization is needed to show the connectivity. Our experiments confirm that the proposed condition ensures the connectivity of solutions found by stochastic gradient descent, even in settings where the previous requirements do not hold.},
author = {Nguyen, Quynh and Bréchet, Pierre and Mondelli, Marco},
booktitle = {35th Conference on Neural Information Processing Systems},
location = {Virtual},
publisher = {NeurIPS},
title = {{When are solutions connected in deep networks?}},
year = {2021},
}
@inproceedings{10595,
abstract = {A recent line of work has analyzed the theoretical properties of deep neural networks via the Neural Tangent Kernel (NTK). In particular, the smallest eigenvalue of the NTK has been related to the memorization capacity, the global convergence of gradient descent algorithms and the generalization of deep nets. However, existing results either provide bounds in the two-layer setting or assume that the spectrum of the NTK matrices is bounded away from 0 for multi-layer networks. In this paper, we provide tight bounds on the smallest eigenvalue of NTK matrices for deep ReLU nets, both in the limiting case of infinite widths and for finite widths. In the finite-width setting, the network architectures we consider are fairly general: we require the existence of a wide layer with roughly order of $N$ neurons, $N$ being the number of data samples; and the scaling of the remaining layer widths is arbitrary (up to logarithmic factors). To obtain our results, we analyze various quantities of independent interest: we give lower bounds on the smallest singular value of hidden feature matrices, and upper bounds on the Lipschitz constant of input-output feature maps.},
author = {Nguyen, Quynh and Mondelli, Marco and Montufar, Guido F},
booktitle = {Proceedings of the 38th International Conference on Machine Learning},
editor = {Meila, Marina and Zhang, Tong},
location = {Virtual},
pages = {8119--8129},
publisher = {ML Research Press},
title = {{Tight bounds on the smallest eigenvalue of the neural tangent kernel for deep ReLU networks}},
volume = {139},
year = {2021},
}
@inproceedings{10597,
abstract = {We thank Emmanuel Abbe and Min Ye for providing us the implementation of RPA decoding. D. Fathollahi and M. Mondelli are partially supported by the 2019 Lopez-Loreta Prize. N. Farsad is supported by Discovery Grant from the Natural Sciences and Engineering Research Council of Canada (NSERC) and Canada Foundation for Innovation (CFI), John R. Evans Leader Fund. S. A. Hashemi is supported by a Postdoctoral Fellowship from NSERC.},
author = {Fathollahi, Dorsa and Farsad, Nariman and Hashemi, Seyyed Ali and Mondelli, Marco},
booktitle = {2021 IEEE International Symposium on Information Theory},
isbn = {978-1-5386-8210-4},
location = {Virtual, Melbourne, Australia},
pages = {1082--1087},
publisher = {Institute of Electrical and Electronics Engineers},
title = {{Sparse multi-decoder recursive projection aggregation for Reed-Muller codes}},
doi = {10.1109/isit45174.2021.9517887},
year = {2021},
}
@inproceedings{10598,
abstract = { We consider the problem of estimating a signal from measurements obtained via a generalized linear model. We focus on estimators based on approximate message passing (AMP), a family of iterative algorithms with many appealing features: the performance of AMP in the high-dimensional limit can be succinctly characterized under suitable model assumptions; AMP can also be tailored to the empirical distribution of the signal entries, and for a wide class of estimation problems, AMP is conjectured to be optimal among all polynomial-time algorithms. However, a major issue of AMP is that in many models (such as phase retrieval), it requires an initialization correlated with the ground-truth signal and independent from the measurement matrix. Assuming that such an initialization is available is typically not realistic. In this paper, we solve this problem by proposing an AMP algorithm initialized with a spectral estimator. With such an initialization, the standard AMP analysis fails since the spectral estimator depends in a complicated way on the design matrix. Our main contribution is a rigorous characterization of the performance of AMP with spectral initialization in the high-dimensional limit. The key technical idea is to define and analyze a two-phase artificial AMP algorithm that first produces the spectral estimator, and then closely approximates the iterates of the true AMP. We also provide numerical results that demonstrate the validity of the proposed approach. },
author = {Mondelli, Marco and Venkataramanan, Ramji},
booktitle = {Proceedings of The 24th International Conference on Artificial Intelligence and Statistics},
editor = {Banerjee, Arindam and Fukumizu, Kenji},
issn = {2640-3498},
location = {Virtual, San Diego, CA, United States},
pages = {397--405},
publisher = {ML Research Press},
title = {{ Approximate message passing with spectral initialization for generalized linear models}},
volume = {130},
year = {2021},
}
@article{9047,
abstract = {This work analyzes the latency of the simplified successive cancellation (SSC) decoding scheme for polar codes proposed by Alamdar-Yazdi and Kschischang. It is shown that, unlike conventional successive cancellation decoding, where latency is linear in the block length, the latency of SSC decoding is sublinear. More specifically, the latency of SSC decoding is O(N1−1/μ) , where N is the block length and μ is the scaling exponent of the channel, which captures the speed of convergence of the rate to capacity. Numerical results demonstrate the tightness of the bound and show that most of the latency reduction arises from the parallel decoding of subcodes of rate 0 or 1.},
author = {Mondelli, Marco and Hashemi, Seyyed Ali and Cioffi, John M. and Goldsmith, Andrea},
issn = {15582248},
journal = {IEEE Transactions on Wireless Communications},
number = {1},
pages = {18--27},
publisher = {IEEE},
title = {{Sublinear latency for simplified successive cancellation decoding of polar codes}},
doi = {10.1109/TWC.2020.3022922},
volume = {20},
year = {2021},
}
@inproceedings{10053,
abstract = {This paper characterizes the latency of the simplified successive-cancellation (SSC) decoding scheme for polar codes under hardware resource constraints. In particular, when the number of processing elements P that can perform SSC decoding operations in parallel is limited, as is the case in practice, the latency of SSC decoding is O(N1−1 μ+NPlog2log2NP), where N is the block length of the code and μ is the scaling exponent of polar codes for the channel. Three direct consequences of this bound are presented. First, in a fully-parallel implementation where P=N2 , the latency of SSC decoding is O(N1−1/μ) , which is sublinear in the block length. This recovers a result from an earlier work. Second, in a fully-serial implementation where P=1 , the latency of SSC decoding scales as O(Nlog2log2N) . The multiplicative constant is also calculated: we show that the latency of SSC decoding when P=1 is given by (2+o(1))Nlog2log2N . Third, in a semi-parallel implementation, the smallest P that gives the same latency as that of the fully-parallel implementation is P=N1/μ . The tightness of our bound on SSC decoding latency and the applicability of the foregoing results is validated through extensive simulations.},
author = {Hashemi, Seyyed Ali and Mondelli, Marco and Fazeli, Arman and Vardy, Alexander and Cioffi, John and Goldsmith, Andrea},
booktitle = {2021 IEEE International Symposium on Information Theory},
isbn = {978-1-5386-8210-4},
issn = {2157-8095},
location = {Melbourne, Australia},
pages = {2369--2374},
publisher = {Institute of Electrical and Electronics Engineers},
title = {{Parallelism versus latency in simplified successive-cancellation decoding of polar codes}},
doi = {10.1109/ISIT45174.2021.9518153},
year = {2021},
}
@article{10364,
abstract = {This paper characterizes the latency of the simplified successive-cancellation (SSC) decoding scheme for polar codes under hardware resource constraints. In particular, when the number of processing elements P that can perform SSC decoding operations in parallel is limited, as is the case in practice, the latency of SSC decoding is O(N1-1/μ + N/P log2 log2 N/P), where N is the block length of the code and μ is the scaling exponent of the channel. Three direct consequences of this bound are presented. First, in a fully-parallel implementation where P = N/2, the latency of SSC decoding is O(N1-1/μ), which is sublinear in the block length. This recovers a result from our earlier work. Second, in a fully-serial implementation where P = 1, the latency of SSC decoding scales as O(N log2 log2 N). The multiplicative constant is also calculated: we show that the latency of SSC decoding when P = 1 is given by (2 + o(1))N log2 log2 N. Third, in a semi-parallel implementation, the smallest P that gives the same latency as that of the fully-parallel implementation is P = N1/μ. The tightness of our bound on SSC decoding latency and the applicability of the foregoing results is validated through extensive simulations.},
author = {Hashemi, Seyyed Ali and Mondelli, Marco and Fazeli, Arman and Vardy, Alexander and Cioffi, John and Goldsmith, Andrea},
issn = {1558-2248},
journal = {IEEE Transactions on Wireless Communications},
publisher = {Institute of Electrical and Electronics Engineers},
title = {{Parallelism versus latency in simplified successive-cancellation decoding of polar codes}},
doi = {10.1109/TWC.2021.3125626},
year = {2021},
}
@article{10211,
abstract = {We study the problem of recovering an unknown signal 𝑥𝑥 given measurements obtained from a generalized linear model with a Gaussian sensing matrix. Two popular solutions are based on a linear estimator 𝑥𝑥^L and a spectral estimator 𝑥𝑥^s. The former is a data-dependent linear combination of the columns of the measurement matrix, and its analysis is quite simple. The latter is the principal eigenvector of a data-dependent matrix, and a recent line of work has studied its performance. In this paper, we show how to optimally combine 𝑥𝑥^L and 𝑥𝑥^s. At the heart of our analysis is the exact characterization of the empirical joint distribution of (𝑥𝑥,𝑥𝑥^L,𝑥𝑥^s) in the high-dimensional limit. This allows us to compute the Bayes-optimal combination of 𝑥𝑥^L and 𝑥𝑥^s, given the limiting distribution of the signal 𝑥𝑥. When the distribution of the signal is Gaussian, then the Bayes-optimal combination has the form 𝜃𝑥𝑥^L+𝑥𝑥^s and we derive the optimal combination coefficient. In order to establish the limiting distribution of (𝑥𝑥,𝑥𝑥^L,𝑥𝑥^s), we design and analyze an approximate message passing algorithm whose iterates give 𝑥𝑥^L and approach 𝑥𝑥^s. Numerical simulations demonstrate the improvement of the proposed combination with respect to the two methods considered separately.},
author = {Mondelli, Marco and Thrampoulidis, Christos and Venkataramanan, Ramji},
issn = {1615-3375},
journal = {Foundations of Computational Mathematics},
keywords = {Applied Mathematics, Computational Theory and Mathematics, Computational Mathematics, Analysis},
publisher = {Springer},
title = {{Optimal combination of linear and spectral estimators for generalized linear models}},
doi = {10.1007/s10208-021-09531-x},
year = {2021},
}
@inproceedings{8536,
abstract = {This work analyzes the latency of the simplified successive cancellation (SSC) decoding scheme for polar codes proposed by Alamdar-Yazdi and Kschischang. It is shown that, unlike conventional successive cancellation decoding, where latency is linear in the block length, the latency of SSC decoding is sublinear. More specifically, the latency of SSC decoding is O(N 1−1/µ ), where N is the block length and µ is the scaling exponent of the channel, which captures the speed of convergence of the rate to capacity. Numerical results demonstrate the tightness of the bound and show that most of the latency reduction arises from the parallel decoding of subcodes of rate 0 and 1.},
author = {Mondelli, Marco and Hashemi, Seyyed Ali and Cioffi, John and Goldsmith, Andrea},
booktitle = {IEEE International Symposium on Information Theory - Proceedings},
isbn = {9781728164328},
issn = {21578095},
location = {Los Angeles, CA, United States},
publisher = {IEEE},
title = {{Simplified successive cancellation decoding of polar codes has sublinear latency}},
doi = {10.1109/ISIT44484.2020.9174141},
volume = {2020-June},
year = {2020},
}
@article{6748,
abstract = {Fitting a function by using linear combinations of a large number N of `simple' components is one of the most fruitful ideas in statistical learning. This idea lies at the core of a variety of methods, from two-layer neural networks to kernel regression, to boosting. In general, the resulting risk minimization problem is non-convex and is solved by gradient descent or its variants. Unfortunately, little is known about global convergence properties of these approaches.
Here we consider the problem of learning a concave function f on a compact convex domain Ω⊆ℝd, using linear combinations of `bump-like' components (neurons). The parameters to be fitted are the centers of N bumps, and the resulting empirical risk minimization problem is highly non-convex. We prove that, in the limit in which the number of neurons diverges, the evolution of gradient descent converges to a Wasserstein gradient flow in the space of probability distributions over Ω. Further, when the bump width δ tends to 0, this gradient flow has a limit which is a viscous porous medium equation. Remarkably, the cost function optimized by this gradient flow exhibits a special property known as displacement convexity, which implies exponential convergence rates for N→∞, δ→0. Surprisingly, this asymptotic theory appears to capture well the behavior for moderate values of δ,N. Explaining this phenomenon, and understanding the dependence on δ,N in a quantitative manner remains an outstanding challenge.},
author = {Javanmard, Adel and Mondelli, Marco and Montanari, Andrea},
journal = {Annals of Statistics},
number = {6},
pages = {3619--3642},
publisher = {Project Euclid},
title = {{Analysis of a two-layer neural network via displacement convexity}},
doi = {10.1214/20-AOS1945},
volume = {48},
year = {2020},
}
@article{9002,
abstract = { We prove that, for the binary erasure channel (BEC), the polar-coding paradigm gives rise to codes that not only approach the Shannon limit but do so under the best possible scaling of their block length as a function of the gap to capacity. This result exhibits the first known family of binary codes that attain both optimal scaling and quasi-linear complexity of encoding and decoding. Our proof is based on the construction and analysis of binary polar codes with large kernels. When communicating reliably at rates within ε>0 of capacity, the code length n often scales as O(1/εμ), where the constant μ is called the scaling exponent. It is known that the optimal scaling exponent is μ=2, and it is achieved by random linear codes. The scaling exponent of conventional polar codes (based on the 2×2 kernel) on the BEC is μ=3.63. This falls far short of the optimal scaling guaranteed by random codes. Our main contribution is a rigorous proof of the following result: for the BEC, there exist ℓ×ℓ binary kernels, such that polar codes constructed from these kernels achieve scaling exponent μ(ℓ) that tends to the optimal value of 2 as ℓ grows. We furthermore characterize precisely how large ℓ needs to be as a function of the gap between μ(ℓ) and 2. The resulting binary codes maintain the recursive structure of conventional polar codes, and thereby achieve construction complexity O(n) and encoding/decoding complexity O(nlogn).},
author = {Fazeli, Arman and Hassani, Hamed and Mondelli, Marco and Vardy, Alexander},
issn = {1557-9654},
journal = {IEEE Transactions on Information Theory},
number = {9},
pages = {5693--5710},
publisher = {IEEE},
title = {{Binary linear codes with optimal scaling: Polar codes with large kernels}},
doi = {10.1109/TIT.2020.3038806},
volume = {67},
year = {2020},
}
@inproceedings{9198,
abstract = {The optimization of multilayer neural networks typically leads to a solution
with zero training error, yet the landscape can exhibit spurious local minima
and the minima can be disconnected. In this paper, we shed light on this
phenomenon: we show that the combination of stochastic gradient descent (SGD)
and over-parameterization makes the landscape of multilayer neural networks
approximately connected and thus more favorable to optimization. More
specifically, we prove that SGD solutions are connected via a piecewise linear
path, and the increase in loss along this path vanishes as the number of
neurons grows large. This result is a consequence of the fact that the
parameters found by SGD are increasingly dropout stable as the network becomes
wider. We show that, if we remove part of the neurons (and suitably rescale the
remaining ones), the change in loss is independent of the total number of
neurons, and it depends only on how many neurons are left. Our results exhibit
a mild dependence on the input dimension: they are dimension-free for two-layer
networks and depend linearly on the dimension for multilayer networks. We
validate our theoretical findings with numerical experiments for different
architectures and classification tasks.},
author = {Shevchenko, Alexander and Mondelli, Marco},
booktitle = {Proceedings of the 37th International Conference on Machine Learning},
pages = {8773--8784},
publisher = {Proceedings of Machine Learning Research},
title = {{Landscape connectivity and dropout stability of SGD solutions for over-parameterized neural networks}},
volume = {119},
year = {2020},
}
@inproceedings{9221,
abstract = {Recent works have shown that gradient descent can find a global minimum for over-parameterized neural networks where the widths of all the hidden layers scale polynomially with N (N being the number of training samples). In this paper, we prove that, for deep networks, a single layer of width N following the input layer suffices to ensure a similar guarantee. In particular, all the remaining layers are allowed to have constant widths, and form a pyramidal topology. We show an application of our result to the widely used LeCun’s initialization and obtain an over-parameterization requirement for the single wide layer of order N2.
},
author = {Nguyen, Quynh and Mondelli, Marco},
booktitle = {34th Conference on Neural Information Processing Systems},
location = {Vancouver, Canada},
pages = {11961–11972},
publisher = {Curran Associates},
title = {{Global convergence of deep networks with one wide layer followed by pyramidal topology}},
volume = {33},
year = {2020},
}
@article{7007,
abstract = {We consider the primitive relay channel, where the source sends a message to the relay and to the destination, and the relay helps the communication by transmitting an additional message to the destination via a separate channel. Two well-known coding techniques have been introduced for this setting: decode-and-forward and compress-and-forward. In decode-and-forward, the relay completely decodes the message and sends some information to the destination; in compress-and-forward, the relay does not decode, and it sends a compressed version of the received signal to the destination using Wyner–Ziv coding. In this paper, we present a novel coding paradigm that provides an improved achievable rate for the primitive relay channel. The idea is to combine compress-and-forward and decode-and-forward via a chaining construction. We transmit over pairs of blocks: in the first block, we use compress-and-forward; and, in the second block, we use decode-and-forward. More specifically, in the first block, the relay does not decode, it compresses the received signal via Wyner–Ziv, and it sends only part of the compression to the destination. In the second block, the relay completely decodes the message, it sends some information to the destination, and it also sends the remaining part of the compression coming from the first block. By doing so, we are able to strictly outperform both compress-and-forward and decode-and-forward. Note that the proposed coding scheme can be implemented with polar codes. As such, it has the typical attractive properties of polar coding schemes, namely, quasi-linear encoding and decoding complexity, and error probability that decays at super-polynomial speed. As a running example, we take into account the special case of the erasure relay channel, and we provide a comparison between the rates achievable by our proposed scheme and the existing upper and lower bounds.},
author = {Mondelli, Marco and Hassani, S. Hamed and Urbanke, Rüdiger},
issn = {1999-4893},
journal = {Algorithms},
number = {10},
publisher = {MDPI},
title = {{A new coding paradigm for the primitive relay channel}},
doi = {10.3390/a12100218},
volume = {12},
year = {2019},
}
@article{6662,
abstract = {In phase retrieval, we want to recover an unknown signal 𝑥∈ℂ𝑑 from n quadratic measurements of the form 𝑦𝑖=|⟨𝑎𝑖,𝑥⟩|2+𝑤𝑖, where 𝑎𝑖∈ℂ𝑑 are known sensing vectors and 𝑤𝑖 is measurement noise. We ask the following weak recovery question: What is the minimum number of measurements n needed to produce an estimator 𝑥^(𝑦) that is positively correlated with the signal 𝑥? We consider the case of Gaussian vectors 𝑎𝑎𝑖. We prove that—in the high-dimensional limit—a sharp phase transition takes place, and we locate the threshold in the regime of vanishingly small noise. For 𝑛≤𝑑−𝑜(𝑑), no estimator can do significantly better than random and achieve a strictly positive correlation. For 𝑛≥𝑑+𝑜(𝑑), a simple spectral estimator achieves a positive correlation. Surprisingly, numerical simulations with the same spectral estimator demonstrate promising performance with realistic sensing matrices. Spectral methods are used to initialize non-convex optimization algorithms in phase retrieval, and our approach can boost the performance in this setting as well. Our impossibility result is based on classical information-theoretic arguments. The spectral algorithm computes the leading eigenvector of a weighted empirical covariance matrix. We obtain a sharp characterization of the spectral properties of this random matrix using tools from free probability and generalizing a recent result by Lu and Li. Both the upper bound and lower bound generalize beyond phase retrieval to measurements 𝑦𝑖 produced according to a generalized linear model. As a by-product of our analysis, we compare the threshold of the proposed spectral method with that of a message passing algorithm.},
author = {Mondelli, Marco and Montanari, Andrea},
issn = {1615-3383},
journal = {Foundations of Computational Mathematics},
number = {3},
pages = {703--773},
publisher = {Springer},
title = {{Fundamental limits of weak recovery with applications to phase retrieval}},
doi = {10.1007/s10208-018-9395-y},
volume = {19},
year = {2019},
}
@article{6663,
abstract = {Consider the problem of constructing a polar code of block length N for a given transmission channel W. Previous approaches require one to compute the reliability of the N synthetic channels and then use only those that are sufficiently reliable. However, we know from two independent works by Schürch and by Bardet et al. that the synthetic channels are partially ordered with respect to degradation. Hence, it is natural to ask whether the partial order can be exploited to reduce the computational burden of the construction problem. We show that, if we take advantage of the partial order, we can construct a polar code by computing the reliability of roughly a fraction 1/ log 3/2 N of the synthetic channels. In particular, we prove that N/ log 3/2 N is a lower bound on the number of synthetic channels to be considered and such a bound is tight up to a multiplicative factor log log N. This set of roughly N/ log 3/2 N synthetic channels is universal, in the sense that it allows one to construct polar codes for any W, and it can be identified by solving a maximum matching problem on a bipartite graph. Our proof technique consists of reducing the construction problem to the problem of computing the maximum cardinality of an antichain for a suitable partially ordered set. As such, this method is general, and it can be used to further improve the complexity of the construction problem, in case a refined partial order on the synthetic channels of polar codes is discovered.},
author = {Mondelli, Marco and Hassani, Hamed and Urbanke, Rudiger},
journal = {IEEE},
number = {5},
pages = {2782--2791},
publisher = {IEEE},
title = {{Construction of polar codes with sublinear complexity}},
doi = {10.1109/tit.2018.2889667},
volume = {65},
year = {2019},
}
@inproceedings{6747,
abstract = {We establish connections between the problem of learning a two-layer neural network and tensor decomposition. We consider a model with feature vectors x∈ℝd, r hidden units with weights {wi}1≤i≤r and output y∈ℝ, i.e., y=∑ri=1σ(w𝖳ix), with activation functions given by low-degree polynomials. In particular, if σ(x)=a0+a1x+a3x3, we prove that no polynomial-time learning algorithm can outperform the trivial predictor that assigns to each example the response variable 𝔼(y), when d3/2≪r≪d2. Our conclusion holds for a `natural data distribution', namely standard Gaussian feature vectors x, and output distributed according to a two-layer neural network with random isotropic weights, and under a certain complexity-theoretic assumption on tensor decomposition. Roughly speaking, we assume that no polynomial-time algorithm can substantially outperform current methods for tensor decomposition based on the sum-of-squares hierarchy. We also prove generalizations of this statement for higher degree polynomial activations, and non-random weight vectors. Remarkably, several existing algorithms for learning two-layer networks with rigorous guarantees are based on tensor decomposition. Our results support the idea that this is indeed the core computational difficulty in learning such networks, under the stated generative model for the data. As a side result, we show that under this model learning the network requires accurate learning of its weights, a property that does not hold in a more general setting. },
author = {Mondelli, Marco and Montanari, Andrea},
booktitle = {Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics},
location = {Naha, Okinawa, Japan},
pages = {1051--1060},
publisher = {Proceedings of Machine Learning Research},
title = {{On the connection between learning two-layers neural networks and tensor decomposition}},
volume = {89},
year = {2019},
}
@article{6750,
abstract = {Polar codes have gained extensive attention during the past few years and recently they have been selected for the next generation of wireless communications standards (5G). Successive-cancellation-based (SC-based) decoders, such as SC list (SCL) and SC flip (SCF), provide a reasonable error performance for polar codes at the cost of low decoding speed. Fast SC-based decoders, such as Fast-SSC, Fast-SSCL, and Fast-SSCF, identify the special constituent codes in a polar code graph off-line, produce a list of operations, store the list in memory, and feed the list to the decoder to decode the constituent codes in order efficiently, thus increasing the decoding speed. However, the list of operations is dependent on the code rate and as the rate changes, a new list is produced, making fast SC-based decoders not rate-flexible. In this paper, we propose a completely rate-flexible fast SC-based decoder by creating the list of operations directly in hardware, with low implementation complexity. We further propose a hardware architecture implementing the proposed method and show that the area occupation of the rate-flexible fast SC-based decoder in this paper is only 38% of the total area of the memory-based base-line decoder when 5G code rates are supported. },
author = {Hashemi, Seyyed Ali and Condo, Carlo and Mondelli, Marco and Gross, Warren J},
issn = {1053587X},
journal = {IEEE Transactions on Signal Processing},
number = {22},
publisher = {IEEE},
title = {{Rate-flexible fast polar decoders}},
doi = {10.1109/TSP.2019.2944738},
volume = {67},
year = {2019},
}
@inproceedings{6664,
abstract = {Reed-Muller (RM) and polar codes are a class of capacity-achieving channel coding schemes with the same factor graph representation. Low-complexity decoding algorithms fall short in providing a good error-correction performance for RM and polar codes. Using the symmetric group of RM and polar codes, the specific decoding algorithm can be carried out on multiple permutations of the factor graph to boost the error-correction performance. However, this approach results in high decoding complexity. In this paper, we first derive the total number of factor graph permutations on which the decoding can be performed. We further propose a successive permutation (SP) scheme which finds the permutations on the fly, thus the decoding always progresses on a single factor graph permutation. We show that SP can be used to improve the error-correction performance of RM and polar codes under successive-cancellation (SC) and SC list (SCL) decoding, while keeping the memory requirements of the decoders unaltered. Our results for RM and polar codes of length 128 and rate 0.5 show that when SP is used and at a target frame error rate of 10 -4 , up to 0.5 dB and 0.1 dB improvement can be achieved for RM and polar codes respectively.},
author = {Hashemi, Seyyed Ali and Doan, Nghia and Mondelli, Marco and Gross, Warren },
booktitle = {2018 IEEE 10th International Symposium on Turbo Codes & Iterative Information Processing},
location = {Hong Kong, China},
pages = {1--5},
publisher = {IEEE},
title = {{Decoding Reed-Muller and polar codes by successive factor graph permutations}},
doi = {10.1109/istc.2018.8625281},
year = {2018},
}
@inproceedings{6665,
abstract = {We prove that, at least for the binary erasure channel, the polar-coding paradigm gives rise to codes that not only approach the Shannon limit but, in fact, do so under the best possible scaling of their block length as a function of the gap to capacity. This result exhibits the first known family of binary codes that attain both optimal scaling and quasi-linear complexity of encoding and decoding. Specifically, for any fixed δ > 0, we exhibit binary linear codes that ensure reliable communication at rates within ε > 0 of capacity with block length n = O(1/ε 2+δ ), construction complexity Θ(n), and encoding/decoding complexity Θ(n log n).},
author = {Fazeli, Arman and Hassani, Hamed and Mondelli, Marco and Vardy, Alexander},
booktitle = {2018 IEEE Information Theory Workshop},
location = {Guangzhou, China},
pages = {1--5},
publisher = {IEEE},
title = {{Binary linear codes with optimal scaling: Polar codes with large kernels}},
doi = {10.1109/itw.2018.8613428},
year = {2018},
}
@article{6674,
abstract = {Polar codes represent one of the major recent breakthroughs in coding theory and, because of their attractive features, they have been selected for the incoming 5G standard. As such, a lot of attention has been devoted to the development of decoding algorithms with good error performance and efficient hardware implementation. One of the leading candidates in this regard is represented by successive-cancellation list (SCL) decoding. However, its hardware implementation requires a large amount of memory. Recently, a partitioned SCL (PSCL) decoder has been proposed to significantly reduce the memory consumption. In this paper, we consider the paradigm of PSCL decoding from a practical standpoint, and we provide several improvements. First, by changing the target signal-to-noise ratio and consequently modifying the construction of the code, we are able to improve the performance at no additional computational, latency, or memory cost. Second, we bridge the performance gap between SCL and PSCL decoding by introducing a generalized PSCL decoder and a layered PSCL decoder. In this way, we obtain almost the same performance of the SCL decoder with a significantly lower memory requirement, as testified by hardware implementation results. Third, we present an optimal scheme to allocate cyclic redundancy checks. Finally, we provide a lower bound on the list size that guarantees optimal maximum a posteriori performance for the binary erasure channel.},
author = {Hashemi, Seyyed Ali and Mondelli, Marco and Hassani, S. Hamed and Condo, Carlo and Urbanke, Rudiger L. and Gross, Warren J.},
issn = {1558-0857},
journal = {IEEE Transactions on Communications},
number = {9},
pages = {3749--3759},
publisher = {IEEE},
title = {{Decoder partitioning: Towards practical list decoding of polar codes}},
doi = {10.1109/tcomm.2018.2832207},
volume = {66},
year = {2018},
}
@inproceedings{6675,
abstract = {We present a coding paradigm that provides a new achievable rate for the primitive relay channel by combining compress-and-forward and decode-and-forward with a chaining construction. In the primitive relay channel model, the source broadcasts a message to the relay and to the destination; and the relay facilitates this communication by sending an additional message to the destination through a separate channel. Two well-known coding approaches for this setting are decode-and-forward and compress-and-forward: in the former, the relay decodes the message and sends some of the information to the destination; in the latter, the relay does not attempt to decode, but it sends a compressed description of the received sequence to the destination via Wyner-Ziv coding. In our scheme, we transmit over pairs of blocks and we use compress-and-forward for the first block and decode-and-forward for the second. In particular, in the first block, the relay does not attempt to decode and it sends only a part of the compressed description of the received sequence; in the second block, the relay decodes the message and sends this information plus the remaining part of the compressed sequence relative to the first block. As a result, we strictly outperform both compress-and- forward and decode-and-forward. Furthermore, this paradigm can be implemented with a low-complexity polar coding scheme that has the typical attractive features of polar codes, i.e., quasi-linear encoding/decoding complexity and super-polynomial decay of the error probability. Throughout the paper we consider as a running example the special case of the erasure relay channel and we compare the rates achievable by our proposed scheme with the existing upper and lower bounds.},
author = {Mondelli, Marco and Hassani, Hamed and Urbanke, Rudiger},
booktitle = {2018 IEEE International Symposium on Information Theory},
issn = {2157-8117},
location = {Vail, CO, United States},
pages = {351--355},
publisher = {IEEE},
title = {{A new coding paradigm for the primitive relay channel}},
doi = {10.1109/isit.2018.8437479},
year = {2018},
}
@article{6678,
abstract = {We survey coding techniques that enable reliable transmission at rates that approach the capacity of an arbitrary discrete memoryless channel. In particular, we take the point of view of modern coding theory and discuss how recent advances in coding for symmetric channels help provide more efficient solutions for the asymmetric case. We consider, in more detail, three basic coding paradigms. The first one is Gallager's scheme that consists of concatenating a linear code with a non-linear mapping so that the input distribution can be appropriately shaped. We explicitly show that both polar codes and spatially coupled codes can be employed in this scenario. Furthermore, we derive a scaling law between the gap to capacity, the cardinality of the input and output alphabets, and the required size of the mapper. The second one is an integrated scheme in which the code is used both for source coding, in order to create codewords distributed according to the capacity-achieving input distribution, and for channel coding, in order to provide error protection. Such a technique has been recently introduced by Honda and Yamamoto in the context of polar codes, and we show how to apply it also to the design of sparse graph codes. The third paradigm is based on an idea of Böcherer and Mathar, and separates the two tasks of source coding and channel coding by a chaining construction that binds together several codewords. We present conditions for the source code and the channel code, and we describe how to combine any source code with any channel code that fulfill those conditions, in order to provide capacity-achieving schemes for asymmetric channels. In particular, we show that polar codes, spatially coupled codes, and homophonic codes are suitable as basic building blocks of the proposed coding strategy. Rather than focusing on the exact details of the schemes, the purpose of this tutorial is to present different coding techniques that can then be implemented with many variants. There is no absolute winner and, in order to understand the most suitable technique for a specific application scenario, we provide a detailed comparison that takes into account several performance metrics.},
author = {Mondelli, Marco and Hassani, Hamed and Urbanke, Rudiger },
issn = {0018-9448},
journal = {IEEE Transactions on Information Theory},
number = {5},
pages = {3371--3393},
publisher = {IEEE},
title = {{How to achieve the capacity of asymmetric channels}},
doi = {10.1109/tit.2018.2789885},
volume = {64},
year = {2018},
}
@inproceedings{6728,
abstract = {Polar codes are a channel coding scheme for the next generation of wireless communications standard (5G). The belief propagation (BP) decoder allows for parallel decoding of polar codes, making it suitable for high throughput applications. However, the error-correction performance of polar codes under BP decoding is far from the requirements of 5G. It has been shown that the error-correction performance of BP can be improved if the decoding is performed on multiple permuted factor graphs of polar codes. However, a different BP decoding scheduling is required for each factor graph permutation which results in the design of a different decoder for each permutation. Moreover, the selection of the different factor graph permutations is at random, which prevents the decoder to achieve a desirable error correction performance with a small number of permutations. In this paper, we first show that the permutations on the factor graph can be mapped into suitable permutations on the codeword positions. As a result, we can make use of a single decoder for all the permutations. In addition, we introduce a method to construct a set of predetermined permutations which can provide the correct codeword if the decoding fails on the original permutation. We show that for the 5G polar code of length 1024, the error-correction performance of the proposed decoder is more than 0.25 dB better than that of the BP decoder with the same number of random permutations at the frame error rate of 10 -4 .},
author = {Doan, Nghia and Hashemi, Seyyed Ali and Mondelli, Marco and Gross, Warren J.},
booktitle = {2018 IEEE Global Communications Conference },
isbn = {9781538647271},
location = {Abu Dhabi, United Arab Emirates},
publisher = {IEEE},
title = {{On the decoding of polar codes on permuted factor graphs}},
doi = {10.1109/glocom.2018.8647308},
year = {2018},
}
@inproceedings{6679,
abstract = {Polar codes represent one of the major recent breakthroughs in coding theory and, because of their attractive features, they have been selected for the incoming 5G standard. As such, a lot of attention has been devoted to the development of decoding algorithms with good error performance and efficient hardware implementation. One of the leading candidates in this regard is represented by successive-cancellation list (SCL) decoding. However, its hardware implementation requires a large amount of memory. Recently, a partitioned SCL (PSCL) decoder has been proposed to significantly reduce the memory consumption [1]. In this paper, we examine the paradigm of PSCL decoding from both theoretical and practical standpoints: (i) by changing the construction of the code, we are able to improve the performance at no additional computational, latency or memory cost, (ii) we present an optimal scheme to allocate cyclic redundancy checks (CRCs), and (iii) we provide an upper bound on the list size that allows MAP performance.},
author = {Hashemi, Seyyed Ali and Mondelli, Marco and Hassani, Hamed and Urbanke, Ruediger and Gross, Warren},
booktitle = {2017 IEEE Global Communications Conference},
location = {Singapore, Singapore},
pages = {1--7},
publisher = {IEEE},
title = {{Partitioned list decoding of polar codes: Analysis and improvement of finite length performance}},
doi = {10.1109/glocom.2017.8254940},
year = {2017},
}
@inproceedings{6729,
abstract = {Consider the problem of constructing a polar code of block length N for the transmission over a given channel W. Typically this requires to compute the reliability of all the N synthetic channels and then to include those that are sufficiently reliable. However, we know from [1], [2] that there is a partial order among the synthetic channels. Hence, it is natural to ask whether we can exploit it to reduce the computational burden of the construction problem. We show that, if we take advantage of the partial order [1], [2], we can construct a polar code by computing the reliability of roughly N/ log 3/2 N synthetic channels. Such a set of synthetic channels is universal, in the sense that it allows one to construct polar codes for any W, and it can be identified by solving a maximum matching problem on a bipartite graph. Our proof technique consists in reducing the construction problem to the problem of computing the maximum cardinality of an antichain for a suitable partially ordered set. As such, this method is general and it can be used to further improve the complexity of the construction problem in case a new partial order on the synthetic channels of polar codes is discovered.},
author = {Mondelli, Marco and Hassani, S. Hamed and Urbanke, Rudiger},
booktitle = {2017 IEEE International Symposium on Information Theory },
isbn = {9781509040964},
issn = {2157-8117},
location = {Aachen, Germany},
pages = {1853--1857},
publisher = {IEEE},
title = {{Construction of polar codes with sublinear complexity}},
doi = {10.1109/isit.2017.8006850},
year = {2017},
}
@article{6730,
abstract = {We introduce a new approach to proving that a sequence of deterministic linear codes achieves capacity on an erasure channel under maximum a posteriori decoding. Rather than relying on the precise structure of the codes, our method exploits code symmetry. In particular, the technique applies to any sequence of linear codes where the blocklengths are strictly increasing, the code rates converge, and the permutation group of each code is doubly transitive. In other words, we show that symmetry alone implies near-optimal performance. An important consequence of this result is that a sequence of Reed-Muller codes with increasing block length and converging rate achieves capacity. This possibility has been suggested previously in the literature but it has only been proven for cases where the limiting code rate is 0 or 1. Moreover, these results extend naturally to all affine-invariant codes and, thus, to extended primitive narrow-sense BCH codes. This also resolves, in the affirmative, the existence question for capacity-achieving sequences of binary cyclic codes. The primary tools used in the proof are the sharp threshold property for symmetric monotone Boolean functions and the area theorem for extrinsic information transfer functions.},
author = {Kudekar, Shrinivas and Kumar, Santhosh and Mondelli, Marco and Pfister, Henry D. and Sasoglu, Eren and Urbanke, Ridiger L.},
issn = {1557-9654},
journal = {IEEE Transactions on Information Theory},
number = {7},
pages = {4298--4316},
publisher = {IEEE},
title = {{Reed–Muller codes achieve capacity on erasure channels}},
doi = {10.1109/tit.2017.2673829},
volume = {63},
year = {2017},
}
@inproceedings{6731,
abstract = {We present a rate-compatible polar coding scheme that achieves the capacity of any family of channels. Our solution generalizes the previous results [1], [2] that provide capacity-achieving rate-compatible polar codes for a degraded family of channels. The motivation for our extension comes from the fact that in many practical scenarios, e.g., MIMO systems and non-Gaussian interference, the channels cannot be ordered by degradation. The main technical contribution of this paper consists in removing the degradation condition. To do so, we exploit the ideas coming from the construction of universal polar codes. Our scheme possesses the usual attractive features of polar codes: low complexity code construction, encoding, and decoding; super-polynomial scaling of the error probability with the block length; and absence of error floors. On the negative side, the scaling of the gap to capacity with the block length is slower than in standard polar codes, and we prove an upper bound on the scaling exponent.},
author = {Mondelli, Marco and Hassani, Hamed and Maric, Ivana and Hui, Dennis and Hong, Song-Nam},
booktitle = {2017 IEEE Wireless Communications and Networking Conference Workshops },
isbn = {9781509059089},
location = {San Francisco, CA, USA},
publisher = {IEEE},
title = {{Capacity-achieving rate-compatible polar codes for general channels}},
doi = {10.1109/wcncw.2017.7919107},
year = {2017},
}
@article{6732,
abstract = {Consider the transmission of a polar code of block length N and rate R over a binary memoryless symmetric channel W and let P e be the block error probability under successive cancellation decoding. In this paper, we develop new bounds that characterize the relationship of the parameters R, N, P e , and the quality of the channel W quantified by its capacity I(W) and its Bhattacharyya parameter Z(W). In previous work, two main regimes were studied. In the error exponent regime, the channel W and the rate R <; I(W) are fixed, and it was proved that the error probability Pe scales roughly as 2 -√N . In the scaling exponent approach, the channel W and the error probability Pe are fixed and it was proved that the gap to capacity I(W) - R scales as N -1/μ . Here, μ is called scaling exponent and this scaling exponent depends on the channel W. A heuristic computation for the binary erasure channel (BEC) gives μ = 3.627 and it was shown that, for any channel W, 3.579 ≤ μ ≤ 5.702. Our contributions are as follows. First, we provide the tighter upper bound μ <;≤ 4.714 valid for any W. With the same technique, we obtain the upper bound μ ≤ 3.639 for the case of the BEC; this upper bound approaches very closely the heuristically derived value for the scaling exponent of the erasure channel. Second, we develop a trade-off between the gap to capacity I(W)- R and the error probability Pe as the functions of the block length N. In other words, we neither fix the gap to capacity (error exponent regime) nor the error probability (scaling exponent regime), but we do consider a moderate deviations regime in which we study how fast both quantities, as the functions of the block length N, simultaneously go to 0. Third, we prove that polar codes are not affected by error floors. To do so, we fix a polar code of block length N and rate R. Then, we vary the channel W and study the impact of this variation on the error probability. We show that the error probability Pe scales as the Bhattacharyya parameter Z(W) raised to a power that scales roughly like VN. This agrees with the scaling in the error exponent regime.},
author = {Mondelli, Marco and Hassani, S. Hamed and Urbanke, Rudiger L.},
issn = {1557-9654},
journal = {IEEE Transactions on Information Theory},
number = {12},
pages = {6698--6712},
publisher = {IEEE},
title = {{Unified scaling of polar codes: Error exponent, scaling exponent, moderate deviations, and error floors}},
doi = {10.1109/tit.2016.2616117},
volume = {62},
year = {2016},
}
@inproceedings{6733,
abstract = {The question whether RM codes are capacity-achieving is a long-standing open problem in coding theory that was recently answered in the affirmative for transmission over erasure channels [1], [2]. Remarkably, the proof does not rely on specific properties of RM codes, apart from their symmetry. Indeed, the main technical result consists in showing that any sequence of linear codes, with doubly-transitive permutation groups, achieves capacity on the memoryless erasure channel under bit-MAP decoding. Thus, a natural question is what happens under block-MAP decoding. In [1], [2], by exploiting further symmetries of the code, the bit-MAP threshold was shown to be sharp enough so that the block erasure probability also converges to 0. However, this technique relies heavily on the fact that the transmission is over an erasure channel. We present an alternative approach to strengthen results regarding the bit-MAP threshold to block-MAP thresholds. This approach is based on a careful analysis of the weight distribution of RM codes. In particular, the flavor of the main result is the following: assume that the bit-MAP error probability decays as N -δ , for some δ > 0. Then, the block-MAP error probability also converges to 0. This technique applies to transmission over any binary memoryless symmetric channel. Thus, it can be thought of as a first step in extending the proof that RM codes are capacity-achieving to the general case.},
author = {Kudekar, Shrinivas and Kumar, Santhosh and Mondelli, Marco and Pfister, Henry D. and Urbankez, Rudiger},
booktitle = {2016 IEEE International Symposium on Information Theory },
location = {Barcelona, Spain},
pages = {1755--1759},
publisher = {IEEE},
title = {{Comparing the bit-MAP and block-MAP decoding thresholds of Reed-Muller codes on BMS channels}},
doi = {10.1109/isit.2016.7541600},
year = {2016},
}
@inproceedings{6770,
abstract = {We describe a new method to compare the bit-MAP and block-MAP decoding thresholds of Reed-Muller (RM) codes for transmission over a binary memoryless symmetric channel. The question whether RM codes are capacity-achieving is a long-standing open problem in coding theory and it has recently been answered in the affirmative for transmission over
erasure channels. Remarkably, the proof does not rely on specific properties of RM codes, apart from their symmetry. Indeed, the main technical result consists in showing that any sequence of linear codes, with doubly-transitive permutation groups, achieves capacity on the memoryless erasure channel under bit-MAP decoding. A natural question is what happens under block-MAP decoding. If the minimum distance of the code family is close to linear (e.g., of order N/ log(N)), then one can combine an upper bound on the bit-MAP error probability with a lower bound on the minimum distance to show that the code family is also capacity-achieving under block-MAP decoding. This strategy is successful for BCH codes. Unfortunately, the minimum distance of RM codes scales only as √N, which does not suffice to obtain the desired result. Then, one can exploit further symmetries of RM codes to show that the bit-MAP threshold is sharp enough so that the block erasure probability also tends to 0. However, this technique relies heavily on the fact that the transmission is over an erasure channel.
We present an alternative approach to strengthen results regarding the bit-MAP threshold to block-MAP thresholds. This approach is based on a careful analysis of the weight distribution of RM codes. In particular, the flavor of the main result is the following: assume that the bit-MAP error probability decays as N−δ, for some δ > 0. Then, the block-MAP
error probability also converges to 0. This technique applies to the transmission over any binary memoryless symmetric channel. Thus, it can be thought of as a first step in extending the proof that RM codes are capacity-achieving to the general case.},
author = {Mondelli, Marco and Kudekar, Shrinivas and Kumar, Santosh and Pfister, Henry D. and Şaşoğlu, Eren and Urbanke, Rüdiger},
booktitle = {24th International Zurich Seminar on Communications},
location = {Zurich, Switzerland},
pages = {50},
publisher = {ETH Zürich},
title = {{Reed-Muller codes: Thresholds and weight distribution}},
doi = {10.3929/ETHZ-A-010646484},
year = {2016},
}
@article{6736,
abstract = {Motivated by the significant performance gains which polar codes experience under successive cancellation list decoding, their scaling exponent is studied as a function of the list size. In particular, the error probability is fixed, and the tradeoff between the block length and back-off from capacity is analyzed. A lower bound is provided on the error probability under MAP decoding with list size L for any binary-input memoryless output-symmetric channel and for any class of linear codes such that their minimum distance is unbounded as the block length grows large. Then, it is shown that under MAP decoding, although the introduction of a list can significantly improve the involved constants, the scaling exponent itself, i.e., the speed at which capacity is approached, stays unaffected for any finite list size. In particular, this result applies to polar codes, since their minimum distance tends to infinity as the block length increases. A similar result is proved for genie-aided successive cancellation decoding when transmission takes place over the binary erasure channel, namely, the scaling exponent remains constant for any fixed number of helps from the genie. Note that since genie-aided successive cancellation decoding might be strictly worse than successive cancellation list decoding, the problem of establishing the scaling exponent of the latter remains open.},
author = {Mondelli, Marco and Hassani, Hamed and Urbanke, Rudiger},
journal = {IEEE Transactions on Information Theory},
number = {9},
pages = {4838--4851},
publisher = {IEEE},
title = {{Scaling exponent of list decoders with applications to polar codes}},
doi = {10.1109/tit.2015.2453315},
volume = {61},
year = {2015},
}
@article{6737,
abstract = {This paper presents polar coding schemes for the two-user discrete memoryless broadcast channel (DM-BC) which achieve Marton's region with both common and private messages. This is the best achievable rate region known to date, and it is tight for all classes of two-user DM-BCs whose capacity regions are known. To accomplish this task, we first construct polar codes for both the superposition as well as binning strategy. By combining these two schemes, we obtain Marton's region with private messages only. Finally, we show how to handle the case of common information. The proposed coding schemes possess the usual advantages of polar codes, i.e., they have low encoding and decoding complexity and a superpolynomial decay rate of the error probability. We follow the lead of Goela, Abbe, and Gastpar, who recently introduced polar codes emulating the superposition and binning schemes. To align the polar indices, for both schemes, their solution involves some degradedness constraints that are assumed to hold between the auxiliary random variables and channel outputs. To remove these constraints, we consider the transmission of k blocks and employ a chaining construction that guarantees the proper alignment of the polarized indices. The techniques described in this paper are quite general, and they can be adopted to many other multiterminal scenarios whenever there polar indices need to be aligned.},
author = {Mondelli, Marco and Hassani, Hamed and Sason, Igal and Urbanke, Rudiger},
journal = {IEEE Transactions on Information Theory},
number = {2},
pages = {783--800},
publisher = {IEEE},
title = {{Achieving Marton’s region for broadcast channels using polar codes}},
doi = {10.1109/tit.2014.2368555},
volume = {61},
year = {2015},
}
@article{6739,
abstract = {We explore the relationship between polar and RM codes and we describe a coding scheme which improves upon the performance of the standard polar code at practical block lengths. Our starting point is the experimental observation that RM codes have a smaller error probability than polar codes under MAP decoding. This motivates us to introduce a family of codes that “interpolates” between RM and polar codes, call this family C inter = {C α : α ∈ [0, 1j}, where C α|α=1 is the original polar code, and C α|α=0 is an RM code. Based on numerical observations, we remark that the error probability under MAP decoding is an increasing function of α. MAP decoding has in general exponential complexity, but empirically the performance of polar codes at finite block lengths is boosted by moving along the family Cinter even under low-complexity decoding schemes such as, for instance, belief propagation or successive cancellation list decoder. We demonstrate the performance gain via numerical simulations for transmission over the erasure channel as well as the Gaussian channel.},
author = {Mondelli, Marco and Hassani, Hamed and Urbanke, Rudiger},
issn = {0090-6778},
journal = {IEEE Transactions on Communications},
number = {9},
pages = {3084--3091},
publisher = {IEEE},
title = {{From polar to Reed-Muller codes: A technique to improve the finite-length performance}},
doi = {10.1109/tcomm.2014.2345069},
volume = {62},
year = {2014},
}
@inproceedings{6740,
abstract = {We describe coding techniques that achieve the capacity of a discrete memoryless asymmetric channel. To do so, we discuss how recent advances in coding for symmetric channels yield more efficient solutions also for the asymmetric case. In more detail, we consider three basic approaches. The first one is Gallager's scheme that concatenates a linear code with a non-linear mapper, in order to bias the input distribution. We explicitly show that both polar codes and spatially coupled codes can be employed in this scenario. Further, we derive a scaling law between the gap to capacity, the cardinality of channel input and output alphabets, and the required size of the mapper. The second one is an integrated approach in which the coding scheme is used both for source coding, in order to create codewords with the capacity-achieving distribution, and for channel coding, in order to provide error protection. Such a technique has been recently introduced by Honda and Yamamoto in the context of polar codes, and we show how to apply it also to the design of sparse graph codes. The third approach is based on an idea due to Böcherer and Mathar and separates completely the two tasks of source coding and channel coding by “chaining” together several codewords. We prove that we can combine any suitable source code with any suitable channel code in order to provide optimal schemes for asymmetric channels. In particular, polar codes and spatially coupled codes fulfill the required conditions.},
author = {Mondelli, Marco and Urbanke, Rudiger and Hassani, Hamed},
booktitle = {52nd Annual Allerton Conference on Communication, Control, and Computing},
location = {Monticello, IL, United States},
pages = {789--796},
publisher = {IEEE},
title = {{How to achieve the capacity of asymmetric channels}},
doi = {10.1109/allerton.2014.7028535},
year = {2014},
}
@article{6744,
abstract = {With the aim of extending the coverage and improving the performance of impulse radio ultra-wideband (UWB) systems, this paper focuses on developing a novel single differential encoded decode and forward (DF) non-cooperative relaying scheme (NCR). To favor simple receiver structures, differential noncoherent detection is employed which enables effective energy capture without any channel estimation. Putting emphasis on the general case of multi-hop relaying, we illustrate an original algorithm for the joint power allocation and path selection (JPAPS), minimizing an approximate expression of the overall bit error rate (BER). In particular, after deriving a closed-form power allocation strategy, the optimal path selection is reduced to a shortest path problem on a connected graph, which can be solved without any topology information with complexity O(N 3 ), N being the number of available relays of the network. An approximate scheme is also presented, which reduces the complexity to O(N 2 ) while showing a negligible performance loss, and for benchmarking purposes, an exhaustive-search based multi-hop DF cooperative strategy is derived. Simulation results for various network setups corroborate the effectiveness of the proposed low-complexity JPAPS algorithm, which favorably compares to existing AF and DF relaying methods.},
author = {Mondelli, Marco and Zhou, Qi and Lottici, Vincenzo and Ma, Xiaoli},
journal = {IEEE Transactions on Wireless Communications},
number = {3},
pages = {1397--1409},
publisher = {IEEE},
title = {{Joint power allocation and path selection for multi-hop noncoherent decode and forward UWB communications}},
doi = {10.1109/twc.2014.020914.130669},
volume = {13},
year = {2014},
}
@article{6768,
abstract = {The paper presents an algorithm that applies a stack filter simulating the Mean Curvature Motion equation via a finite difference scheme.},
author = {Mondelli, Marco},
issn = {2105-1232},
journal = {Image Processing On Line},
pages = {68--111},
publisher = {Image Processing On Line},
title = {{A finite difference scheme for the stack filter simulating the MCM}},
doi = {10.5201/ipol.2013.53},
volume = {3},
year = {2013},
}
@inproceedings{6746,
abstract = {This paper proposes a novel cooperative approach for two-hop amplify-and-forward (A&F) relaying that exploits both the signal forwarded by the relay and the one directly transmitted by the source in impulse-radio ultra-wideband (IR-UWB) systems. Specifically, we focus on a non-coherent setup employing a double-differential encoding scheme at the source node and a single differential demodulation at the relay and destination. The log-likelihood ratio based decision rule is derived at the destination node. A semi-analytical power allocation strategy is presented by evaluating a closed-form expression for the effective signal to noise ratio (SNR) at the destination, which is maximized by exhaustive search. Numerical simulations show that the proposed system outperforms both the direct transmission with single differential encoding and the non-cooperative multi-hop approach in different scenarios.},
author = {Mondelli, Marco and Zhou, Qi and Ma, Xiaoli and Lottici, Vincenzo},
booktitle = {2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
issn = {1520-6149},
location = {Kyoto, Japan},
pages = {2905--2908},
publisher = {IEEE},
title = {{A cooperative approach for amplify-and-forward differential transmitted reference IR-UWB relay systems}},
doi = {10.1109/icassp.2012.6288524},
year = {2012},
}
@article{6749,
abstract = {This article refers to algorithms based on finite difference schemes for computing mean and affine curvature evolutions of digital images, introduced by Alvarez and Morel [L. Alvarez, J.M. Morel, “Formalization and computational aspects of image analysis”, Acta Numerica, pp. 159, 1994]. We discuss consistency, stability and convergence. Our analysis focuses on some possible choices of the parameters, choices that generate multiple variants in the implementations. Meaningful visual examples on how the algorithms actually work are provided.},
author = {Mondelli, Marco and Ciomaga, Adina},
issn = {2105-1232},
journal = {Image Processing On Line},
pages = {127--177},
publisher = {IPOL Image Processing On Line},
title = {{Finite difference schemes for MCM and AMSS}},
doi = {10.5201/ipol.2011.cm_fds},
volume = {1},
year = {2011},
}
@inproceedings{6767,
abstract = {In the present paper we give a thorough analysis of two finite difference schemes for the Mean Curvature Motion and its affine variant, the Affine Morphological Scale Space, schemes introduced in the Image Processing framework. This analysis brings in a series of parameters that allow us to compute an accurate discrete evolution of curvature motions.
The choice of these parameters is based on intrinsic geometric properties of the evolution equations for linear, radial and elliptical functions. In the last part we present several examples, underlining the main advantages of the algorithms (the removal of pixelization effects and JPEG artifacts) as well as their major drawbacks (absence of contrast invariance and grid dependence). A detailed explanatory report, the ANSI C implementations and an on-line demo can be found at http://www.ipol.im/.},
author = {Mondelli, Marco and Ciomaga, Adina},
booktitle = {Proceedings of the International Student Conference on Pure and Applied Mathematics},
isbn = {978-973-703-602-5},
location = {Iasi, Romania},
pages = {137--156},
publisher = {Editura Universitãtii „Alexandru Ioan Cuza” Iasi},
title = {{On finite difference schemes for curvature motions}},
doi = {10.13140/2.1.1862.4646},
year = {2011},
}