@inproceedings{14180, abstract = {Modern neural network architectures can leverage large amounts of data to generalize well within the training distribution. However, they are less capable of systematic generalization to data drawn from unseen but related distributions, a feat that is hypothesized to require compositional reasoning and reuse of knowledge. In this work, we present Neural Interpreters, an architecture that factorizes inference in a self-attention network as a system of modules, which we call \emph{functions}. Inputs to the model are routed through a sequence of functions in a way that is end-to-end learned. The proposed architecture can flexibly compose computation along width and depth, and lends itself well to capacity extension after training. To demonstrate the versatility of Neural Interpreters, we evaluate it in two distinct settings: image classification and visual abstract reasoning on Raven Progressive Matrices. In the former, we show that Neural Interpreters perform on par with the vision transformer using fewer parameters, while being transferrable to a new task in a sample efficient manner. In the latter, we find that Neural Interpreters are competitive with respect to the state-of-the-art in terms of systematic generalization. }, author = {Rahaman, Nasim and Gondal, Muhammad Waleed and Joshi, Shruti and Gehler, Peter and Bengio, Yoshua and Locatello, Francesco and Schölkopf, Bernhard}, booktitle = {Advances in Neural Information Processing Systems}, isbn = {9781713845393}, location = {Virtual}, pages = {10985--10998}, title = {{Dynamic inference with neural interpreters}}, volume = {34}, year = {2021}, } @article{14117, abstract = {The two fields of machine learning and graphical causality arose and are developed separately. However, there is, now, cross-pollination and increasing interest in both fields to benefit from the advances of the other. In this article, we review fundamental concepts of causal inference and relate them to crucial open problems of machine learning, including transfer and generalization, thereby assaying how causality can contribute to modern machine learning research. This also applies in the opposite direction: we note that most work in causality starts from the premise that the causal variables are given. A central problem for AI and causality is, thus, causal representation learning, that is, the discovery of high-level causal variables from low-level observations. Finally, we delineate some implications of causality for machine learning and propose key research areas at the intersection of both communities.}, author = {Scholkopf, Bernhard and Locatello, Francesco and Bauer, Stefan and Ke, Nan Rosemary and Kalchbrenner, Nal and Goyal, Anirudh and Bengio, Yoshua}, issn = {1558-2256}, journal = {Proceedings of the IEEE}, keywords = {Electrical and Electronic Engineering}, number = {5}, pages = {612--634}, publisher = {Institute of Electrical and Electronics Engineers}, title = {{Toward causal representation learning}}, doi = {10.1109/jproc.2021.3058954}, volume = {109}, year = {2021}, } @inproceedings{14178, abstract = {Learning meaningful representations that disentangle the underlying structure of the data generating process is considered to be of key importance in machine learning. While disentangled representations were found to be useful for diverse tasks such as abstract reasoning and fair classification, their scalability and real-world impact remain questionable. We introduce a new high-resolution dataset with 1M simulated images and over 1,800 annotated real-world images of the same setup. In contrast to previous work, this new dataset exhibits correlations, a complex underlying structure, and allows to evaluate transfer to unseen simulated and real-world settings where the encoder i) remains in distribution or ii) is out of distribution. We propose new architectures in order to scale disentangled representation learning to realistic high-resolution settings and conduct a large-scale empirical study of disentangled representations on this dataset. We observe that disentanglement is a good predictor for out-of-distribution (OOD) task performance.}, author = {Dittadi, Andrea and Träuble, Frederik and Locatello, Francesco and Wüthrich, Manuel and Agrawal, Vaibhav and Winther, Ole and Bauer, Stefan and Schölkopf, Bernhard}, booktitle = {The Ninth International Conference on Learning Representations}, location = {Virtual}, title = {{On the transfer of disentangled representations in realistic settings}}, year = {2021}, } @unpublished{14221, abstract = {The world is structured in countless ways. It may be prudent to enforce corresponding structural properties to a learning algorithm's solution, such as incorporating prior beliefs, natural constraints, or causal structures. Doing so may translate to faster, more accurate, and more flexible models, which may directly relate to real-world impact. In this dissertation, we consider two different research areas that concern structuring a learning algorithm's solution: when the structure is known and when it has to be discovered.}, author = {Locatello, Francesco}, booktitle = {arXiv}, title = {{Enforcing and discovering structure in machine learning}}, doi = {10.48550/arXiv.2111.13693}, year = {2021}, } @unpublished{14278, abstract = {The Birkhoff conjecture says that the boundary of a strictly convex integrable billiard table is necessarily an ellipse. In this article, we consider a stronger notion of integrability, namely, integrability close to the boundary, and prove a local version of this conjecture: a small perturbation of almost every ellipse that preserves integrability near the boundary, is itself an ellipse. We apply this result to study local spectral rigidity of ellipses using the connection between the wave trace of the Laplacian and the dynamics near the boundary and establish rigidity for almost all of them.}, author = {Koval, Illya}, booktitle = {arXiv}, title = {{Local strong Birkhoff conjecture and local spectral rigidity of almost every ellipse}}, doi = {10.48550/ARXIV.2111.12171}, year = {2021}, } @phdthesis{10199, abstract = {The design and verification of concurrent systems remains an open challenge due to the non-determinism that arises from the inter-process communication. In particular, concurrent programs are notoriously difficult both to be written correctly and to be analyzed formally, as complex thread interaction has to be accounted for. The difficulties are further exacerbated when concurrent programs get executed on modern-day hardware, which contains various buffering and caching mechanisms for efficiency reasons. This causes further subtle non-determinism, which can often produce very unintuitive behavior of the concurrent programs. Model checking is at the forefront of tackling the verification problem, where the task is to decide, given as input a concurrent system and a desired property, whether the system satisfies the property. The inherent state-space explosion problem in model checking of concurrent systems causes naïve explicit methods not to scale, thus more inventive methods are required. One such method is stateless model checking (SMC), which explores in memory-efficient manner the program executions rather than the states of the program. State-of-the-art SMC is typically coupled with partial order reduction (POR) techniques, which argue that certain executions provably produce identical system behavior, thus limiting the amount of executions one needs to explore in order to cover all possible behaviors. Another method to tackle the state-space explosion is symbolic model checking, where the considered techniques operate on a succinct implicit representation of the input system rather than explicitly accessing the system. In this thesis we present new techniques for verification of concurrent systems. We present several novel POR methods for SMC of concurrent programs under various models of semantics, some of which account for write-buffering mechanisms. Additionally, we present novel algorithms for symbolic model checking of finite-state concurrent systems, where the desired property of the systems is to ensure a formally defined notion of fairness.}, author = {Toman, Viktor}, issn = {2663-337X}, keywords = {concurrency, verification, model checking}, pages = {166}, publisher = {Institute of Science and Technology Austria}, title = {{Improved verification techniques for concurrent systems}}, doi = {10.15479/at:ista:10199}, year = {2021}, } @article{8429, abstract = {We develop a Bayesian model (BayesRR-RC) that provides robust SNP-heritability estimation, an alternative to marker discovery, and accurate genomic prediction, taking 22 seconds per iteration to estimate 8.4 million SNP-effects and 78 SNP-heritability parameters in the UK Biobank. We find that only ≤10% of the genetic variation captured for height, body mass index, cardiovascular disease, and type 2 diabetes is attributable to proximal regulatory regions within 10kb upstream of genes, while 12-25% is attributed to coding regions, 32–44% to introns, and 22-28% to distal 10-500kb upstream regions. Up to 24% of all cis and coding regions of each chromosome are associated with each trait, with over 3,100 independent exonic and intronic regions and over 5,400 independent regulatory regions having ≥95% probability of contributing ≥0.001% to the genetic variance of these four traits. Our open-source software (GMRM) provides a scalable alternative to current approaches for biobank data.}, author = {Patxot, Marion and Trejo Banos, Daniel and Kousathanas, Athanasios and Orliac, Etienne J and Ojavee, Sven E and Moser, Gerhard and Sidorenko, Julia and Kutalik, Zoltan and Magi, Reedik and Visscher, Peter M and Ronnegard, Lars and Robinson, Matthew Richard}, issn = {2041-1723}, journal = {Nature Communications}, number = {1}, publisher = {Springer Nature}, title = {{Probabilistic inference of the genetic architecture underlying functional enrichment of complex traits}}, doi = {10.1038/s41467-021-27258-9}, volume = {12}, year = {2021}, } @inproceedings{10854, abstract = {Consider a distributed task where the communication network is fixed but the local inputs given to the nodes of the distributed system may change over time. In this work, we explore the following question: if some of the local inputs change, can an existing solution be updated efficiently, in a dynamic and distributed manner? To address this question, we define the batch dynamic CONGEST model in which we are given a bandwidth-limited communication network and a dynamic edge labelling defines the problem input. The task is to maintain a solution to a graph problem on the labelled graph under batch changes. We investigate, when a batch of alpha edge label changes arrive, - how much time as a function of alpha we need to update an existing solution, and - how much information the nodes have to keep in local memory between batches in order to update the solution quickly. Our work lays the foundations for the theory of input-dynamic distributed network algorithms. We give a general picture of the complexity landscape in this model, design both universal algorithms and algorithms for concrete problems, and present a general framework for lower bounds. The diverse time complexity of our model spans from constant time, through time polynomial in alpha, and to alpha time, which we show to be enough for any task.}, author = {Foerster, Klaus-Tycho and Korhonen, Janne and Paz, Ami and Rybicki, Joel and Schmid, Stefan}, booktitle = {Abstract Proceedings of the 2021 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems}, isbn = {9781450380720}, location = {Virtual, Online}, pages = {71--72}, publisher = {Association for Computing Machinery}, title = {{Input-dynamic distributed algorithms for communication networks}}, doi = {10.1145/3410220.3453923}, year = {2021}, } @article{10855, abstract = {Consider a distributed task where the communication network is fixed but the local inputs given to the nodes of the distributed system may change over time. In this work, we explore the following question: if some of the local inputs change, can an existing solution be updated efficiently, in a dynamic and distributed manner? To address this question, we define the batch dynamic \congest model in which we are given a bandwidth-limited communication network and a dynamic edge labelling defines the problem input. The task is to maintain a solution to a graph problem on the labeled graph under batch changes. We investigate, when a batch of α edge label changes arrive, \beginitemize \item how much time as a function of α we need to update an existing solution, and \item how much information the nodes have to keep in local memory between batches in order to update the solution quickly. \enditemize Our work lays the foundations for the theory of input-dynamic distributed network algorithms. We give a general picture of the complexity landscape in this model, design both universal algorithms and algorithms for concrete problems, and present a general framework for lower bounds. In particular, we derive non-trivial upper bounds for two selected, contrasting problems: maintaining a minimum spanning tree and detecting cliques.}, author = {Foerster, Klaus-Tycho and Korhonen, Janne and Paz, Ami and Rybicki, Joel and Schmid, Stefan}, issn = {2476-1249}, journal = {Proceedings of the ACM on Measurement and Analysis of Computing Systems}, keywords = {Computer Networks and Communications, Hardware and Architecture, Safety, Risk, Reliability and Quality, Computer Science (miscellaneous)}, number = {1}, pages = {1--33}, publisher = {Association for Computing Machinery}, title = {{Input-dynamic distributed algorithms for communication networks}}, doi = {10.1145/3447384}, volume = {5}, year = {2021}, } @article{9293, abstract = {We consider planning problems for graphs, Markov Decision Processes (MDPs), and games on graphs in an explicit state space. While graphs represent the most basic planning model, MDPs represent interaction with nature and games on graphs represent interaction with an adversarial environment. We consider two planning problems with k different target sets: (a) the coverage problem asks whether there is a plan for each individual target set; and (b) the sequential target reachability problem asks whether the targets can be reached in a given sequence. For the coverage problem, we present a linear-time algorithm for graphs, and quadratic conditional lower bound for MDPs and games on graphs. For the sequential target problem, we present a linear-time algorithm for graphs, a sub-quadratic algorithm for MDPs, and a quadratic conditional lower bound for games on graphs. Our results with conditional lower bounds, based on the boolean matrix multiplication (BMM) conjecture and strong exponential time hypothesis (SETH), establish (i) model-separation results showing that for the coverage problem MDPs and games on graphs are harder than graphs, and for the sequential reachability problem games on graphs are harder than MDPs and graphs; and (ii) problem-separation results showing that for MDPs the coverage problem is harder than the sequential target problem.}, author = {Chatterjee, Krishnendu and Dvořák, Wolfgang and Henzinger, Monika H and Svozil, Alexander}, issn = {0004-3702}, journal = {Artificial Intelligence}, number = {8}, publisher = {Elsevier}, title = {{Algorithms and conditional lower bounds for planning problems}}, doi = {10.1016/j.artint.2021.103499}, volume = {297}, year = {2021}, }