[{"abstract":[{"lang":"eng","text":"Methods inspired from machine learning have recently attracted great interest in the computational study of quantum many-particle systems. So far, however, it has proven challenging to deal with microscopic models in which the total number of particles is not conserved. To address this issue, we propose a new variant of neural network states, which we term neural coherent states. Taking the Fröhlich impurity model as a case study, we show that neural coherent states can learn the ground state of non-additive systems very well. In particular, we observe substantial improvement over the standard coherent state estimates in the most challenging intermediate coupling regime. Our approach is generic and does not assume specific details of the system, suggesting wide applications."}],"ec_funded":1,"type":"preprint","date_created":"2022-02-17T11:18:57Z","date_updated":"2023-09-07T13:44:16Z","oa_version":"Preprint","author":[{"full_name":"Rzadkowski, Wojciech","first_name":"Wojciech","last_name":"Rzadkowski","id":"48C55298-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-1106-4419"},{"full_name":"Lemeshko, Mikhail","id":"37CB05FA-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-6990-7802","first_name":"Mikhail","last_name":"Lemeshko"},{"full_name":"Mentink, Johan H.","first_name":"Johan H.","last_name":"Mentink"}],"related_material":{"record":[{"id":"10759","status":"public","relation":"dissertation_contains"}]},"publication_status":"submitted","status":"public","title":"Artificial neural network states for non-additive systems","department":[{"_id":"MiLe"}],"acknowledgement":"We acknowledge fruitful discussions with Giacomo Bighin, Giammarco Fabiani, Areg Ghazaryan, Christoph\r\nLampert, and Artem Volosniev at various stages of this work. W.R. is a recipient of a DOC Fellowship of the\r\nAustrian Academy of Sciences and has received funding from the EU Horizon 2020 programme under the Marie\r\nSkłodowska-Curie Grant Agreement No. 665385. M. L. acknowledges support by the European Research Council (ERC) Starting Grant No. 801770 (ANGULON). This work is part of the Shell-NWO/FOM-initiative “Computational sciences for energy research” of Shell and Chemical Sciences, Earth and Life Sciences, Physical Sciences, FOM and STW.","_id":"10762","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2021","month":"05","day":"31","article_processing_charge":"No","language":[{"iso":"eng"}],"doi":"10.48550/arXiv.2105.15193","date_published":"2021-05-31T00:00:00Z","project":[{"_id":"2688CF98-B435-11E9-9278-68D0E5697425","grant_number":"801770","name":"Angulon: physics and applications of a new quasiparticle","call_identifier":"H2020"},{"call_identifier":"H2020","name":"International IST Doctoral Program","_id":"2564DBCA-B435-11E9-9278-68D0E5697425","grant_number":"665385"}],"page":"2105.15193","publication":"arXiv","main_file_link":[{"url":"https://arxiv.org/abs/2105.15193","open_access":"1"}],"citation":{"ama":"Rzadkowski W, Lemeshko M, Mentink JH. Artificial neural network states for non-additive systems. arXiv. doi:10.48550/arXiv.2105.15193","ista":"Rzadkowski W, Lemeshko M, Mentink JH. Artificial neural network states for non-additive systems. arXiv, 10.48550/arXiv.2105.15193.","ieee":"W. Rzadkowski, M. Lemeshko, and J. H. Mentink, “Artificial neural network states for non-additive systems,” arXiv. .","apa":"Rzadkowski, W., Lemeshko, M., & Mentink, J. H. (n.d.). Artificial neural network states for non-additive systems. arXiv. https://doi.org/10.48550/arXiv.2105.15193","mla":"Rzadkowski, Wojciech, et al. “Artificial Neural Network States for Non-Additive Systems.” ArXiv, doi:10.48550/arXiv.2105.15193.","short":"W. Rzadkowski, M. Lemeshko, J.H. Mentink, ArXiv (n.d.).","chicago":"Rzadkowski, Wojciech, Mikhail Lemeshko, and Johan H. Mentink. “Artificial Neural Network States for Non-Additive Systems.” ArXiv, n.d. https://doi.org/10.48550/arXiv.2105.15193."},"external_id":{"arxiv":["2105.15193"]},"oa":1},{"date_published":"2021-05-30T00:00:00Z","citation":{"ama":"Phuong M. Underspecification in deep learning. 2021. doi:10.15479/AT:ISTA:9418","ista":"Phuong M. 2021. Underspecification in deep learning. Institute of Science and Technology Austria.","apa":"Phuong, M. (2021). Underspecification in deep learning. Institute of Science and Technology Austria. https://doi.org/10.15479/AT:ISTA:9418","ieee":"M. Phuong, “Underspecification in deep learning,” Institute of Science and Technology Austria, 2021.","mla":"Phuong, Mary. Underspecification in Deep Learning. Institute of Science and Technology Austria, 2021, doi:10.15479/AT:ISTA:9418.","short":"M. Phuong, Underspecification in Deep Learning, Institute of Science and Technology Austria, 2021.","chicago":"Phuong, Mary. “Underspecification in Deep Learning.” Institute of Science and Technology Austria, 2021. https://doi.org/10.15479/AT:ISTA:9418."},"page":"125","day":"30","has_accepted_license":"1","article_processing_charge":"No","oa_version":"Published Version","file":[{"date_updated":"2021-05-24T11:22:29Z","date_created":"2021-05-24T11:22:29Z","checksum":"4f0abe64114cfed264f9d36e8d1197e3","success":1,"relation":"main_file","file_id":"9419","file_size":2673905,"content_type":"application/pdf","creator":"bphuong","file_name":"mph-thesis-v519-pdfimages.pdf","access_level":"open_access"},{"file_name":"thesis.zip","access_level":"closed","creator":"bphuong","content_type":"application/zip","file_size":92995100,"file_id":"9420","relation":"source_file","date_created":"2021-05-24T11:56:02Z","date_updated":"2021-05-24T11:56:02Z","checksum":"f5699e876bc770a9b0df8345a77720a2"}],"_id":"9418","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","title":"Underspecification in deep learning","ddc":["000"],"status":"public","abstract":[{"text":"Deep learning is best known for its empirical success across a wide range of applications\r\nspanning computer vision, natural language processing and speech. Of equal significance,\r\nthough perhaps less known, are its ramifications for learning theory: deep networks have\r\nbeen observed to perform surprisingly well in the high-capacity regime, aka the overfitting\r\nor underspecified regime. Classically, this regime on the far right of the bias-variance curve\r\nis associated with poor generalisation; however, recent experiments with deep networks\r\nchallenge this view.\r\n\r\nThis thesis is devoted to investigating various aspects of underspecification in deep learning.\r\nFirst, we argue that deep learning models are underspecified on two levels: a) any given\r\ntraining dataset can be fit by many different functions, and b) any given function can be\r\nexpressed by many different parameter configurations. We refer to the second kind of\r\nunderspecification as parameterisation redundancy and we precisely characterise its extent.\r\nSecond, we characterise the implicit criteria (the inductive bias) that guide learning in the\r\nunderspecified regime. Specifically, we consider a nonlinear but tractable classification\r\nsetting, and show that given the choice, neural networks learn classifiers with a large margin.\r\nThird, we consider learning scenarios where the inductive bias is not by itself sufficient to\r\ndeal with underspecification. We then study different ways of ‘tightening the specification’: i)\r\nIn the setting of representation learning with variational autoencoders, we propose a hand-\r\ncrafted regulariser based on mutual information. ii) In the setting of binary classification, we\r\nconsider soft-label (real-valued) supervision. We derive a generalisation bound for linear\r\nnetworks supervised in this way and verify that soft labels facilitate fast learning. Finally, we\r\nexplore an application of soft-label supervision to the training of multi-exit models.","lang":"eng"}],"type":"dissertation","alternative_title":["ISTA Thesis"],"doi":"10.15479/AT:ISTA:9418","supervisor":[{"full_name":"Lampert, Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","first_name":"Christoph","last_name":"Lampert"}],"acknowledged_ssus":[{"_id":"ScienComp"},{"_id":"CampIT"},{"_id":"E-Lib"}],"degree_awarded":"PhD","language":[{"iso":"eng"}],"oa":1,"month":"05","publication_identifier":{"issn":["2663-337X"]},"author":[{"first_name":"Phuong","last_name":"Bui Thi Mai","id":"3EC6EE64-F248-11E8-B48F-1D18A9856A87","full_name":"Bui Thi Mai, Phuong"}],"related_material":{"record":[{"status":"deleted","relation":"part_of_dissertation","id":"7435"},{"status":"public","relation":"part_of_dissertation","id":"7481"},{"id":"9416","relation":"part_of_dissertation","status":"public"},{"relation":"part_of_dissertation","status":"public","id":"7479"}]},"date_created":"2021-05-24T13:06:23Z","date_updated":"2023-09-08T11:11:12Z","year":"2021","publication_status":"published","publisher":"Institute of Science and Technology Austria","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"file_date_updated":"2021-05-24T11:56:02Z"},{"alternative_title":["PMLR"],"type":"conference","abstract":[{"lang":"eng","text":"The focus of disentanglement approaches has been on identifying independent factors of variation in data. However, the causal variables underlying real-world observations are often not statistically independent. In this work, we bridge the gap to real-world scenarios by analyzing the behavior of the most prominent disentanglement approaches on correlated data in a large-scale empirical study (including 4260 models). We show and quantify that systematically induced correlations in the dataset are being learned and reflected in the latent representations, which has implications for downstream applications of disentanglement such as fairness. We also demonstrate how to resolve these latent correlations, either using weak supervision during\r\ntraining or by post-hoc correcting a pre-trained model with a small number of labels."}],"intvolume":" 139","status":"public","title":"On disentangled representations learned from correlated data","_id":"14177","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Published Version","scopus_import":"1","article_processing_charge":"No","day":"01","page":"10401-10412","citation":{"short":"F. Träuble, E. Creager, N. Kilbertus, F. Locatello, A. Dittadi, A. Goyal, B. Schölkopf, S. Bauer, in:, Proceedings of the 38th International Conference on Machine Learning, ML Research Press, 2021, pp. 10401–10412.","mla":"Träuble, Frederik, et al. “On Disentangled Representations Learned from Correlated Data.” Proceedings of the 38th International Conference on Machine Learning, vol. 139, ML Research Press, 2021, pp. 10401–12.","chicago":"Träuble, Frederik, Elliot Creager, Niki Kilbertus, Francesco Locatello, Andrea Dittadi, Anirudh Goyal, Bernhard Schölkopf, and Stefan Bauer. “On Disentangled Representations Learned from Correlated Data.” In Proceedings of the 38th International Conference on Machine Learning, 139:10401–12. ML Research Press, 2021.","ama":"Träuble F, Creager E, Kilbertus N, et al. On disentangled representations learned from correlated data. In: Proceedings of the 38th International Conference on Machine Learning. Vol 139. ML Research Press; 2021:10401-10412.","ieee":"F. Träuble et al., “On disentangled representations learned from correlated data,” in Proceedings of the 38th International Conference on Machine Learning, Virtual, 2021, vol. 139, pp. 10401–10412.","apa":"Träuble, F., Creager, E., Kilbertus, N., Locatello, F., Dittadi, A., Goyal, A., … Bauer, S. (2021). On disentangled representations learned from correlated data. In Proceedings of the 38th International Conference on Machine Learning (Vol. 139, pp. 10401–10412). Virtual: ML Research Press.","ista":"Träuble F, Creager E, Kilbertus N, Locatello F, Dittadi A, Goyal A, Schölkopf B, Bauer S. 2021. On disentangled representations learned from correlated data. Proceedings of the 38th International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 139, 10401–10412."},"publication":"Proceedings of the 38th International Conference on Machine Learning","date_published":"2021-08-01T00:00:00Z","extern":"1","publisher":"ML Research Press","department":[{"_id":"FrLo"}],"publication_status":"published","year":"2021","volume":139,"date_created":"2023-08-22T14:03:47Z","date_updated":"2023-09-11T10:18:48Z","author":[{"first_name":"Frederik","last_name":"Träuble","full_name":"Träuble, Frederik"},{"full_name":"Creager, Elliot","last_name":"Creager","first_name":"Elliot"},{"full_name":"Kilbertus, Niki","first_name":"Niki","last_name":"Kilbertus"},{"id":"26cfd52f-2483-11ee-8040-88983bcc06d4","orcid":"0000-0002-4850-0683","first_name":"Francesco","last_name":"Locatello","full_name":"Locatello, Francesco"},{"first_name":"Andrea","last_name":"Dittadi","full_name":"Dittadi, Andrea"},{"full_name":"Goyal, Anirudh","last_name":"Goyal","first_name":"Anirudh"},{"full_name":"Schölkopf, Bernhard","first_name":"Bernhard","last_name":"Schölkopf"},{"last_name":"Bauer","first_name":"Stefan","full_name":"Bauer, Stefan"}],"month":"08","quality_controlled":"1","oa":1,"external_id":{"arxiv":["2006.07886"]},"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2006.07886"}],"language":[{"iso":"eng"}],"conference":{"name":"ICML: International Conference on Machine Learning","end_date":"2021-07-24","location":"Virtual","start_date":"2021-07-18"}},{"publication_status":"published","publisher":"ML Research Press","department":[{"_id":"FrLo"}],"year":"2021","date_updated":"2023-09-11T10:16:55Z","date_created":"2023-08-22T14:03:04Z","volume":139,"author":[{"full_name":"Yèche, Hugo","last_name":"Yèche","first_name":"Hugo"},{"full_name":"Dresdner, Gideon","first_name":"Gideon","last_name":"Dresdner"},{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","last_name":"Locatello","first_name":"Francesco","full_name":"Locatello, Francesco"},{"full_name":"Hüser, Matthias","first_name":"Matthias","last_name":"Hüser"},{"last_name":"Rätsch","first_name":"Gunnar","full_name":"Rätsch, Gunnar"}],"extern":"1","quality_controlled":"1","external_id":{"arxiv":["2106.05142"]},"main_file_link":[{"url":"https://arxiv.org/abs/2106.05142","open_access":"1"}],"oa":1,"language":[{"iso":"eng"}],"conference":{"start_date":"2021-07-18","location":"Virtual","end_date":"2021-07-24","name":"International Conference on Machine Learning"},"month":"08","title":"Neighborhood contrastive learning applied to online patient monitoring","status":"public","intvolume":" 139","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"14176","oa_version":"Preprint","alternative_title":["PMLR"],"type":"conference","abstract":[{"text":"Intensive care units (ICU) are increasingly looking towards machine learning for methods to provide online monitoring of critically ill patients. In machine learning, online monitoring is often formulated as a supervised learning problem. Recently, contrastive learning approaches have demonstrated promising improvements over competitive supervised benchmarks. These methods rely on well-understood data augmentation techniques developed for image data which do not apply to online monitoring. In this work, we overcome this limitation by\r\nsupplementing time-series data augmentation techniques with a novel contrastive\r\nlearning objective which we call neighborhood contrastive learning (NCL). Our objective explicitly groups together contiguous time segments from each patient while maintaining state-specific information. Our experiments demonstrate a marked improvement over existing work applying contrastive methods to medical time-series.","lang":"eng"}],"page":"11964-11974","publication":"Proceedings of 38th International Conference on Machine Learning","citation":{"ama":"Yèche H, Dresdner G, Locatello F, Hüser M, Rätsch G. Neighborhood contrastive learning applied to online patient monitoring. In: Proceedings of 38th International Conference on Machine Learning. Vol 139. ML Research Press; 2021:11964-11974.","ista":"Yèche H, Dresdner G, Locatello F, Hüser M, Rätsch G. 2021. Neighborhood contrastive learning applied to online patient monitoring. Proceedings of 38th International Conference on Machine Learning. International Conference on Machine Learning, PMLR, vol. 139, 11964–11974.","apa":"Yèche, H., Dresdner, G., Locatello, F., Hüser, M., & Rätsch, G. (2021). Neighborhood contrastive learning applied to online patient monitoring. In Proceedings of 38th International Conference on Machine Learning (Vol. 139, pp. 11964–11974). Virtual: ML Research Press.","ieee":"H. Yèche, G. Dresdner, F. Locatello, M. Hüser, and G. Rätsch, “Neighborhood contrastive learning applied to online patient monitoring,” in Proceedings of 38th International Conference on Machine Learning, Virtual, 2021, vol. 139, pp. 11964–11974.","mla":"Yèche, Hugo, et al. “Neighborhood Contrastive Learning Applied to Online Patient Monitoring.” Proceedings of 38th International Conference on Machine Learning, vol. 139, ML Research Press, 2021, pp. 11964–74.","short":"H. Yèche, G. Dresdner, F. Locatello, M. Hüser, G. Rätsch, in:, Proceedings of 38th International Conference on Machine Learning, ML Research Press, 2021, pp. 11964–11974.","chicago":"Yèche, Hugo, Gideon Dresdner, Francesco Locatello, Matthias Hüser, and Gunnar Rätsch. “Neighborhood Contrastive Learning Applied to Online Patient Monitoring.” In Proceedings of 38th International Conference on Machine Learning, 139:11964–74. ML Research Press, 2021."},"date_published":"2021-08-01T00:00:00Z","scopus_import":"1","day":"01","article_processing_charge":"No"},{"year":"2021","_id":"14182","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","department":[{"_id":"FrLo"}],"intvolume":" 34","title":"Backward-compatible prediction updates: A probabilistic approach","publication_status":"published","status":"public","author":[{"full_name":"Träuble, Frederik","last_name":"Träuble","first_name":"Frederik"},{"full_name":"Kügelgen, Julius von","last_name":"Kügelgen","first_name":"Julius von"},{"last_name":"Kleindessner","first_name":"Matthäus","full_name":"Kleindessner, Matthäus"},{"full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","last_name":"Locatello","first_name":"Francesco"},{"full_name":"Schölkopf, Bernhard","first_name":"Bernhard","last_name":"Schölkopf"},{"first_name":"Peter","last_name":"Gehler","full_name":"Gehler, Peter"}],"volume":34,"oa_version":"Preprint","date_created":"2023-08-22T14:05:41Z","date_updated":"2023-09-11T11:31:59Z","type":"conference","abstract":[{"lang":"eng","text":"When machine learning systems meet real world applications, accuracy is only\r\none of several requirements. In this paper, we assay a complementary\r\nperspective originating from the increasing availability of pre-trained and\r\nregularly improving state-of-the-art models. While new improved models develop\r\nat a fast pace, downstream tasks vary more slowly or stay constant. Assume that\r\nwe have a large unlabelled data set for which we want to maintain accurate\r\npredictions. Whenever a new and presumably better ML models becomes available,\r\nwe encounter two problems: (i) given a limited budget, which data points should\r\nbe re-evaluated using the new model?; and (ii) if the new predictions differ\r\nfrom the current ones, should we update? Problem (i) is about compute cost,\r\nwhich matters for very large data sets and models. Problem (ii) is about\r\nmaintaining consistency of the predictions, which can be highly relevant for\r\ndownstream applications; our demand is to avoid negative flips, i.e., changing\r\ncorrect to incorrect predictions. In this paper, we formalize the Prediction\r\nUpdate Problem and present an efficient probabilistic approach as answer to the\r\nabove questions. In extensive experiments on standard classification benchmark\r\ndata sets, we show that our method outperforms alternative strategies along key\r\nmetrics for backward-compatible prediction updates."}],"extern":"1","oa":1,"external_id":{"arxiv":["2107.01057"]},"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2107.01057"}],"citation":{"chicago":"Träuble, Frederik, Julius von Kügelgen, Matthäus Kleindessner, Francesco Locatello, Bernhard Schölkopf, and Peter Gehler. “Backward-Compatible Prediction Updates: A Probabilistic Approach.” In 35th Conference on Neural Information Processing Systems, 34:116–28, 2021.","mla":"Träuble, Frederik, et al. “Backward-Compatible Prediction Updates: A Probabilistic Approach.” 35th Conference on Neural Information Processing Systems, vol. 34, 2021, pp. 116–28.","short":"F. Träuble, J. von Kügelgen, M. Kleindessner, F. Locatello, B. Schölkopf, P. Gehler, in:, 35th Conference on Neural Information Processing Systems, 2021, pp. 116–128.","ista":"Träuble F, Kügelgen J von, Kleindessner M, Locatello F, Schölkopf B, Gehler P. 2021. Backward-compatible prediction updates: A probabilistic approach. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 116–128.","ieee":"F. Träuble, J. von Kügelgen, M. Kleindessner, F. Locatello, B. Schölkopf, and P. Gehler, “Backward-compatible prediction updates: A probabilistic approach,” in 35th Conference on Neural Information Processing Systems, Virtual, 2021, vol. 34, pp. 116–128.","apa":"Träuble, F., Kügelgen, J. von, Kleindessner, M., Locatello, F., Schölkopf, B., & Gehler, P. (2021). Backward-compatible prediction updates: A probabilistic approach. In 35th Conference on Neural Information Processing Systems (Vol. 34, pp. 116–128). Virtual.","ama":"Träuble F, Kügelgen J von, Kleindessner M, Locatello F, Schölkopf B, Gehler P. Backward-compatible prediction updates: A probabilistic approach. In: 35th Conference on Neural Information Processing Systems. Vol 34. ; 2021:116-128."},"publication":"35th Conference on Neural Information Processing Systems","page":"116-128","quality_controlled":"1","date_published":"2021-07-02T00:00:00Z","conference":{"end_date":"2021-12-10","location":"Virtual","start_date":"2021-12-07","name":"NeurIPS: Neural Information Processing Systems"},"language":[{"iso":"eng"}],"article_processing_charge":"No","publication_identifier":{"isbn":["9781713845393"]},"day":"02","month":"07"},{"type":"conference","extern":"1","abstract":[{"lang":"eng","text":"Variational Inference makes a trade-off between the capacity of the variational family and the tractability of finding an approximate posterior distribution. Instead, Boosting Variational Inference allows practitioners to obtain increasingly good posterior approximations by spending more compute. The main obstacle to widespread adoption of Boosting Variational Inference is the amount of resources necessary to improve over a strong Variational Inference baseline. In our work, we trace this limitation back to the global curvature of the KL-divergence. We characterize how the global curvature impacts time and memory consumption, address the problem with the notion of local curvature, and provide a novel approximate backtracking algorithm for estimating local curvature. We give new theoretical convergence rates for our algorithms and provide experimental validation on synthetic and real-world datasets."}],"publication_status":"published","status":"public","title":"Boosting variational inference with locally adaptive step-sizes","department":[{"_id":"FrLo"}],"publisher":"International Joint Conferences on Artificial Intelligence","_id":"14181","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2021","date_updated":"2023-09-11T11:14:30Z","date_created":"2023-08-22T14:05:14Z","oa_version":"Published Version","author":[{"full_name":"Dresdner, Gideon","first_name":"Gideon","last_name":"Dresdner"},{"last_name":"Shekhar","first_name":"Saurav","full_name":"Shekhar, Saurav"},{"full_name":"Pedregosa, Fabian","first_name":"Fabian","last_name":"Pedregosa"},{"last_name":"Locatello","first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco"},{"first_name":"Gunnar","last_name":"Rätsch","full_name":"Rätsch, Gunnar"}],"day":"19","month":"05","publication_identifier":{"eisbn":["9780999241196"]},"article_processing_charge":"No","quality_controlled":"1","page":"2337-2343","publication":"Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2105.09240"}],"external_id":{"arxiv":["2105.09240"]},"citation":{"ista":"Dresdner G, Shekhar S, Pedregosa F, Locatello F, Rätsch G. 2021. Boosting variational inference with locally adaptive step-sizes. Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence. IJCAI: International Joint Conference on Artificial Intelligence, 2337–2343.","ieee":"G. Dresdner, S. Shekhar, F. Pedregosa, F. Locatello, and G. Rätsch, “Boosting variational inference with locally adaptive step-sizes,” in Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, Montreal, Canada, 2021, pp. 2337–2343.","apa":"Dresdner, G., Shekhar, S., Pedregosa, F., Locatello, F., & Rätsch, G. (2021). Boosting variational inference with locally adaptive step-sizes. In Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence (pp. 2337–2343). Montreal, Canada: International Joint Conferences on Artificial Intelligence. https://doi.org/10.24963/ijcai.2021/322","ama":"Dresdner G, Shekhar S, Pedregosa F, Locatello F, Rätsch G. Boosting variational inference with locally adaptive step-sizes. In: Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence. International Joint Conferences on Artificial Intelligence; 2021:2337-2343. doi:10.24963/ijcai.2021/322","chicago":"Dresdner, Gideon, Saurav Shekhar, Fabian Pedregosa, Francesco Locatello, and Gunnar Rätsch. “Boosting Variational Inference with Locally Adaptive Step-Sizes.” In Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, 2337–43. International Joint Conferences on Artificial Intelligence, 2021. https://doi.org/10.24963/ijcai.2021/322.","mla":"Dresdner, Gideon, et al. “Boosting Variational Inference with Locally Adaptive Step-Sizes.” Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, International Joint Conferences on Artificial Intelligence, 2021, pp. 2337–43, doi:10.24963/ijcai.2021/322.","short":"G. Dresdner, S. Shekhar, F. Pedregosa, F. Locatello, G. Rätsch, in:, Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, International Joint Conferences on Artificial Intelligence, 2021, pp. 2337–2343."},"oa":1,"language":[{"iso":"eng"}],"conference":{"end_date":"2021-08-27","start_date":"2021-08-19","location":"Montreal, Canada","name":"IJCAI: International Joint Conference on Artificial Intelligence"},"date_published":"2021-05-19T00:00:00Z","doi":"10.24963/ijcai.2021/322"},{"author":[{"full_name":"Kügelgen, Julius von","first_name":"Julius von","last_name":"Kügelgen"},{"first_name":"Yash","last_name":"Sharma","full_name":"Sharma, Yash"},{"full_name":"Gresele, Luigi","last_name":"Gresele","first_name":"Luigi"},{"full_name":"Brendel, Wieland","first_name":"Wieland","last_name":"Brendel"},{"first_name":"Bernhard","last_name":"Schölkopf","full_name":"Schölkopf, Bernhard"},{"full_name":"Besserve, Michel","last_name":"Besserve","first_name":"Michel"},{"full_name":"Locatello, Francesco","last_name":"Locatello","first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4"}],"date_created":"2023-08-22T14:04:36Z","date_updated":"2023-09-11T10:33:19Z","volume":34,"oa_version":"Preprint","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"14179","year":"2021","title":"Self-supervised learning with data augmentations provably isolates content from style","status":"public","publication_status":"published","intvolume":" 34","department":[{"_id":"FrLo"}],"abstract":[{"text":"Self-supervised representation learning has shown remarkable success in a number of domains. A common practice is to perform data augmentation via hand-crafted transformations intended to leave the semantics of the data invariant. We seek to understand the empirical success of this approach from a theoretical perspective. We formulate the augmentation process as a latent variable model by postulating a partition of the latent representation into a content component, which is assumed invariant to augmentation, and a style component, which is allowed to change. Unlike prior work on disentanglement and independent component analysis, we allow for both nontrivial statistical and causal dependencies in the latent space. We study the identifiability of the latent representation based on pairs of views of the observations and prove sufficient conditions that allow us to identify the invariant content partition up to an invertible mapping in both generative and discriminative settings. We find numerical simulations with dependent latent variables are consistent with our theory. Lastly, we introduce Causal3DIdent, a dataset of high-dimensional, visually complex images with rich causal dependencies, which we use to study the effect of data augmentations performed in practice.","lang":"eng"}],"extern":"1","type":"conference","conference":{"end_date":"2021-12-10","location":"Virtual","start_date":"2021-12-07","name":"NeurIPS: Neural Information Processing Systems"},"date_published":"2021-06-08T00:00:00Z","language":[{"iso":"eng"}],"publication":"Advances in Neural Information Processing Systems","oa":1,"external_id":{"arxiv":["2106.04619"]},"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2106.04619"}],"citation":{"mla":"Kügelgen, Julius von, et al. “Self-Supervised Learning with Data Augmentations Provably Isolates Content from Style.” Advances in Neural Information Processing Systems, vol. 34, 2021, pp. 16451–67.","short":"J. von Kügelgen, Y. Sharma, L. Gresele, W. Brendel, B. Schölkopf, M. Besserve, F. Locatello, in:, Advances in Neural Information Processing Systems, 2021, pp. 16451–16467.","chicago":"Kügelgen, Julius von, Yash Sharma, Luigi Gresele, Wieland Brendel, Bernhard Schölkopf, Michel Besserve, and Francesco Locatello. “Self-Supervised Learning with Data Augmentations Provably Isolates Content from Style.” In Advances in Neural Information Processing Systems, 34:16451–67, 2021.","ama":"Kügelgen J von, Sharma Y, Gresele L, et al. Self-supervised learning with data augmentations provably isolates content from style. In: Advances in Neural Information Processing Systems. Vol 34. ; 2021:16451-16467.","ista":"Kügelgen J von, Sharma Y, Gresele L, Brendel W, Schölkopf B, Besserve M, Locatello F. 2021. Self-supervised learning with data augmentations provably isolates content from style. Advances in Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 16451–16467.","apa":"Kügelgen, J. von, Sharma, Y., Gresele, L., Brendel, W., Schölkopf, B., Besserve, M., & Locatello, F. (2021). Self-supervised learning with data augmentations provably isolates content from style. In Advances in Neural Information Processing Systems (Vol. 34, pp. 16451–16467). Virtual.","ieee":"J. von Kügelgen et al., “Self-supervised learning with data augmentations provably isolates content from style,” in Advances in Neural Information Processing Systems, Virtual, 2021, vol. 34, pp. 16451–16467."},"quality_controlled":"1","page":"16451-16467","day":"08","month":"06","article_processing_charge":"No","publication_identifier":{"isbn":["9781713845393"]}},{"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"14180","year":"2021","title":"Dynamic inference with neural interpreters","status":"public","publication_status":"published","intvolume":" 34","department":[{"_id":"FrLo"}],"author":[{"last_name":"Rahaman","first_name":"Nasim","full_name":"Rahaman, Nasim"},{"first_name":"Muhammad Waleed","last_name":"Gondal","full_name":"Gondal, Muhammad Waleed"},{"last_name":"Joshi","first_name":"Shruti","full_name":"Joshi, Shruti"},{"full_name":"Gehler, Peter","last_name":"Gehler","first_name":"Peter"},{"full_name":"Bengio, Yoshua","first_name":"Yoshua","last_name":"Bengio"},{"last_name":"Locatello","first_name":"Francesco","orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","full_name":"Locatello, Francesco"},{"first_name":"Bernhard","last_name":"Schölkopf","full_name":"Schölkopf, Bernhard"}],"date_created":"2023-08-22T14:04:55Z","date_updated":"2023-09-11T11:33:46Z","volume":34,"oa_version":"Preprint","type":"conference","abstract":[{"lang":"eng","text":"Modern neural network architectures can leverage large amounts of data to generalize well within the training distribution. However, they are less capable of systematic generalization to data drawn from unseen but related distributions, a feat that is hypothesized to require compositional reasoning and reuse of knowledge. In this work, we present Neural Interpreters, an architecture that factorizes inference in a self-attention network as a system of modules, which we call \\emph{functions}. Inputs to the model are routed through a sequence of functions in a way that is end-to-end learned. The proposed architecture can flexibly compose computation along width and depth, and lends itself well to capacity extension after training. To demonstrate the versatility of Neural Interpreters, we evaluate it in two distinct settings: image classification and visual abstract reasoning on Raven Progressive Matrices. In the former, we show that Neural Interpreters perform on par with the vision transformer using fewer parameters, while being transferrable to a new task in a sample efficient manner. In the latter, we find that Neural Interpreters are competitive with respect to the state-of-the-art in terms of systematic generalization. "}],"extern":"1","publication":"Advances in Neural Information Processing Systems","external_id":{"arxiv":["2110.06399"]},"main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2110.06399"}],"oa":1,"citation":{"ista":"Rahaman N, Gondal MW, Joshi S, Gehler P, Bengio Y, Locatello F, Schölkopf B. 2021. Dynamic inference with neural interpreters. Advances in Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 10985–10998.","apa":"Rahaman, N., Gondal, M. W., Joshi, S., Gehler, P., Bengio, Y., Locatello, F., & Schölkopf, B. (2021). Dynamic inference with neural interpreters. In Advances in Neural Information Processing Systems (Vol. 34, pp. 10985–10998). Virtual.","ieee":"N. Rahaman et al., “Dynamic inference with neural interpreters,” in Advances in Neural Information Processing Systems, Virtual, 2021, vol. 34, pp. 10985–10998.","ama":"Rahaman N, Gondal MW, Joshi S, et al. Dynamic inference with neural interpreters. In: Advances in Neural Information Processing Systems. Vol 34. ; 2021:10985-10998.","chicago":"Rahaman, Nasim, Muhammad Waleed Gondal, Shruti Joshi, Peter Gehler, Yoshua Bengio, Francesco Locatello, and Bernhard Schölkopf. “Dynamic Inference with Neural Interpreters.” In Advances in Neural Information Processing Systems, 34:10985–98, 2021.","mla":"Rahaman, Nasim, et al. “Dynamic Inference with Neural Interpreters.” Advances in Neural Information Processing Systems, vol. 34, 2021, pp. 10985–98.","short":"N. Rahaman, M.W. Gondal, S. Joshi, P. Gehler, Y. Bengio, F. Locatello, B. Schölkopf, in:, Advances in Neural Information Processing Systems, 2021, pp. 10985–10998."},"quality_controlled":"1","page":"10985-10998","conference":{"location":"Virtual","start_date":"2021-12-07","end_date":"2021-12-10","name":"NeurIPS: Neural Information Processing Systems"},"date_published":"2021-10-12T00:00:00Z","language":[{"iso":"eng"}],"month":"10","day":"12","article_processing_charge":"No","publication_identifier":{"isbn":["9781713845393"]}},{"doi":"10.1109/jproc.2021.3058954","language":[{"iso":"eng"}],"main_file_link":[{"url":"https://doi.org/10.1109/JPROC.2021.3058954","open_access":"1"}],"external_id":{"arxiv":["2102.11107"]},"oa":1,"quality_controlled":"1","month":"05","publication_identifier":{"eissn":["1558-2256"],"issn":["0018-9219"]},"author":[{"full_name":"Scholkopf, Bernhard","last_name":"Scholkopf","first_name":"Bernhard"},{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","last_name":"Locatello","first_name":"Francesco","full_name":"Locatello, Francesco"},{"full_name":"Bauer, Stefan","first_name":"Stefan","last_name":"Bauer"},{"full_name":"Ke, Nan Rosemary","first_name":"Nan Rosemary","last_name":"Ke"},{"last_name":"Kalchbrenner","first_name":"Nal","full_name":"Kalchbrenner, Nal"},{"full_name":"Goyal, Anirudh","last_name":"Goyal","first_name":"Anirudh"},{"last_name":"Bengio","first_name":"Yoshua","full_name":"Bengio, Yoshua"}],"date_updated":"2023-09-11T11:43:35Z","date_created":"2023-08-21T12:19:30Z","volume":109,"year":"2021","publication_status":"published","publisher":"Institute of Electrical and Electronics Engineers","department":[{"_id":"FrLo"}],"extern":"1","date_published":"2021-05-01T00:00:00Z","publication":"Proceedings of the IEEE","citation":{"ama":"Scholkopf B, Locatello F, Bauer S, et al. Toward causal representation learning. Proceedings of the IEEE. 2021;109(5):612-634. doi:10.1109/jproc.2021.3058954","ieee":"B. Scholkopf et al., “Toward causal representation learning,” Proceedings of the IEEE, vol. 109, no. 5. Institute of Electrical and Electronics Engineers, pp. 612–634, 2021.","apa":"Scholkopf, B., Locatello, F., Bauer, S., Ke, N. R., Kalchbrenner, N., Goyal, A., & Bengio, Y. (2021). Toward causal representation learning. Proceedings of the IEEE. Institute of Electrical and Electronics Engineers. https://doi.org/10.1109/jproc.2021.3058954","ista":"Scholkopf B, Locatello F, Bauer S, Ke NR, Kalchbrenner N, Goyal A, Bengio Y. 2021. Toward causal representation learning. Proceedings of the IEEE. 109(5), 612–634.","short":"B. Scholkopf, F. Locatello, S. Bauer, N.R. Ke, N. Kalchbrenner, A. Goyal, Y. Bengio, Proceedings of the IEEE 109 (2021) 612–634.","mla":"Scholkopf, Bernhard, et al. “Toward Causal Representation Learning.” Proceedings of the IEEE, vol. 109, no. 5, Institute of Electrical and Electronics Engineers, 2021, pp. 612–34, doi:10.1109/jproc.2021.3058954.","chicago":"Scholkopf, Bernhard, Francesco Locatello, Stefan Bauer, Nan Rosemary Ke, Nal Kalchbrenner, Anirudh Goyal, and Yoshua Bengio. “Toward Causal Representation Learning.” Proceedings of the IEEE. Institute of Electrical and Electronics Engineers, 2021. https://doi.org/10.1109/jproc.2021.3058954."},"article_type":"original","page":"612-634","day":"01","article_processing_charge":"No","scopus_import":"1","keyword":["Electrical and Electronic Engineering"],"oa_version":"Published Version","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"14117","status":"public","title":"Toward causal representation learning","intvolume":" 109","abstract":[{"lang":"eng","text":"The two fields of machine learning and graphical causality arose and are developed separately. However, there is, now, cross-pollination and increasing interest in both fields to benefit from the advances of the other. In this article, we review fundamental concepts of causal inference and relate them to crucial open problems of machine learning, including transfer and generalization, thereby assaying how causality can contribute to modern machine learning research. This also applies in the opposite direction: we note that most work in causality starts from the premise that the causal variables are given. A central problem for AI and causality is, thus, causal representation learning, that is, the discovery of high-level causal variables from low-level observations. Finally, we delineate some implications of causality for machine learning and propose key research areas at the intersection of both communities."}],"issue":"5","type":"journal_article"},{"type":"conference","extern":"1","abstract":[{"lang":"eng","text":"Learning meaningful representations that disentangle the underlying structure of the data generating process is considered to be of key importance in machine learning. While disentangled representations were found to be useful for diverse tasks such as abstract reasoning and fair classification, their scalability and real-world impact remain questionable. We introduce a new high-resolution dataset with 1M simulated images and over 1,800 annotated real-world images of the same setup. In contrast to previous work, this new dataset exhibits correlations, a complex underlying structure, and allows to evaluate transfer to unseen simulated and real-world settings where the encoder i) remains in distribution or ii) is out of distribution. We propose new architectures in order to scale disentangled representation learning to realistic high-resolution settings and conduct a large-scale empirical study of disentangled representations on this dataset. We observe that disentanglement is a good predictor for out-of-distribution (OOD) task performance."}],"department":[{"_id":"FrLo"}],"publication_status":"published","status":"public","title":"On the transfer of disentangled representations in realistic settings","year":"2021","_id":"14178","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Preprint","date_updated":"2023-09-11T10:55:30Z","date_created":"2023-08-22T14:04:16Z","author":[{"first_name":"Andrea","last_name":"Dittadi","full_name":"Dittadi, Andrea"},{"last_name":"Träuble","first_name":"Frederik","full_name":"Träuble, Frederik"},{"orcid":"0000-0002-4850-0683","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","last_name":"Locatello","first_name":"Francesco","full_name":"Locatello, Francesco"},{"last_name":"Wüthrich","first_name":"Manuel","full_name":"Wüthrich, Manuel"},{"full_name":"Agrawal, Vaibhav","last_name":"Agrawal","first_name":"Vaibhav"},{"full_name":"Winther, Ole","last_name":"Winther","first_name":"Ole"},{"full_name":"Bauer, Stefan","last_name":"Bauer","first_name":"Stefan"},{"full_name":"Schölkopf, Bernhard","last_name":"Schölkopf","first_name":"Bernhard"}],"article_processing_charge":"No","month":"05","day":"04","quality_controlled":"1","citation":{"ama":"Dittadi A, Träuble F, Locatello F, et al. On the transfer of disentangled representations in realistic settings. In: The Ninth International Conference on Learning Representations. ; 2021.","ista":"Dittadi A, Träuble F, Locatello F, Wüthrich M, Agrawal V, Winther O, Bauer S, Schölkopf B. 2021. On the transfer of disentangled representations in realistic settings. The Ninth International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","ieee":"A. Dittadi et al., “On the transfer of disentangled representations in realistic settings,” in The Ninth International Conference on Learning Representations, Virtual, 2021.","apa":"Dittadi, A., Träuble, F., Locatello, F., Wüthrich, M., Agrawal, V., Winther, O., … Schölkopf, B. (2021). On the transfer of disentangled representations in realistic settings. In The Ninth International Conference on Learning Representations. Virtual.","mla":"Dittadi, Andrea, et al. “On the Transfer of Disentangled Representations in Realistic Settings.” The Ninth International Conference on Learning Representations, 2021.","short":"A. Dittadi, F. Träuble, F. Locatello, M. Wüthrich, V. Agrawal, O. Winther, S. Bauer, B. Schölkopf, in:, The Ninth International Conference on Learning Representations, 2021.","chicago":"Dittadi, Andrea, Frederik Träuble, Francesco Locatello, Manuel Wüthrich, Vaibhav Agrawal, Ole Winther, Stefan Bauer, and Bernhard Schölkopf. “On the Transfer of Disentangled Representations in Realistic Settings.” In The Ninth International Conference on Learning Representations, 2021."},"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2010.14407"}],"external_id":{"arxiv":["2010.14407"]},"oa":1,"publication":"The Ninth International Conference on Learning Representations","language":[{"iso":"eng"}],"date_published":"2021-05-04T00:00:00Z","conference":{"name":"ICLR: International Conference on Learning Representations","end_date":"2021-05-07","location":"Virtual","start_date":"2021-05-03"}}]