[{"_id":"10762","status":"public","project":[{"_id":"2688CF98-B435-11E9-9278-68D0E5697425","call_identifier":"H2020","name":"Angulon: physics and applications of a new quasiparticle","grant_number":"801770"},{"grant_number":"665385","name":"International IST Doctoral Program","call_identifier":"H2020","_id":"2564DBCA-B435-11E9-9278-68D0E5697425"}],"type":"preprint","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","citation":{"short":"W. Rzadkowski, M. Lemeshko, J.H. Mentink, ArXiv (n.d.).","ieee":"W. Rzadkowski, M. Lemeshko, and J. H. Mentink, “Artificial neural network states for non-additive systems,” arXiv. .","apa":"Rzadkowski, W., Lemeshko, M., & Mentink, J. H. (n.d.). Artificial neural network states for non-additive systems. arXiv. https://doi.org/10.48550/arXiv.2105.15193","ama":"Rzadkowski W, Lemeshko M, Mentink JH. Artificial neural network states for non-additive systems. arXiv. doi:10.48550/arXiv.2105.15193","mla":"Rzadkowski, Wojciech, et al. “Artificial Neural Network States for Non-Additive Systems.” ArXiv, doi:10.48550/arXiv.2105.15193.","ista":"Rzadkowski W, Lemeshko M, Mentink JH. Artificial neural network states for non-additive systems. arXiv, 10.48550/arXiv.2105.15193.","chicago":"Rzadkowski, Wojciech, Mikhail Lemeshko, and Johan H. Mentink. “Artificial Neural Network States for Non-Additive Systems.” ArXiv, n.d. https://doi.org/10.48550/arXiv.2105.15193."},"date_updated":"2023-09-07T13:44:16Z","title":"Artificial neural network states for non-additive systems","department":[{"_id":"MiLe"}],"article_processing_charge":"No","external_id":{"arxiv":["2105.15193"]},"author":[{"last_name":"Rzadkowski","full_name":"Rzadkowski, Wojciech","orcid":"0000-0002-1106-4419","first_name":"Wojciech","id":"48C55298-F248-11E8-B48F-1D18A9856A87"},{"id":"37CB05FA-F248-11E8-B48F-1D18A9856A87","first_name":"Mikhail","orcid":"0000-0002-6990-7802","full_name":"Lemeshko, Mikhail","last_name":"Lemeshko"},{"first_name":"Johan H.","last_name":"Mentink","full_name":"Mentink, Johan H."}],"acknowledgement":"We acknowledge fruitful discussions with Giacomo Bighin, Giammarco Fabiani, Areg Ghazaryan, Christoph\r\nLampert, and Artem Volosniev at various stages of this work. W.R. is a recipient of a DOC Fellowship of the\r\nAustrian Academy of Sciences and has received funding from the EU Horizon 2020 programme under the Marie\r\nSkłodowska-Curie Grant Agreement No. 665385. M. L. acknowledges support by the European Research Council (ERC) Starting Grant No. 801770 (ANGULON). This work is part of the Shell-NWO/FOM-initiative “Computational sciences for energy research” of Shell and Chemical Sciences, Earth and Life Sciences, Physical Sciences, FOM and STW.","oa_version":"Preprint","abstract":[{"text":"Methods inspired from machine learning have recently attracted great interest in the computational study of quantum many-particle systems. So far, however, it has proven challenging to deal with microscopic models in which the total number of particles is not conserved. To address this issue, we propose a new variant of neural network states, which we term neural coherent states. Taking the Fröhlich impurity model as a case study, we show that neural coherent states can learn the ground state of non-additive systems very well. In particular, we observe substantial improvement over the standard coherent state estimates in the most challenging intermediate coupling regime. Our approach is generic and does not assume specific details of the system, suggesting wide applications.","lang":"eng"}],"month":"05","oa":1,"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2105.15193"}],"publication":"arXiv","language":[{"iso":"eng"}],"day":"31","publication_status":"submitted","year":"2021","ec_funded":1,"date_created":"2022-02-17T11:18:57Z","date_published":"2021-05-31T00:00:00Z","doi":"10.48550/arXiv.2105.15193","related_material":{"record":[{"relation":"dissertation_contains","id":"10759","status":"public"}]},"page":"2105.15193"},{"related_material":{"record":[{"id":"7435","status":"deleted","relation":"part_of_dissertation"},{"status":"public","id":"7481","relation":"part_of_dissertation"},{"relation":"part_of_dissertation","id":"9416","status":"public"},{"status":"public","id":"7479","relation":"part_of_dissertation"}]},"degree_awarded":"PhD","publication_status":"published","publication_identifier":{"issn":["2663-337X"]},"language":[{"iso":"eng"}],"file":[{"date_updated":"2021-05-24T11:22:29Z","file_size":2673905,"creator":"bphuong","date_created":"2021-05-24T11:22:29Z","file_name":"mph-thesis-v519-pdfimages.pdf","content_type":"application/pdf","access_level":"open_access","relation":"main_file","checksum":"4f0abe64114cfed264f9d36e8d1197e3","file_id":"9419","success":1},{"date_updated":"2021-05-24T11:56:02Z","file_size":92995100,"creator":"bphuong","date_created":"2021-05-24T11:56:02Z","file_name":"thesis.zip","content_type":"application/zip","access_level":"closed","relation":"source_file","checksum":"f5699e876bc770a9b0df8345a77720a2","file_id":"9420"}],"alternative_title":["ISTA Thesis"],"month":"05","abstract":[{"text":"Deep learning is best known for its empirical success across a wide range of applications\r\nspanning computer vision, natural language processing and speech. Of equal significance,\r\nthough perhaps less known, are its ramifications for learning theory: deep networks have\r\nbeen observed to perform surprisingly well in the high-capacity regime, aka the overfitting\r\nor underspecified regime. Classically, this regime on the far right of the bias-variance curve\r\nis associated with poor generalisation; however, recent experiments with deep networks\r\nchallenge this view.\r\n\r\nThis thesis is devoted to investigating various aspects of underspecification in deep learning.\r\nFirst, we argue that deep learning models are underspecified on two levels: a) any given\r\ntraining dataset can be fit by many different functions, and b) any given function can be\r\nexpressed by many different parameter configurations. We refer to the second kind of\r\nunderspecification as parameterisation redundancy and we precisely characterise its extent.\r\nSecond, we characterise the implicit criteria (the inductive bias) that guide learning in the\r\nunderspecified regime. Specifically, we consider a nonlinear but tractable classification\r\nsetting, and show that given the choice, neural networks learn classifiers with a large margin.\r\nThird, we consider learning scenarios where the inductive bias is not by itself sufficient to\r\ndeal with underspecification. We then study different ways of ‘tightening the specification’: i)\r\nIn the setting of representation learning with variational autoencoders, we propose a hand-\r\ncrafted regulariser based on mutual information. ii) In the setting of binary classification, we\r\nconsider soft-label (real-valued) supervision. We derive a generalisation bound for linear\r\nnetworks supervised in this way and verify that soft labels facilitate fast learning. Finally, we\r\nexplore an application of soft-label supervision to the training of multi-exit models.","lang":"eng"}],"acknowledged_ssus":[{"_id":"ScienComp"},{"_id":"CampIT"},{"_id":"E-Lib"}],"oa_version":"Published Version","file_date_updated":"2021-05-24T11:56:02Z","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"date_updated":"2023-09-08T11:11:12Z","supervisor":[{"first_name":"Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","full_name":"Lampert, Christoph","orcid":"0000-0001-8622-7887"}],"ddc":["000"],"type":"dissertation","status":"public","_id":"9418","page":"125","date_created":"2021-05-24T13:06:23Z","date_published":"2021-05-30T00:00:00Z","doi":"10.15479/AT:ISTA:9418","year":"2021","has_accepted_license":"1","day":"30","oa":1,"publisher":"Institute of Science and Technology Austria","article_processing_charge":"No","author":[{"first_name":"Phuong","id":"3EC6EE64-F248-11E8-B48F-1D18A9856A87","last_name":"Bui Thi Mai","full_name":"Bui Thi Mai, Phuong"}],"title":"Underspecification in deep learning","citation":{"short":"M. Phuong, Underspecification in Deep Learning, Institute of Science and Technology Austria, 2021.","ieee":"M. Phuong, “Underspecification in deep learning,” Institute of Science and Technology Austria, 2021.","ama":"Phuong M. Underspecification in deep learning. 2021. doi:10.15479/AT:ISTA:9418","apa":"Phuong, M. (2021). Underspecification in deep learning. Institute of Science and Technology Austria. https://doi.org/10.15479/AT:ISTA:9418","mla":"Phuong, Mary. Underspecification in Deep Learning. Institute of Science and Technology Austria, 2021, doi:10.15479/AT:ISTA:9418.","ista":"Phuong M. 2021. Underspecification in deep learning. Institute of Science and Technology Austria.","chicago":"Phuong, Mary. “Underspecification in Deep Learning.” Institute of Science and Technology Austria, 2021. https://doi.org/10.15479/AT:ISTA:9418."},"user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1"},{"volume":139,"publication_status":"published","language":[{"iso":"eng"}],"main_file_link":[{"url":"https://arxiv.org/abs/2006.07886","open_access":"1"}],"alternative_title":["PMLR"],"scopus_import":"1","intvolume":" 139","month":"08","abstract":[{"text":"The focus of disentanglement approaches has been on identifying independent factors of variation in data. However, the causal variables underlying real-world observations are often not statistically independent. In this work, we bridge the gap to real-world scenarios by analyzing the behavior of the most prominent disentanglement approaches on correlated data in a large-scale empirical study (including 4260 models). We show and quantify that systematically induced correlations in the dataset are being learned and reflected in the latent representations, which has implications for downstream applications of disentanglement such as fairness. We also demonstrate how to resolve these latent correlations, either using weak supervision during\r\ntraining or by post-hoc correcting a pre-trained model with a small number of labels.","lang":"eng"}],"oa_version":"Published Version","department":[{"_id":"FrLo"}],"date_updated":"2023-09-11T10:18:48Z","extern":"1","conference":{"name":"ICML: International Conference on Machine Learning","start_date":"2021-07-18","location":"Virtual","end_date":"2021-07-24"},"type":"conference","status":"public","_id":"14177","page":"10401-10412","date_created":"2023-08-22T14:03:47Z","date_published":"2021-08-01T00:00:00Z","year":"2021","publication":"Proceedings of the 38th International Conference on Machine Learning","day":"01","oa":1,"quality_controlled":"1","publisher":"ML Research Press","article_processing_charge":"No","external_id":{"arxiv":["2006.07886"]},"author":[{"full_name":"Träuble, Frederik","last_name":"Träuble","first_name":"Frederik"},{"full_name":"Creager, Elliot","last_name":"Creager","first_name":"Elliot"},{"first_name":"Niki","last_name":"Kilbertus","full_name":"Kilbertus, Niki"},{"id":"26cfd52f-2483-11ee-8040-88983bcc06d4","first_name":"Francesco","orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco","last_name":"Locatello"},{"last_name":"Dittadi","full_name":"Dittadi, Andrea","first_name":"Andrea"},{"full_name":"Goyal, Anirudh","last_name":"Goyal","first_name":"Anirudh"},{"last_name":"Schölkopf","full_name":"Schölkopf, Bernhard","first_name":"Bernhard"},{"first_name":"Stefan","full_name":"Bauer, Stefan","last_name":"Bauer"}],"title":"On disentangled representations learned from correlated data","citation":{"short":"F. Träuble, E. Creager, N. Kilbertus, F. Locatello, A. Dittadi, A. Goyal, B. Schölkopf, S. Bauer, in:, Proceedings of the 38th International Conference on Machine Learning, ML Research Press, 2021, pp. 10401–10412.","ieee":"F. Träuble et al., “On disentangled representations learned from correlated data,” in Proceedings of the 38th International Conference on Machine Learning, Virtual, 2021, vol. 139, pp. 10401–10412.","ama":"Träuble F, Creager E, Kilbertus N, et al. On disentangled representations learned from correlated data. In: Proceedings of the 38th International Conference on Machine Learning. Vol 139. ML Research Press; 2021:10401-10412.","apa":"Träuble, F., Creager, E., Kilbertus, N., Locatello, F., Dittadi, A., Goyal, A., … Bauer, S. (2021). On disentangled representations learned from correlated data. In Proceedings of the 38th International Conference on Machine Learning (Vol. 139, pp. 10401–10412). Virtual: ML Research Press.","mla":"Träuble, Frederik, et al. “On Disentangled Representations Learned from Correlated Data.” Proceedings of the 38th International Conference on Machine Learning, vol. 139, ML Research Press, 2021, pp. 10401–12.","ista":"Träuble F, Creager E, Kilbertus N, Locatello F, Dittadi A, Goyal A, Schölkopf B, Bauer S. 2021. On disentangled representations learned from correlated data. Proceedings of the 38th International Conference on Machine Learning. ICML: International Conference on Machine Learning, PMLR, vol. 139, 10401–10412.","chicago":"Träuble, Frederik, Elliot Creager, Niki Kilbertus, Francesco Locatello, Andrea Dittadi, Anirudh Goyal, Bernhard Schölkopf, and Stefan Bauer. “On Disentangled Representations Learned from Correlated Data.” In Proceedings of the 38th International Conference on Machine Learning, 139:10401–12. ML Research Press, 2021."},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87"},{"page":"11964-11974","date_created":"2023-08-22T14:03:04Z","date_published":"2021-08-01T00:00:00Z","year":"2021","publication":"Proceedings of 38th International Conference on Machine Learning","day":"01","oa":1,"quality_controlled":"1","publisher":"ML Research Press","article_processing_charge":"No","external_id":{"arxiv":["2106.05142"]},"author":[{"first_name":"Hugo","last_name":"Yèche","full_name":"Yèche, Hugo"},{"full_name":"Dresdner, Gideon","last_name":"Dresdner","first_name":"Gideon"},{"last_name":"Locatello","full_name":"Locatello, Francesco","orcid":"0000-0002-4850-0683","first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4"},{"first_name":"Matthias","full_name":"Hüser, Matthias","last_name":"Hüser"},{"first_name":"Gunnar","full_name":"Rätsch, Gunnar","last_name":"Rätsch"}],"title":"Neighborhood contrastive learning applied to online patient monitoring","citation":{"chicago":"Yèche, Hugo, Gideon Dresdner, Francesco Locatello, Matthias Hüser, and Gunnar Rätsch. “Neighborhood Contrastive Learning Applied to Online Patient Monitoring.” In Proceedings of 38th International Conference on Machine Learning, 139:11964–74. ML Research Press, 2021.","ista":"Yèche H, Dresdner G, Locatello F, Hüser M, Rätsch G. 2021. Neighborhood contrastive learning applied to online patient monitoring. Proceedings of 38th International Conference on Machine Learning. International Conference on Machine Learning, PMLR, vol. 139, 11964–11974.","mla":"Yèche, Hugo, et al. “Neighborhood Contrastive Learning Applied to Online Patient Monitoring.” Proceedings of 38th International Conference on Machine Learning, vol. 139, ML Research Press, 2021, pp. 11964–74.","apa":"Yèche, H., Dresdner, G., Locatello, F., Hüser, M., & Rätsch, G. (2021). Neighborhood contrastive learning applied to online patient monitoring. In Proceedings of 38th International Conference on Machine Learning (Vol. 139, pp. 11964–11974). Virtual: ML Research Press.","ama":"Yèche H, Dresdner G, Locatello F, Hüser M, Rätsch G. Neighborhood contrastive learning applied to online patient monitoring. In: Proceedings of 38th International Conference on Machine Learning. Vol 139. ML Research Press; 2021:11964-11974.","ieee":"H. Yèche, G. Dresdner, F. Locatello, M. Hüser, and G. Rätsch, “Neighborhood contrastive learning applied to online patient monitoring,” in Proceedings of 38th International Conference on Machine Learning, Virtual, 2021, vol. 139, pp. 11964–11974.","short":"H. Yèche, G. Dresdner, F. Locatello, M. Hüser, G. Rätsch, in:, Proceedings of 38th International Conference on Machine Learning, ML Research Press, 2021, pp. 11964–11974."},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","volume":139,"publication_status":"published","language":[{"iso":"eng"}],"main_file_link":[{"url":"https://arxiv.org/abs/2106.05142","open_access":"1"}],"alternative_title":["PMLR"],"scopus_import":"1","intvolume":" 139","month":"08","abstract":[{"text":"Intensive care units (ICU) are increasingly looking towards machine learning for methods to provide online monitoring of critically ill patients. In machine learning, online monitoring is often formulated as a supervised learning problem. Recently, contrastive learning approaches have demonstrated promising improvements over competitive supervised benchmarks. These methods rely on well-understood data augmentation techniques developed for image data which do not apply to online monitoring. In this work, we overcome this limitation by\r\nsupplementing time-series data augmentation techniques with a novel contrastive\r\nlearning objective which we call neighborhood contrastive learning (NCL). Our objective explicitly groups together contiguous time segments from each patient while maintaining state-specific information. Our experiments demonstrate a marked improvement over existing work applying contrastive methods to medical time-series.","lang":"eng"}],"oa_version":"Preprint","department":[{"_id":"FrLo"}],"date_updated":"2023-09-11T10:16:55Z","extern":"1","conference":{"name":"International Conference on Machine Learning","end_date":"2021-07-24","location":"Virtual","start_date":"2021-07-18"},"type":"conference","status":"public","_id":"14176"},{"department":[{"_id":"FrLo"}],"title":"Backward-compatible prediction updates: A probabilistic approach","author":[{"first_name":"Frederik","full_name":"Träuble, Frederik","last_name":"Träuble"},{"first_name":"Julius von","full_name":"Kügelgen, Julius von","last_name":"Kügelgen"},{"first_name":"Matthäus","full_name":"Kleindessner, Matthäus","last_name":"Kleindessner"},{"first_name":"Francesco","id":"26cfd52f-2483-11ee-8040-88983bcc06d4","last_name":"Locatello","orcid":"0000-0002-4850-0683","full_name":"Locatello, Francesco"},{"last_name":"Schölkopf","full_name":"Schölkopf, Bernhard","first_name":"Bernhard"},{"full_name":"Gehler, Peter","last_name":"Gehler","first_name":"Peter"}],"external_id":{"arxiv":["2107.01057"]},"article_processing_charge":"No","extern":"1","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","date_updated":"2023-09-11T11:31:59Z","citation":{"mla":"Träuble, Frederik, et al. “Backward-Compatible Prediction Updates: A Probabilistic Approach.” 35th Conference on Neural Information Processing Systems, vol. 34, 2021, pp. 116–28.","ama":"Träuble F, Kügelgen J von, Kleindessner M, Locatello F, Schölkopf B, Gehler P. Backward-compatible prediction updates: A probabilistic approach. In: 35th Conference on Neural Information Processing Systems. Vol 34. ; 2021:116-128.","apa":"Träuble, F., Kügelgen, J. von, Kleindessner, M., Locatello, F., Schölkopf, B., & Gehler, P. (2021). Backward-compatible prediction updates: A probabilistic approach. In 35th Conference on Neural Information Processing Systems (Vol. 34, pp. 116–128). Virtual.","short":"F. Träuble, J. von Kügelgen, M. Kleindessner, F. Locatello, B. Schölkopf, P. Gehler, in:, 35th Conference on Neural Information Processing Systems, 2021, pp. 116–128.","ieee":"F. Träuble, J. von Kügelgen, M. Kleindessner, F. Locatello, B. Schölkopf, and P. Gehler, “Backward-compatible prediction updates: A probabilistic approach,” in 35th Conference on Neural Information Processing Systems, Virtual, 2021, vol. 34, pp. 116–128.","chicago":"Träuble, Frederik, Julius von Kügelgen, Matthäus Kleindessner, Francesco Locatello, Bernhard Schölkopf, and Peter Gehler. “Backward-Compatible Prediction Updates: A Probabilistic Approach.” In 35th Conference on Neural Information Processing Systems, 34:116–28, 2021.","ista":"Träuble F, Kügelgen J von, Kleindessner M, Locatello F, Schölkopf B, Gehler P. 2021. Backward-compatible prediction updates: A probabilistic approach. 35th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 34, 116–128."},"status":"public","type":"conference","conference":{"name":"NeurIPS: Neural Information Processing Systems","start_date":"2021-12-07","end_date":"2021-12-10","location":"Virtual"},"_id":"14182","date_published":"2021-07-02T00:00:00Z","volume":34,"date_created":"2023-08-22T14:05:41Z","page":"116-128","day":"02","publication":"35th Conference on Neural Information Processing Systems","language":[{"iso":"eng"}],"publication_identifier":{"isbn":["9781713845393"]},"publication_status":"published","year":"2021","month":"07","intvolume":" 34","quality_controlled":"1","oa":1,"main_file_link":[{"url":"https://arxiv.org/abs/2107.01057","open_access":"1"}],"oa_version":"Preprint","abstract":[{"text":"When machine learning systems meet real world applications, accuracy is only\r\none of several requirements. In this paper, we assay a complementary\r\nperspective originating from the increasing availability of pre-trained and\r\nregularly improving state-of-the-art models. While new improved models develop\r\nat a fast pace, downstream tasks vary more slowly or stay constant. Assume that\r\nwe have a large unlabelled data set for which we want to maintain accurate\r\npredictions. Whenever a new and presumably better ML models becomes available,\r\nwe encounter two problems: (i) given a limited budget, which data points should\r\nbe re-evaluated using the new model?; and (ii) if the new predictions differ\r\nfrom the current ones, should we update? Problem (i) is about compute cost,\r\nwhich matters for very large data sets and models. Problem (ii) is about\r\nmaintaining consistency of the predictions, which can be highly relevant for\r\ndownstream applications; our demand is to avoid negative flips, i.e., changing\r\ncorrect to incorrect predictions. In this paper, we formalize the Prediction\r\nUpdate Problem and present an efficient probabilistic approach as answer to the\r\nabove questions. In extensive experiments on standard classification benchmark\r\ndata sets, we show that our method outperforms alternative strategies along key\r\nmetrics for backward-compatible prediction updates.","lang":"eng"}]}]