[{"abstract":[{"text":"Deep neural networks (DNNs) often have to be compressed, via pruning and/or quantization, before they can be deployed in practical settings. In this work we propose a new compression-aware minimizer dubbed CrAM that modifies the optimization step in a principled way, in order to produce models whose local loss behavior is stable under compression operations such as pruning. Thus, dense models trained via CrAM should be compressible post-training, in a single step, without significant accuracy loss. Experimental results on standard benchmarks, such as residual networks for ImageNet classification and BERT models for language modelling, show that CrAM produces dense models that can be more accurate than the standard SGD/Adam-based baselines, but which are stable under weight pruning: specifically, we can prune models in one-shot to 70-80% sparsity with almost no accuracy loss, and to 90% with reasonable (∼1%) accuracy loss, which is competitive with gradual compression methods. Additionally, CrAM can produce sparse models which perform well for transfer learning, and it also works for semi-structured 2:4 pruning patterns supported by GPU hardware. The code for reproducing the results is available at this https URL .","lang":"eng"}],"ec_funded":1,"type":"conference","author":[{"last_name":"Peste","first_name":"Elena-Alexandra","id":"32D78294-F248-11E8-B48F-1D18A9856A87","full_name":"Peste, Elena-Alexandra"},{"first_name":"Adrian","last_name":"Vladu","full_name":"Vladu, Adrian"},{"full_name":"Kurtic, Eldar","last_name":"Kurtic","first_name":"Eldar","id":"47beb3a5-07b5-11eb-9b87-b108ec578218"},{"last_name":"Lampert","first_name":"Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","full_name":"Lampert, Christoph"},{"full_name":"Alistarh, Dan-Adrian","last_name":"Alistarh","first_name":"Dan-Adrian","orcid":"0000-0003-3650-940X","id":"4A899BFC-F248-11E8-B48F-1D18A9856A87"}],"related_material":{"record":[{"status":"public","relation":"dissertation_contains","id":"13074"}]},"date_updated":"2023-06-01T12:54:45Z","date_created":"2023-05-23T11:36:18Z","oa_version":"Preprint","_id":"13053","year":"2023","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","acknowledgement":"AP, EK, DA received funding from the European Research Council (ERC) under the European\r\nUnion’s Horizon 2020 research and innovation programme (grant agreement No 805223 ScaleML). AV acknowledges the support of the French Agence Nationale de la Recherche (ANR), under grant ANR-21-CE48-0016 (project COMCOPT). We further acknowledge the support from the Scientific Service Units (SSU) of ISTA through resources provided by Scientific Computing (SciComp)-","publication_status":"accepted","title":"CrAM: A Compression-Aware Minimizer","status":"public","department":[{"_id":"GradSch"},{"_id":"DaAl"},{"_id":"ChLa"}],"month":"05","article_processing_charge":"No","conference":{"start_date":"2023-05-01","location":"Kigali, Rwanda ","end_date":"2023-05-05","name":"ICLR: International Conference on Learning Representations"},"date_published":"2023-05-01T00:00:00Z","acknowledged_ssus":[{"_id":"ScienComp"}],"language":[{"iso":"eng"}],"publication":"11th International Conference on Learning Representations ","external_id":{"arxiv":["2207.14200"]},"main_file_link":[{"open_access":"1","url":"https://openreview.net/pdf?id=_eTZBs-yedr"}],"oa":1,"citation":{"mla":"Peste, Elena-Alexandra, et al. “CrAM: A Compression-Aware Minimizer.” 11th International Conference on Learning Representations .","short":"E.-A. Peste, A. Vladu, E. Kurtic, C. Lampert, D.-A. Alistarh, in:, 11th International Conference on Learning Representations , n.d.","chicago":"Peste, Elena-Alexandra, Adrian Vladu, Eldar Kurtic, Christoph Lampert, and Dan-Adrian Alistarh. “CrAM: A Compression-Aware Minimizer.” In 11th International Conference on Learning Representations , n.d.","ama":"Peste E-A, Vladu A, Kurtic E, Lampert C, Alistarh D-A. CrAM: A Compression-Aware Minimizer. In: 11th International Conference on Learning Representations .","ista":"Peste E-A, Vladu A, Kurtic E, Lampert C, Alistarh D-A. CrAM: A Compression-Aware Minimizer. 11th International Conference on Learning Representations . ICLR: International Conference on Learning Representations.","apa":"Peste, E.-A., Vladu, A., Kurtic, E., Lampert, C., & Alistarh, D.-A. (n.d.). CrAM: A Compression-Aware Minimizer. In 11th International Conference on Learning Representations . Kigali, Rwanda .","ieee":"E.-A. Peste, A. Vladu, E. Kurtic, C. Lampert, and D.-A. Alistarh, “CrAM: A Compression-Aware Minimizer,” in 11th International Conference on Learning Representations , Kigali, Rwanda ."},"quality_controlled":"1","project":[{"name":"Elastic Coordination for Scalable Machine Learning","call_identifier":"H2020","grant_number":"805223","_id":"268A44D6-B435-11E9-9278-68D0E5697425"}]},{"publisher":"Springer Nature","department":[{"_id":"ChLa"}],"intvolume":" 14068","title":"On the implementation of baselines and lightweight conditional model extrapolation (LIMES) under class-prior shift","status":"public","publication_status":"published","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"14410","year":"2023","oa_version":"None","volume":14068,"date_updated":"2023-10-09T06:48:02Z","date_created":"2023-10-08T22:01:18Z","author":[{"last_name":"Tomaszewska","first_name":"Paulina","full_name":"Tomaszewska, Paulina"},{"first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph"}],"alternative_title":["LNCS"],"type":"conference","abstract":[{"text":"This paper focuses on the implementation details of the baseline methods and a recent lightweight conditional model extrapolation algorithm LIMES [5] for streaming data under class-prior shift. LIMES achieves superior performance over the baseline methods, especially concerning the minimum-across-day accuracy, which is important for the users of the system. In this work, the key measures to facilitate reproducibility and enhance the credibility of the results are described.","lang":"eng"}],"page":"67-73","quality_controlled":"1","citation":{"ama":"Tomaszewska P, Lampert C. On the implementation of baselines and lightweight conditional model extrapolation (LIMES) under class-prior shift. In: International Workshop on Reproducible Research in Pattern Recognition. Vol 14068. Springer Nature; 2023:67-73. doi:10.1007/978-3-031-40773-4_6","apa":"Tomaszewska, P., & Lampert, C. (2023). On the implementation of baselines and lightweight conditional model extrapolation (LIMES) under class-prior shift. In International Workshop on Reproducible Research in Pattern Recognition (Vol. 14068, pp. 67–73). Montreal, Canada: Springer Nature. https://doi.org/10.1007/978-3-031-40773-4_6","ieee":"P. Tomaszewska and C. Lampert, “On the implementation of baselines and lightweight conditional model extrapolation (LIMES) under class-prior shift,” in International Workshop on Reproducible Research in Pattern Recognition, Montreal, Canada, 2023, vol. 14068, pp. 67–73.","ista":"Tomaszewska P, Lampert C. 2023. On the implementation of baselines and lightweight conditional model extrapolation (LIMES) under class-prior shift. International Workshop on Reproducible Research in Pattern Recognition. RRPR: Reproducible Research in Pattern Recognition, LNCS, vol. 14068, 67–73.","short":"P. Tomaszewska, C. Lampert, in:, International Workshop on Reproducible Research in Pattern Recognition, Springer Nature, 2023, pp. 67–73.","mla":"Tomaszewska, Paulina, and Christoph Lampert. “On the Implementation of Baselines and Lightweight Conditional Model Extrapolation (LIMES) under Class-Prior Shift.” International Workshop on Reproducible Research in Pattern Recognition, vol. 14068, Springer Nature, 2023, pp. 67–73, doi:10.1007/978-3-031-40773-4_6.","chicago":"Tomaszewska, Paulina, and Christoph Lampert. “On the Implementation of Baselines and Lightweight Conditional Model Extrapolation (LIMES) under Class-Prior Shift.” In International Workshop on Reproducible Research in Pattern Recognition, 14068:67–73. Springer Nature, 2023. https://doi.org/10.1007/978-3-031-40773-4_6."},"publication":"International Workshop on Reproducible Research in Pattern Recognition","language":[{"iso":"eng"}],"doi":"10.1007/978-3-031-40773-4_6","date_published":"2023-08-20T00:00:00Z","conference":{"name":"RRPR: Reproducible Research in Pattern Recognition","end_date":"2022-08-21","location":"Montreal, Canada","start_date":"2022-08-21"},"scopus_import":"1","publication_identifier":{"issn":["0302-9743"],"isbn":["9783031407727"],"eissn":["1611-3349"]},"article_processing_charge":"No","month":"08","day":"20"},{"month":"12","day":"15","article_processing_charge":"No","quality_controlled":"1","project":[{"_id":"059876FA-7A3F-11EA-A408-12923DDC885E","name":"Prix Lopez-Loretta 2019 - Marco Mondelli"}],"publication":"37th Annual Conference on Neural Information Processing Systems","oa":1,"main_file_link":[{"open_access":"1","url":" https://doi.org/10.48550/arXiv.2305.13165"}],"citation":{"ama":"Súkeník P, Mondelli M, Lampert C. Deep neural collapse is provably optimal for the deep unconstrained features model. In: 37th Annual Conference on Neural Information Processing Systems.","ista":"Súkeník P, Mondelli M, Lampert C. Deep neural collapse is provably optimal for the deep unconstrained features model. 37th Annual Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems, NeurIPS, .","apa":"Súkeník, P., Mondelli, M., & Lampert, C. (n.d.). Deep neural collapse is provably optimal for the deep unconstrained features model. In 37th Annual Conference on Neural Information Processing Systems. New Orleans, LA, United States.","ieee":"P. Súkeník, M. Mondelli, and C. Lampert, “Deep neural collapse is provably optimal for the deep unconstrained features model,” in 37th Annual Conference on Neural Information Processing Systems, New Orleans, LA, United States.","mla":"Súkeník, Peter, et al. “Deep Neural Collapse Is Provably Optimal for the Deep Unconstrained Features Model.” 37th Annual Conference on Neural Information Processing Systems.","short":"P. Súkeník, M. Mondelli, C. Lampert, in:, 37th Annual Conference on Neural Information Processing Systems, n.d.","chicago":"Súkeník, Peter, Marco Mondelli, and Christoph Lampert. “Deep Neural Collapse Is Provably Optimal for the Deep Unconstrained Features Model.” In 37th Annual Conference on Neural Information Processing Systems, n.d."},"external_id":{"arxiv":["2305.13165"]},"language":[{"iso":"eng"}],"conference":{"location":"New Orleans, LA, United States","start_date":"2023-12-10","end_date":"2023-12-16","name":"NeurIPS: Neural Information Processing Systems"},"date_published":"2023-12-15T00:00:00Z","alternative_title":["NeurIPS"],"type":"conference","abstract":[{"text":"Neural collapse (NC) refers to the surprising structure of the last layer of deep neural networks in the terminal phase of gradient descent training. Recently, an increasing amount of experimental evidence has pointed to the propagation of NC to earlier layers of neural networks. However, while the NC in the last layer is well studied theoretically, much less is known about its multi-layered counterpart - deep neural collapse (DNC). In particular, existing work focuses either on linear layers or only on the last two layers at the price of an extra assumption. Our paper fills this gap by generalizing the established analytical framework for NC - the unconstrained features model - to multiple non-linear layers. Our key technical contribution is to show that, in a deep unconstrained features model, the unique global optimum for binary classification exhibits all the properties typical of DNC. This explains the existing experimental evidence of DNC. We also empirically show that (i) by optimizing deep unconstrained features models via gradient descent, the resulting solution agrees well with our theory, and (ii) trained networks recover the unconstrained features suitable for the occurrence of DNC, thus supporting the validity of this modeling principle.","lang":"eng"}],"status":"public","title":"Deep neural collapse is provably optimal for the deep unconstrained features model","publication_status":"inpress","department":[{"_id":"MaMo"},{"_id":"ChLa"}],"acknowledgement":"M. M. is partially supported by the 2019 Lopez-Loreta Prize. The authors would like to thank Eugenia Iofinova, Bernd Prach and Simone Bombari for valuable feedback on the manuscript.","_id":"14921","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2023","date_updated":"2024-02-06T07:53:26Z","date_created":"2024-02-02T11:17:41Z","oa_version":"Preprint","author":[{"first_name":"Peter","last_name":"Súkeník","id":"d64d6a8d-eb8e-11eb-b029-96fd216dec3c","full_name":"Súkeník, Peter"},{"full_name":"Mondelli, Marco","id":"27EB676C-8706-11E9-9510-7717E6697425","orcid":"0000-0002-3242-7020","first_name":"Marco","last_name":"Mondelli"},{"full_name":"Lampert, Christoph","first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887"}]},{"language":[{"iso":"eng"}],"date_published":"2023-11-10T00:00:00Z","doi":"10.48550/ARXIV.2311.06103","publication":"arXiv","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2311.06103"}],"external_id":{"arxiv":["2311.06103"]},"citation":{"chicago":"Prach, Bernd, and Christoph Lampert. “1-Lipschitz Neural Networks Are More Expressive with N-Activations.” ArXiv, n.d. https://doi.org/10.48550/ARXIV.2311.06103.","short":"B. Prach, C. Lampert, ArXiv (n.d.).","mla":"Prach, Bernd, and Christoph Lampert. “1-Lipschitz Neural Networks Are More Expressive with N-Activations.” ArXiv, 2311.06103, doi:10.48550/ARXIV.2311.06103.","ieee":"B. Prach and C. Lampert, “1-Lipschitz neural networks are more expressive with N-activations,” arXiv. .","apa":"Prach, B., & Lampert, C. (n.d.). 1-Lipschitz neural networks are more expressive with N-activations. arXiv. https://doi.org/10.48550/ARXIV.2311.06103","ista":"Prach B, Lampert C. 1-Lipschitz neural networks are more expressive with N-activations. arXiv, 2311.06103.","ama":"Prach B, Lampert C. 1-Lipschitz neural networks are more expressive with N-activations. arXiv. doi:10.48550/ARXIV.2311.06103"},"oa":1,"month":"11","day":"10","article_processing_charge":"No","date_created":"2024-02-28T17:59:32Z","date_updated":"2024-03-04T07:02:39Z","oa_version":"Preprint","author":[{"full_name":"Prach, Bernd","id":"2D561D42-C427-11E9-89B4-9C1AE6697425","last_name":"Prach","first_name":"Bernd"},{"last_name":"Lampert","first_name":"Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","full_name":"Lampert, Christoph"}],"publication_status":"submitted","status":"public","title":"1-Lipschitz neural networks are more expressive with N-activations","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"15039","year":"2023","abstract":[{"text":"A crucial property for achieving secure, trustworthy and interpretable deep learning systems is their robustness: small changes to a system's inputs should not result in large changes to its outputs. Mathematically, this means one strives for networks with a small Lipschitz constant. Several recent works have focused on how to construct such Lipschitz networks, typically by imposing constraints on the weight matrices. In this work, we study an orthogonal aspect, namely the role of the activation function. We show that commonly used activation functions, such as MaxMin, as well as all piece-wise linear ones with two segments unnecessarily restrict the class of representable functions, even in the simplest one-dimensional setting. We furthermore introduce the new N-activation function that is provably more expressive than currently popular activation functions. We provide code at this https URL.","lang":"eng"}],"article_number":"2311.06103","type":"preprint"},{"month":"10","day":"12","article_processing_charge":"No","has_accepted_license":"1","language":[{"iso":"eng"}],"doi":"10.48550/arXiv.2210.06434","date_published":"2022-10-12T00:00:00Z","publication":"arXiv","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"oa":1,"citation":{"chicago":"Scott, Jonathan A, Michelle X Yeo, and Christoph Lampert. “Cross-Client Label Propagation for Transductive Federated Learning.” ArXiv, n.d. https://doi.org/10.48550/arXiv.2210.06434.","short":"J.A. Scott, M.X. Yeo, C. Lampert, ArXiv (n.d.).","mla":"Scott, Jonathan A., et al. “Cross-Client Label Propagation for Transductive Federated Learning.” ArXiv, 2210.06434, doi:10.48550/arXiv.2210.06434.","apa":"Scott, J. A., Yeo, M. X., & Lampert, C. (n.d.). Cross-client Label Propagation for transductive federated learning. arXiv. https://doi.org/10.48550/arXiv.2210.06434","ieee":"J. A. Scott, M. X. Yeo, and C. Lampert, “Cross-client Label Propagation for transductive federated learning,” arXiv. .","ista":"Scott JA, Yeo MX, Lampert C. Cross-client Label Propagation for transductive federated learning. arXiv, 2210.06434.","ama":"Scott JA, Yeo MX, Lampert C. Cross-client Label Propagation for transductive federated learning. arXiv. doi:10.48550/arXiv.2210.06434"},"external_id":{"arxiv":["2210.06434"]},"file_date_updated":"2023-02-20T08:21:35Z","abstract":[{"text":"We present Cross-Client Label Propagation(XCLP), a new method for transductive federated learning. XCLP estimates a data graph jointly from the data of multiple clients and computes labels for the unlabeled data by propagating label information across the graph. To avoid clients having to share their data with anyone, XCLP employs two cryptographically secure protocols: secure Hamming distance computation and secure summation. We demonstrate two distinct applications of XCLP within federated learning. In the first, we use it in a one-shot way to predict labels for unseen test points. In the second, we use it to repeatedly pseudo-label unlabeled training data in a federated semi-supervised setting. Experiments on both real federated and standard benchmark datasets show that in both applications XCLP achieves higher classification accuracy than alternative approaches.","lang":"eng"}],"article_number":"2210.06434","type":"preprint","date_created":"2023-02-20T08:21:50Z","date_updated":"2023-02-21T08:20:18Z","file":[{"file_name":"2210.06434.pdf","access_level":"open_access","file_size":291893,"content_type":"application/pdf","creator":"chl","relation":"main_file","file_id":"12661","date_created":"2023-02-20T08:21:35Z","date_updated":"2023-02-20T08:21:35Z","checksum":"7ab20543fd4393f14fb857ce2e4f03c6","success":1}],"oa_version":"Preprint","author":[{"first_name":"Jonathan A","last_name":"Scott","id":"e499926b-f6e0-11ea-865d-9c63db0031e8","full_name":"Scott, Jonathan A"},{"full_name":"Yeo, Michelle X","last_name":"Yeo","first_name":"Michelle X","id":"2D82B818-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Lampert","first_name":"Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","full_name":"Lampert, Christoph"}],"publication_status":"submitted","title":"Cross-client Label Propagation for transductive federated learning","status":"public","ddc":["004"],"department":[{"_id":"ChLa"}],"year":"2022","_id":"12660","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87"},{"author":[{"last_name":"Súkeník","first_name":"Peter","id":"d64d6a8d-eb8e-11eb-b029-96fd216dec3c","full_name":"Súkeník, Peter"},{"full_name":"Lampert, Christoph","first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887"}],"date_created":"2023-02-20T08:23:06Z","date_updated":"2023-02-21T08:24:55Z","oa_version":"Preprint","year":"2022","_id":"12662","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","title":"Generalization in Multi-objective machine learning","status":"public","ddc":["004"],"publication_status":"submitted","department":[{"_id":"ChLa"}],"abstract":[{"text":"Modern machine learning tasks often require considering not just one but multiple objectives. For example, besides the prediction quality, this could be the efficiency, robustness or fairness of the learned models, or any of their combinations. Multi-objective learning offers a natural framework for handling such problems without having to commit to early trade-offs. Surprisingly, statistical learning theory so far offers almost no insight into the generalization properties of multi-objective learning. In this work, we make first steps to fill this gap: we establish foundational generalization bounds for the multi-objective setting as well as generalization and excess bounds for learning with scalarizations. We also provide the first theoretical analysis of the relation between the Pareto-optimal sets of the true objectives and the Pareto-optimal sets of their empirical approximations from training data. In particular, we show a surprising asymmetry: all Pareto-optimal solutions can be approximated by empirically Pareto-optimal ones, but not vice versa.","lang":"eng"}],"article_number":"2208.13499","type":"preprint","doi":"10.48550/arXiv.2208.13499","date_published":"2022-08-29T00:00:00Z","language":[{"iso":"eng"}],"publication":"arXiv","external_id":{"arxiv":["2208.13499"]},"citation":{"ama":"Súkeník P, Lampert C. Generalization in Multi-objective machine learning. arXiv. doi:10.48550/arXiv.2208.13499","apa":"Súkeník, P., & Lampert, C. (n.d.). Generalization in Multi-objective machine learning. arXiv. https://doi.org/10.48550/arXiv.2208.13499","ieee":"P. Súkeník and C. Lampert, “Generalization in Multi-objective machine learning,” arXiv. .","ista":"Súkeník P, Lampert C. Generalization in Multi-objective machine learning. arXiv, 2208.13499.","short":"P. Súkeník, C. Lampert, ArXiv (n.d.).","mla":"Súkeník, Peter, and Christoph Lampert. “Generalization in Multi-Objective Machine Learning.” ArXiv, 2208.13499, doi:10.48550/arXiv.2208.13499.","chicago":"Súkeník, Peter, and Christoph Lampert. “Generalization in Multi-Objective Machine Learning.” ArXiv, n.d. https://doi.org/10.48550/arXiv.2208.13499."},"main_file_link":[{"open_access":"1","url":" https://doi.org/10.48550/arXiv.2208.13499"}],"oa":1,"month":"08","day":"29","has_accepted_license":"1","article_processing_charge":"No"},{"file_date_updated":"2023-02-23T10:30:04Z","year":"2022","acknowledgement":"The authors would like to thank Bernd Prach, Elias Frantar, Alexandra Peste, Mahdi Nikdan, and Peter Súkeník for their helpful feedback. This research was supported by the Scientific Service Units (SSU) of IST Austria through resources provided by Scientific Computing (SciComp). This publication was made possible by an ETH AI Center postdoctoral fellowship granted to Nikola Konstantinov. Eugenia Iofinova was supported in part by the FWF DK VGSCO, grant agreement number W1260-N35. ","department":[{"_id":"ChLa"}],"publisher":"ML Research Press","publication_status":"published","related_material":{"link":[{"url":"https://github.com/ISTAustria-CVML/FLEA","relation":"software","description":"source code"}]},"author":[{"id":"f9a17499-f6e0-11ea-865d-fdf9a3f77117","orcid":"0000-0002-7778-3221","first_name":"Eugenia B","last_name":"Iofinova","full_name":"Iofinova, Eugenia B"},{"full_name":"Konstantinov, Nikola H","last_name":"Konstantinov","first_name":"Nikola H","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Lampert","first_name":"Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","full_name":"Lampert, Christoph"}],"date_created":"2023-02-02T20:29:57Z","date_updated":"2023-02-23T10:30:54Z","publication_identifier":{"issn":["2835-8856"]},"month":"12","tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"external_id":{"arxiv":["2106.11732"]},"oa":1,"main_file_link":[{"open_access":"1","url":"https://openreview.net/forum?id=XsPopigZXV"}],"project":[{"grant_number":" W1260-N35","_id":"9B9290DE-BA93-11EA-9121-9846C619BF3A","name":"Vienna Graduate School on Computational Optimization"}],"quality_controlled":"1","language":[{"iso":"eng"}],"acknowledged_ssus":[{"_id":"ScienComp"}],"type":"journal_article","abstract":[{"lang":"eng","text":"Fairness-aware learning aims at constructing classifiers that not only make accurate predictions, but also do not discriminate against specific groups. It is a fast-growing area of\r\nmachine learning with far-reaching societal impact. However, existing fair learning methods\r\nare vulnerable to accidental or malicious artifacts in the training data, which can cause\r\nthem to unknowingly produce unfair classifiers. In this work we address the problem of\r\nfair learning from unreliable training data in the robust multisource setting, where the\r\navailable training data comes from multiple sources, a fraction of which might not be representative of the true data distribution. We introduce FLEA, a filtering-based algorithm\r\nthat identifies and suppresses those data sources that would have a negative impact on\r\nfairness or accuracy if they were used for training. As such, FLEA is not a replacement of\r\nprior fairness-aware learning methods but rather an augmentation that makes any of them\r\nrobust against unreliable training data. We show the effectiveness of our approach by a\r\ndiverse range of experiments on multiple datasets. Additionally, we prove formally that\r\n–given enough data– FLEA protects the learner against corruptions as long as the fraction of\r\naffected data sources is less than half. Our source code and documentation are available at\r\nhttps://github.com/ISTAustria-CVML/FLEA."}],"_id":"12495","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","status":"public","title":"FLEA: Provably robust fair multisource learning from unreliable training data","ddc":["000"],"file":[{"relation":"main_file","file_id":"12673","date_updated":"2023-02-23T10:30:04Z","date_created":"2023-02-23T10:30:04Z","checksum":"97c8a8470759cab597abb973ca137a3b","success":1,"file_name":"2022_TMLR_Iofinova.pdf","access_level":"open_access","file_size":1948063,"content_type":"application/pdf","creator":"dernst"}],"oa_version":"Published Version","article_processing_charge":"No","has_accepted_license":"1","day":"22","citation":{"chicago":"Iofinova, Eugenia B, Nikola H Konstantinov, and Christoph Lampert. “FLEA: Provably Robust Fair Multisource Learning from Unreliable Training Data.” Transactions on Machine Learning Research. ML Research Press, 2022.","mla":"Iofinova, Eugenia B., et al. “FLEA: Provably Robust Fair Multisource Learning from Unreliable Training Data.” Transactions on Machine Learning Research, ML Research Press, 2022.","short":"E.B. Iofinova, N.H. Konstantinov, C. Lampert, Transactions on Machine Learning Research (2022).","ista":"Iofinova EB, Konstantinov NH, Lampert C. 2022. FLEA: Provably robust fair multisource learning from unreliable training data. Transactions on Machine Learning Research.","apa":"Iofinova, E. B., Konstantinov, N. H., & Lampert, C. (2022). FLEA: Provably robust fair multisource learning from unreliable training data. Transactions on Machine Learning Research. ML Research Press.","ieee":"E. B. Iofinova, N. H. Konstantinov, and C. Lampert, “FLEA: Provably robust fair multisource learning from unreliable training data,” Transactions on Machine Learning Research. ML Research Press, 2022.","ama":"Iofinova EB, Konstantinov NH, Lampert C. FLEA: Provably robust fair multisource learning from unreliable training data. Transactions on Machine Learning Research. 2022."},"publication":"Transactions on Machine Learning Research","article_type":"original","date_published":"2022-12-22T00:00:00Z"},{"date_published":"2022-10-23T00:00:00Z","page":"350-365","publication":"Computer Vision – ECCV 2022","citation":{"ama":"Prach B, Lampert C. Almost-orthogonal layers for efficient general-purpose Lipschitz networks. In: Computer Vision – ECCV 2022. Vol 13681. Springer Nature; 2022:350-365. doi:10.1007/978-3-031-19803-8_21","ista":"Prach B, Lampert C. 2022. Almost-orthogonal layers for efficient general-purpose Lipschitz networks. Computer Vision – ECCV 2022. ECCV: European Conference on Computer Vision, LNCS, vol. 13681, 350–365.","apa":"Prach, B., & Lampert, C. (2022). Almost-orthogonal layers for efficient general-purpose Lipschitz networks. In Computer Vision – ECCV 2022 (Vol. 13681, pp. 350–365). Tel Aviv, Israel: Springer Nature. https://doi.org/10.1007/978-3-031-19803-8_21","ieee":"B. Prach and C. Lampert, “Almost-orthogonal layers for efficient general-purpose Lipschitz networks,” in Computer Vision – ECCV 2022, Tel Aviv, Israel, 2022, vol. 13681, pp. 350–365.","mla":"Prach, Bernd, and Christoph Lampert. “Almost-Orthogonal Layers for Efficient General-Purpose Lipschitz Networks.” Computer Vision – ECCV 2022, vol. 13681, Springer Nature, 2022, pp. 350–65, doi:10.1007/978-3-031-19803-8_21.","short":"B. Prach, C. Lampert, in:, Computer Vision – ECCV 2022, Springer Nature, 2022, pp. 350–365.","chicago":"Prach, Bernd, and Christoph Lampert. “Almost-Orthogonal Layers for Efficient General-Purpose Lipschitz Networks.” In Computer Vision – ECCV 2022, 13681:350–65. Springer Nature, 2022. https://doi.org/10.1007/978-3-031-19803-8_21."},"day":"23","article_processing_charge":"No","scopus_import":"1","oa_version":"Preprint","title":"Almost-orthogonal layers for efficient general-purpose Lipschitz networks","status":"public","intvolume":" 13681","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"11839","abstract":[{"lang":"eng","text":"It is a highly desirable property for deep networks to be robust against\r\nsmall input changes. One popular way to achieve this property is by designing\r\nnetworks with a small Lipschitz constant. In this work, we propose a new\r\ntechnique for constructing such Lipschitz networks that has a number of\r\ndesirable properties: it can be applied to any linear network layer\r\n(fully-connected or convolutional), it provides formal guarantees on the\r\nLipschitz constant, it is easy to implement and efficient to run, and it can be\r\ncombined with any training objective and optimization method. In fact, our\r\ntechnique is the first one in the literature that achieves all of these\r\nproperties simultaneously. Our main contribution is a rescaling-based weight\r\nmatrix parametrization that guarantees each network layer to have a Lipschitz\r\nconstant of at most 1 and results in the learned weight matrices to be close to\r\northogonal. Hence we call such layers almost-orthogonal Lipschitz (AOL).\r\nExperiments and ablation studies in the context of image classification with\r\ncertified robust accuracy confirm that AOL layers achieve results that are on\r\npar with most existing methods. Yet, they are simpler to implement and more\r\nbroadly applicable, because they do not require computationally expensive\r\nmatrix orthogonalization or inversion steps as part of the network\r\narchitecture. We provide code at https://github.com/berndprach/AOL."}],"alternative_title":["LNCS"],"type":"conference","language":[{"iso":"eng"}],"conference":{"end_date":"2022-10-27","start_date":"2022-10-23","location":"Tel Aviv, Israel","name":"ECCV: European Conference on Computer Vision"},"doi":"10.1007/978-3-031-19803-8_21","quality_controlled":"1","external_id":{"arxiv":["2208.03160"]},"main_file_link":[{"url":" https://doi.org/10.48550/arXiv.2208.03160","open_access":"1"}],"oa":1,"month":"10","publication_identifier":{"eisbn":["9783031198038"],"isbn":["9783031198021"]},"date_created":"2022-08-12T15:09:47Z","date_updated":"2023-05-03T08:00:46Z","volume":13681,"author":[{"full_name":"Prach, Bernd","first_name":"Bernd","last_name":"Prach","id":"2D561D42-C427-11E9-89B4-9C1AE6697425"},{"full_name":"Lampert, Christoph","last_name":"Lampert","first_name":"Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87"}],"publication_status":"published","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"publisher":"Springer Nature","year":"2022"},{"abstract":[{"lang":"eng","text":"The digitalization of almost all aspects of our everyday lives has led to unprecedented amounts of data being freely available on the Internet. In particular social media platforms provide rich sources of user-generated data, though typically in unstructured form, and with high diversity, such as written in many different languages. Automatically identifying meaningful information in such big data resources and extracting it efficiently is one of the ongoing challenges of our time. A common step for this is sentiment analysis, which forms the foundation for tasks such as opinion mining or trend prediction. Unfortunately, publicly available tools for this task are almost exclusively available for English-language texts. Consequently, a large fraction of the Internet users, who do not communicate in English, are ignored in automatized studies, a phenomenon called rare-language discrimination.In this work we propose a technique to overcome this problem by a truly multi-lingual model, which can be trained automatically without linguistic knowledge or even the ability to read the many target languages. The main step is to combine self-annotation, specifically the use of emoticons as a proxy for labels, with multi-lingual sentence representations.To evaluate our method we curated several large datasets from data obtained via the free Twitter streaming API. The results show that our proposed multi-lingual training is able to achieve sentiment predictions at the same quality level for rare languages as for frequent ones, and in particular clearly better than what mono-lingual training achieves on the same data. "}],"type":"conference","author":[{"full_name":"Lampert, Jasmin","last_name":"Lampert","first_name":"Jasmin"},{"last_name":"Lampert","first_name":"Christoph","orcid":"0000-0002-4561-241X","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","full_name":"Lampert, Christoph"}],"oa_version":"None","date_created":"2022-02-10T14:08:23Z","date_updated":"2023-08-02T14:27:50Z","_id":"10752","year":"2022","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","publisher":"IEEE","department":[{"_id":"ChLa"}],"title":"Overcoming rare-language discrimination in multi-lingual sentiment analysis","status":"public","publication_status":"published","publication_identifier":{"isbn":["9781665439022"]},"article_processing_charge":"No","day":"13","month":"01","doi":"10.1109/bigdata52589.2021.9672003","date_published":"2022-01-13T00:00:00Z","conference":{"location":"Orlando, FL, United States","start_date":"2021-12-15","end_date":"2021-12-18","name":"Big Data: International Conference on Big Data"},"language":[{"iso":"eng"}],"external_id":{"isi":["000800559505036"]},"citation":{"ista":"Lampert J, Lampert C. 2022. Overcoming rare-language discrimination in multi-lingual sentiment analysis. 2021 IEEE International Conference on Big Data. Big Data: International Conference on Big Data, 5185–5192.","apa":"Lampert, J., & Lampert, C. (2022). Overcoming rare-language discrimination in multi-lingual sentiment analysis. In 2021 IEEE International Conference on Big Data (pp. 5185–5192). Orlando, FL, United States: IEEE. https://doi.org/10.1109/bigdata52589.2021.9672003","ieee":"J. Lampert and C. Lampert, “Overcoming rare-language discrimination in multi-lingual sentiment analysis,” in 2021 IEEE International Conference on Big Data, Orlando, FL, United States, 2022, pp. 5185–5192.","ama":"Lampert J, Lampert C. Overcoming rare-language discrimination in multi-lingual sentiment analysis. In: 2021 IEEE International Conference on Big Data. IEEE; 2022:5185-5192. doi:10.1109/bigdata52589.2021.9672003","chicago":"Lampert, Jasmin, and Christoph Lampert. “Overcoming Rare-Language Discrimination in Multi-Lingual Sentiment Analysis.” In 2021 IEEE International Conference on Big Data, 5185–92. IEEE, 2022. https://doi.org/10.1109/bigdata52589.2021.9672003.","mla":"Lampert, Jasmin, and Christoph Lampert. “Overcoming Rare-Language Discrimination in Multi-Lingual Sentiment Analysis.” 2021 IEEE International Conference on Big Data, IEEE, 2022, pp. 5185–92, doi:10.1109/bigdata52589.2021.9672003.","short":"J. Lampert, C. Lampert, in:, 2021 IEEE International Conference on Big Data, IEEE, 2022, pp. 5185–5192."},"publication":"2021 IEEE International Conference on Big Data","page":"5185-5192","isi":1,"quality_controlled":"1"},{"oa_version":"Preprint","title":"Lightweight conditional model extrapolation for streaming data under class-prior shift","status":"public","intvolume":" 2022","_id":"12161","user_id":"4359f0d1-fa6c-11eb-b949-802e58b17ae8","abstract":[{"lang":"eng","text":"We introduce LIMES, a new method for learning with non-stationary streaming data, inspired by the recent success of meta-learning. The main idea is not to attempt to learn a single classifier that would have to work well across all occurring data distributions, nor many separate classifiers, but to exploit a hybrid strategy: we learn a single set of model parameters from which a specific classifier for any specific data distribution is derived via classifier adaptation. Assuming a multiclass classification setting with class-prior shift, the adaptation step can be performed analytically with only the classifier’s bias terms being affected. Another contribution of our work is an extrapolation step that predicts suitable adaptation parameters for future time steps based on the previous data. In combination, we obtain a lightweight procedure for learning from streaming data with varying class distribution that adds no trainable parameters and almost no memory or computational overhead compared to training a single model. Experiments on a set of exemplary tasks using Twitter data show that LIMES achieves higher accuracy than alternative approaches, especially with respect to the relevant real-world metric of lowest within-day accuracy."}],"type":"conference","date_published":"2022-11-29T00:00:00Z","page":"2128-2134","publication":"26th International Conference on Pattern Recognition","citation":{"mla":"Tomaszewska, Paulina, and Christoph Lampert. “Lightweight Conditional Model Extrapolation for Streaming Data under Class-Prior Shift.” 26th International Conference on Pattern Recognition, vol. 2022, Institute of Electrical and Electronics Engineers, 2022, pp. 2128–34, doi:10.1109/icpr56361.2022.9956195.","short":"P. Tomaszewska, C. Lampert, in:, 26th International Conference on Pattern Recognition, Institute of Electrical and Electronics Engineers, 2022, pp. 2128–2134.","chicago":"Tomaszewska, Paulina, and Christoph Lampert. “Lightweight Conditional Model Extrapolation for Streaming Data under Class-Prior Shift.” In 26th International Conference on Pattern Recognition, 2022:2128–34. Institute of Electrical and Electronics Engineers, 2022. https://doi.org/10.1109/icpr56361.2022.9956195.","ama":"Tomaszewska P, Lampert C. Lightweight conditional model extrapolation for streaming data under class-prior shift. In: 26th International Conference on Pattern Recognition. Vol 2022. Institute of Electrical and Electronics Engineers; 2022:2128-2134. doi:10.1109/icpr56361.2022.9956195","ista":"Tomaszewska P, Lampert C. 2022. Lightweight conditional model extrapolation for streaming data under class-prior shift. 26th International Conference on Pattern Recognition. ICPR: International Conference on Pattern Recognition vol. 2022, 2128–2134.","ieee":"P. Tomaszewska and C. Lampert, “Lightweight conditional model extrapolation for streaming data under class-prior shift,” in 26th International Conference on Pattern Recognition, Montreal, Canada, 2022, vol. 2022, pp. 2128–2134.","apa":"Tomaszewska, P., & Lampert, C. (2022). Lightweight conditional model extrapolation for streaming data under class-prior shift. In 26th International Conference on Pattern Recognition (Vol. 2022, pp. 2128–2134). Montreal, Canada: Institute of Electrical and Electronics Engineers. https://doi.org/10.1109/icpr56361.2022.9956195"},"day":"29","article_processing_charge":"No","scopus_import":"1","date_updated":"2023-08-04T09:06:34Z","date_created":"2023-01-12T12:09:38Z","volume":2022,"author":[{"first_name":"Paulina","last_name":"Tomaszewska","full_name":"Tomaszewska, Paulina"},{"full_name":"Lampert, Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","first_name":"Christoph","last_name":"Lampert"}],"publication_status":"published","department":[{"_id":"ChLa"}],"publisher":"Institute of Electrical and Electronics Engineers","year":"2022","language":[{"iso":"eng"}],"conference":{"start_date":"2022-08-21","location":"Montreal, Canada","end_date":"2022-08-25","name":"ICPR: International Conference on Pattern Recognition"},"doi":"10.1109/icpr56361.2022.9956195","isi":1,"quality_controlled":"1","external_id":{"arxiv":["2206.05181"],"isi":["000897707602018"]},"main_file_link":[{"url":"https://doi.org/10.48550/arXiv.2206.05181","open_access":"1"}],"oa":1,"month":"11","publication_identifier":{"eisbn":["9781665490627"],"eissn":["2831-7475"]}},{"acknowledgement":"The authors thank Eugenia Iofinova and Bernd Prach for providing feedback on early versions of this paper. This publication was made possible by an ETH AI Center postdoctoral fellowship to Nikola Konstantinov.","year":"2022","publication_status":"published","publisher":"ML Research Press","department":[{"_id":"ChLa"}],"author":[{"id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87","last_name":"Konstantinov","first_name":"Nikola H","full_name":"Konstantinov, Nikola H"},{"first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-4561-241X","full_name":"Lampert, Christoph"}],"related_material":{"record":[{"status":"public","relation":"dissertation_contains","id":"10799"},{"id":"13241","relation":"shorter_version","status":"public"}]},"date_updated":"2023-09-26T10:44:37Z","date_created":"2022-02-28T14:05:42Z","volume":23,"file_date_updated":"2022-07-12T15:08:28Z","external_id":{"arxiv":["2102.06004"]},"tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"oa":1,"quality_controlled":"1","language":[{"iso":"eng"}],"month":"05","publication_identifier":{"issn":["1532-4435"],"eissn":["1533-7928"]},"_id":"10802","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","status":"public","ddc":["004"],"title":"Fairness-aware PAC learning from corrupted data","intvolume":" 23","oa_version":"Published Version","file":[{"success":1,"checksum":"9cac897b54a0ddf3a553a2c33e88cfda","date_updated":"2022-07-12T15:08:28Z","date_created":"2022-07-12T15:08:28Z","file_id":"11570","relation":"main_file","creator":"kschuh","content_type":"application/pdf","file_size":551862,"access_level":"open_access","file_name":"2022_JournalMachineLearningResearch_Konstantinov.pdf"}],"type":"journal_article","abstract":[{"text":"Addressing fairness concerns about machine learning models is a crucial step towards their long-term adoption in real-world automated systems. While many approaches have been developed for training fair models from data, little is known about the robustness of these methods to data corruption. In this work we consider fairness-aware learning under worst-case data manipulations. We show that an adversary can in some situations force any learner to return an overly biased classifier, regardless of the sample size and with or without degrading\r\naccuracy, and that the strength of the excess bias increases for learning problems with underrepresented protected groups in the data. We also prove that our hardness results are tight up to constant factors. To this end, we study two natural learning algorithms that optimize for both accuracy and fairness and show that these algorithms enjoy guarantees that are order-optimal in terms of the corruption ratio and the protected groups frequencies in the large data\r\nlimit.","lang":"eng"}],"publication":"Journal of Machine Learning Research","citation":{"ama":"Konstantinov NH, Lampert C. Fairness-aware PAC learning from corrupted data. Journal of Machine Learning Research. 2022;23:1-60.","ieee":"N. H. Konstantinov and C. Lampert, “Fairness-aware PAC learning from corrupted data,” Journal of Machine Learning Research, vol. 23. ML Research Press, pp. 1–60, 2022.","apa":"Konstantinov, N. H., & Lampert, C. (2022). Fairness-aware PAC learning from corrupted data. Journal of Machine Learning Research. ML Research Press.","ista":"Konstantinov NH, Lampert C. 2022. Fairness-aware PAC learning from corrupted data. Journal of Machine Learning Research. 23, 1–60.","short":"N.H. Konstantinov, C. Lampert, Journal of Machine Learning Research 23 (2022) 1–60.","mla":"Konstantinov, Nikola H., and Christoph Lampert. “Fairness-Aware PAC Learning from Corrupted Data.” Journal of Machine Learning Research, vol. 23, ML Research Press, 2022, pp. 1–60.","chicago":"Konstantinov, Nikola H, and Christoph Lampert. “Fairness-Aware PAC Learning from Corrupted Data.” Journal of Machine Learning Research. ML Research Press, 2022."},"article_type":"original","page":"1-60","date_published":"2022-05-01T00:00:00Z","scopus_import":"1","keyword":["Fairness","robustness","data poisoning","trustworthy machine learning","PAC learning"],"day":"01","has_accepted_license":"1","article_processing_charge":"No"},{"type":"conference","abstract":[{"lang":"eng","text":"Addressing fairness concerns about machine learning models is a crucial step towards their long-term adoption in real-world automated systems. Many approaches for training fair models from data have been developed and an implicit assumption about such algorithms is that they are able to recover a fair model, despite potential historical biases in the data. In this work we show a number of impossibility results that indicate that there is no learning algorithm that can recover a fair model when a proportion of the dataset is subject to arbitrary manipulations. Specifically, we prove that there are situations in which an adversary can force any learner to return a biased classifier, with or without degrading accuracy, and that the strength of this bias increases for learning problems with underrepresented protected groups in the data. Our results emphasize on the importance of studying further data corruption models of various strength and of establishing stricter data collection practices for fairness-aware learning."}],"status":"public","title":"On the impossibility of fairness-aware learning from corrupted data","intvolume":" 171","_id":"13241","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","oa_version":"Preprint","scopus_import":"1","day":"01","article_processing_charge":"No","page":"59-83","publication":"Proceedings of Machine Learning Research","citation":{"apa":"Konstantinov, N. H., & Lampert, C. (2022). On the impossibility of fairness-aware learning from corrupted data. In Proceedings of Machine Learning Research (Vol. 171, pp. 59–83). ML Research Press.","ieee":"N. H. Konstantinov and C. Lampert, “On the impossibility of fairness-aware learning from corrupted data,” in Proceedings of Machine Learning Research, 2022, vol. 171, pp. 59–83.","ista":"Konstantinov NH, Lampert C. 2022. On the impossibility of fairness-aware learning from corrupted data. Proceedings of Machine Learning Research. vol. 171, 59–83.","ama":"Konstantinov NH, Lampert C. On the impossibility of fairness-aware learning from corrupted data. In: Proceedings of Machine Learning Research. Vol 171. ML Research Press; 2022:59-83.","chicago":"Konstantinov, Nikola H, and Christoph Lampert. “On the Impossibility of Fairness-Aware Learning from Corrupted Data.” In Proceedings of Machine Learning Research, 171:59–83. ML Research Press, 2022.","short":"N.H. Konstantinov, C. Lampert, in:, Proceedings of Machine Learning Research, ML Research Press, 2022, pp. 59–83.","mla":"Konstantinov, Nikola H., and Christoph Lampert. “On the Impossibility of Fairness-Aware Learning from Corrupted Data.” Proceedings of Machine Learning Research, vol. 171, ML Research Press, 2022, pp. 59–83."},"date_published":"2022-12-01T00:00:00Z","publication_status":"published","publisher":"ML Research Press","department":[{"_id":"ChLa"}],"year":"2022","acknowledgement":"This paper is a shortened, workshop version of Konstantinov and Lampert (2021),\r\nhttps://arxiv.org/abs/2102.06004. For further results, including an analysis of algorithms achieving the lower bounds from this paper, we refer to the full version.","date_updated":"2023-09-26T10:44:37Z","date_created":"2023-07-16T22:01:13Z","volume":171,"author":[{"first_name":"Nikola H","last_name":"Konstantinov","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87","full_name":"Konstantinov, Nikola H"},{"full_name":"Lampert, Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","first_name":"Christoph"}],"related_material":{"record":[{"relation":"extended_version","status":"public","id":"10802"}]},"month":"12","publication_identifier":{"eissn":["2640-3498"]},"quality_controlled":"1","external_id":{"arxiv":["2102.06004"]},"main_file_link":[{"url":"https://arxiv.org/abs/2102.06004","open_access":"1"}],"oa":1,"language":[{"iso":"eng"}]},{"page":"246-259","publication":"42nd German Conference on Pattern Recognition","citation":{"mla":"Volhejn, Vaclav, and Christoph Lampert. “Does SGD Implicitly Optimize for Smoothness?” 42nd German Conference on Pattern Recognition, vol. 12544, Springer, 2021, pp. 246–59, doi:10.1007/978-3-030-71278-5_18.","short":"V. Volhejn, C. Lampert, in:, 42nd German Conference on Pattern Recognition, Springer, 2021, pp. 246–259.","chicago":"Volhejn, Vaclav, and Christoph Lampert. “Does SGD Implicitly Optimize for Smoothness?” In 42nd German Conference on Pattern Recognition, 12544:246–59. LNCS. Springer, 2021. https://doi.org/10.1007/978-3-030-71278-5_18.","ama":"Volhejn V, Lampert C. Does SGD implicitly optimize for smoothness? In: 42nd German Conference on Pattern Recognition. Vol 12544. LNCS. Springer; 2021:246-259. doi:10.1007/978-3-030-71278-5_18","ista":"Volhejn V, Lampert C. 2021. Does SGD implicitly optimize for smoothness? 42nd German Conference on Pattern Recognition. DAGM GCPR: German Conference on Pattern Recognition LNCS vol. 12544, 246–259.","apa":"Volhejn, V., & Lampert, C. (2021). Does SGD implicitly optimize for smoothness? In 42nd German Conference on Pattern Recognition (Vol. 12544, pp. 246–259). Tübingen, Germany: Springer. https://doi.org/10.1007/978-3-030-71278-5_18","ieee":"V. Volhejn and C. Lampert, “Does SGD implicitly optimize for smoothness?,” in 42nd German Conference on Pattern Recognition, Tübingen, Germany, 2021, vol. 12544, pp. 246–259."},"date_published":"2021-03-17T00:00:00Z","series_title":"LNCS","scopus_import":"1","day":"17","has_accepted_license":"1","article_processing_charge":"No","ddc":["510"],"status":"public","title":"Does SGD implicitly optimize for smoothness?","intvolume":" 12544","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"9210","oa_version":"Submitted Version","file":[{"date_updated":"2022-08-12T07:27:58Z","date_created":"2022-08-12T07:27:58Z","success":1,"checksum":"3e3628ab1cf658d82524963f808004ea","file_id":"11820","relation":"main_file","creator":"dernst","file_size":420234,"content_type":"application/pdf","file_name":"2020_GCPR_submitted_Volhejn.pdf","access_level":"open_access"}],"type":"conference","abstract":[{"lang":"eng","text":"Modern neural networks can easily fit their training set perfectly. Surprisingly, despite being “overfit” in this way, they tend to generalize well to future data, thereby defying the classic bias–variance trade-off of machine learning theory. Of the many possible explanations, a prevalent one is that training by stochastic gradient descent (SGD) imposes an implicit bias that leads it to learn simple functions, and these simple functions generalize well. However, the specifics of this implicit bias are not well understood.\r\nIn this work, we explore the smoothness conjecture which states that SGD is implicitly biased towards learning functions that are smooth. We propose several measures to formalize the intuitive notion of smoothness, and we conduct experiments to determine whether SGD indeed implicitly optimizes for these measures. Our findings rule out the possibility that smoothness measures based on first-order derivatives are being implicitly enforced. They are supportive, though, of the smoothness conjecture for measures based on second-order derivatives."}],"quality_controlled":"1","oa":1,"language":[{"iso":"eng"}],"conference":{"name":"DAGM GCPR: German Conference on Pattern Recognition ","location":"Tübingen, Germany","start_date":"2020-09-28","end_date":"2020-10-01"},"doi":"10.1007/978-3-030-71278-5_18","month":"03","publication_identifier":{"isbn":["9783030712778"],"eissn":["1611-3349"],"issn":["0302-9743"]},"publication_status":"published","publisher":"Springer","department":[{"_id":"ChLa"}],"year":"2021","date_created":"2021-03-01T09:01:16Z","date_updated":"2022-08-12T07:28:47Z","volume":12544,"author":[{"id":"d5235fb4-7a6d-11eb-b254-f25d12d631a8","first_name":"Vaclav","last_name":"Volhejn","full_name":"Volhejn, Vaclav"},{"full_name":"Lampert, Christoph","first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887"}],"file_date_updated":"2022-08-12T07:27:58Z"},{"day":"01","month":"05","has_accepted_license":"1","article_processing_charge":"No","scopus_import":"1","language":[{"iso":"eng"}],"conference":{"location":"Virtual","start_date":"2021-05-03","end_date":"2021-05-07","name":" ICLR: International Conference on Learning Representations"},"date_published":"2021-05-01T00:00:00Z","quality_controlled":"1","publication":"9th International Conference on Learning Representations","citation":{"mla":"Phuong, Mary, and Christoph Lampert. “The Inductive Bias of ReLU Networks on Orthogonally Separable Data.” 9th International Conference on Learning Representations, 2021.","short":"M. Phuong, C. Lampert, in:, 9th International Conference on Learning Representations, 2021.","chicago":"Phuong, Mary, and Christoph Lampert. “The Inductive Bias of ReLU Networks on Orthogonally Separable Data.” In 9th International Conference on Learning Representations, 2021.","ama":"Phuong M, Lampert C. The inductive bias of ReLU networks on orthogonally separable data. In: 9th International Conference on Learning Representations. ; 2021.","ista":"Phuong M, Lampert C. 2021. The inductive bias of ReLU networks on orthogonally separable data. 9th International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","ieee":"M. Phuong and C. Lampert, “The inductive bias of ReLU networks on orthogonally separable data,” in 9th International Conference on Learning Representations, Virtual, 2021.","apa":"Phuong, M., & Lampert, C. (2021). The inductive bias of ReLU networks on orthogonally separable data. In 9th International Conference on Learning Representations. Virtual."},"oa":1,"main_file_link":[{"url":"https://openreview.net/pdf?id=krz7T0xU9Z_","open_access":"1"}],"abstract":[{"text":"We study the inductive bias of two-layer ReLU networks trained by gradient flow. We identify a class of easy-to-learn (`orthogonally separable') datasets, and characterise the solution that ReLU networks trained on such datasets converge to. Irrespective of network width, the solution turns out to be a combination of two max-margin classifiers: one corresponding to the positive data subset and one corresponding to the negative data subset. The proof is based on the recently introduced concept of extremal sectors, for which we prove a number of properties in the context of orthogonal separability. In particular, we prove stationarity of activation patterns from some time onwards, which enables a reduction of the ReLU network to an ensemble of linear subnetworks.","lang":"eng"}],"file_date_updated":"2021-05-24T11:15:57Z","type":"conference","date_updated":"2023-09-07T13:29:50Z","date_created":"2021-05-24T11:16:46Z","file":[{"relation":"main_file","file_id":"9417","date_updated":"2021-05-24T11:15:57Z","date_created":"2021-05-24T11:15:57Z","checksum":"f34ff17017527db5ba6927f817bdd125","file_name":"iclr2021_conference.pdf","access_level":"open_access","content_type":"application/pdf","file_size":502356,"creator":"bphuong"}],"oa_version":"Published Version","author":[{"first_name":"Phuong","last_name":"Bui Thi Mai","id":"3EC6EE64-F248-11E8-B48F-1D18A9856A87","full_name":"Bui Thi Mai, Phuong"},{"full_name":"Lampert, Christoph","first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887"}],"related_material":{"record":[{"status":"public","relation":"dissertation_contains","id":"9418"}]},"publication_status":"published","title":"The inductive bias of ReLU networks on orthogonally separable data","ddc":["000"],"status":"public","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"_id":"9416","year":"2021","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87"},{"date_published":"2021-06-07T00:00:00Z","doi":"10.48550/arXiv.2102.05996","language":[{"iso":"eng"}],"main_file_link":[{"url":"https://arxiv.org/abs/2102.05996","open_access":"1"}],"oa":1,"citation":{"ama":"Konstantinov NH, Lampert C. Fairness through regularization for learning to rank. arXiv. doi:10.48550/arXiv.2102.05996","ieee":"N. H. Konstantinov and C. Lampert, “Fairness through regularization for learning to rank,” arXiv. .","apa":"Konstantinov, N. H., & Lampert, C. (n.d.). Fairness through regularization for learning to rank. arXiv. https://doi.org/10.48550/arXiv.2102.05996","ista":"Konstantinov NH, Lampert C. Fairness through regularization for learning to rank. arXiv, 2102.05996.","short":"N.H. Konstantinov, C. Lampert, ArXiv (n.d.).","mla":"Konstantinov, Nikola H., and Christoph Lampert. “Fairness through Regularization for Learning to Rank.” ArXiv, 2102.05996, doi:10.48550/arXiv.2102.05996.","chicago":"Konstantinov, Nikola H, and Christoph Lampert. “Fairness through Regularization for Learning to Rank.” ArXiv, n.d. https://doi.org/10.48550/arXiv.2102.05996."},"external_id":{"arxiv":["2102.05996"]},"publication":"arXiv","article_processing_charge":"No","month":"06","day":"07","related_material":{"record":[{"id":"10799","relation":"dissertation_contains","status":"public"}]},"author":[{"full_name":"Konstantinov, Nikola H","first_name":"Nikola H","last_name":"Konstantinov","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87"},{"full_name":"Lampert, Christoph","first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-4561-241X"}],"oa_version":"Preprint","date_updated":"2023-09-07T13:42:08Z","date_created":"2022-02-28T14:13:59Z","_id":"10803","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2021","department":[{"_id":"ChLa"}],"status":"public","title":"Fairness through regularization for learning to rank","publication_status":"submitted","abstract":[{"lang":"eng","text":"Given the abundance of applications of ranking in recent years, addressing fairness concerns around automated ranking systems becomes necessary for increasing the trust among end-users. Previous work on fair ranking has mostly focused on application-specific fairness notions, often tailored to online advertising, and it rarely considers learning as part of the process. In this work, we show how to transfer numerous fairness notions from binary classification to a learning to rank setting. Our formalism allows us to design methods for incorporating fairness objectives with provable generalization guarantees. An extensive experimental evaluation shows that our method can improve ranking fairness substantially with no or only little loss of model quality."}],"type":"preprint","article_number":"2102.05996"},{"abstract":[{"text":"The goal of zero-shot learning is to construct a classifier that can identify object classes for which no training examples are available. When training data for some of the object classes is available but not for others, the name generalized zero-shot learning is commonly used.\r\nIn a wider sense, the phrase zero-shot is also used to describe other machine learning-based approaches that require no training data from the problem of interest, such as zero-shot action recognition or zero-shot machine translation.","lang":"eng"}],"type":"book_chapter","place":"Cham","author":[{"id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","first_name":"Christoph","last_name":"Lampert","full_name":"Lampert, Christoph"}],"edition":"2","date_updated":"2024-02-19T10:59:04Z","date_created":"2024-02-14T14:05:32Z","oa_version":"None","_id":"14987","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","year":"2021","title":"Zero-Shot Learning","publication_status":"published","status":"public","publisher":"Springer","department":[{"_id":"ChLa"}],"editor":[{"full_name":"Ikeuchi, Katsushi","first_name":"Katsushi","last_name":"Ikeuchi"}],"day":"13","month":"10","publication_identifier":{"eisbn":["9783030634162"],"isbn":["9783030634155"]},"article_processing_charge":"No","date_published":"2021-10-13T00:00:00Z","doi":"10.1007/978-3-030-63416-2_874","language":[{"iso":"eng"}],"publication":"Computer Vision","citation":{"ieee":"C. Lampert, “Zero-Shot Learning,” in Computer Vision, 2nd ed., K. Ikeuchi, Ed. Cham: Springer, 2021, pp. 1395–1397.","apa":"Lampert, C. (2021). Zero-Shot Learning. In K. Ikeuchi (Ed.), Computer Vision (2nd ed., pp. 1395–1397). Cham: Springer. https://doi.org/10.1007/978-3-030-63416-2_874","ista":"Lampert C. 2021.Zero-Shot Learning. In: Computer Vision. , 1395–1397.","ama":"Lampert C. Zero-Shot Learning. In: Ikeuchi K, ed. Computer Vision. 2nd ed. Cham: Springer; 2021:1395-1397. doi:10.1007/978-3-030-63416-2_874","chicago":"Lampert, Christoph. “Zero-Shot Learning.” In Computer Vision, edited by Katsushi Ikeuchi, 2nd ed., 1395–97. Cham: Springer, 2021. https://doi.org/10.1007/978-3-030-63416-2_874.","short":"C. Lampert, in:, K. Ikeuchi (Ed.), Computer Vision, 2nd ed., Springer, Cham, 2021, pp. 1395–1397.","mla":"Lampert, Christoph. “Zero-Shot Learning.” Computer Vision, edited by Katsushi Ikeuchi, 2nd ed., Springer, 2021, pp. 1395–97, doi:10.1007/978-3-030-63416-2_874."},"quality_controlled":"1","page":"1395-1397"},{"license":"https://creativecommons.org/licenses/by-sa/4.0/","abstract":[{"text":"We present a generative model of images that explicitly reasons over the set\r\nof objects they show. Our model learns a structured latent representation that\r\nseparates objects from each other and from the background; unlike prior works,\r\nit explicitly represents the 2D position and depth of each object, as well as\r\nan embedding of its segmentation mask and appearance. The model can be trained\r\nfrom images alone in a purely unsupervised fashion without the need for object\r\nmasks or depth information. Moreover, it always generates complete objects,\r\neven though a significant fraction of training images contain occlusions.\r\nFinally, we show that our model can infer decompositions of novel images into\r\ntheir constituent objects, including accurate prediction of depth ordering and\r\nsegmentation of occluded parts.","lang":"eng"}],"article_number":"2004.00642","type":"preprint","date_created":"2020-06-29T23:55:23Z","date_updated":"2021-01-12T08:16:44Z","oa_version":"Preprint","author":[{"first_name":"Titas","last_name":"Anciukevicius","full_name":"Anciukevicius, Titas"},{"full_name":"Lampert, Christoph","last_name":"Lampert","first_name":"Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87"},{"last_name":"Henderson","first_name":"Paul M","orcid":"0000-0002-5198-7445","id":"13C09E74-18D9-11E9-8878-32CFE5697425","full_name":"Henderson, Paul M"}],"title":"Object-centric image generation with factored depths, locations, and appearances","status":"public","ddc":["004"],"publication_status":"submitted","department":[{"_id":"ChLa"}],"_id":"8063","year":"2020","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","day":"01","month":"04","article_processing_charge":"No","language":[{"iso":"eng"}],"date_published":"2020-04-01T00:00:00Z","publication":"arXiv","tmp":{"short":"CC BY-SA (4.0)","image":"/images/cc_by_sa.png","name":"Creative Commons Attribution-ShareAlike 4.0 International Public License (CC BY-SA 4.0)","legal_code_url":"https://creativecommons.org/licenses/by-sa/4.0/legalcode"},"external_id":{"arxiv":["2004.00642"]},"oa":1,"citation":{"apa":"Anciukevicius, T., Lampert, C., & Henderson, P. M. (n.d.). Object-centric image generation with factored depths, locations, and appearances. arXiv.","ieee":"T. Anciukevicius, C. Lampert, and P. M. Henderson, “Object-centric image generation with factored depths, locations, and appearances,” arXiv. .","ista":"Anciukevicius T, Lampert C, Henderson PM. Object-centric image generation with factored depths, locations, and appearances. arXiv, 2004.00642.","ama":"Anciukevicius T, Lampert C, Henderson PM. Object-centric image generation with factored depths, locations, and appearances. arXiv.","chicago":"Anciukevicius, Titas, Christoph Lampert, and Paul M Henderson. “Object-Centric Image Generation with Factored Depths, Locations, and Appearances.” ArXiv, n.d.","short":"T. Anciukevicius, C. Lampert, P.M. Henderson, ArXiv (n.d.).","mla":"Anciukevicius, Titas, et al. “Object-Centric Image Generation with Factored Depths, Locations, and Appearances.” ArXiv, 2004.00642."},"main_file_link":[{"url":"https://arxiv.org/abs/2004.00642","open_access":"1"}]},{"article_processing_charge":"No","day":"07","date_published":"2020-07-07T00:00:00Z","citation":{"short":"P.M. Henderson, C. Lampert, in:, 34th Conference on Neural Information Processing Systems, Curran Associates, 2020, pp. 3106–3117.","mla":"Henderson, Paul M., and Christoph Lampert. “Unsupervised Object-Centric Video Generation and Decomposition in 3D.” 34th Conference on Neural Information Processing Systems, vol. 33, Curran Associates, 2020, pp. 3106–3117.","chicago":"Henderson, Paul M, and Christoph Lampert. “Unsupervised Object-Centric Video Generation and Decomposition in 3D.” In 34th Conference on Neural Information Processing Systems, 33:3106–3117. Curran Associates, 2020.","ama":"Henderson PM, Lampert C. Unsupervised object-centric video generation and decomposition in 3D. In: 34th Conference on Neural Information Processing Systems. Vol 33. Curran Associates; 2020:3106–3117.","apa":"Henderson, P. M., & Lampert, C. (2020). Unsupervised object-centric video generation and decomposition in 3D. In 34th Conference on Neural Information Processing Systems (Vol. 33, pp. 3106–3117). Vancouver, Canada: Curran Associates.","ieee":"P. M. Henderson and C. Lampert, “Unsupervised object-centric video generation and decomposition in 3D,” in 34th Conference on Neural Information Processing Systems, Vancouver, Canada, 2020, vol. 33, pp. 3106–3117.","ista":"Henderson PM, Lampert C. 2020. Unsupervised object-centric video generation and decomposition in 3D. 34th Conference on Neural Information Processing Systems. NeurIPS: Neural Information Processing Systems vol. 33, 3106–3117."},"publication":"34th Conference on Neural Information Processing Systems","page":"3106–3117","abstract":[{"lang":"eng","text":"A natural approach to generative modeling of videos is to represent them as a composition of moving objects. Recent works model a set of 2D sprites over a slowly-varying background, but without considering the underlying 3D scene that\r\ngives rise to them. We instead propose to model a video as the view seen while moving through a scene with multiple 3D objects and a 3D background. Our model is trained from monocular videos without any supervision, yet learns to\r\ngenerate coherent 3D scenes containing several moving objects. We conduct detailed experiments on two datasets, going beyond the visual complexity supported by state-of-the-art generative approaches. We evaluate our method on\r\ndepth-prediction and 3D object detection---tasks which cannot be addressed by those earlier works---and show it out-performs them even on 2D instance segmentation and tracking."}],"type":"conference","oa_version":"Preprint","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"8188","intvolume":" 33","status":"public","title":"Unsupervised object-centric video generation and decomposition in 3D","publication_identifier":{"isbn":["9781713829546"]},"month":"07","conference":{"name":"NeurIPS: Neural Information Processing Systems","location":"Vancouver, Canada","start_date":"2020-12-06","end_date":"2020-12-12"},"language":[{"iso":"eng"}],"acknowledged_ssus":[{"_id":"ScienComp"}],"oa":1,"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2007.06705"}],"external_id":{"arxiv":["2007.06705"]},"quality_controlled":"1","author":[{"full_name":"Henderson, Paul M","first_name":"Paul M","last_name":"Henderson","id":"13C09E74-18D9-11E9-8878-32CFE5697425","orcid":"0000-0002-5198-7445"},{"first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph"}],"volume":33,"date_updated":"2023-04-25T09:49:58Z","date_created":"2020-07-31T16:59:19Z","acknowledgement":"This research was supported by the Scientific Service Units (SSU) of IST Austria through resources\r\nprovided by Scientific Computing (SciComp). PH is employed part-time by Blackford Analysis, but\r\nthey did not support this project in any way.","year":"2020","publisher":"Curran Associates","department":[{"_id":"ChLa"}],"publication_status":"published"},{"year":"2020","publication_status":"published","department":[{"_id":"ChLa"}],"publisher":"IEEE","author":[{"last_name":"Royer","first_name":"Amélie","orcid":"0000-0002-8407-0705","id":"3811D890-F248-11E8-B48F-1D18A9856A87","full_name":"Royer, Amélie"},{"last_name":"Lampert","first_name":"Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","full_name":"Lampert, Christoph"}],"related_material":{"record":[{"id":"8331","status":"deleted","relation":"dissertation_contains"},{"relation":"dissertation_contains","status":"public","id":"8390"}]},"date_updated":"2023-09-07T13:16:17Z","date_created":"2020-06-07T22:00:53Z","article_number":"1716-1725","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2004.12623"}],"oa":1,"external_id":{"arxiv":["2004.12623"]},"quality_controlled":"1","conference":{"end_date":"2020-03-05","location":" Snowmass Village, CO, United States","start_date":"2020-03-01","name":"WACV: Winter Conference on Applications of Computer Vision"},"doi":"10.1109/WACV45572.2020.9093288","language":[{"iso":"eng"}],"month":"03","publication_identifier":{"isbn":["9781728165530"]},"user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","_id":"7936","status":"public","title":"Localizing grouped instances for efficient detection in low-resource scenarios","oa_version":"Preprint","type":"conference","abstract":[{"lang":"eng","text":"State-of-the-art detection systems are generally evaluated on their ability to exhaustively retrieve objects densely distributed in the image, across a wide variety of appearances and semantic categories. Orthogonal to this, many real-life object detection applications, for example in remote sensing, instead require dealing with large images that contain only a few small objects of a single class, scattered heterogeneously across the space. In addition, they are often subject to strict computational constraints, such as limited battery capacity and computing power.To tackle these more practical scenarios, we propose a novel flexible detection scheme that efficiently adapts to variable object sizes and densities: We rely on a sequence of detection stages, each of which has the ability to predict groups of objects as well as individuals. Similar to a detection cascade, this multi-stage architecture spares computational effort by discarding large irrelevant regions of the image early during the detection process. The ability to group objects provides further computational and memory savings, as it allows working with lower image resolutions in early stages, where groups are more easily detected than individuals, as they are more salient. We report experimental results on two aerial image datasets, and show that the proposed method is as accurate yet computationally more efficient than standard single-shot detectors, consistently across three different backbone architectures."}],"publication":"IEEE Winter Conference on Applications of Computer Vision","citation":{"apa":"Royer, A., & Lampert, C. (2020). Localizing grouped instances for efficient detection in low-resource scenarios. In IEEE Winter Conference on Applications of Computer Vision. Snowmass Village, CO, United States: IEEE. https://doi.org/10.1109/WACV45572.2020.9093288","ieee":"A. Royer and C. Lampert, “Localizing grouped instances for efficient detection in low-resource scenarios,” in IEEE Winter Conference on Applications of Computer Vision, Snowmass Village, CO, United States, 2020.","ista":"Royer A, Lampert C. 2020. Localizing grouped instances for efficient detection in low-resource scenarios. IEEE Winter Conference on Applications of Computer Vision. WACV: Winter Conference on Applications of Computer Vision, 1716–1725.","ama":"Royer A, Lampert C. Localizing grouped instances for efficient detection in low-resource scenarios. In: IEEE Winter Conference on Applications of Computer Vision. IEEE; 2020. doi:10.1109/WACV45572.2020.9093288","chicago":"Royer, Amélie, and Christoph Lampert. “Localizing Grouped Instances for Efficient Detection in Low-Resource Scenarios.” In IEEE Winter Conference on Applications of Computer Vision. IEEE, 2020. https://doi.org/10.1109/WACV45572.2020.9093288.","short":"A. Royer, C. Lampert, in:, IEEE Winter Conference on Applications of Computer Vision, IEEE, 2020.","mla":"Royer, Amélie, and Christoph Lampert. “Localizing Grouped Instances for Efficient Detection in Low-Resource Scenarios.” IEEE Winter Conference on Applications of Computer Vision, 1716–1725, IEEE, 2020, doi:10.1109/WACV45572.2020.9093288."},"date_published":"2020-03-01T00:00:00Z","scopus_import":1,"day":"01","article_processing_charge":"No"},{"day":"01","article_processing_charge":"No","scopus_import":"1","date_published":"2020-03-01T00:00:00Z","publication":"2020 IEEE Winter Conference on Applications of Computer Vision","citation":{"mla":"Royer, Amélie, and Christoph Lampert. “A Flexible Selection Scheme for Minimum-Effort Transfer Learning.” 2020 IEEE Winter Conference on Applications of Computer Vision, 2180–2189, IEEE, 2020, doi:10.1109/WACV45572.2020.9093635.","short":"A. Royer, C. Lampert, in:, 2020 IEEE Winter Conference on Applications of Computer Vision, IEEE, 2020.","chicago":"Royer, Amélie, and Christoph Lampert. “A Flexible Selection Scheme for Minimum-Effort Transfer Learning.” In 2020 IEEE Winter Conference on Applications of Computer Vision. IEEE, 2020. https://doi.org/10.1109/WACV45572.2020.9093635.","ama":"Royer A, Lampert C. A flexible selection scheme for minimum-effort transfer learning. In: 2020 IEEE Winter Conference on Applications of Computer Vision. IEEE; 2020. doi:10.1109/WACV45572.2020.9093635","ista":"Royer A, Lampert C. 2020. A flexible selection scheme for minimum-effort transfer learning. 2020 IEEE Winter Conference on Applications of Computer Vision. WACV: Winter Conference on Applications of Computer Vision, 2180–2189.","ieee":"A. Royer and C. Lampert, “A flexible selection scheme for minimum-effort transfer learning,” in 2020 IEEE Winter Conference on Applications of Computer Vision, Snowmass Village, CO, United States, 2020.","apa":"Royer, A., & Lampert, C. (2020). A flexible selection scheme for minimum-effort transfer learning. In 2020 IEEE Winter Conference on Applications of Computer Vision. Snowmass Village, CO, United States: IEEE. https://doi.org/10.1109/WACV45572.2020.9093635"},"abstract":[{"lang":"eng","text":"Fine-tuning is a popular way of exploiting knowledge contained in a pre-trained convolutional network for a new visual recognition task. However, the orthogonal setting of transferring knowledge from a pretrained network to a visually different yet semantically close source is rarely considered: This commonly happens with real-life data, which is not necessarily as clean as the training source (noise, geometric transformations, different modalities, etc.).To tackle such scenarios, we introduce a new, generalized form of fine-tuning, called flex-tuning, in which any individual unit (e.g. layer) of a network can be tuned, and the most promising one is chosen automatically. In order to make the method appealing for practical use, we propose two lightweight and faster selection procedures that prove to be good approximations in practice. We study these selection criteria empirically across a variety of domain shifts and data scarcity scenarios, and show that fine-tuning individual units, despite its simplicity, yields very good results as an adaptation technique. As it turns out, in contrast to common practice, rather than the last fully-connected unit it is best to tune an intermediate or early one in many domain- shift scenarios, which is accurately detected by flex-tuning."}],"type":"conference","oa_version":"Preprint","status":"public","title":"A flexible selection scheme for minimum-effort transfer learning","_id":"7937","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","month":"03","publication_identifier":{"isbn":["9781728165530"]},"language":[{"iso":"eng"}],"conference":{"start_date":"2020-03-01","location":"Snowmass Village, CO, United States","end_date":"2020-03-05","name":"WACV: Winter Conference on Applications of Computer Vision"},"doi":"10.1109/WACV45572.2020.9093635","quality_controlled":"1","main_file_link":[{"open_access":"1","url":"http://arxiv.org/abs/2008.11995"}],"oa":1,"external_id":{"arxiv":["2008.11995"]},"article_number":"2180-2189","date_created":"2020-06-07T22:00:53Z","date_updated":"2023-09-07T13:16:17Z","author":[{"full_name":"Royer, Amélie","id":"3811D890-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0002-8407-0705","first_name":"Amélie","last_name":"Royer"},{"id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","first_name":"Christoph","last_name":"Lampert","full_name":"Lampert, Christoph"}],"related_material":{"record":[{"id":"8331","status":"deleted","relation":"dissertation_contains"},{"relation":"dissertation_contains","status":"public","id":"8390"}]},"publication_status":"published","department":[{"_id":"ChLa"}],"publisher":"IEEE","year":"2020"}]