[{"file_date_updated":"2022-07-12T15:08:28Z","date_created":"2022-02-28T14:05:42Z","date_updated":"2023-09-26T10:44:37Z","volume":23,"author":[{"full_name":"Konstantinov, Nikola H","last_name":"Konstantinov","first_name":"Nikola H","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87"},{"full_name":"Lampert, Christoph","last_name":"Lampert","first_name":"Christoph","orcid":"0000-0002-4561-241X","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87"}],"related_material":{"record":[{"relation":"dissertation_contains","status":"public","id":"10799"},{"relation":"shorter_version","status":"public","id":"13241"}]},"publication_status":"published","publisher":"ML Research Press","department":[{"_id":"ChLa"}],"year":"2022","acknowledgement":"The authors thank Eugenia Iofinova and Bernd Prach for providing feedback on early versions of this paper. This publication was made possible by an ETH AI Center postdoctoral fellowship to Nikola Konstantinov.","month":"05","publication_identifier":{"issn":["1532-4435"],"eissn":["1533-7928"]},"language":[{"iso":"eng"}],"quality_controlled":"1","external_id":{"arxiv":["2102.06004"]},"tmp":{"name":"Creative Commons Attribution 4.0 International Public License (CC-BY 4.0)","legal_code_url":"https://creativecommons.org/licenses/by/4.0/legalcode","short":"CC BY (4.0)","image":"/images/cc_by.png"},"oa":1,"abstract":[{"lang":"eng","text":"Addressing fairness concerns about machine learning models is a crucial step towards their long-term adoption in real-world automated systems. While many approaches have been developed for training fair models from data, little is known about the robustness of these methods to data corruption. In this work we consider fairness-aware learning under worst-case data manipulations. We show that an adversary can in some situations force any learner to return an overly biased classifier, regardless of the sample size and with or without degrading\r\naccuracy, and that the strength of the excess bias increases for learning problems with underrepresented protected groups in the data. We also prove that our hardness results are tight up to constant factors. To this end, we study two natural learning algorithms that optimize for both accuracy and fairness and show that these algorithms enjoy guarantees that are order-optimal in terms of the corruption ratio and the protected groups frequencies in the large data\r\nlimit."}],"type":"journal_article","oa_version":"Published Version","file":[{"success":1,"checksum":"9cac897b54a0ddf3a553a2c33e88cfda","date_updated":"2022-07-12T15:08:28Z","date_created":"2022-07-12T15:08:28Z","file_id":"11570","relation":"main_file","creator":"kschuh","file_size":551862,"content_type":"application/pdf","access_level":"open_access","file_name":"2022_JournalMachineLearningResearch_Konstantinov.pdf"}],"status":"public","ddc":["004"],"title":"Fairness-aware PAC learning from corrupted data","intvolume":" 23","_id":"10802","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","day":"01","has_accepted_license":"1","article_processing_charge":"No","keyword":["Fairness","robustness","data poisoning","trustworthy machine learning","PAC learning"],"scopus_import":"1","date_published":"2022-05-01T00:00:00Z","article_type":"original","page":"1-60","publication":"Journal of Machine Learning Research","citation":{"short":"N.H. Konstantinov, C. Lampert, Journal of Machine Learning Research 23 (2022) 1–60.","mla":"Konstantinov, Nikola H., and Christoph Lampert. “Fairness-Aware PAC Learning from Corrupted Data.” Journal of Machine Learning Research, vol. 23, ML Research Press, 2022, pp. 1–60.","chicago":"Konstantinov, Nikola H, and Christoph Lampert. “Fairness-Aware PAC Learning from Corrupted Data.” Journal of Machine Learning Research. ML Research Press, 2022.","ama":"Konstantinov NH, Lampert C. Fairness-aware PAC learning from corrupted data. Journal of Machine Learning Research. 2022;23:1-60.","ieee":"N. H. Konstantinov and C. Lampert, “Fairness-aware PAC learning from corrupted data,” Journal of Machine Learning Research, vol. 23. ML Research Press, pp. 1–60, 2022.","apa":"Konstantinov, N. H., & Lampert, C. (2022). Fairness-aware PAC learning from corrupted data. Journal of Machine Learning Research. ML Research Press.","ista":"Konstantinov NH, Lampert C. 2022. Fairness-aware PAC learning from corrupted data. Journal of Machine Learning Research. 23, 1–60."}},{"oa_version":"Preprint","_id":"13241","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","intvolume":" 171","title":"On the impossibility of fairness-aware learning from corrupted data","status":"public","abstract":[{"text":"Addressing fairness concerns about machine learning models is a crucial step towards their long-term adoption in real-world automated systems. Many approaches for training fair models from data have been developed and an implicit assumption about such algorithms is that they are able to recover a fair model, despite potential historical biases in the data. In this work we show a number of impossibility results that indicate that there is no learning algorithm that can recover a fair model when a proportion of the dataset is subject to arbitrary manipulations. Specifically, we prove that there are situations in which an adversary can force any learner to return a biased classifier, with or without degrading accuracy, and that the strength of this bias increases for learning problems with underrepresented protected groups in the data. Our results emphasize on the importance of studying further data corruption models of various strength and of establishing stricter data collection practices for fairness-aware learning.","lang":"eng"}],"type":"conference","date_published":"2022-12-01T00:00:00Z","citation":{"ieee":"N. H. Konstantinov and C. Lampert, “On the impossibility of fairness-aware learning from corrupted data,” in Proceedings of Machine Learning Research, 2022, vol. 171, pp. 59–83.","apa":"Konstantinov, N. H., & Lampert, C. (2022). On the impossibility of fairness-aware learning from corrupted data. In Proceedings of Machine Learning Research (Vol. 171, pp. 59–83). ML Research Press.","ista":"Konstantinov NH, Lampert C. 2022. On the impossibility of fairness-aware learning from corrupted data. Proceedings of Machine Learning Research. vol. 171, 59–83.","ama":"Konstantinov NH, Lampert C. On the impossibility of fairness-aware learning from corrupted data. In: Proceedings of Machine Learning Research. Vol 171. ML Research Press; 2022:59-83.","chicago":"Konstantinov, Nikola H, and Christoph Lampert. “On the Impossibility of Fairness-Aware Learning from Corrupted Data.” In Proceedings of Machine Learning Research, 171:59–83. ML Research Press, 2022.","short":"N.H. Konstantinov, C. Lampert, in:, Proceedings of Machine Learning Research, ML Research Press, 2022, pp. 59–83.","mla":"Konstantinov, Nikola H., and Christoph Lampert. “On the Impossibility of Fairness-Aware Learning from Corrupted Data.” Proceedings of Machine Learning Research, vol. 171, ML Research Press, 2022, pp. 59–83."},"publication":"Proceedings of Machine Learning Research","page":"59-83","article_processing_charge":"No","day":"01","scopus_import":"1","related_material":{"record":[{"status":"public","relation":"extended_version","id":"10802"}]},"author":[{"full_name":"Konstantinov, Nikola H","last_name":"Konstantinov","first_name":"Nikola H","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87"},{"orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","last_name":"Lampert","first_name":"Christoph","full_name":"Lampert, Christoph"}],"volume":171,"date_updated":"2023-09-26T10:44:37Z","date_created":"2023-07-16T22:01:13Z","acknowledgement":"This paper is a shortened, workshop version of Konstantinov and Lampert (2021),\r\nhttps://arxiv.org/abs/2102.06004. For further results, including an analysis of algorithms achieving the lower bounds from this paper, we refer to the full version.","year":"2022","publisher":"ML Research Press","department":[{"_id":"ChLa"}],"publication_status":"published","language":[{"iso":"eng"}],"external_id":{"arxiv":["2102.06004"]},"oa":1,"main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2102.06004"}],"quality_controlled":"1","publication_identifier":{"eissn":["2640-3498"]},"month":"12"},{"supervisor":[{"last_name":"Lampert","first_name":"Christoph","orcid":"0000-0001-8622-7887","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","full_name":"Lampert, Christoph"}],"degree_awarded":"PhD","language":[{"iso":"eng"}],"doi":"10.15479/at:ista:10799","project":[{"name":"International IST Doctoral Program","call_identifier":"H2020","_id":"2564DBCA-B435-11E9-9278-68D0E5697425","grant_number":"665385"}],"oa":1,"month":"03","publication_identifier":{"issn":["2663-337X"],"isbn":["978-3-99078-015-2"]},"date_updated":"2023-10-17T12:31:54Z","date_created":"2022-02-28T13:03:49Z","author":[{"full_name":"Konstantinov, Nikola H","first_name":"Nikola H","last_name":"Konstantinov","id":"4B9D76E4-F248-11E8-B48F-1D18A9856A87"}],"related_material":{"record":[{"status":"public","relation":"part_of_dissertation","id":"8724"},{"id":"10803","relation":"part_of_dissertation","status":"public"},{"id":"10802","relation":"part_of_dissertation","status":"public"},{"id":"6590","status":"public","relation":"part_of_dissertation"}]},"publication_status":"published","department":[{"_id":"GradSch"},{"_id":"ChLa"}],"publisher":"Institute of Science and Technology Austria","year":"2022","file_date_updated":"2022-03-10T12:11:48Z","ec_funded":1,"date_published":"2022-03-08T00:00:00Z","page":"176","citation":{"short":"N.H. Konstantinov, Robustness and Fairness in Machine Learning, Institute of Science and Technology Austria, 2022.","mla":"Konstantinov, Nikola H. Robustness and Fairness in Machine Learning. Institute of Science and Technology Austria, 2022, doi:10.15479/at:ista:10799.","chicago":"Konstantinov, Nikola H. “Robustness and Fairness in Machine Learning.” Institute of Science and Technology Austria, 2022. https://doi.org/10.15479/at:ista:10799.","ama":"Konstantinov NH. Robustness and fairness in machine learning. 2022. doi:10.15479/at:ista:10799","ieee":"N. H. Konstantinov, “Robustness and fairness in machine learning,” Institute of Science and Technology Austria, 2022.","apa":"Konstantinov, N. H. (2022). Robustness and fairness in machine learning. Institute of Science and Technology Austria. https://doi.org/10.15479/at:ista:10799","ista":"Konstantinov NH. 2022. Robustness and fairness in machine learning. Institute of Science and Technology Austria."},"day":"08","has_accepted_license":"1","article_processing_charge":"No","keyword":["robustness","fairness","machine learning","PAC learning","adversarial learning"],"oa_version":"Published Version","file":[{"access_level":"open_access","file_name":"thesis.pdf","creator":"nkonstan","content_type":"application/pdf","file_size":4204905,"file_id":"10823","relation":"main_file","success":1,"checksum":"626bc523ae8822d20e635d0e2d95182e","date_updated":"2022-03-06T11:42:54Z","date_created":"2022-03-06T11:42:54Z"},{"creator":"nkonstan","file_size":22841103,"content_type":"application/x-zip-compressed","file_name":"thesis.zip","access_level":"closed","date_updated":"2022-03-10T12:11:48Z","date_created":"2022-03-06T11:42:57Z","checksum":"e2ca2b88350ac8ea1515b948885cbcb1","file_id":"10824","relation":"source_file"}],"ddc":["000"],"title":"Robustness and fairness in machine learning","status":"public","_id":"10799","user_id":"c635000d-4b10-11ee-a964-aac5a93f6ac1","abstract":[{"lang":"eng","text":"Because of the increasing popularity of machine learning methods, it is becoming important to understand the impact of learned components on automated decision-making systems and to guarantee that their consequences are beneficial to society. In other words, it is necessary to ensure that machine learning is sufficiently trustworthy to be used in real-world applications. This thesis studies two properties of machine learning models that are highly desirable for the\r\nsake of reliability: robustness and fairness. In the first part of the thesis we study the robustness of learning algorithms to training data corruption. Previous work has shown that machine learning models are vulnerable to a range\r\nof training set issues, varying from label noise through systematic biases to worst-case data manipulations. This is an especially relevant problem from a present perspective, since modern machine learning methods are particularly data hungry and therefore practitioners often have to rely on data collected from various external sources, e.g. from the Internet, from app users or via crowdsourcing. Naturally, such sources vary greatly in the quality and reliability of the\r\ndata they provide. With these considerations in mind, we study the problem of designing machine learning algorithms that are robust to corruptions in data coming from multiple sources. We show that, in contrast to the case of a single dataset with outliers, successful learning within this model is possible both theoretically and practically, even under worst-case data corruptions. The second part of this thesis deals with fairness-aware machine learning. There are multiple areas where machine learning models have shown promising results, but where careful considerations are required, in order to avoid discrimanative decisions taken by such learned components. Ensuring fairness can be particularly challenging, because real-world training datasets are expected to contain various forms of historical bias that may affect the learning process. In this thesis we show that data corruption can indeed render the problem of achieving fairness impossible, by tightly characterizing the theoretical limits of fair learning under worst-case data manipulations. However, assuming access to clean data, we also show how fairness-aware learning can be made practical in contexts beyond binary classification, in particular in the challenging learning to rank setting."}],"alternative_title":["ISTA Thesis"],"type":"dissertation"},{"file":[{"file_name":"2020_GCPR_submitted_Volhejn.pdf","access_level":"open_access","content_type":"application/pdf","file_size":420234,"creator":"dernst","relation":"main_file","file_id":"11820","date_created":"2022-08-12T07:27:58Z","date_updated":"2022-08-12T07:27:58Z","checksum":"3e3628ab1cf658d82524963f808004ea","success":1}],"oa_version":"Submitted Version","_id":"9210","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","intvolume":" 12544","ddc":["510"],"title":"Does SGD implicitly optimize for smoothness?","status":"public","abstract":[{"lang":"eng","text":"Modern neural networks can easily fit their training set perfectly. Surprisingly, despite being “overfit” in this way, they tend to generalize well to future data, thereby defying the classic bias–variance trade-off of machine learning theory. Of the many possible explanations, a prevalent one is that training by stochastic gradient descent (SGD) imposes an implicit bias that leads it to learn simple functions, and these simple functions generalize well. However, the specifics of this implicit bias are not well understood.\r\nIn this work, we explore the smoothness conjecture which states that SGD is implicitly biased towards learning functions that are smooth. We propose several measures to formalize the intuitive notion of smoothness, and we conduct experiments to determine whether SGD indeed implicitly optimizes for these measures. Our findings rule out the possibility that smoothness measures based on first-order derivatives are being implicitly enforced. They are supportive, though, of the smoothness conjecture for measures based on second-order derivatives."}],"type":"conference","date_published":"2021-03-17T00:00:00Z","citation":{"ista":"Volhejn V, Lampert C. 2021. Does SGD implicitly optimize for smoothness? 42nd German Conference on Pattern Recognition. DAGM GCPR: German Conference on Pattern Recognition LNCS vol. 12544, 246–259.","ieee":"V. Volhejn and C. Lampert, “Does SGD implicitly optimize for smoothness?,” in 42nd German Conference on Pattern Recognition, Tübingen, Germany, 2021, vol. 12544, pp. 246–259.","apa":"Volhejn, V., & Lampert, C. (2021). Does SGD implicitly optimize for smoothness? In 42nd German Conference on Pattern Recognition (Vol. 12544, pp. 246–259). Tübingen, Germany: Springer. https://doi.org/10.1007/978-3-030-71278-5_18","ama":"Volhejn V, Lampert C. Does SGD implicitly optimize for smoothness? In: 42nd German Conference on Pattern Recognition. Vol 12544. LNCS. Springer; 2021:246-259. doi:10.1007/978-3-030-71278-5_18","chicago":"Volhejn, Vaclav, and Christoph Lampert. “Does SGD Implicitly Optimize for Smoothness?” In 42nd German Conference on Pattern Recognition, 12544:246–59. LNCS. Springer, 2021. https://doi.org/10.1007/978-3-030-71278-5_18.","mla":"Volhejn, Vaclav, and Christoph Lampert. “Does SGD Implicitly Optimize for Smoothness?” 42nd German Conference on Pattern Recognition, vol. 12544, Springer, 2021, pp. 246–59, doi:10.1007/978-3-030-71278-5_18.","short":"V. Volhejn, C. Lampert, in:, 42nd German Conference on Pattern Recognition, Springer, 2021, pp. 246–259."},"publication":"42nd German Conference on Pattern Recognition","page":"246-259","has_accepted_license":"1","article_processing_charge":"No","day":"17","scopus_import":"1","series_title":"LNCS","author":[{"full_name":"Volhejn, Vaclav","first_name":"Vaclav","last_name":"Volhejn","id":"d5235fb4-7a6d-11eb-b254-f25d12d631a8"},{"first_name":"Christoph","last_name":"Lampert","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","full_name":"Lampert, Christoph"}],"volume":12544,"date_created":"2021-03-01T09:01:16Z","date_updated":"2022-08-12T07:28:47Z","year":"2021","publisher":"Springer","department":[{"_id":"ChLa"}],"publication_status":"published","file_date_updated":"2022-08-12T07:27:58Z","doi":"10.1007/978-3-030-71278-5_18","conference":{"name":"DAGM GCPR: German Conference on Pattern Recognition ","location":"Tübingen, Germany","start_date":"2020-09-28","end_date":"2020-10-01"},"language":[{"iso":"eng"}],"oa":1,"quality_controlled":"1","publication_identifier":{"issn":["0302-9743"],"isbn":["9783030712778"],"eissn":["1611-3349"]},"month":"03"},{"publication":"9th International Conference on Learning Representations","citation":{"mla":"Phuong, Mary, and Christoph Lampert. “The Inductive Bias of ReLU Networks on Orthogonally Separable Data.” 9th International Conference on Learning Representations, 2021.","short":"M. Phuong, C. Lampert, in:, 9th International Conference on Learning Representations, 2021.","chicago":"Phuong, Mary, and Christoph Lampert. “The Inductive Bias of ReLU Networks on Orthogonally Separable Data.” In 9th International Conference on Learning Representations, 2021.","ama":"Phuong M, Lampert C. The inductive bias of ReLU networks on orthogonally separable data. In: 9th International Conference on Learning Representations. ; 2021.","ista":"Phuong M, Lampert C. 2021. The inductive bias of ReLU networks on orthogonally separable data. 9th International Conference on Learning Representations. ICLR: International Conference on Learning Representations.","apa":"Phuong, M., & Lampert, C. (2021). The inductive bias of ReLU networks on orthogonally separable data. In 9th International Conference on Learning Representations. Virtual.","ieee":"M. Phuong and C. Lampert, “The inductive bias of ReLU networks on orthogonally separable data,” in 9th International Conference on Learning Representations, Virtual, 2021."},"main_file_link":[{"open_access":"1","url":"https://openreview.net/pdf?id=krz7T0xU9Z_"}],"oa":1,"quality_controlled":"1","conference":{"location":"Virtual","start_date":"2021-05-03","end_date":"2021-05-07","name":" ICLR: International Conference on Learning Representations"},"date_published":"2021-05-01T00:00:00Z","language":[{"iso":"eng"}],"scopus_import":"1","month":"05","day":"01","has_accepted_license":"1","article_processing_charge":"No","_id":"9416","year":"2021","user_id":"2DF688A6-F248-11E8-B48F-1D18A9856A87","status":"public","title":"The inductive bias of ReLU networks on orthogonally separable data","publication_status":"published","ddc":["000"],"department":[{"_id":"GradSch"},{"_id":"ChLa"}],"author":[{"last_name":"Bui Thi Mai","first_name":"Phuong","id":"3EC6EE64-F248-11E8-B48F-1D18A9856A87","full_name":"Bui Thi Mai, Phuong"},{"full_name":"Lampert, Christoph","id":"40C20FD2-F248-11E8-B48F-1D18A9856A87","orcid":"0000-0001-8622-7887","first_name":"Christoph","last_name":"Lampert"}],"related_material":{"record":[{"status":"public","relation":"dissertation_contains","id":"9418"}]},"date_updated":"2023-09-07T13:29:50Z","date_created":"2021-05-24T11:16:46Z","oa_version":"Published Version","file":[{"checksum":"f34ff17017527db5ba6927f817bdd125","date_updated":"2021-05-24T11:15:57Z","date_created":"2021-05-24T11:15:57Z","relation":"main_file","file_id":"9417","content_type":"application/pdf","file_size":502356,"creator":"bphuong","access_level":"open_access","file_name":"iclr2021_conference.pdf"}],"type":"conference","abstract":[{"lang":"eng","text":"We study the inductive bias of two-layer ReLU networks trained by gradient flow. We identify a class of easy-to-learn (`orthogonally separable') datasets, and characterise the solution that ReLU networks trained on such datasets converge to. Irrespective of network width, the solution turns out to be a combination of two max-margin classifiers: one corresponding to the positive data subset and one corresponding to the negative data subset. The proof is based on the recently introduced concept of extremal sectors, for which we prove a number of properties in the context of orthogonal separability. In particular, we prove stationarity of activation patterns from some time onwards, which enables a reduction of the ReLU network to an ensemble of linear subnetworks."}],"file_date_updated":"2021-05-24T11:15:57Z"}]