@inproceedings{14974, abstract = {The field of machine learning and AI has witnessed remarkable breakthroughs with the emergence of LLMs, which have also sparked a lively debate in the causal community. As researchers in this field, we are interested in exploring how LLMs relate to causality research, and how we can leverage the technology to advance it. In the second conference of Causal Learning and Reasoning (CLeaR), 2023, we held a round table discussion to gather and integrate the diverse perspectives of the CLeaR community on this topic. There is a general consensus that LLMs are not yet capable of causal reasoning at the current stage but has a lot of potential with public available information by CLeaR 2023. Enhancing causal machine learning is vital not only for its own sake but also to help LLMs improve their performance, especially regarding trustworthiness. In this document, we present both the summary and the raw outcome of the round table discussion. We acknowledge that with the progress of both fields, the opportunities and impact may rapidly change. We will repeat the same exercise in CLeaR 2024 to document the evolution.}, author = {Zhang, Cheng and Janzing, Dominik and van der Schaar, Mihaela and Locatello, Francesco and Spirtes, Peter and Zhang, Kun and Schölkopf, Bernhard and Uhler, Caroline}, booktitle = {2nd Conference on Causal Learning and Reasoning}, location = {Tübingen, Germany}, title = {{Causality in the time of LLMs: Round table discussion results of CLeaR 2023}}, year = {2023}, } @article{1092, abstract = {A graphical model encodes conditional independence relations via the Markov properties. For an undirected graph these conditional independence relations can be represented by a simple polytope known as the graph associahedron, which can be constructed as a Minkowski sum of standard simplices. We show that there is an analogous polytope for conditional independence relations coming from a regular Gaussian model, and it can be defined using multiinformation or relative entropy. For directed acyclic graphical models we give a construction of this polytope as a Minkowski sum of matroid polytopes. Finally, we apply this geometric insight to construct a new ordering-based search algorithm for causal inference via directed acyclic graphical models. }, author = {Mohammadi, Fatemeh and Uhler, Caroline and Wang, Charles and Yu, Josephine}, journal = {SIAM Journal on Discrete Mathematics}, number = {1}, pages = {64--93}, publisher = {SIAM}, title = {{Generalized permutohedra from probabilistic graphical models}}, doi = {10.1137/16M107894X}, volume = {32}, year = {2018}, } @article{2015, abstract = {We consider the problem of learning a Bayesian network or directed acyclic graph model from observational data. A number of constraint‐based, score‐based and hybrid algorithms have been developed for this purpose. Statistical consistency guarantees of these algorithms rely on the faithfulness assumption, which has been shown to be restrictive especially for graphs with cycles in the skeleton. We here propose the sparsest permutation (SP) algorithm, showing that learning Bayesian networks is possible under strictly weaker assumptions than faithfulness. This comes at a computational price, thereby indicating a statistical‐computational trade‐off for causal inference algorithms. In the Gaussian noiseless setting, we prove that the SP algorithm boils down to finding the permutation of the variables with the sparsest Cholesky decomposition of the inverse covariance matrix, which is equivalent to ℓ0‐penalized maximum likelihood estimation. We end with a simulation study showing that in line with the proven stronger consistency guarantees, and the SP algorithm compares favourably to standard causal inference algorithms in terms of accuracy for a given sample size.}, author = {Raskutti, Garvesh and Uhler, Caroline}, journal = {STAT}, number = {1}, publisher = {Wiley}, title = {{Learning directed acyclic graphs based on sparsest permutations}}, doi = {10.1002/sta4.183}, volume = {7}, year = {2018}, } @article{698, abstract = {Extracellular matrix signals from the microenvironment regulate gene expression patterns and cell behavior. Using a combination of experiments and geometric models, we demonstrate correlations between cell geometry, three-dimensional (3D) organization of chromosome territories, and gene expression. Fluorescence in situ hybridization experiments showed that micropatterned fibroblasts cultured on anisotropic versus isotropic substrates resulted in repositioning of specific chromosomes, which contained genes that were differentially regulated by cell geometries. Experiments combined with ellipsoid packing models revealed that the mechanosensitivity of chromosomes was correlated with their orientation in the nucleus. Transcription inhibition experiments suggested that the intermingling degree was more sensitive to global changes in transcription than to chromosome radial positioning and its orientations. These results suggested that cell geometry modulated 3D chromosome arrangement, and their neighborhoods correlated with gene expression patterns in a predictable manner. This is central to understanding geometric control of genetic programs involved in cellular homeostasis and the associated diseases. }, author = {Wang, Yejun and Nagarajan, Mallika and Uhler, Caroline and Shivashankar, Gv}, issn = {10591524}, journal = {Molecular Biology of the Cell}, number = {14}, pages = {1997 -- 2009}, publisher = {American Society for Cell Biology}, title = {{Orientation and repositioning of chromosomes correlate with cell geometry dependent gene expression}}, doi = {10.1091/mbc.E16-12-0825}, volume = {28}, year = {2017}, } @article{2016, abstract = {The Ising model is one of the simplest and most famous models of interacting systems. It was originally proposed to model ferromagnetic interactions in statistical physics and is now widely used to model spatial processes in many areas such as ecology, sociology, and genetics, usually without testing its goodness-of-fit. Here, we propose an exact goodness-of-fit test for the finite-lattice Ising model. The theory of Markov bases has been developed in algebraic statistics for exact goodness-of-fit testing using a Monte Carlo approach. However, this beautiful theory has fallen short of its promise for applications, because finding a Markov basis is usually computationally intractable. We develop a Monte Carlo method for exact goodness-of-fit testing for the Ising model which avoids computing a Markov basis and also leads to a better connectivity of the Markov chain and hence to a faster convergence. We show how this method can be applied to analyze the spatial organization of receptors on the cell membrane.}, author = {Martin Del Campo Sanchez, Abraham and Cepeda Humerez, Sarah A and Uhler, Caroline}, issn = {03036898}, journal = {Scandinavian Journal of Statistics}, number = {2}, pages = {285 -- 306}, publisher = {Wiley-Blackwell}, title = {{Exact goodness-of-fit testing for the Ising model}}, doi = {10.1111/sjos.12251}, volume = {44}, year = {2017}, } @article{1208, abstract = {We study parameter estimation in linear Gaussian covariance models, which are p-dimensional Gaussian models with linear constraints on the covariance matrix. Maximum likelihood estimation for this class of models leads to a non-convex optimization problem which typically has many local maxima. Using recent results on the asymptotic distribution of extreme eigenvalues of the Wishart distribution, we provide sufficient conditions for any hill climbing method to converge to the global maximum. Although we are primarily interested in the case in which n≫p, the proofs of our results utilize large sample asymptotic theory under the scheme n/p→γ>1. Remarkably, our numerical simulations indicate that our results remain valid for p as small as 2. An important consequence of this analysis is that, for sample sizes n≃14p, maximum likelihood estimation for linear Gaussian covariance models behaves as if it were a convex optimization problem. © 2016 The Royal Statistical Society and Blackwell Publishing Ltd.}, author = {Zwiernik, Piotr and Uhler, Caroline and Richards, Donald}, issn = {13697412}, journal = {Journal of the Royal Statistical Society. Series B: Statistical Methodology}, number = {4}, pages = {1269 -- 1292}, publisher = {Wiley-Blackwell}, title = {{Maximum likelihood estimation for linear Gaussian covariance models}}, doi = {10.1111/rssb.12217}, volume = {79}, year = {2017}, } @article{1089, abstract = {We discuss properties of distributions that are multivariate totally positive of order two (MTP2) related to conditional independence. In particular, we show that any independence model generated by an MTP2 distribution is a compositional semigraphoid which is upward-stable and singleton-transitive. In addition, we prove that any MTP2 distribution satisfying an appropriate support condition is faithful to its concentration graph. Finally, we analyze factorization properties of MTP2 distributions and discuss ways of constructing MTP2 distributions; in particular we give conditions on the log-linear parameters of a discrete distribution which ensure MTP2 and characterize conditional Gaussian distributions which satisfy MTP2.}, author = {Fallat, Shaun and Lauritzen, Steffen and Sadeghi, Kayvan and Uhler, Caroline and Wermuth, Nanny and Zwiernik, Piotr}, issn = {00905364}, journal = {Annals of Statistics}, number = {3}, pages = {1152 -- 1184}, publisher = {Institute of Mathematical Statistics}, title = {{Total positivity in Markov structures}}, doi = {10.1214/16-AOS1478}, volume = {45}, year = {2017}, } @article{1088, abstract = {Cell geometry is tightly coupled to gene expression patterns within the tissue microenvironment. This perspective synthesizes evidence that the 3D organization of chromosomes is a critical intermediate for geometric control of genomic programs. Using a combination of experiments and modeling we outline approaches to decipher the mechano-genomic code that governs cellular homeostasis and reprogramming.}, author = {Uhler, Caroline and Shivashankar, G V}, journal = {BioArchitecture}, number = {4}, pages = {76 -- 84}, publisher = {Taylor & Francis}, title = {{Geometric control and modeling of genome reprogramming}}, doi = {10.1080/19490992.2016.1201620}, volume = {6}, year = {2016}, } @article{1293, abstract = {For a graph G with p vertices the closed convex cone S⪰0(G) consists of all real positive semidefinite p×p matrices whose sparsity pattern is given by G, that is, those matrices with zeros in the off-diagonal entries corresponding to nonedges of G. The extremal rays of this cone and their associated ranks have applications to matrix completion problems, maximum likelihood estimation in Gaussian graphical models in statistics, and Gauss elimination for sparse matrices. While the maximum rank of an extremal ray in S⪰0(G), known as the sparsity order of G, has been characterized for different classes of graphs, we here study all possible extremal ranks of S⪰0(G). We investigate when the geometry of the (±1)-cut polytope of G yields a polyhedral characterization of the set of extremal ranks of S⪰0(G). For a graph G without K5 minors, we show that appropriately chosen normal vectors to the facets of the (±1)-cut polytope of G specify the off-diagonal entries of extremal matrices in S⪰0(G). We also prove that for appropriately chosen scalars the constant term of the linear equation of each facet-supporting hyperplane is the rank of its corresponding extremal matrix in S⪰0(G). Furthermore, we show that if G is series-parallel then this gives a complete characterization of all possible extremal ranks of S⪰0(G). Consequently, the sparsity order problem for series-parallel graphs can be solved in terms of polyhedral geometry.}, author = {Solus, Liam T and Uhler, Caroline and Yoshida, Ruriko}, journal = {Linear Algebra and Its Applications}, pages = {247 -- 275}, publisher = {Elsevier}, title = {{Extremal positive semidefinite matrices whose sparsity pattern is given by graphs without K5 minors}}, doi = {10.1016/j.laa.2016.07.026}, volume = {509}, year = {2016}, } @article{1480, abstract = {Exponential varieties arise from exponential families in statistics. These real algebraic varieties have strong positivity and convexity properties, familiar from toric varieties and their moment maps. Among them are varieties of inverses of symmetric matrices satisfying linear constraints. This class includes Gaussian graphical models. We develop a general theory of exponential varieties. These are derived from hyperbolic polynomials and their integral representations. We compare the multidegrees and ML degrees of the gradient map for hyperbolic polynomials. }, author = {Michałek, Mateusz and Sturmfels, Bernd and Uhler, Caroline and Zwiernik, Piotr}, journal = {Proceedings of the London Mathematical Society}, number = {1}, pages = {27 -- 56}, publisher = {Oxford University Press}, title = {{Exponential varieties}}, doi = {10.1112/plms/pdv066}, volume = {112}, year = {2016}, } @article{2014, abstract = {The concepts of faithfulness and strong-faithfulness are important for statistical learning of graphical models. Graphs are not sufficient for describing the association structure of a discrete distribution. Hypergraphs representing hierarchical log-linear models are considered instead, and the concept of parametric (strong-) faithfulness with respect to a hypergraph is introduced. Strong-faithfulness ensures the existence of uniformly consistent parameter estimators and enables building uniformly consistent procedures for a hypergraph search. The strength of association in a discrete distribution can be quantified with various measures, leading to different concepts of strong-faithfulness. Lower and upper bounds for the proportions of distributions that do not satisfy strong-faithfulness are computed for different parameterizations and measures of association.}, author = {Klimova, Anna and Uhler, Caroline and Rudas, Tamás}, journal = {Computational Statistics & Data Analysis}, number = {7}, pages = {57 -- 72}, publisher = {Elsevier}, title = {{Faithfulness and learning hypergraphs from discrete distributions}}, doi = {10.1016/j.csda.2015.01.017}, volume = {87}, year = {2015}, } @article{2011, abstract = {The protection of privacy of individual-level information in genome-wide association study (GWAS) databases has been a major concern of researchers following the publication of “an attack” on GWAS data by Homer et al. (2008). Traditional statistical methods for confidentiality and privacy protection of statistical databases do not scale well to deal with GWAS data, especially in terms of guarantees regarding protection from linkage to external information. The more recent concept of differential privacy, introduced by the cryptographic community, is an approach that provides a rigorous definition of privacy with meaningful privacy guarantees in the presence of arbitrary external information, although the guarantees may come at a serious price in terms of data utility. Building on such notions, Uhler et al. (2013) proposed new methods to release aggregate GWAS data without compromising an individual’s privacy. We extend the methods developed in Uhler et al. (2013) for releasing differentially-private χ2χ2-statistics by allowing for arbitrary number of cases and controls, and for releasing differentially-private allelic test statistics. We also provide a new interpretation by assuming the controls’ data are known, which is a realistic assumption because some GWAS use publicly available data as controls. We assess the performance of the proposed methods through a risk-utility analysis on a real data set consisting of DNA samples collected by the Wellcome Trust Case Control Consortium and compare the methods with the differentially-private release mechanism proposed by Johnson and Shmatikov (2013).}, author = {Yu, Fei and Fienberg, Stephen and Slaković, Alexandra and Uhler, Caroline}, journal = {Journal of Biomedical Informatics}, pages = {133 -- 141}, publisher = {Elsevier}, title = {{Scalable privacy-preserving data sharing methodology for genome-wide association studies}}, doi = {10.1016/j.jbi.2014.01.008}, volume = {50}, year = {2014}, } @article{2013, abstract = {An asymptotic theory is developed for computing volumes of regions in the parameter space of a directed Gaussian graphical model that are obtained by bounding partial correlations. We study these volumes using the method of real log canonical thresholds from algebraic geometry. Our analysis involves the computation of the singular loci of correlation hypersurfaces. Statistical applications include the strong-faithfulness assumption for the PC algorithm and the quantification of confounder bias in causal inference. A detailed analysis is presented for trees, bow ties, tripartite graphs, and complete graphs. }, author = {Lin, Shaowei and Uhler, Caroline and Sturmfels, Bernd and Bühlmann, Peter}, journal = {Foundations of Computational Mathematics}, number = {5}, pages = {1079 -- 1116}, publisher = {Springer}, title = {{Hypersurfaces and their singularities in partial correlation testing}}, doi = {10.1007/s10208-014-9205-0}, volume = {14}, year = {2014}, } @unpublished{2017, abstract = { Gaussian graphical models have received considerable attention during the past four decades from the statistical and machine learning communities. In Bayesian treatments of this model, the G-Wishart distribution serves as the conjugate prior for inverse covariance matrices satisfying graphical constraints. While it is straightforward to posit the unnormalized densities, the normalizing constants of these distributions have been known only for graphs that are chordal, or decomposable. Up until now, it was unknown whether the normalizing constant for a general graph could be represented explicitly, and a considerable body of computational literature emerged that attempted to avoid this apparent intractability. We close this question by providing an explicit representation of the G-Wishart normalizing constant for general graphs.}, author = {Caroline Uhler and Lenkoski, Alex and Richards, Donald}, booktitle = {ArXiv}, publisher = {ArXiv}, title = {{ Exact formulas for the normalizing constants of Wishart distributions for graphical models}}, year = {2014}, } @inproceedings{2047, abstract = {Following the publication of an attack on genome-wide association studies (GWAS) data proposed by Homer et al., considerable attention has been given to developing methods for releasing GWAS data in a privacy-preserving way. Here, we develop an end-to-end differentially private method for solving regression problems with convex penalty functions and selecting the penalty parameters by cross-validation. In particular, we focus on penalized logistic regression with elastic-net regularization, a method widely used to in GWAS analyses to identify disease-causing genes. We show how a differentially private procedure for penalized logistic regression with elastic-net regularization can be applied to the analysis of GWAS data and evaluate our method’s performance.}, author = {Yu, Fei and Rybar, Michal and Uhler, Caroline and Fienberg, Stephen}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, editor = {Domingo Ferrer, Josep}, location = {Ibiza, Spain}, pages = {170 -- 184}, publisher = {Springer}, title = {{Differentially-private logistic regression for detecting multiple-SNP association in GWAS databases}}, doi = {10.1007/978-3-319-11257-2_14}, volume = {8744}, year = {2014}, } @unpublished{2012, abstract = {The classical sphere packing problem asks for the best (infinite) arrangement of non-overlapping unit balls which cover as much space as possible. We define a generalized version of the problem, where we allow each ball a limited amount of overlap with other balls. We study two natural choices of overlap measures and obtain the optimal lattice packings in a parameterized family of lattices which contains the FCC, BCC, and integer lattice.}, author = {Iglesias Ham, Mabel and Kerber, Michael and Uhler, Caroline}, booktitle = {arXiv}, title = {{Sphere packing with limited overlap}}, doi = {10.48550/arXiv.1401.0468}, year = {2014}, } @article{2010, abstract = {Many algorithms for inferring causality rely heavily on the faithfulness assumption. The main justification for imposing this assumption is that the set of unfaithful distributions has Lebesgue measure zero, since it can be seen as a collection of hypersurfaces in a hypercube. However, due to sampling error the faithfulness condition alone is not sufficient for statistical estimation, and strong-faithfulness has been proposed and assumed to achieve uniform or high-dimensional consistency. In contrast to the plain faithfulness assumption, the set of distributions that is not strong-faithful has nonzero Lebesgue measure and in fact, can be surprisingly large as we show in this paper. We study the strong-faithfulness condition from a geometric and combinatorial point of view and give upper and lower bounds on the Lebesgue measure of strong-faithful distributions for various classes of directed acyclic graphs. Our results imply fundamental limitations for the PC-algorithm and potentially also for other algorithms based on partial correlation testing in the Gaussian case.}, author = {Uhler, Caroline and Raskutti, Garvesh and Bühlmann, Peter and Yu, Bin}, journal = {The Annals of Statistics}, number = {2}, pages = {436 -- 463}, publisher = {Institute of Mathematical Statistics}, title = {{Geometry of the faithfulness assumption in causal inference}}, doi = {10.1214/12-AOS1080}, volume = {41}, year = {2013}, } @article{2009, abstract = {Traditional statistical methods for confidentiality protection of statistical databases do not scale well to deal with GWAS databases especially in terms of guarantees regarding protection from linkage to external information. The more recent concept of differential privacy, introduced by the cryptographic community, is an approach which provides a rigorous definition of privacy with meaningful privacy guarantees in the presence of arbitrary external information, although the guarantees may come at a serious price in terms of data utility. Building on such notions, we propose new methods to release aggregate GWAS data without compromising an individual’s privacy. We present methods for releasing differentially private minor allele frequencies, chi-square statistics and p-values. We compare these approaches on simulated data and on a GWAS study of canine hair length involving 685 dogs. We also propose a privacy-preserving method for finding genome-wide associations based on a differentially-private approach to penalized logistic regression.}, author = {Uhler, Caroline and Slavkovic, Aleksandra and Fienberg, Stephen}, journal = {Journal of Privacy and Confidentiality }, number = {1}, pages = {137 -- 166}, publisher = {Carnegie Mellon University}, title = {{Privacy-preserving data sharing for genome-wide association studies}}, doi = {10.29012/jpc.v5i1.629}, volume = {5}, year = {2013}, } @article{2280, abstract = {The problem of packing ellipsoids of different sizes and shapes into an ellipsoidal container so as to minimize a measure of overlap between ellipsoids is considered. A bilevel optimization formulation is given, together with an algorithm for the general case and a simpler algorithm for the special case in which all ellipsoids are in fact spheres. Convergence results are proved and computational experience is described and illustrated. The motivating application-chromosome organization in the human cell nucleus-is discussed briefly, and some illustrative results are presented.}, author = {Uhler, Caroline and Wright, Stephen}, journal = {SIAM Review}, number = {4}, pages = {671 -- 706}, publisher = {Society for Industrial and Applied Mathematics }, title = {{Packing ellipsoids with overlap}}, doi = {10.1137/120872309}, volume = {55}, year = {2013}, } @article{2959, abstract = {We study maximum likelihood estimation in Gaussian graphical models from a geometric point of view. An algebraic elimination criterion allows us to find exact lower bounds on the number of observations needed to ensure that the maximum likelihood estimator (MLE) exists with probability one. This is applied to bipartite graphs, grids and colored graphs. We also study the ML degree, and we present the first instance of a graph for which the MLE exists with probability one, even when the number of observations equals the treewidth.}, author = {Uhler, Caroline}, journal = {Annals of Statistics}, number = {1}, pages = {238 -- 261}, publisher = {Institute of Mathematical Statistics}, title = {{Geometry of maximum likelihood estimation in Gaussian graphical models}}, doi = {10.1214/11-AOS957}, volume = {40}, year = {2012}, }