@article{3164, abstract = {Overview of the Special Issue on structured prediction and inference.}, author = {Blaschko, Matthew and Lampert, Christoph}, journal = {International Journal of Computer Vision}, number = {3}, pages = {257 -- 258}, publisher = {Springer}, title = {{Guest editorial: Special issue on structured prediction and inference}}, doi = {10.1007/s11263-012-0530-y}, volume = {99}, year = {2012}, } @inproceedings{3125, abstract = {We propose a new learning method to infer a mid-level feature representation that combines the advantage of semantic attribute representations with the higher expressive power of non-semantic features. The idea lies in augmenting an existing attribute-based representation with additional dimensions for which an autoencoder model is coupled with a large-margin principle. This construction allows a smooth transition between the zero-shot regime with no training example, the unsupervised regime with training examples but without class labels, and the supervised regime with training examples and with class labels. The resulting optimization problem can be solved efficiently, because several of the necessity steps have closed-form solutions. Through extensive experiments we show that the augmented representation achieves better results in terms of object categorization accuracy than the semantic representation alone.}, author = {Sharmanska, Viktoriia and Quadrianto, Novi and Lampert, Christoph}, location = {Florence, Italy}, number = {PART 5}, pages = {242 -- 255}, publisher = {Springer}, title = {{Augmented attribute representations}}, doi = {10.1007/978-3-642-33715-4_18}, volume = {7576}, year = {2012}, } @inproceedings{3126, abstract = {In this work we propose a new information-theoretic clustering algorithm that infers cluster memberships by direct optimization of a non-parametric mutual information estimate between data distribution and cluster assignment. Although the optimization objective has a solid theoretical foundation it is hard to optimize. We propose an approximate optimization formulation that leads to an efficient algorithm with low runtime complexity. The algorithm has a single free parameter, the number of clusters to find. We demonstrate superior performance on several synthetic and real datasets. }, author = {Müller, Andreas and Nowozin, Sebastian and Lampert, Christoph}, location = {Graz, Austria}, pages = {205 -- 215}, publisher = {Springer}, title = {{Information theoretic clustering using minimal spanning trees}}, doi = {10.1007/978-3-642-32717-9_21}, volume = {7476}, year = {2012}, } @article{3248, abstract = {We describe RTblob, a high speed vision system that detects objects in cluttered scenes based on their color and shape at a speed of over 800 frames/s. Because the system is available as open-source software and relies only on off-the-shelf PC hardware components, it can provide the basis for multiple application scenarios. As an illustrative example, we show how RTblob can be used in a robotic table tennis scenario to estimate ball trajectories through 3D space simultaneously from four cameras images at a speed of 200 Hz.}, author = {Lampert, Christoph and Peters, Jan}, issn = {1861-8219}, journal = {Journal of Real-Time Image Processing}, number = {1}, pages = {31 -- 41}, publisher = {Springer}, title = {{Real-time detection of colored objects in multiple camera streams with off-the-shelf hardware components}}, doi = {10.1007/s11554-010-0168-3}, volume = {7}, year = {2012}, } @inproceedings{3124, abstract = {We consider the problem of inference in a graphical model with binary variables. While in theory it is arguably preferable to compute marginal probabilities, in practice researchers often use MAP inference due to the availability of efficient discrete optimization algorithms. We bridge the gap between the two approaches by introducing the Discrete Marginals technique in which approximate marginals are obtained by minimizing an objective function with unary and pairwise terms over a discretized domain. This allows the use of techniques originally developed for MAP-MRF inference and learning. We explore two ways to set up the objective function - by discretizing the Bethe free energy and by learning it from training data. Experimental results show that for certain types of graphs a learned function can outperform the Bethe approximation. We also establish a link between the Bethe free energy and submodular functions. }, author = {Korc, Filip and Kolmogorov, Vladimir and Lampert, Christoph}, location = {Edinburgh, Scotland}, publisher = {ICML}, title = {{Approximating marginals using discrete energy minimization}}, year = {2012}, } @misc{5396, abstract = {We consider the problem of inference in agraphical model with binary variables. While in theory it is arguably preferable to compute marginal probabilities, in practice researchers often use MAP inference due to the availability of efficient discrete optimization algorithms. We bridge the gap between the two approaches by introducing the Discrete Marginals technique in which approximate marginals are obtained by minimizing an objective function with unary and pair-wise terms over a discretized domain. This allows the use of techniques originally devel-oped for MAP-MRF inference and learning. We explore two ways to set up the objective function - by discretizing the Bethe free energy and by learning it from training data. Experimental results show that for certain types of graphs a learned function can out-perform the Bethe approximation. We also establish a link between the Bethe free energy and submodular functions.}, author = {Korc, Filip and Kolmogorov, Vladimir and Lampert, Christoph}, issn = {2664-1690}, pages = {13}, publisher = {IST Austria}, title = {{Approximating marginals using discrete energy minimization}}, doi = {10.15479/AT:IST-2012-0003}, year = {2012}, } @inproceedings{2915, author = {Kroemer, Oliver and Lampert, Christoph and Peters, Jan}, publisher = {Deutsches Zentrum für Luft und Raumfahrt}, title = {{Multi-modal learning for dynamic tactile sensing}}, year = {2012}, } @inproceedings{3127, abstract = {When searching for characteristic subpatterns in potentially noisy graph data, it appears self-evident that having multiple observations would be better than having just one. However, it turns out that the inconsistencies introduced when different graph instances have different edge sets pose a serious challenge. In this work we address this challenge for the problem of finding maximum weighted cliques. We introduce the concept of most persistent soft-clique. This is subset of vertices, that 1) is almost fully or at least densely connected, 2) occurs in all or almost all graph instances, and 3) has the maximum weight. We present a measure of clique-ness, that essentially counts the number of edge missing to make a subset of vertices into a clique. With this measure, we show that the problem of finding the most persistent soft-clique problem can be cast either as: a) a max-min two person game optimization problem, or b) a min-min soft margin optimization problem. Both formulations lead to the same solution when using a partial Lagrangian method to solve the optimization problems. By experiments on synthetic data and on real social network data, we show that the proposed method is able to reliably find soft cliques in graph data, even if that is distorted by random noise or unreliable observations.}, author = {Quadrianto, Novi and Lampert, Christoph and Chen, Chao}, booktitle = {Proceedings of the 29th International Conference on Machine Learning}, location = {Edinburgh, United Kingdom}, pages = {211--218}, publisher = {ML Research Press}, title = {{The most persistent soft-clique in a set of sampled graphs}}, year = {2012}, } @inproceedings{3337, abstract = {Playing table tennis is a difficult task for robots, especially due to their limitations of acceleration. A key bottleneck is the amount of time needed to reach the desired hitting position and velocity of the racket for returning the incoming ball. Here, it often does not suffice to simply extrapolate the ball's trajectory after the opponent returns it but more information is needed. Humans are able to predict the ball's trajectory based on the opponent's moves and, thus, have a considerable advantage. Hence, we propose to incorporate an anticipation system into robot table tennis players, which enables the robot to react earlier while the opponent is performing the striking movement. Based on visual observation of the opponent's racket movement, the robot can predict the aim of the opponent and adjust its movement generation accordingly. The policies for deciding how and when to react are obtained by reinforcement learning. We conduct experiments with an existing robot player to show that the learned reaction policy can significantly improve the performance of the overall system.}, author = {Wang, Zhikun and Lampert, Christoph and Mülling, Katharina and Schölkopf, Bernhard and Peters, Jan}, location = {San Francisco, USA}, pages = {332 -- 337}, publisher = {IEEE}, title = {{Learning anticipation policies for robot table tennis}}, doi = {10.1109/IROS.2011.6094892}, year = {2011}, } @article{3389, abstract = {Kernel canonical correlation analysis (KCCA) is a general technique for subspace learning that incorporates principal components analysis (PCA) and Fisher linear discriminant analysis (LDA) as special cases. By finding directions that maximize correlation, KCCA learns representations that are more closely tied to the underlying process that generates the data and can ignore high-variance noise directions. However, for data where acquisition in one or more modalities is expensive or otherwise limited, KCCA may suffer from small sample effects. We propose to use semi-supervised Laplacian regularization to utilize data that are present in only one modality. This approach is able to find highly correlated directions that also lie along the data manifold, resulting in a more robust estimate of correlated subspaces. Functional magnetic resonance imaging (fMRI) acquired data are naturally amenable to subspace techniques as data are well aligned. fMRI data of the human brain are a particularly interesting candidate. In this study we implemented various supervised and semi-supervised versions of KCCA on human fMRI data, with regression to single and multi-variate labels (corresponding to video content subjects viewed during the image acquisition). In each variate condition, the semi-supervised variants of KCCA performed better than the supervised variants, including a supervised variant with Laplacian regularization. We additionally analyze the weights learned by the regression in order to infer brain regions that are important to different types of visual processing.}, author = {Blaschko, Matthew and Shelton, Jacquelyn and Bartels, Andreas and Lampert, Christoph and Gretton, Arthur}, journal = {Pattern Recognition Letters}, number = {11}, pages = {1572 -- 1583}, publisher = {Elsevier}, title = {{Semi supervised kernel canonical correlation analysis with application to human fMRI}}, doi = {10.1016/j.patrec.2011.02.011}, volume = {32}, year = {2011}, } @article{3382, abstract = {Dynamic tactile sensing is a fundamental ability to recognize materials and objects. However, while humans are born with partially developed dynamic tactile sensing and quickly master this skill, today's robots remain in their infancy. The development of such a sense requires not only better sensors but the right algorithms to deal with these sensors' data as well. For example, when classifying a material based on touch, the data are noisy, high-dimensional, and contain irrelevant signals as well as essential ones. Few classification methods from machine learning can deal with such problems. In this paper, we propose an efficient approach to infer suitable lower dimensional representations of the tactile data. In order to classify materials based on only the sense of touch, these representations are autonomously discovered using visual information of the surfaces during training. However, accurately pairing vision and tactile samples in real-robot applications is a difficult problem. The proposed approach, therefore, works with weak pairings between the modalities. Experiments show that the resulting approach is very robust and yields significantly higher classification performance based on only dynamic tactile sensing.}, author = {Kroemer, Oliver and Lampert, Christoph and Peters, Jan}, journal = {IEEE Transactions on Robotics}, number = {3}, pages = {545 -- 557}, publisher = {IEEE}, title = {{Learning dynamic tactile sensing with robust vision based training}}, doi = {10.1109/TRO.2011.2121130}, volume = {27}, year = {2011}, } @misc{5386, abstract = {We introduce TopoCut: a new way to integrate knowledge about topological properties (TPs) into random field image segmentation model. Instead of including TPs as additional constraints during minimization of the energy function, we devise an efficient algorithm for modifying the unary potentials such that the resulting segmentation is guaranteed with the desired properties. Our method is more flexible in the sense that it handles more topology constraints than previous methods, which were only able to enforce pairwise or global connectivity. In particular, our method is very fast, making it for the first time possible to enforce global topological properties in practical image segmentation tasks.}, author = {Chen, Chao and Freedman, Daniel and Lampert, Christoph}, issn = {2664-1690}, pages = {69}, publisher = {IST Austria}, title = {{Enforcing topological constraints in random field image segmentation}}, doi = {10.15479/AT:IST-2011-0002}, year = {2011}, } @inproceedings{3336, abstract = {We introduce TopoCut: a new way to integrate knowledge about topological properties (TPs) into random field image segmentation model. Instead of including TPs as additional constraints during minimization of the energy function, we devise an efficient algorithm for modifying the unary potentials such that the resulting segmentation is guaranteed with the desired properties. Our method is more flexible in the sense that it handles more topology constraints than previous methods, which were only able to enforce pairwise or global connectivity. In particular, our method is very fast, making it for the first time possible to enforce global topological properties in practical image segmentation tasks.}, author = {Chen, Chao and Freedman, Daniel and Lampert, Christoph}, booktitle = {CVPR: Computer Vision and Pattern Recognition}, isbn = {978-1-4577-0394-2}, location = {Colorado Springs, CO, United States}, pages = {2089 -- 2096}, publisher = {IEEE}, title = {{Enforcing topological constraints in random field image segmentation}}, doi = {10.1109/CVPR.2011.5995503}, year = {2011}, } @inproceedings{3163, abstract = {We study multi-label prediction for structured output sets, a problem that occurs, for example, in object detection in images, secondary structure prediction in computational biology, and graph matching with symmetries. Conventional multilabel classification techniques are typically not applicable in this situation, because they require explicit enumeration of the label set, which is infeasible in case of structured outputs. Relying on techniques originally designed for single-label structured prediction, in particular structured support vector machines, results in reduced prediction accuracy, or leads to infeasible optimization problems. In this work we derive a maximum-margin training formulation for multi-label structured prediction that remains computationally tractable while achieving high prediction accuracy. It also shares most beneficial properties with single-label maximum-margin approaches, in particular formulation as a convex optimization problem, efficient working set training, and PAC-Bayesian generalization bounds.}, author = {Lampert, Christoph}, location = {Granada, Spain}, publisher = {Neural Information Processing Systems}, title = {{Maximum margin multi-label structured prediction}}, year = {2011}, } @misc{3322, abstract = {We study multi-label prediction for structured output spaces, a problem that occurs, for example, in object detection in images, secondary structure prediction in computational biology, and graph matching with symmetries. Conventional multi-label classification techniques are typically not applicable in this situation, because they require explicit enumeration of the label space, which is infeasible in case of structured outputs. Relying on techniques originally designed for single- label structured prediction, in particular structured support vector machines, results in reduced prediction accuracy, or leads to infeasible optimization problems. In this work we derive a maximum-margin training formulation for multi-label structured prediction that remains computationally tractable while achieving high prediction accuracy. It also shares most beneficial properties with single-label maximum-margin approaches, in particular a formulation as a convex optimization problem, efficient working set training, and PAC-Bayesian generalization bounds.}, author = {Lampert, Christoph}, booktitle = {NIPS: Neural Information Processing Systems}, publisher = {Neural Information Processing Systems Foundation}, title = {{Maximum margin multi label structured prediction}}, year = {2011}, } @article{3320, abstract = {Powerful statistical models that can be learned efficiently from large amounts of data are currently revolutionizing computer vision. These models possess a rich internal structure reflecting task-specific relations and constraints. This monograph introduces the reader to the most popular classes of structured models in computer vision. Our focus is discrete undirected graphical models which we cover in detail together with a description of algorithms for both probabilistic inference and maximum a posteriori inference. We discuss separately recently successful techniques for prediction in general structured models. In the second part of this monograph we describe methods for parameter learning where we distinguish the classic maximum likelihood based methods from the more recent prediction-based parameter learning methods. We highlight developments to enhance current models and discuss kernelized models and latent variable models. To make the monograph more practical and to provide links to further study we provide examples of successful application of many methods in the computer vision literature.}, author = {Nowozin, Sebastian and Lampert, Christoph}, journal = {Foundations and Trends in Computer Graphics and Vision}, number = {3-4}, pages = {185 -- 365}, publisher = {Now Publishers}, title = {{Structured learning and prediction in computer vision}}, doi = {10.1561/0600000033}, volume = {6}, year = {2011}, } @inproceedings{3319, abstract = {We address the problem of metric learning for multi-view data, namely the construction of embedding projections from data in different representations into a shared feature space, such that the Euclidean distance in this space provides a meaningful within-view as well as between-view similarity. Our motivation stems from the problem of cross-media retrieval tasks, where the availability of a joint Euclidean distance function is a pre-requisite to allow fast, in particular hashing-based, nearest neighbor queries. We formulate an objective function that expresses the intuitive concept that matching samples are mapped closely together in the output space, whereas non-matching samples are pushed apart, no matter in which view they are available. The resulting optimization problem is not convex, but it can be decomposed explicitly into a convex and a concave part, thereby allowing efficient optimization using the convex-concave procedure. Experiments on an image retrieval task show that nearest-neighbor based cross-view retrieval is indeed possible, and the proposed technique improves the retrieval accuracy over baseline techniques.}, author = {Quadrianto, Novi and Lampert, Christoph}, location = {Bellevue, United States}, pages = {425 -- 432}, publisher = {ML Research Press}, title = {{Learning multi-view neighborhood preserving projections}}, year = {2011}, } @inproceedings{3794, abstract = {We study the problem of multimodal dimensionality reduction assuming that data samples can be missing at training time, and not all data modalities may be present at application time. Maximum covariance analysis, as a generalization of PCA, has many desirable properties, but its application to practical problems is limited by its need for perfectly paired data. We overcome this limitation by a latent variable approach that allows working with weakly paired data and is still able to efficiently process large datasets using standard numerical routines. The resulting weakly paired maximum covariance analysis often finds better representations than alternative methods, as we show in two exemplary tasks: texture discrimination and transfer learning.}, author = {Lampert, Christoph and Krömer, Oliver}, location = {Heraklion, Crete, Greece}, pages = {566 -- 579}, publisher = {Springer}, title = {{Weakly-paired maximum covariance analysis for multimodal dimensionality reduction and transfer learning}}, doi = {10.1007/978-3-642-15552-9_41}, volume = {6312}, year = {2010}, } @inproceedings{3793, abstract = {Recent progress in per-pixel object class labeling of natural images can be attributed to the use of multiple types of image features and sound statistical learning approaches. Within the latter, Conditional Random Fields (CRF) are prominently used for their ability to represent interactions between random variables. Despite their popularity in computer vision, parameter learning for CRFs has remained difficult, popular approaches being cross-validation and piecewise training. In this work, we propose a simple yet expressive tree-structured CRF based on a recent hierarchical image segmentation method. Our model combines and weights multiple image features within a hierarchical representation and allows simple and efficient globally-optimal learning of ≈ 105 parameters. The tractability of our model allows us to pose and answer some of the open questions regarding parameter learning applying to CRF-based approaches. The key findings for learning CRF models are, from the obvious to the surprising, i) multiple image features always help, ii) the limiting dimension with respect to current models is the amount of training data, iii) piecewise training is competitive, iv) current methods for max-margin training fail for models with many parameters. }, author = {Nowozin, Sebastian and Gehler, Peter and Lampert, Christoph}, location = {Heraklion, Crete, Greece}, pages = {98 -- 111}, publisher = {Springer}, title = {{On parameter learning in CRF-based approaches to object class image segmentation}}, doi = {10.1007/978-3-642-15567-3_8}, volume = {6316}, year = {2010}, }