@inproceedings{7640, abstract = {We propose a new model for detecting visual relationships, such as "person riding motorcycle" or "bottle on table". This task is an important step towards comprehensive structured mage understanding, going beyond detecting individual objects. Our main novelty is a Box Attention mechanism that allows to model pairwise interactions between objects using standard object detection pipelines. The resulting model is conceptually clean, expressive and relies on well-justified training and prediction procedures. Moreover, unlike previously proposed approaches, our model does not introduce any additional complex components or hyperparameters on top of those already required by the underlying detection model. We conduct an experimental evaluation on two datasets, V-COCO and Open Images, demonstrating strong quantitative and qualitative results.}, author = {Kolesnikov, Alexander and Kuznetsova, Alina and Lampert, Christoph and Ferrari, Vittorio}, booktitle = {Proceedings of the 2019 International Conference on Computer Vision Workshop}, isbn = {9781728150239}, location = {Seoul, South Korea}, publisher = {IEEE}, title = {{Detecting visual relationships using box attention}}, doi = {10.1109/ICCVW.2019.00217}, year = {2019}, } @phdthesis{197, abstract = {Modern computer vision systems heavily rely on statistical machine learning models, which typically require large amounts of labeled data to be learned reliably. Moreover, very recently computer vision research widely adopted techniques for representation learning, which further increase the demand for labeled data. However, for many important practical problems there is relatively small amount of labeled data available, so it is problematic to leverage full potential of the representation learning methods. One way to overcome this obstacle is to invest substantial resources into producing large labelled datasets. Unfortunately, this can be prohibitively expensive in practice. In this thesis we focus on the alternative way of tackling the aforementioned issue. We concentrate on methods, which make use of weakly-labeled or even unlabeled data. Specifically, the first half of the thesis is dedicated to the semantic image segmentation task. We develop a technique, which achieves competitive segmentation performance and only requires annotations in a form of global image-level labels instead of dense segmentation masks. Subsequently, we present a new methodology, which further improves segmentation performance by leveraging tiny additional feedback from a human annotator. By using our methods practitioners can greatly reduce the amount of data annotation effort, which is required to learn modern image segmentation models. In the second half of the thesis we focus on methods for learning from unlabeled visual data. We study a family of autoregressive models for modeling structure of natural images and discuss potential applications of these models. Moreover, we conduct in-depth study of one of these applications, where we develop the state-of-the-art model for the probabilistic image colorization task.}, author = {Kolesnikov, Alexander}, issn = {2663-337X}, pages = {113}, publisher = {Institute of Science and Technology Austria}, title = {{Weakly-Supervised Segmentation and Unsupervised Modeling of Natural Images}}, doi = {10.15479/AT:ISTA:th_1021}, year = {2018}, } @article{563, abstract = {In continuous populations with local migration, nearby pairs of individuals have on average more similar genotypes than geographically well separated pairs. A barrier to gene flow distorts this classical pattern of isolation by distance. Genetic similarity is decreased for sample pairs on different sides of the barrier and increased for pairs on the same side near the barrier. Here, we introduce an inference scheme that utilizes this signal to detect and estimate the strength of a linear barrier to gene flow in two-dimensions. We use a diffusion approximation to model the effects of a barrier on the geographical spread of ancestry backwards in time. This approach allows us to calculate the chance of recent coalescence and probability of identity by descent. We introduce an inference scheme that fits these theoretical results to the geographical covariance structure of bialleleic genetic markers. It can estimate the strength of the barrier as well as several demographic parameters. We investigate the power of our inference scheme to detect barriers by applying it to a wide range of simulated data. We also showcase an example application to a Antirrhinum majus (snapdragon) flower color hybrid zone, where we do not detect any signal of a strong genome wide barrier to gene flow.}, author = {Ringbauer, Harald and Kolesnikov, Alexander and Field, David and Barton, Nicholas H}, journal = {Genetics}, number = {3}, pages = {1231--1245}, publisher = {Genetics Society of America}, title = {{Estimating barriers to gene flow from distorted isolation-by-distance patterns}}, doi = {10.1534/genetics.117.300638}, volume = {208}, year = {2018}, } @inproceedings{1000, abstract = {We study probabilistic models of natural images and extend the autoregressive family of PixelCNN models by incorporating latent variables. Subsequently, we describe two new generative image models that exploit different image transformations as latent variables: a quantized grayscale view of the image or a multi-resolution image pyramid. The proposed models tackle two known shortcomings of existing PixelCNN models: 1) their tendency to focus on low-level image details, while largely ignoring high-level image information, such as object shapes, and 2) their computationally costly procedure for image sampling. We experimentally demonstrate benefits of our LatentPixelCNN models, in particular showing that they produce much more realistically looking image samples than previous state-of-the-art probabilistic models. }, author = {Kolesnikov, Alexander and Lampert, Christoph}, booktitle = {34th International Conference on Machine Learning}, isbn = {978-151085514-4}, location = {Sydney, Australia}, pages = {1905 -- 1914}, publisher = {JMLR}, title = {{PixelCNN models with auxiliary variables for natural image modeling}}, volume = {70}, year = {2017}, } @inproceedings{998, abstract = {A major open problem on the road to artificial intelligence is the development of incrementally learning systems that learn about more and more concepts over time from a stream of data. In this work, we introduce a new training strategy, iCaRL, that allows learning in such a class-incremental way: only the training data for a small number of classes has to be present at the same time and new classes can be added progressively. iCaRL learns strong classifiers and a data representation simultaneously. This distinguishes it from earlier works that were fundamentally limited to fixed data representations and therefore incompatible with deep learning architectures. We show by experiments on CIFAR-100 and ImageNet ILSVRC 2012 data that iCaRL can learn many classes incrementally over a long period of time where other strategies quickly fail. }, author = {Rebuffi, Sylvestre Alvise and Kolesnikov, Alexander and Sperl, Georg and Lampert, Christoph}, isbn = {978-153860457-1}, location = {Honolulu, HA, United States}, pages = {5533 -- 5542}, publisher = {IEEE}, title = {{iCaRL: Incremental classifier and representation learning}}, doi = {10.1109/CVPR.2017.587}, volume = {2017}, year = {2017}, } @inproceedings{911, abstract = {We develop a probabilistic technique for colorizing grayscale natural images. In light of the intrinsic uncertainty of this task, the proposed probabilistic framework has numerous desirable properties. In particular, our model is able to produce multiple plausible and vivid colorizations for a given grayscale image and is one of the first colorization models to provide a proper stochastic sampling scheme. Moreover, our training procedure is supported by a rigorous theoretical framework that does not require any ad hoc heuristics and allows for efficient modeling and learning of the joint pixel color distribution.We demonstrate strong quantitative and qualitative experimental results on the CIFAR-10 dataset and the challenging ILSVRC 2012 dataset.}, author = {Royer, Amélie and Kolesnikov, Alexander and Lampert, Christoph}, location = {London, United Kingdom}, pages = {85.1--85.12}, publisher = {BMVA Press}, title = {{Probabilistic image colorization}}, doi = {10.5244/c.31.85}, year = {2017}, } @inproceedings{1102, abstract = {Weakly-supervised object localization methods tend to fail for object classes that consistently co-occur with the same background elements, e.g. trains on tracks. We propose a method to overcome these failures by adding a very small amount of model-specific additional annotation. The main idea is to cluster a deep network\'s mid-level representations and assign object or distractor labels to each cluster. Experiments show substantially improved localization results on the challenging ILSVC2014 dataset for bounding box detection and the PASCAL VOC2012 dataset for semantic segmentation.}, author = {Kolesnikov, Alexander and Lampert, Christoph}, booktitle = {Proceedings of the British Machine Vision Conference 2016}, location = {York, United Kingdom}, pages = {92.1--92.12}, publisher = {BMVA Press}, title = {{Improving weakly-supervised object localization by micro-annotation}}, doi = {10.5244/C.30.92}, volume = {2016-September}, year = {2016}, } @inproceedings{1369, abstract = {We introduce a new loss function for the weakly-supervised training of semantic image segmentation models based on three guiding principles: to seed with weak localization cues, to expand objects based on the information about which classes can occur in an image, and to constrain the segmentations to coincide with object boundaries. We show experimentally that training a deep convolutional neural network using the proposed loss function leads to substantially better segmentations than previous state-of-the-art methods on the challenging PASCAL VOC 2012 dataset. We furthermore give insight into the working mechanism of our method by a detailed experimental study that illustrates how the segmentation quality is affected by each term of the proposed loss function as well as their combinations.}, author = {Kolesnikov, Alexander and Lampert, Christoph}, location = {Amsterdam, The Netherlands}, pages = {695 -- 711}, publisher = {Springer}, title = {{Seed, expand and constrain: Three principles for weakly-supervised image segmentation}}, doi = {10.1007/978-3-319-46493-0_42}, volume = {9908}, year = {2016}, } @inproceedings{2171, abstract = {We present LS-CRF, a new method for training cyclic Conditional Random Fields (CRFs) from large datasets that is inspired by classical closed-form expressions for the maximum likelihood parameters of a generative graphical model with tree topology. Training a CRF with LS-CRF requires only solving a set of independent regression problems, each of which can be solved efficiently in closed form or by an iterative solver. This makes LS-CRF orders of magnitude faster than classical CRF training based on probabilistic inference, and at the same time more flexible and easier to implement than other approximate techniques, such as pseudolikelihood or piecewise training. We apply LS-CRF to the task of semantic image segmentation, showing that it achieves on par accuracy to other training techniques at higher speed, thereby allowing efficient CRF training from very large training sets. For example, training a linearly parameterized pairwise CRF on 150,000 images requires less than one hour on a modern workstation.}, author = {Kolesnikov, Alexander and Guillaumin, Matthieu and Ferrari, Vittorio and Lampert, Christoph}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, editor = {Fleet, David and Pajdla, Tomas and Schiele, Bernt and Tuytelaars, Tinne}, location = {Zurich, Switzerland}, number = {PART 3}, pages = {550 -- 565}, publisher = {Springer}, title = {{Closed-form approximate CRF training for scalable image segmentation}}, doi = {10.1007/978-3-319-10578-9_36}, volume = {8691}, year = {2014}, }