@inproceedings{13142, abstract = {Reinforcement learning has received much attention for learning controllers of deterministic systems. We consider a learner-verifier framework for stochastic control systems and survey recent methods that formally guarantee a conjunction of reachability and safety properties. Given a property and a lower bound on the probability of the property being satisfied, our framework jointly learns a control policy and a formal certificate to ensure the satisfaction of the property with a desired probability threshold. Both the control policy and the formal certificate are continuous functions from states to reals, which are learned as parameterized neural networks. While in the deterministic case, the certificates are invariant and barrier functions for safety, or Lyapunov and ranking functions for liveness, in the stochastic case the certificates are supermartingales. For certificate verification, we use interval arithmetic abstract interpretation to bound the expected values of neural network functions.}, author = {Chatterjee, Krishnendu and Henzinger, Thomas A and Lechner, Mathias and Zikelic, Dorde}, booktitle = {Tools and Algorithms for the Construction and Analysis of Systems }, isbn = {9783031308222}, issn = {1611-3349}, location = {Paris, France}, pages = {3--25}, publisher = {Springer Nature}, title = {{A learner-verifier framework for neural network controllers and certificates of stochastic systems}}, doi = {10.1007/978-3-031-30823-9_1}, volume = {13993}, year = {2023}, } @article{12704, abstract = {Adversarial training (i.e., training on adversarially perturbed input data) is a well-studied method for making neural networks robust to potential adversarial attacks during inference. However, the improved robustness does not come for free but rather is accompanied by a decrease in overall model accuracy and performance. Recent work has shown that, in practical robot learning applications, the effects of adversarial training do not pose a fair trade-off but inflict a net loss when measured in holistic robot performance. This work revisits the robustness-accuracy trade-off in robot learning by systematically analyzing if recent advances in robust training methods and theory in conjunction with adversarial robot learning, are capable of making adversarial training suitable for real-world robot applications. We evaluate three different robot learning tasks ranging from autonomous driving in a high-fidelity environment amenable to sim-to-real deployment to mobile robot navigation and gesture recognition. Our results demonstrate that, while these techniques make incremental improvements on the trade-off on a relative scale, the negative impact on the nominal accuracy caused by adversarial training still outweighs the improved robustness by an order of magnitude. We conclude that although progress is happening, further advances in robust learning methods are necessary before they can benefit robot learning tasks in practice.}, author = {Lechner, Mathias and Amini, Alexander and Rus, Daniela and Henzinger, Thomas A}, issn = {2377-3766}, journal = {IEEE Robotics and Automation Letters}, number = {3}, pages = {1595--1602}, publisher = {Institute of Electrical and Electronics Engineers}, title = {{Revisiting the adversarial robustness-accuracy tradeoff in robot learning}}, doi = {10.1109/LRA.2023.3240930}, volume = {8}, year = {2023}, } @inproceedings{14242, abstract = {We study the problem of training and certifying adversarially robust quantized neural networks (QNNs). Quantization is a technique for making neural networks more efficient by running them using low-bit integer arithmetic and is therefore commonly adopted in industry. Recent work has shown that floating-point neural networks that have been verified to be robust can become vulnerable to adversarial attacks after quantization, and certification of the quantized representation is necessary to guarantee robustness. In this work, we present quantization-aware interval bound propagation (QA-IBP), a novel method for training robust QNNs. Inspired by advances in robust learning of non-quantized networks, our training algorithm computes the gradient of an abstract representation of the actual network. Unlike existing approaches, our method can handle the discrete semantics of QNNs. Based on QA-IBP, we also develop a complete verification procedure for verifying the adversarial robustness of QNNs, which is guaranteed to terminate and produce a correct answer. Compared to existing approaches, the key advantage of our verification procedure is that it runs entirely on GPU or other accelerator devices. We demonstrate experimentally that our approach significantly outperforms existing methods and establish the new state-of-the-art for training and certifying the robustness of QNNs.}, author = {Lechner, Mathias and Zikelic, Dorde and Chatterjee, Krishnendu and Henzinger, Thomas A and Rus, Daniela}, booktitle = {Proceedings of the 37th AAAI Conference on Artificial Intelligence}, isbn = {9781577358800}, location = {Washington, DC, United States}, number = {12}, pages = {14964--14973}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{Quantization-aware interval bound propagation for training certifiably robust quantized neural networks}}, doi = {10.1609/aaai.v37i12.26747}, volume = {37}, year = {2023}, } @inproceedings{14559, abstract = {We consider the problem of learning control policies in discrete-time stochastic systems which guarantee that the system stabilizes within some specified stabilization region with probability 1. Our approach is based on the novel notion of stabilizing ranking supermartingales (sRSMs) that we introduce in this work. Our sRSMs overcome the limitation of methods proposed in previous works whose applicability is restricted to systems in which the stabilizing region cannot be left once entered under any control policy. We present a learning procedure that learns a control policy together with an sRSM that formally certifies probability 1 stability, both learned as neural networks. We show that this procedure can also be adapted to formally verifying that, under a given Lipschitz continuous control policy, the stochastic system stabilizes within some stabilizing region with probability 1. Our experimental evaluation shows that our learning procedure can successfully learn provably stabilizing policies in practice.}, author = {Ansaripour, Matin and Chatterjee, Krishnendu and Henzinger, Thomas A and Lechner, Mathias and Zikelic, Dorde}, booktitle = {21st International Symposium on Automated Technology for Verification and Analysis}, isbn = {9783031453281}, issn = {1611-3349}, location = {Singapore, Singapore}, pages = {357--379}, publisher = {Springer Nature}, title = {{Learning provably stabilizing neural controllers for discrete-time stochastic systems}}, doi = {10.1007/978-3-031-45329-8_17}, volume = {14215}, year = {2023}, } @inproceedings{14830, abstract = {We study the problem of learning controllers for discrete-time non-linear stochastic dynamical systems with formal reach-avoid guarantees. This work presents the first method for providing formal reach-avoid guarantees, which combine and generalize stability and safety guarantees, with a tolerable probability threshold p in [0,1] over the infinite time horizon. Our method leverages advances in machine learning literature and it represents formal certificates as neural networks. In particular, we learn a certificate in the form of a reach-avoid supermartingale (RASM), a novel notion that we introduce in this work. Our RASMs provide reachability and avoidance guarantees by imposing constraints on what can be viewed as a stochastic extension of level sets of Lyapunov functions for deterministic systems. Our approach solves several important problems -- it can be used to learn a control policy from scratch, to verify a reach-avoid specification for a fixed control policy, or to fine-tune a pre-trained policy if it does not satisfy the reach-avoid specification. We validate our approach on 3 stochastic non-linear reinforcement learning tasks.}, author = {Zikelic, Dorde and Lechner, Mathias and Henzinger, Thomas A and Chatterjee, Krishnendu}, booktitle = {Proceedings of the 37th AAAI Conference on Artificial Intelligence}, issn = {2374-3468}, keywords = {General Medicine}, location = {Washington, DC, United States}, number = {10}, pages = {11926--11935}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{Learning control policies for stochastic systems with reach-avoid guarantees}}, doi = {10.1609/aaai.v37i10.26407}, volume = {37}, year = {2023}, } @inproceedings{15023, abstract = {Reinforcement learning has shown promising results in learning neural network policies for complicated control tasks. However, the lack of formal guarantees about the behavior of such policies remains an impediment to their deployment. We propose a novel method for learning a composition of neural network policies in stochastic environments, along with a formal certificate which guarantees that a specification over the policy's behavior is satisfied with the desired probability. Unlike prior work on verifiable RL, our approach leverages the compositional nature of logical specifications provided in SpectRL, to learn over graphs of probabilistic reach-avoid specifications. The formal guarantees are provided by learning neural network policies together with reach-avoid supermartingales (RASM) for the graph’s sub-tasks and then composing them into a global policy. We also derive a tighter lower bound compared to previous work on the probability of reach-avoidance implied by a RASM, which is required to find a compositional policy with an acceptable probabilistic threshold for complex tasks with multiple edge policies. We implement a prototype of our approach and evaluate it on a Stochastic Nine Rooms environment.}, author = {Zikelic, Dorde and Lechner, Mathias and Verma, Abhinav and Chatterjee, Krishnendu and Henzinger, Thomas A}, booktitle = {37th Conference on Neural Information Processing Systems}, location = {New Orleans, LO, United States}, title = {{Compositional policy learning in stochastic control systems with formal guarantees}}, year = {2023}, } @inproceedings{12010, abstract = {World models learn behaviors in a latent imagination space to enhance the sample-efficiency of deep reinforcement learning (RL) algorithms. While learning world models for high-dimensional observations (e.g., pixel inputs) has become practicable on standard RL benchmarks and some games, their effectiveness in real-world robotics applications has not been explored. In this paper, we investigate how such agents generalize to real-world autonomous vehicle control tasks, where advanced model-free deep RL algorithms fail. In particular, we set up a series of time-lap tasks for an F1TENTH racing robot, equipped with a high-dimensional LiDAR sensor, on a set of test tracks with a gradual increase in their complexity. In this continuous-control setting, we show that model-based agents capable of learning in imagination substantially outperform model-free agents with respect to performance, sample efficiency, successful task completion, and generalization. Moreover, we show that the generalization ability of model-based agents strongly depends on the choice of their observation model. We provide extensive empirical evidence for the effectiveness of world models provided with long enough memory horizons in sim2real tasks.}, author = {Brunnbauer, Axel and Berducci, Luigi and Brandstatter, Andreas and Lechner, Mathias and Hasani, Ramin and Rus, Daniela and Grosu, Radu}, booktitle = {2022 International Conference on Robotics and Automation}, isbn = {9781728196817}, issn = {1050-4729}, location = {Philadelphia, PA, United States}, pages = {7513--7520}, publisher = {IEEE}, title = {{Latent imagination facilitates zero-shot transfer in autonomous racing}}, doi = {10.1109/ICRA46639.2022.9811650}, year = {2022}, } @unpublished{11366, abstract = {Adversarial training (i.e., training on adversarially perturbed input data) is a well-studied method for making neural networks robust to potential adversarial attacks during inference. However, the improved robustness does not come for free but rather is accompanied by a decrease in overall model accuracy and performance. Recent work has shown that, in practical robot learning applications, the effects of adversarial training do not pose a fair trade-off but inflict a net loss when measured in holistic robot performance. This work revisits the robustness-accuracy trade-off in robot learning by systematically analyzing if recent advances in robust training methods and theory in conjunction with adversarial robot learning can make adversarial training suitable for real-world robot applications. We evaluate a wide variety of robot learning tasks ranging from autonomous driving in a high-fidelity environment amenable to sim-to-real deployment, to mobile robot gesture recognition. Our results demonstrate that, while these techniques make incremental improvements on the trade-off on a relative scale, the negative side-effects caused by adversarial training still outweigh the improvements by an order of magnitude. We conclude that more substantial advances in robust learning methods are necessary before they can benefit robot learning tasks in practice.}, author = {Lechner, Mathias and Amini, Alexander and Rus, Daniela and Henzinger, Thomas A}, booktitle = {arXiv}, title = {{Revisiting the adversarial robustness-accuracy tradeoff in robot learning}}, doi = {10.48550/arXiv.2204.07373}, year = {2022}, } @article{12147, abstract = {Continuous-time neural networks are a class of machine learning systems that can tackle representation learning on spatiotemporal decision-making tasks. These models are typically represented by continuous differential equations. However, their expressive power when they are deployed on computers is bottlenecked by numerical differential equation solvers. This limitation has notably slowed down the scaling and understanding of numerous natural physical phenomena such as the dynamics of nervous systems. Ideally, we would circumvent this bottleneck by solving the given dynamical system in closed form. This is known to be intractable in general. Here, we show that it is possible to closely approximate the interaction between neurons and synapses—the building blocks of natural and artificial neural networks—constructed by liquid time-constant networks efficiently in closed form. To this end, we compute a tightly bounded approximation of the solution of an integral appearing in liquid time-constant dynamics that has had no known closed-form solution so far. This closed-form solution impacts the design of continuous-time and continuous-depth neural models. For instance, since time appears explicitly in closed form, the formulation relaxes the need for complex numerical solvers. Consequently, we obtain models that are between one and five orders of magnitude faster in training and inference compared with differential equation-based counterparts. More importantly, in contrast to ordinary differential equation-based continuous networks, closed-form networks can scale remarkably well compared with other deep learning instances. Lastly, as these models are derived from liquid networks, they show good performance in time-series modelling compared with advanced recurrent neural network models.}, author = {Hasani, Ramin and Lechner, Mathias and Amini, Alexander and Liebenwein, Lucas and Ray, Aaron and Tschaikowski, Max and Teschl, Gerald and Rus, Daniela}, issn = {2522-5839}, journal = {Nature Machine Intelligence}, keywords = {Artificial Intelligence, Computer Networks and Communications, Computer Vision and Pattern Recognition, Human-Computer Interaction, Software}, number = {11}, pages = {992--1003}, publisher = {Springer Nature}, title = {{Closed-form continuous-time neural networks}}, doi = {10.1038/s42256-022-00556-7}, volume = {4}, year = {2022}, } @phdthesis{11362, abstract = {Deep learning has enabled breakthroughs in challenging computing problems and has emerged as the standard problem-solving tool for computer vision and natural language processing tasks. One exception to this trend is safety-critical tasks where robustness and resilience requirements contradict the black-box nature of neural networks. To deploy deep learning methods for these tasks, it is vital to provide guarantees on neural network agents' safety and robustness criteria. This can be achieved by developing formal verification methods to verify the safety and robustness properties of neural networks. Our goal is to design, develop and assess safety verification methods for neural networks to improve their reliability and trustworthiness in real-world applications. This thesis establishes techniques for the verification of compressed and adversarially trained models as well as the design of novel neural networks for verifiably safe decision-making. First, we establish the problem of verifying quantized neural networks. Quantization is a technique that trades numerical precision for the computational efficiency of running a neural network and is widely adopted in industry. We show that neglecting the reduced precision when verifying a neural network can lead to wrong conclusions about the robustness and safety of the network, highlighting that novel techniques for quantized network verification are necessary. We introduce several bit-exact verification methods explicitly designed for quantized neural networks and experimentally confirm on realistic networks that the network's robustness and other formal properties are affected by the quantization. Furthermore, we perform a case study providing evidence that adversarial training, a standard technique for making neural networks more robust, has detrimental effects on the network's performance. This robustness-accuracy tradeoff has been studied before regarding the accuracy obtained on classification datasets where each data point is independent of all other data points. On the other hand, we investigate the tradeoff empirically in robot learning settings where a both, a high accuracy and a high robustness, are desirable. Our results suggest that the negative side-effects of adversarial training outweigh its robustness benefits in practice. Finally, we consider the problem of verifying safety when running a Bayesian neural network policy in a feedback loop with systems over the infinite time horizon. Bayesian neural networks are probabilistic models for learning uncertainties in the data and are therefore often used on robotic and healthcare applications where data is inherently stochastic. We introduce a method for recalibrating Bayesian neural networks so that they yield probability distributions over safe decisions only. Our method learns a safety certificate that guarantees safety over the infinite time horizon to determine which decisions are safe in every possible state of the system. We demonstrate the effectiveness of our approach on a series of reinforcement learning benchmarks.}, author = {Lechner, Mathias}, isbn = {978-3-99078-017-6}, keywords = {neural networks, verification, machine learning}, pages = {124}, publisher = {Institute of Science and Technology Austria}, title = {{Learning verifiable representations}}, doi = {10.15479/at:ista:11362}, year = {2022}, } @article{12510, abstract = {We introduce a new statistical verification algorithm that formally quantifies the behavioral robustness of any time-continuous process formulated as a continuous-depth model. Our algorithm solves a set of global optimization (Go) problems over a given time horizon to construct a tight enclosure (Tube) of the set of all process executions starting from a ball of initial states. We call our algorithm GoTube. Through its construction, GoTube ensures that the bounding tube is conservative up to a desired probability and up to a desired tightness. GoTube is implemented in JAX and optimized to scale to complex continuous-depth neural network models. Compared to advanced reachability analysis tools for time-continuous neural networks, GoTube does not accumulate overapproximation errors between time steps and avoids the infamous wrapping effect inherent in symbolic techniques. We show that GoTube substantially outperforms state-of-the-art verification tools in terms of the size of the initial ball, speed, time-horizon, task completion, and scalability on a large set of experiments. GoTube is stable and sets the state-of-the-art in terms of its ability to scale to time horizons well beyond what has been previously possible.}, author = {Gruenbacher, Sophie A. and Lechner, Mathias and Hasani, Ramin and Rus, Daniela and Henzinger, Thomas A and Smolka, Scott A. and Grosu, Radu}, isbn = {978577358350}, issn = {2374-3468}, journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, keywords = {General Medicine}, number = {6}, pages = {6755--6764}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{GoTube: Scalable statistical verification of continuous-depth models}}, doi = {10.1609/aaai.v36i6.20631}, volume = {36}, year = {2022}, } @article{12511, abstract = {We consider the problem of formally verifying almost-sure (a.s.) asymptotic stability in discrete-time nonlinear stochastic control systems. While verifying stability in deterministic control systems is extensively studied in the literature, verifying stability in stochastic control systems is an open problem. The few existing works on this topic either consider only specialized forms of stochasticity or make restrictive assumptions on the system, rendering them inapplicable to learning algorithms with neural network policies. In this work, we present an approach for general nonlinear stochastic control problems with two novel aspects: (a) instead of classical stochastic extensions of Lyapunov functions, we use ranking supermartingales (RSMs) to certify a.s. asymptotic stability, and (b) we present a method for learning neural network RSMs. We prove that our approach guarantees a.s. asymptotic stability of the system and provides the first method to obtain bounds on the stabilization time, which stochastic Lyapunov functions do not. Finally, we validate our approach experimentally on a set of nonlinear stochastic reinforcement learning environments with neural network policies.}, author = {Lechner, Mathias and Zikelic, Dorde and Chatterjee, Krishnendu and Henzinger, Thomas A}, isbn = {9781577358350}, issn = {2374-3468}, journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, keywords = {General Medicine}, number = {7}, pages = {7326--7336}, publisher = {Association for the Advancement of Artificial Intelligence}, title = {{Stability verification in stochastic control systems via neural network supermartingales}}, doi = {10.1609/aaai.v36i7.20695}, volume = {36}, year = {2022}, } @unpublished{14601, abstract = {In this work, we address the problem of learning provably stable neural network policies for stochastic control systems. While recent work has demonstrated the feasibility of certifying given policies using martingale theory, the problem of how to learn such policies is little explored. Here, we study the effectiveness of jointly learning a policy together with a martingale certificate that proves its stability using a single learning algorithm. We observe that the joint optimization problem becomes easily stuck in local minima when starting from a randomly initialized policy. Our results suggest that some form of pre-training of the policy is required for the joint optimization to repair and verify the policy successfully.}, author = {Zikelic, Dorde and Lechner, Mathias and Chatterjee, Krishnendu and Henzinger, Thomas A}, booktitle = {arXiv}, title = {{Learning stabilizing policies in stochastic control systems}}, doi = {10.48550/arXiv.2205.11991}, year = {2022}, } @unpublished{14600, abstract = {We study the problem of learning controllers for discrete-time non-linear stochastic dynamical systems with formal reach-avoid guarantees. This work presents the first method for providing formal reach-avoid guarantees, which combine and generalize stability and safety guarantees, with a tolerable probability threshold $p\in[0,1]$ over the infinite time horizon. Our method leverages advances in machine learning literature and it represents formal certificates as neural networks. In particular, we learn a certificate in the form of a reach-avoid supermartingale (RASM), a novel notion that we introduce in this work. Our RASMs provide reachability and avoidance guarantees by imposing constraints on what can be viewed as a stochastic extension of level sets of Lyapunov functions for deterministic systems. Our approach solves several important problems -- it can be used to learn a control policy from scratch, to verify a reach-avoid specification for a fixed control policy, or to fine-tune a pre-trained policy if it does not satisfy the reach-avoid specification. We validate our approach on $3$ stochastic non-linear reinforcement learning tasks.}, author = {Zikelic, Dorde and Lechner, Mathias and Henzinger, Thomas A and Chatterjee, Krishnendu}, booktitle = {arXiv}, title = {{Learning control policies for stochastic systems with reach-avoid guarantees}}, doi = {10.48550/ARXIV.2210.05308}, year = {2022}, } @inproceedings{10669, abstract = {We show that Neural ODEs, an emerging class of timecontinuous neural networks, can be verified by solving a set of global-optimization problems. For this purpose, we introduce Stochastic Lagrangian Reachability (SLR), an abstraction-based technique for constructing a tight Reachtube (an over-approximation of the set of reachable states over a given time-horizon), and provide stochastic guarantees in the form of confidence intervals for the Reachtube bounds. SLR inherently avoids the infamous wrapping effect (accumulation of over-approximation errors) by performing local optimization steps to expand safe regions instead of repeatedly forward-propagating them as is done by deterministic reachability methods. To enable fast local optimizations, we introduce a novel forward-mode adjoint sensitivity method to compute gradients without the need for backpropagation. Finally, we establish asymptotic and non-asymptotic convergence rates for SLR.}, author = {Grunbacher, Sophie and Hasani, Ramin and Lechner, Mathias and Cyranka, Jacek and Smolka, Scott A and Grosu, Radu}, booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence}, isbn = {978-1-57735-866-4}, issn = {2374-3468}, location = {Virtual}, number = {13}, pages = {11525--11535}, publisher = {AAAI Press}, title = {{On the verification of neural ODEs with stochastic guarantees}}, volume = {35}, year = {2021}, } @inproceedings{10671, abstract = {We introduce a new class of time-continuous recurrent neural network models. Instead of declaring a learning system’s dynamics by implicit nonlinearities, we construct networks of linear first-order dynamical systems modulated via nonlinear interlinked gates. The resulting models represent dynamical systems with varying (i.e., liquid) time-constants coupled to their hidden state, with outputs being computed by numerical differential equation solvers. These neural networks exhibit stable and bounded behavior, yield superior expressivity within the family of neural ordinary differential equations, and give rise to improved performance on time-series prediction tasks. To demonstrate these properties, we first take a theoretical approach to find bounds over their dynamics, and compute their expressive power by the trajectory length measure in a latent trajectory space. We then conduct a series of time-series prediction experiments to manifest the approximation capability of Liquid Time-Constant Networks (LTCs) compared to classical and modern RNNs.}, author = {Hasani, Ramin and Lechner, Mathias and Amini, Alexander and Rus, Daniela and Grosu, Radu}, booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence}, isbn = {978-1-57735-866-4}, issn = {2374-3468}, location = {Virtual}, number = {9}, pages = {7657--7666}, publisher = {AAAI Press}, title = {{Liquid time-constant networks}}, volume = {35}, year = {2021}, } @inproceedings{10668, abstract = {Robustness to variations in lighting conditions is a key objective for any deep vision system. To this end, our paper extends the receptive field of convolutional neural networks with two residual components, ubiquitous in the visual processing system of vertebrates: On-center and off-center pathways, with an excitatory center and inhibitory surround; OOCS for short. The On-center pathway is excited by the presence of a light stimulus in its center, but not in its surround, whereas the Off-center pathway is excited by the absence of a light stimulus in its center, but not in its surround. We design OOCS pathways via a difference of Gaussians, with their variance computed analytically from the size of the receptive fields. OOCS pathways complement each other in their response to light stimuli, ensuring this way a strong edge-detection capability, and as a result an accurate and robust inference under challenging lighting conditions. We provide extensive empirical evidence showing that networks supplied with OOCS pathways gain accuracy and illumination-robustness from the novel edge representation, compared to other baselines.}, author = {Babaiee, Zahra and Hasani, Ramin and Lechner, Mathias and Rus, Daniela and Grosu, Radu}, booktitle = {Proceedings of the 38th International Conference on Machine Learning}, issn = {2640-3498}, location = {Virtual}, pages = {478--489}, publisher = {ML Research Press}, title = {{On-off center-surround receptive fields for accurate and robust image classification}}, volume = {139}, year = {2021}, } @inproceedings{10670, abstract = {Imitation learning enables high-fidelity, vision-based learning of policies within rich, photorealistic environments. However, such techniques often rely on traditional discrete-time neural models and face difficulties in generalizing to domain shifts by failing to account for the causal relationships between the agent and the environment. In this paper, we propose a theoretical and experimental framework for learning causal representations using continuous-time neural networks, specifically over their discrete-time counterparts. We evaluate our method in the context of visual-control learning of drones over a series of complex tasks, ranging from short- and long-term navigation, to chasing static and dynamic objects through photorealistic environments. Our results demonstrate that causal continuous-time deep models can perform robust navigation tasks, where advanced recurrent models fail. These models learn complex causal control representations directly from raw visual inputs and scale to solve a variety of tasks using imitation learning.}, author = {Vorbach, Charles J and Hasani, Ramin and Amini, Alexander and Lechner, Mathias and Rus, Daniela}, booktitle = {35th Conference on Neural Information Processing Systems}, location = {Virtual}, title = {{Causal navigation by continuous-time neural networks}}, year = {2021}, } @inproceedings{10665, abstract = {Formal verification of neural networks is an active topic of research, and recent advances have significantly increased the size of the networks that verification tools can handle. However, most methods are designed for verification of an idealized model of the actual network which works over real arithmetic and ignores rounding imprecisions. This idealization is in stark contrast to network quantization, which is a technique that trades numerical precision for computational efficiency and is, therefore, often applied in practice. Neglecting rounding errors of such low-bit quantized neural networks has been shown to lead to wrong conclusions about the network’s correctness. Thus, the desired approach for verifying quantized neural networks would be one that takes these rounding errors into account. In this paper, we show that verifying the bitexact implementation of quantized neural networks with bitvector specifications is PSPACE-hard, even though verifying idealized real-valued networks and satisfiability of bit-vector specifications alone are each in NP. Furthermore, we explore several practical heuristics toward closing the complexity gap between idealized and bit-exact verification. In particular, we propose three techniques for making SMT-based verification of quantized neural networks more scalable. Our experiments demonstrate that our proposed methods allow a speedup of up to three orders of magnitude over existing approaches.}, author = {Henzinger, Thomas A and Lechner, Mathias and Zikelic, Dorde}, booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence}, isbn = {978-1-57735-866-4}, issn = {2374-3468}, location = {Virtual}, number = {5A}, pages = {3787--3795}, publisher = {AAAI Press}, title = {{Scalable verification of quantized neural networks}}, volume = {35}, year = {2021}, } @inproceedings{10667, abstract = {Bayesian neural networks (BNNs) place distributions over the weights of a neural network to model uncertainty in the data and the network's prediction. We consider the problem of verifying safety when running a Bayesian neural network policy in a feedback loop with infinite time horizon systems. Compared to the existing sampling-based approaches, which are inapplicable to the infinite time horizon setting, we train a separate deterministic neural network that serves as an infinite time horizon safety certificate. In particular, we show that the certificate network guarantees the safety of the system over a subset of the BNN weight posterior's support. Our method first computes a safe weight set and then alters the BNN's weight posterior to reject samples outside this set. Moreover, we show how to extend our approach to a safe-exploration reinforcement learning setting, in order to avoid unsafe trajectories during the training of the policy. We evaluate our approach on a series of reinforcement learning benchmarks, including non-Lyapunovian safety specifications.}, author = {Lechner, Mathias and Žikelić, Ðorđe and Chatterjee, Krishnendu and Henzinger, Thomas A}, booktitle = {35th Conference on Neural Information Processing Systems}, location = {Virtual}, title = {{Infinite time horizon safety of Bayesian neural networks}}, doi = {10.48550/arXiv.2111.03165}, year = {2021}, }