Identifying datapoints that substantially differ from normality is the task of anomaly detection (AD). While AD has gained widespread attention in rich data domains such as images, videos, audio and text, it has has been studied less frequently in the context of reinforcement learning (RL). This is due to the additional layer of complexity that RL introduces through sequential decision making. Developing suitable anomaly detectors for RL is of particular importance in safety-critical scenarios where acting on anomalous data could result in hazardous situations. In this work, we address the question of what AD means in the context of RL. We found that current research trains and evaluates on overly simplistic and unrealistic scenarios which reduce to classic pattern recognition tasks. We link AD in RL to various fields in RL such as lifelong RL and generalization. We discuss their similarities, differences, and how the fields can benefit from each other. Moreover, we identify non-stationarity to be one of the key drivers for future research on AD in RL and make a first step towards a more formal treatment of the problem by framing it in terms of the recently introduced block contextual Markov decision process. Finally, we define a list of practical desiderata for future problems.
@inproceedings{ muellerAAMAS22,
author = "Robert Müller and Steffen Illium and Thomy Phan and Tom Haider and Claudia Linnhoff-Popien",
title = "Towards Anomaly Detection in Reinforcement Learning",
year = "2022",
abstract = "Identifying datapoints that substantially differ from normality is the task of anomaly detection (AD). While AD has gained widespread attention in rich data domains such as images, videos, audio and text, it has has been studied less frequently in the context of reinforcement learning (RL). This is due to the additional layer of complexity that RL introduces through sequential decision making. Developing suitable anomaly detectors for RL is of particular importance in safety-critical scenarios where acting on anomalous data could result in hazardous situations. In this work, we address the question of what AD means in the context of RL. We found that current research trains and evaluates on overly simplistic and unrealistic scenarios which reduce to classic pattern recognition tasks. We link AD in RL to various fields in RL such as lifelong RL and generalization. We discuss their similarities, differences, and how the fields can benefit from each other. Moreover, we identify non-stationarity to be one of the key drivers for future research on AD in RL and make a first step towards a more formal treatment of the problem by framing it in terms of the recently introduced block contextual Markov decision process. Finally, we define a list of practical desiderata for future problems.",
url = "https://www.ifaamas.org/Proceedings/aamas2022/pdfs/p1799.pdf",
eprint = "https://thomyphan.github.io/files/2022-aamas-bluesky.pdf",
location = "Virtual Event, New Zealand",
publisher = "International Foundation for Autonomous Agents and Multiagent Systems",
booktitle = "BlueSky Ideas of the 21st International Conference on Autonomous Agents and MultiAgent Systems",
pages = "1799--1803",
keywords = "anomaly detection, AI safety, reinforcement learning",
doi = "https://dl.acm.org/doi/10.5555/3535850.3536113"
}
Related Articles
- P. Altmann et al., “CROP: Towards Distributional-Shift Robust Reinforcement Learning using Compact Reshaped Observation Processing”, IJCAI 2023
- P. Altmann et al., “DIRECT: Learning from Sparse and Shifting Rewards using Discriminative Reward Co-Training”, ALA 2023
- F. Ritz et al., “Specification Aware Multi-Agent Reinforcement Learning”, Book of ICAART 2021
- A. Sedlmeier et al., “Uncertainty-Based Out-of-Distribution Classification in Deep Reinforcement Learning”, ICAART 2020
- A. Sedlmeier et al., “Uncertainty-Based Out-of-Distribution Detection in Deep Reinforcement Learning”, ISAAI 2019