The safe application of reinforcement learning (RL) requires generalization from limited training data to unseen scenarios. Yet, fulfilling tasks under changing circumstances is a key challenge in RL. Current state-of-the-art approaches for generalization apply data augmentation techniques to increase the diversity of training data. Even though this prevents overfitting to the training environment(s), it hinders policy optimization. Crafting a suitable observation, only containing crucial information, has been shown to be a challenging task itself. To improve data efficiency and generalization capabilities, we propose Compact Reshaped Observation Processing (CROP) to reduce the state information used for policy optimization. By providing only relevant information, overfitting to a specific training layout is precluded and generalization to unseen environments is improved. We formulate three CROPs that can be applied to fully observable observation- and action-spaces and provide methodical foundation. We empirically show the improvements of CROP in a distributionally shifted safety gridworld. We furthermore provide benchmark comparisons to full observability and data-augmentation in two different-sized procedurally generated mazes.
@inproceedings{ altmannIJCAI23,
author = "Philipp Altmann and Leonard Feuchtinger and Fabian Ritz and Jonas Nüßlein and Claudia Linnhof-Popien and Thomy Phan",
title = "CROP: Towards Distributional-Shift Robust Reinforcement Learning Using Compact Reshaped Observation Processing",
year = "2023",
abstract = "The safe application of reinforcement learning (RL) requires generalization from limited training data to unseen scenarios. Yet, fulfilling tasks under changing circumstances is a key challenge in RL. Current state-of-the-art approaches for generalization apply data augmentation techniques to increase the diversity of training data. Even though this prevents overfitting to the training environment(s), it hinders policy optimization. Crafting a suitable observation, only containing crucial information, has been shown to be a challenging task itself. To improve data efficiency and generalization capabilities, we propose Compact Reshaped Observation Processing (CROP) to reduce the state information used for policy optimization. By providing only relevant information, overfitting to a specific training layout is precluded and generalization to unseen environments is improved. We formulate three CROPs that can be applied to fully observable observation- and action-spaces and provide methodical foundation. We empirically show the improvements of CROP in a distributionally shifted safety gridworld. We furthermore provide benchmark comparisons to full observability and data-augmentation in two different-sized procedurally generated mazes.",
url = "https://thomyphan.github.io/files/2023-ijcai-preprint.pdf",
eprint = "https://thomyphan.github.io/files/2023-ijcai-preprint.pdf",
location = "Macao, China",
publisher = "International Joint Conferences on Artificial Intelligence Organization",
booktitle = "Proceedings of the 32nd International Joint Conference on Artificial Intelligence",
pages = "3414--3422",
keywords = "deep learning, reinforcement learning, robustness",
}
Related Articles
- T. Phan et al., “Confidence-Based Curriculum Learning for Multi-Agent Path Finding”, AAMAS 2024
- P. Altmann et al., “Challenges for Reinforcement Learning in Quantum Computing”, QCE 2024
- P. Altmann et al., “DIRECT: Learning from Sparse and Shifting Rewards using Discriminative Reward Co-Training”, ALA 2023
- R. Müller et al., “Towards Anomaly Detection in Reinforcement Learning”, AAMAS BlueSky Ideas 2022
- F. Ritz et al., “Specification Aware Multi-Agent Reinforcement Learning”, Book of ICAART 2021 (journal version)
- F. Ritz et al., “SAT-MARL: Specification Aware Training in Multi-Agent Reinforcement Learning”, ICAART 2021 (conference version)