Mathieu Seurin; Florian Strub; Philippe Preux; Olivier Pietquin
Don't Do What Doesn't Matter: Intrinsic Motivation with Action Usefulness Proceedings Article
In: Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence (IJCAI-21), Montreal, Canada, 2021, ISBN: 9781713836322.
@inproceedings{seurin_2689,
title = {Don't Do What Doesn't Matter: Intrinsic Motivation with Action Usefulness},
author = {Mathieu Seurin and Florian Strub and Philippe Preux and Olivier Pietquin},
url = {https://www.ijcai.org/proceedings/2021/0406.pdf},
issn = {9781713836322},
year = {2021},
date = {2021-08-01},
booktitle = {Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence (IJCAI-21)},
address = {Montreal, Canada},
abstract = {Sparse rewards are double-edged training signals in
reinforcement learning: easy to design but hard to
optimize. Intrinsic motivation guidances have thus
been developed toward alleviating the resulting ex-
ploration problem. They usually incentivize agents
to look for new states through novelty signals. Yet,
such methods encourage exhaustive exploration of
the state space rather than focusing on the environ-
ment's salient interaction opportunities. We pro-
pose a new exploration method, called Don't Do
What Doesn't Matter (DoWhaM), shifting the em-
phasis from state novelty to state with relevant ac-
tions. While most actions consistently change the
state when used, e.g. moving the agent, some ac-
tions are only effective in specific states, e.g., open-
ing a door, grabbing an object. DoWhaM detects
and rewards actions that seldom affect the environ-
ment. We evaluate DoWhaM on the procedurally-
generated environment MiniGrid, against state-of-
the-art methods. Experiments consistently show
that DoWhaM greatly reduces sample complexity,
installing the new state-of-the-art in MiniGrid.},
note = {Montreal-themed Virtual Reality, 19th -26th August, 2021. 30th International Joint Conference on Artificial Intelligence (IJCAI-21)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Geoffrey Cideron; Mathieu Seurin; Florian Strub; Olivier Pietquin
HIGhER: Improving instruction following with Hindsight Generation for Experience Replay Proceedings Article
In: 2020 IEEE Symposium Series on Computational Intelligence (SSCI), Canberra, Australia, 2020, ISBN: 978-1-7281-2547-3.
@inproceedings{cideron_2687,
title = {HIGhER: Improving instruction following with Hindsight Generation for Experience Replay},
author = {Geoffrey Cideron and Mathieu Seurin and Florian Strub and Olivier Pietquin},
url = {https://ieeexplore.ieee.org/abstract/document/9308603},
issn = {978-1-7281-2547-3},
year = {2020},
date = {2020-01-01},
booktitle = {2020 IEEE Symposium Series on Computational Intelligence (SSCI)},
address = {Canberra, Australia},
abstract = {Language creates a compact representation of the world and allows the description of unlimited situations and objectives through compositionality. While these characterizations may foster instructing, conditioning or structuring interactive agent behavior, it remains an open-problem to correctly relate language understanding and reinforcement learning in even simple instruction following scenarios. This joint learning problem is alleviated through expert demonstrations, auxiliary losses, or neural inductive biases. In this paper, we propose an orthogonal approach called Hindsight Generation for Experience Replay (HIGhER) that extends the Hindsight Experience Replay approach to the language-conditioned policy setting. Whenever the agent does not fulfill its instruction, HIGhER learns to output a new directive that matches the agent trajectory, and it relabels the episode with a positive reward. To do so, HIGhER learns to map a state into an instruction by using past successful trajectories, which removes the need to have external expert interventions to relabel episodes as in vanilla HER. We show the efficiency of our approach in the BabyAI environment, and demonstrate how it complements other instruction following methods.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Mathieu Seurin; Florian Strub; Philippe Preux; Olivier Pietquin
A Machine of Few Words Interactive Speaker Recognition with Reinforcement Learning Proceedings Article
In: Interspeech 2020?Proceedings, Shanghai, China, 2020, ISBN: 9781713820697.
@inproceedings{seurin_2688,
title = {A Machine of Few Words Interactive Speaker Recognition with Reinforcement Learning},
author = {Mathieu Seurin and Florian Strub and Philippe Preux and Olivier Pietquin},
url = {http://www.interspeech2020.org/uploadfile/pdf/Thu-2-7-7.pdf},
issn = {9781713820697},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020?Proceedings},
address = {Shanghai, China},
abstract = {Speaker recognition is a well known and studied task in the
speech processing domain. It has many applications, either for
security or speaker adaptation of personal devices. In this pa-
per, we present a new paradigm for automatic speaker recogni-
tion that we call Interactive Speaker Recognition (ISR). In this
paradigm, the recognition system aims to incrementally build a
representation of the speakers by requesting personalized utter-
ances to be spoken in contrast to the standard text-dependent or
text-independent schemes. To do so, we cast the speaker recog-
nition task into a sequential decision-making problem that we
solve with Reinforcement Learning. Using a standard dataset,
we show that our method achieves excellent performance while
using little speech signal amounts. This method could also be
applied as an utterance selection mechanism for building speech
synthesis systems.},
note = {October 25-29, 2020, Shanghai, China},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Timothée Lesort; Mathieu Seurin; Xinrui Li; Natalia Díaz-Rodríguez; David Filliat
Deep unsupervised state representation learning with robotic priors: a robustness analysis Proceedings Article
In: 2019 International Joint Conference on Neural Networks (IJCNN), Budapest, Hungary, 2019, ISBN: Electronic ISBN:978-1-7281-1985-4.
@inproceedings{lesort_2669,
title = {Deep unsupervised state representation learning with robotic priors: a robustness analysis},
author = {Timothée Lesort and Mathieu Seurin and Xinrui Li and Natalia Díaz-Rodríguez and David Filliat},
url = {https://ieeexplore.ieee.org/document/8852042},
issn = {Electronic ISBN:978-1-7281-1985-4},
year = {2019},
date = {2019-07-01},
booktitle = {2019 International Joint Conference on Neural Networks (IJCNN)},
address = {Budapest, Hungary},
abstract = {Our understanding of the world depends highly on our capacity to produce intuitive and simplified representations which can be easily used to solve problems. We reproduce this simplification process using a neural network to build a low dimensional state representation of the world from images acquired by a robot. As in Jonschkowski et al. 2015, we learn in an unsupervised way using prior knowledge about the world as loss functions called robotic priors and extend this approach to high dimension richer images to learn a 3D representation of the hand position of a robot from RGB images. We propose a quantitative evaluation metric of the learned representation that uses nearest neighbors in the state space and allows to assess its quality and show both the potential and limitations of robotic priors in realistic environments. We augment image size, add distractors and domain randomization, all crucial components to achieve transfer learning to real robots. Finally, we also contribute a new prior to improve the robustness of the representation. The applications of such low dimensional state representation range from easing reinforcement learning (RL) and knowledge transfer across tasks, to facilitating learning from raw data with more efficient and compact high level representations. The results show that the robotic prior approach is able to extract high level representation as the 3D position of an arm and organize it into a compact and coherent space of states in a challenging dataset.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Mathieu Seurin; Philippe Preux; Olivier Pietquin
"I'm sorry Dave, I'm afraid I can't do that" Deep Q-learning from forbidden action Proceedings Article
In: 2020 International Joint Conference on Neural Networks (IJCNN), Glasgow, UK, 2019, ISBN: 978-1-7281-6926-2.
@inproceedings{seurin_2686,
title = {"I'm sorry Dave, I'm afraid I can't do that" Deep Q-learning from forbidden action},
author = {Mathieu Seurin and Philippe Preux and Olivier Pietquin},
url = {https://ieeexplore.ieee.org/abstract/document/9207496},
issn = {978-1-7281-6926-2},
year = {2019},
date = {2019-01-01},
booktitle = {2020 International Joint Conference on Neural Networks (IJCNN)},
address = {Glasgow, UK},
abstract = {The use of Reinforcement Learning (RL) is still restricted to simulation or to enhance human-operated systems through recommendations. Real-world environments (e.g. industrial robots or power grids) are generally designed with safety constraints in mind implemented in the shape of valid actions masks or contingency controllers. For example, the range of motion and the angles of the motors of a robot can be limited to physical boundaries. Violating constraints thus results in rejected actions or entering in a safe mode driven by an external controller, making RL agents incapable of learning from their mistakes. In this paper, we propose a simple modification of a state-of-the-art deep RL algorithm (DQN), enabling learning from forbidden actions. To do so, the standard Q-learning update is enhanced with an extra safety loss inspired by structured classification. We empirically show that it reduces the number of hit constraints during the learning phase and accelerates convergence to near-optimal policies compared to using standard DQN. Experiments are done on a Visual Grid World Environment and the TextWorld domain.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Florian Strub; Mathieu Seurin; Ethan Perez; Harm De Vries; Jeremie Mary; Philippe Preux; Aaron Courville; Olivier Pietquin
Visual reasoning with multi-hop feature modulation Proceedings Article
In: Computer Vision - ECCV 2018 15th European Conference, Munich, Germany, September 8-14, 2018, Proceedings, Part I, Munich, Germany, 2018, ISBN: 978-3030012304.
@inproceedings{strub_2685,
title = {Visual reasoning with multi-hop feature modulation},
author = {Florian Strub and Mathieu Seurin and Ethan Perez and Harm De Vries and Jeremie Mary and Philippe Preux and Aaron Courville and Olivier Pietquin},
url = {https://openaccess.thecvf.com/content_ECCV_2018/html/Florian_Strub_Visual_Reasoning_with_ECCV_2018_paper.html},
issn = {978-3030012304},
year = {2018},
date = {2018-01-01},
booktitle = {Computer Vision - ECCV 2018 15th European Conference, Munich, Germany, September 8-14, 2018, Proceedings, Part I},
address = {Munich, Germany},
abstract = {Recent breakthroughs in computer vision and natural language processing have spurred interest in challenging multi-modal tasks such as visual question-answering and visual dialogue. For such tasks, one successful approach is to condition image-based convolutional network computation on language via Feature-wise Linear Modulation (FiLM) layers, i.e., per-channel scaling and shifting. By alternating between attending to the language input and generating FiLM layer parameters, this approach is better able to scale to settings with longer input sequences such as dialogue. We demonstrate that multi-hop FiLM generation significantly outperforms prior state-of-the-art on the GuessWhat?! visual dialogue task and matches state-of-the art on the ReferIt object retrieval task, and we provide additional qualitative analysis.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
No posts by this author.