It has come to our attention that Theorem 3.1 of our ICML 2008 paper mirrors an observation made by Ralf Schoknecht in his NIPS 15 paper, Optimality of Reinforcement Learning Algorithms with Linear Function Approximation, as part of the proof of his Lemma 1. Schoknecht's paper preceded ours and should have been cited by our paper. We regret this oversight.
We are now aware of at least three papers that independently arrive
upon this result: Schoknecht's NIPS 2002 paper, our ICML 2008 paper, and
a Sutton et al. UAI 2008 paper,
Dyna-Style
Planning with Linear Function Approximation and Prioritized Sweeping,
in Theorem 3.3. While each of these papers makes the same observation early
in the paper, they proceed in different directions and make different
contributions.
To the best of our knowledge, the equivalence between LSTD and model-based
approaches was first discussed by Boyan in his ICML 1999 paper Least Squares Temporal
Difference Learning, though Boyan did not show equivalence in general.
Schoknecht's paper appears to be the first to establish full equivalence
between the linear model solution and the linear TD solution,
and should be cited as such.
@inproceedings{Schoknecht2002,
author = {Ralf Schoknecht},
title = {Optimality of Reinforcement Learning
Algorithms with Linear Function Approximation},
booktitle = {Advances in Neural Information Processing Systems 15},
year = {2002},
pages = {1555-1562}
}
@inproceedings{Boyan1999,
author = {Justin A. Boyan},
title = {Least-Squares Temporal Difference Learning},
booktitle = {Proceedings of the Sixteenth International Conference on
Machine Learning},
year = {1999},
pages = {49-56}
}
@inproceedings{Parr+al:2008,
author = {Ronald Parr and
Lihong Li and
Gavin Taylor and
Christopher Painter-Wakefield and
Michael L. Littman},
title = {An analysis of linear models, linear value-function
approximation, and feature selection for reinforcement learning},
booktitle = {Proceedings of the Twenty-Fifth International
Conference},
year = {2008},
pages = {752-759}
}
@inproceedings{Sutton+al:2008,
author = {Richard S. Sutton and
Csaba Szepesv{\'a}ri and
Alborz Geramifard and
Michael H. Bowling},
title = {Dyna-Style Planning with Linear Function Approximation and
Prioritized Sweeping},
booktitle = {Proceedings of the 24th Conference in Uncertainty
in Artificial Intelligence},
year = {2008},
pages = {528-536}
}