bibfile.bib

@COMMENT{{This file has been generated by bib2bib 1.88}}

@COMMENT{{Command line: /usr/bin/bib2bib -oc citefile -ob bibfile.bib -c 'tags : "web"' /home/mstoll/research/doc/all-v01.bib}}

@MISC{rivest2001lnsc,
  author = {Francois Rivest and Martin Stolle and Thomas Shulz},
  title = {{LNSC} Cascade-correlation Simulator Applet},
  howpublished = {{WWW}},
  year = {2001},
  tags = {web},
  url = {http://www.psych.mcgill.ca/perpg/fac/shultz/cdp/lnsc_applet.htm}
}

@MASTERSTHESIS{stolle2004thesis,
  author = {Martin Stolle},
  title = {Automated Discovery of Options in Reinforcement Learning},
  school = {McGill University},
  year = {2004},
  month = {February},
  abstract = {AI planning benefits greatly from the use of temporally-extended or
	macro-actions. Macro-actions allow for faster and more efficient
	planning as well as the reuse of knowledge from previous solutions.
	In recent years, a significant amount of research has been devoted
	to incorporating macro-actions in learned controllers, particularly
	in the context of Reinforcement Learning. One general approach is
	the use of options (temporally-extended actions) in Reinforcement
	Learning. While the properties of options are well understood, it
	is not clear how to find new options automatically. In this thesis
	we propose two new algorithms for discovering options and compare
	them to one algorithm from the literature. We also contribute a new
	algorithm for learning with options which improves on the performance
	of two widely used learning algorithms. Extensive experiments are
	used to demonstrate the effectiveness of the proposed algorithms.},
  pdf = {pubs/stolle2004thesis.pdf},
  tags = {web},
  url = {http://www.cs.cmu.edu/~mstoll/publications.shtml}
}

@INPROCEEDINGS{stolle2007transfer,
  author = {Martin Stolle and Christopher Atkeson},
  title = {Transfer of Policies Based on Trajectory Libraries},
  booktitle = {Proceedings of the International Conference on Intelligent Robots
	and Systems (IROS 2007)},
  year = {2007},
  abstract = {Recently, libraries of trajectory plans have been shown to be a promising
	way of creating policies for difficult problems. However, often it
	is not desirable or even possible to create a new library for every
	task. We present a method for transferring libraries across tasks,
	which allows us to build libraries by learning from demonstration
	on one task and apply them to similar tasks. Representing the libraries
	in a feature-based space is key to supporting transfer. We also search
	through the library to ensure a complete path to the goal is possible.
	Results are shown for the Little Dog task. Little Dog is a quadruped
	robot that has to walk across rough terrain at reasonably fast speeds.},
  pdf = {pubs/stolle2007transfer.pdf},
  tags = {web},
  timestamp = {2007.07.12},
  url = {http://www.cs.cmu.edu/~mstoll/publications.shtml}
}

@INPROCEEDINGS{stolle2007knowledge,
  author = {Martin Stolle and Christopher G. Atkeson},
  title = {Knowledge Transfer using Local Features},
  booktitle = {Proceedings of the IEEE Symposium on Approximate Dynamic Programming
	and Reinforcement Learning (ADPRL 2007)},
  year = {2007},
  abstract = {We present a method for reducing the effort required to compute policies
	for tasks based on solutions to previously solved tasks. The key
	idea is to use a learned intermediate policy based on local features
	to create an initial policy for the new task. In order to further
	improve this initial policy, we developed a form of generalized policy
	iteration. We achieve a substantial reduction in computation needed
	to find policies when previous experience is available.},
  owner = {mstoll},
  pdf = {pubs/stolle2007knowledge.pdf},
  tags = {web},
  timestamp = {2007.03.29},
  url = {http://www.cs.cmu.edu/~mstoll/publications.shtml}
}

@INPROCEEDINGS{stolle2006policies,
  author = {Martin Stolle and Christopher G. Atkeson},
  title = {Policies Based on Trajectory Libraries},
  booktitle = {Proceedings of the International Conference on Robotics and Automation
	(ICRA 2006)},
  year = {2006},
  abstract = {We present a control approach that uses a library of trajectories
	to establish a global control law or policy. This is an alternative
	to methods for finding global policies based on value functions using
	dynamic programming and also to using plans based on a single desired
	trajectory. Our method has the advantage of providing reasonable
	policies much faster than dynamic programming can provide an initial
	policy. It also has the advantage of providing more robust and global
	policies than following a single desired trajectory. Trajectory libraries
	can be created for robots with many more degrees of freedom than
	what dynamic programming can be applied to as well as for robots
	with dynamic model discontinuities. Results are shown for the ``Labyrinth''
	marble maze, both in simulation as well as a real world version.
	The marble maze is a difficult task which requires both fast control
	as well as planning ahead.},
  pdf = {pubs/stolle2006policies.pdf},
  tags = {web},
  url = {http://www.cs.cmu.edu/~mstoll/publications.shtml}
}

@ARTICLE{stolle2002learning,
  author = {Stolle, Martin and Precup, Doina},
  title = {Learning Options in Reinforcement Learning},
  journal = {Lecture Notes in Computer Science},
  year = {2002},
  volume = {2371},
  pages = {212--223},
  abstract = {Temporally extended actions (e.g., macro actions) have proven very
	useful in speeding up learning, ensuring robustness and building
	prior knowledge into AI systems. The options framework (Precup, 2000;
	Sutton, Precup & Singh, 1999) provides a natural way of incorporating
	such actions into reinforcement learning systems, but leaves open
	the issue of how good options might be identified. In this paper,
	we empirically explore a simple approach to creating options. The
	underlying assumption is that the agent will be asked to perform
	different goal-achievement tasks in an environment that is otherwise
	the same over time. Our approach is based on the intuition that ``bottleneck''
	states, i.e. states that are frequently visited on system trajectories,
	could prove to be useful subgoals (e.g. McGovern & Barto, 2001; Iba,
	1989). We present empirical studies of this approach in two gridworld
	navigation tasks. One of the environments we explored contains bottleneck
	states, and the algorithm indeed finds these states, as expected.
	The second environment is an empty gridworld with no obstacles. Although
	the environment does not contain bottleneck states, our approach
	still finds useful options, which essentially allow the agent to
	travel around the environment more quickly.},
  citeseerurl = {http://citeseer.ist.psu.edu/579862.html},
  pdf = {pubs/stolle2002learning.pdf},
  tags = {web},
  url = {http://www.cs.cmu.edu/~mstoll/publications.shtml}
}

@COMMENT{{jabref-meta: groupsversion:3;}}

@COMMENT{{jabref-meta: groupstree:
0 AllEntriesGroup:;
1 KeywordGroup:read\;0\;tags\;\\\\bread\\\\b\;0\;1\;;
1 KeywordGroup:printed\;0\;tags\;printed\;0\;1\;;
1 SearchGroup:resolution (search)\;0\;resolution\;0\;1\;;
1 KeywordGroup:pdf\;0\;pdf\;pdf\;0\;1\;;
1 KeywordGroup:ps\;0\;ps\;ps\;0\;1\;;
1 KeywordGroup:doi\;0\;doi\;/\;0\;1\;;
1 KeywordGroup:must read\;0\;tags\;mustread\;0\;1\;;
1 KeywordGroup:half read\;0\;tags\;halfread\;0\;1\;;
1 KeywordGroup:web\;0\;tags\;web\;0\;0\;;
1 SearchGroup:stolle\;0\;author=stolle\;0\;0\;;
}}

This file has been generated by bibtex2html 1.88.