Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * Learn a batch from experience replay
- */
- for (int i = 0; i < BATCHSIZE; ++i) {
- // Prioritized experience replay : select a "significant" experience based on highest TDError
- int memId = xorRandInt(memory.size());
- for (int j = 1; j < PRIOTOURNAMENTSIZE; ++j) {
- int r = xorRandInt(memory.size());
- if (memory[r].TDError > memory[memId].TDError) {
- memId = r;
- }
- }
- if (xorRandDouble() < 0.15) {
- // Bypass the prioritized experience replay
- memId = xorRandInt(memory.size());
- }
- const Experience &exp = memory[memId];
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement