policies.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. """UCB policy Checkpoint class implementation
  2. """
  3. # main imports
  4. import os
  5. import logging
  6. import numpy as np
  7. # module imports
  8. from macop.callbacks.base import Callback
  9. from macop.utils.progress import macop_text, macop_line
  10. class UCBCheckpoint(Callback):
  11. """
  12. UCB checkpoint is used for loading previous Upper Confidence Bound data and start again after loading checkpoint
  13. Need to be the same operators used during previous run (see :class:`~macop.policies.reinforcement.UCBPolicy` for more details)
  14. Attributes:
  15. algo: {:class:`~macop.algorithms.base.Algorithm`} -- main algorithm instance reference
  16. every: {int} -- checkpoint frequency used (based on number of evaluations)
  17. filepath: {str} -- file path where checkpoints will be saved
  18. """
  19. def run(self):
  20. """
  21. Check if necessary to do backup based on `every` variable
  22. """
  23. # get current population
  24. currentEvaluation = self.algo.getGlobalEvaluation()
  25. # backup if necessary
  26. if currentEvaluation % self._every == 0:
  27. logging.info("UCB Checkpoint is done into " + self._filepath)
  28. with open(self._filepath, 'w') as f:
  29. rewardsLine = ''
  30. for i, r in enumerate(self.algo.policy.rewards):
  31. rewardsLine += str(r)
  32. if i != len(self.algo.policy.rewards) - 1:
  33. rewardsLine += ';'
  34. f.write(rewardsLine + '\n')
  35. occurrencesLine = ''
  36. for i, o in enumerate(self.algo.policy.occurences):
  37. occurrencesLine += str(o)
  38. if i != len(self.algo.policy.occurences) - 1:
  39. occurrencesLine += ';'
  40. f.write(occurrencesLine + '\n')
  41. def load(self):
  42. """
  43. Load backup lines as rewards and occurrences for UCB
  44. """
  45. if os.path.exists(self._filepath):
  46. logging.info('Load UCB data')
  47. with open(self._filepath) as f:
  48. lines = f.readlines()
  49. # read data for each line
  50. rewardsLine = lines[0].replace('\n', '')
  51. occurrencesLine = lines[1].replace('\n', '')
  52. self.algo.policy.rewards = [
  53. float(f) for f in rewardsLine.split(';')
  54. ]
  55. self.algo.policy.occurences = [
  56. float(f) for f in occurrencesLine.split(';')
  57. ]
  58. macop_text(
  59. self.algo,
  60. f'Load of available UCB policy data from `{self._filepath}`')
  61. else:
  62. macop_text(self.algo, 'No UCB data found, use default UCB policy')
  63. logging.info("No UCB data found...")
  64. macop_line(self.algo)