123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- """UCB policy Checkpoint class implementation
- """
- # main imports
- import os
- import logging
- import numpy as np
- # module imports
- from macop.callbacks.base import Callback
- from macop.utils.progress import macop_text, macop_line
- class UCBCheckpoint(Callback):
- """
- UCB checkpoint is used for loading previous Upper Confidence Bound data and start again after loading checkpoint
- Need to be the same operators used during previous run (see `macop.policies.reinforcement.UCBPolicy` for more details)
- Attributes:
- algo: {Algorithm} -- main algorithm instance reference
- every: {int} -- checkpoint frequency used (based on number of evaluations)
- filepath: {str} -- file path where checkpoints will be saved
- """
- def run(self):
- """
- Check if necessary to do backup based on `every` variable
- """
- # get current population
- currentEvaluation = self._algo.getGlobalEvaluation()
- # backup if necessary
- if currentEvaluation % self._every == 0:
- logging.info("UCB Checkpoint is done into " + self._filepath)
- with open(self._filepath, 'w') as f:
- rewardsLine = ''
- for i, r in enumerate(self._algo._policy._rewards):
- rewardsLine += str(r)
- if i != len(self._algo._policy._rewards) - 1:
- rewardsLine += ';'
- f.write(rewardsLine + '\n')
- occurrencesLine = ''
- for i, o in enumerate(self._algo._policy._occurences):
- occurrencesLine += str(o)
- if i != len(self._algo._policy._occurences) - 1:
- occurrencesLine += ';'
- f.write(occurrencesLine + '\n')
- def load(self):
- """
- Load backup lines as rewards and occurrences for UCB
- """
- if os.path.exists(self._filepath):
- logging.info('Load UCB data')
- with open(self._filepath) as f:
- lines = f.readlines()
- # read data for each line
- rewardsLine = lines[0].replace('\n', '')
- occurrencesLine = lines[1].replace('\n', '')
- self._algo._policy._rewards = [
- float(f) for f in rewardsLine.split(';')
- ]
- self._algo._policy._occurences = [
- float(f) for f in occurrencesLine.split(';')
- ]
-
- macop_text(self._algo, f'Load of available UCB policy data from `{self._filepath}`')
- else:
- macop_text(self._algo, 'No UCB data found, use default UCB policy')
- logging.info("No UCB data found...")
- macop_line(self._algo)
|