Source code for macop.operators.policies.UCBPolicy

"""Policy class implementation which is used for selecting operator using Upper Confidence Bound
"""
# main imports
import logging
import random
import math
import numpy as np

# module imports
from .Policy import Policy


[docs]class UCBPolicy(Policy): """UCB policy class which is used for applying UCB strategy when selecting and applying operator Attributes: operators: {[Operator]} -- list of selected operators for the algorithm C: {float} -- tradeoff between EvE parameter for UCB exp_rate: {float} -- exploration rate (probability to choose randomly next operator) rewards: {[float]} -- list of summed rewards obtained for each operator occurrences: {[int]} -- number of use (selected) of each operator """ def __init__(self, _operators, _C=100., _exp_rate=0.5): self.operators = _operators self.rewards = [0. for o in self.operators] self.occurrences = [0 for o in self.operators] self.C = _C self.exp_rate = _exp_rate
[docs] def select(self): """Select randomly the next operator to use Returns: {Operator}: the selected operator """ indices = [i for i, o in enumerate(self.occurrences) if o == 0] # random choice following exploration rate if np.random.uniform(0, 1) <= self.exp_rate: index = random.choice(range(len(self.operators))) return self.operators[index] elif len(indices) == 0: # if operator have at least be used one time ucbValues = [] nVisits = sum(self.occurrences) for i in range(len(self.operators)): ucbValue = self.rewards[i] + self.C * math.sqrt( math.log(nVisits) / (self.occurrences[i] + 0.1)) ucbValues.append(ucbValue) return self.operators[ucbValues.index(max(ucbValues))] else: return self.operators[random.choice(indices)]
[docs] def apply(self, _solution): """ Apply specific operator chosen to create new solution, computes its fitness and returns solution Args: _solution: {Solution} -- the solution to use for generating new solution Returns: {Solution} -- new generated solution """ operator = self.select() logging.info("---- Applying %s on %s" % (type(operator).__name__, _solution)) # apply operator on solution newSolution = operator.apply(_solution) # compute fitness of new solution newSolution.evaluate(self.algo.evaluator) # compute fitness improvment rate if self.algo.maximise: fir = (newSolution.fitness() - _solution.fitness()) / _solution.fitness() else: fir = (_solution.fitness() - newSolution.fitness()) / _solution.fitness() operator_index = self.operators.index(operator) if fir > 0: self.rewards[operator_index] += fir self.occurrences[operator_index] += 1 logging.info("---- Obtaining %s" % (_solution)) return newSolution