123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- """Policy class implementation which is used for selecting operator using Upper Confidence Bound
- """
- # main imports
- import logging
- import random
- import math
- import numpy as np
- # module imports
- from .Policy import Policy
- class UCBPolicy(Policy):
- """UCB policy class which is used for applying UCB strategy when selecting and applying operator
- Attributes:
- operators: {[Operator]} -- list of selected operators for the algorithm
- C: {float} -- tradeoff between EvE parameter for UCB
- exp_rate: {float} -- exploration rate (probability to choose randomly next operator)
- rewards: {[float]} -- list of summed rewards obtained for each operator
- occurrences: {[int]} -- number of use (selected) of each operator
- """
- def __init__(self, _operators, _C=100., _exp_rate=0.5):
- self.operators = _operators
- self.rewards = [0. for o in self.operators]
- self.occurrences = [0 for o in self.operators]
- self.C = _C
- self.exp_rate = _exp_rate
- def select(self):
- """Select randomly the next operator to use
- Returns:
- {Operator}: the selected operator
- """
- indices = [i for i, o in enumerate(self.occurrences) if o == 0]
- # random choice following exploration rate
- if np.random.uniform(0, 1) <= self.exp_rate:
- index = random.choice(range(len(self.operators)))
- return self.operators[index]
- elif len(indices) == 0:
- # if operator have at least be used one time
- ucbValues = []
- nVisits = sum(self.occurrences)
- for i in range(len(self.operators)):
- ucbValue = self.rewards[i] + self.C * math.sqrt(
- math.log(nVisits) / (self.occurrences[i] + 0.1))
- ucbValues.append(ucbValue)
- return self.operators[ucbValues.index(max(ucbValues))]
- else:
- return self.operators[random.choice(indices)]
- def apply(self, _solution):
- """
- Apply specific operator chosen to create new solution, computes its fitness and returns solution
-
- Args:
- _solution: {Solution} -- the solution to use for generating new solution
- Returns:
- {Solution} -- new generated solution
- """
- operator = self.select()
- logging.info("---- Applying %s on %s" %
- (type(operator).__name__, _solution))
- # apply operator on solution
- newSolution = operator.apply(_solution)
- # compute fitness of new solution
- newSolution.evaluate(self.algo.evaluator)
- # compute fitness improvment rate
- if self.algo.maximise:
- fir = (newSolution.fitness() -
- _solution.fitness()) / _solution.fitness()
- else:
- fir = (_solution.fitness() -
- newSolution.fitness()) / _solution.fitness()
- operator_index = self.operators.index(operator)
- if fir > 0:
- self.rewards[operator_index] += fir
- self.occurrences[operator_index] += 1
- logging.info("---- Obtaining %s" % (_solution))
- return newSolution
|