UCBPolicy.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. """Policy class implementation which is used for selecting operator using Upper Confidence Bound
  2. """
  3. # main imports
  4. import logging
  5. import random
  6. import math
  7. import numpy as np
  8. # module imports
  9. from .Policy import Policy
  10. class UCBPolicy(Policy):
  11. """UCB policy class which is used for applying UCB strategy when selecting and applying operator
  12. Attributes:
  13. operators: {[Operator]} -- list of selected operators for the algorithm
  14. C: {float} -- tradeoff between EvE parameter for UCB
  15. exp_rate: {float} -- exploration rate (probability to choose randomly next operator)
  16. rewards: {[float]} -- list of summed rewards obtained for each operator
  17. occurrences: {[int]} -- number of use (selected) of each operator
  18. """
  19. def __init__(self, _operators, _C=100., _exp_rate=0.5):
  20. self.operators = _operators
  21. self.rewards = [0. for o in self.operators]
  22. self.occurrences = [0 for o in self.operators]
  23. self.C = _C
  24. self.exp_rate = _exp_rate
  25. def select(self):
  26. """Select randomly the next operator to use
  27. Returns:
  28. {Operator}: the selected operator
  29. """
  30. indices = [i for i, o in enumerate(self.occurrences) if o == 0]
  31. # random choice following exploration rate
  32. if np.random.uniform(0, 1) <= self.exp_rate:
  33. return self.operators[random.choice(range(len(self.operators)))]
  34. elif len(indices) == 0:
  35. # if operator have at least be used one time
  36. ucbValues = []
  37. nVisits = sum(self.occurrences)
  38. for i in range(len(self.operators)):
  39. ucbValue = self.rewards[i] + self.C * math.sqrt(
  40. math.log(nVisits) / (self.occurrences[i] + 0.1))
  41. ucbValues.append(ucbValue)
  42. return self.operators[ucbValues.index(max(ucbValues))]
  43. else:
  44. return self.operators[random.choice(indices)]
  45. def apply(self, _solution):
  46. """
  47. Apply specific operator chosen to create new solution, computes its fitness and returns solution
  48. Args:
  49. _solution: {Solution} -- the solution to use for generating new solution
  50. Returns:
  51. {Solution} -- new generated solution
  52. """
  53. operator = self.select()
  54. logging.info("---- Applying %s on %s" %
  55. (type(operator).__name__, _solution))
  56. # apply operator on solution
  57. newSolution = operator.apply(_solution)
  58. # compute fitness of new solution
  59. newSolution.evaluate(self.algo.evaluator)
  60. # compute fitness improvment rate
  61. if self.algo.maximise:
  62. fir = (newSolution.fitness() -
  63. _solution.fitness()) / _solution.fitness()
  64. else:
  65. fir = (_solution.fitness() -
  66. newSolution.fitness()) / _solution.fitness()
  67. if fir > 0:
  68. operator_index = self.operators.index(operator)
  69. self.rewards[operator_index] += fir
  70. self.occurrences[operator_index] += 1
  71. logging.info("---- Obtaining %s" % (_solution))
  72. return newSolution