UCBPolicy.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. """Policy class implementation which is used for selecting operator using Upper Confidence Bound
  2. """
  3. # main imports
  4. import logging
  5. import random
  6. import math
  7. import numpy as np
  8. # module imports
  9. from .Policy import Policy
  10. class UCBPolicy(Policy):
  11. """UCB policy class which is used for applying UCB strategy when selecting and applying operator
  12. Attributes:
  13. operators: {[Operator]} -- list of selected operators for the algorithm
  14. C: {float} -- tradeoff between EvE parameter for UCB
  15. exp_rate: {float} -- exploration rate (probability to choose randomly next operator)
  16. rewards: {[float]} -- list of summed rewards obtained for each operator
  17. occurrences: {[int]} -- number of use (selected) of each operator
  18. """
  19. def __init__(self, operators, C=100., exp_rate=0.5):
  20. self._operators = operators
  21. self._rewards = [0. for o in self._operators]
  22. self._occurrences = [0 for o in self._operators]
  23. self._C = C
  24. self._exp_rate = exp_rate
  25. def select(self):
  26. """Select randomly the next operator to use
  27. Returns:
  28. {Operator}: the selected operator
  29. """
  30. indices = [i for i, o in enumerate(self._occurrences) if o == 0]
  31. # random choice following exploration rate
  32. if np.random.uniform(0, 1) <= self._exp_rate:
  33. index = random.choice(range(len(self._operators)))
  34. return self._operators[index]
  35. elif len(indices) == 0:
  36. # if operator have at least be used one time
  37. ucbValues = []
  38. nVisits = sum(self._occurrences)
  39. for i in range(len(self._operators)):
  40. ucbValue = self._rewards[i] + self._C * math.sqrt(
  41. math.log(nVisits) / (self._occurrences[i] + 0.1))
  42. ucbValues.append(ucbValue)
  43. return self._operators[ucbValues.index(max(ucbValues))]
  44. else:
  45. return self._operators[random.choice(indices)]
  46. def apply(self, solution):
  47. """
  48. Apply specific operator chosen to create new solution, computes its fitness and returns solution
  49. Args:
  50. solution: {Solution} -- the solution to use for generating new solution
  51. Returns:
  52. {Solution} -- new generated solution
  53. """
  54. operator = self.select()
  55. logging.info("---- Applying %s on %s" %
  56. (type(operator).__name__, solution))
  57. # apply operator on solution
  58. newSolution = operator.apply(solution)
  59. # compute fitness of new solution
  60. newSolution.evaluate(self._algo._evaluator)
  61. # compute fitness improvment rate
  62. if self._algo._maximise:
  63. fir = (newSolution.fitness() -
  64. solution.fitness()) / solution.fitness()
  65. else:
  66. fir = (solution.fitness() -
  67. newSolution.fitness()) / solution.fitness()
  68. operator_index = self._operators.index(operator)
  69. if fir > 0:
  70. self._rewards[operator_index] += fir
  71. self._occurrences[operator_index] += 1
  72. logging.info("---- Obtaining %s" % (solution))
  73. return newSolution