UCBPolicy.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. """Policy class implementation which is used for selecting operator using Upper Confidence Bound
  2. """
  3. # main imports
  4. import logging
  5. import random
  6. import math
  7. # module imports
  8. from .Policy import Policy
  9. class UCBPolicy(Policy):
  10. """UCB policy class which is used for applying UCB strategy when selecting and applying operator
  11. Attributes:
  12. operators: {[Operator]} -- list of selected operators for the algorithm
  13. C: {float} -- tradeoff between EvE parameter for UCB
  14. rewards: {[float]} -- list of summed rewards obtained for each operator
  15. occurences: {[int]} -- number of use (selected) of each operator
  16. """
  17. def __init__(self, _operators, _C=100.):
  18. self.operators = _operators
  19. self.rewards = [0. for o in self.operators]
  20. self.occurences = [0 for o in self.operators]
  21. self.C = _C
  22. def select(self):
  23. """Select randomly the next operator to use
  24. Returns:
  25. {Operator}: the selected operator
  26. """
  27. indices = [i for i, o in enumerate(self.occurences) if o == 0]
  28. # if operator have at least be used one time
  29. if len(indices) == 0:
  30. ucbValues = []
  31. nVisits = sum(self.occurences)
  32. for i in range(len(self.operators)):
  33. ucbValue = self.rewards[i] + self.C * math.sqrt(
  34. math.log(nVisits) / self.occurences[i])
  35. ucbValues.append(ucbValue)
  36. return self.operators[ucbValues.index(max(ucbValues))]
  37. else:
  38. return self.operators[random.choice(indices)]
  39. def apply(self, _solution):
  40. """
  41. Apply specific operator chosen to create new solution, computes its fitness and returns solution
  42. Args:
  43. _solution: {Solution} -- the solution to use for generating new solution
  44. Returns:
  45. {Solution} -- new generated solution
  46. """
  47. operator = self.select()
  48. logging.info("---- Applying %s on %s" %
  49. (type(operator).__name__, _solution))
  50. # apply operator on solution
  51. newSolution = operator.apply(_solution)
  52. # compute fitness of new solution
  53. newSolution.evaluate(self.algo.evaluator)
  54. # compute fitness improvment rate
  55. if self.algo.maximise:
  56. fir = (newSolution.fitness() -
  57. _solution.fitness()) / _solution.fitness()
  58. else:
  59. fir = (_solution.fitness() -
  60. newSolution.fitness()) / _solution.fitness()
  61. if fir > 0:
  62. operator_index = self.operators.index(operator)
  63. self.rewards[operator_index] += fir
  64. self.occurences[operator_index] += 1
  65. logging.info("---- Obtaining %s" % (_solution))
  66. return newSolution