ILSSurrogate.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. """Iterated Local Search Algorithm implementation using surrogate as fitness approximation
  2. """
  3. # main imports
  4. import os
  5. import logging
  6. import joblib
  7. import time
  8. # module imports
  9. from macop.algorithms.Algorithm import Algorithm
  10. from .LSSurrogate import LocalSearchSurrogate
  11. from sklearn.linear_model import (LinearRegression, Lasso, Lars, LassoLars,
  12. LassoCV, ElasticNet)
  13. from wsao.sao.problems.nd3dproblem import ND3DProblem
  14. from wsao.sao.surrogates.walsh import WalshSurrogate
  15. from wsao.sao.algos.fitter import FitterAlgo
  16. from wsao.sao.utils.analysis import SamplerAnalysis, FitterAnalysis, OptimizerAnalysis
  17. # quick object for surrogate logging data
  18. class SurrogateAnalysis():
  19. def __init__(self, time, every_ls, r2, evaluations, n_local_search):
  20. self._time = time
  21. self._every_ls = every_ls
  22. self._r2 = r2
  23. self._evaluations = evaluations
  24. self._n_local_search = n_local_search
  25. class ILSSurrogate(Algorithm):
  26. """Iterated Local Search used to avoid local optima and increave EvE (Exploration vs Exploitation) compromise using surrogate
  27. Attributes:
  28. initalizer: {function} -- basic function strategy to initialize solution
  29. evaluator: {function} -- basic function in order to obtained fitness (mono or multiple objectives)
  30. operators: {[Operator]} -- list of operator to use when launching algorithm
  31. policy: {Policy} -- Policy class implementation strategy to select operators
  32. validator: {function} -- basic function to check if solution is valid or not under some constraints
  33. maximise: {bool} -- specify kind of optimization problem
  34. currentSolution: {Solution} -- current solution managed for current evaluation
  35. bestSolution: {Solution} -- best solution found so far during running algorithm
  36. ls_iteration: {int} -- number of evaluation for each local search algorithm
  37. surrogate_file: {str} -- Surrogate model file to load (model trained using https://gitlab.com/florianlprt/wsao)
  38. start_train_surrogate: {int} -- number of evaluation expected before start training and use surrogate
  39. surrogate: {Surrogate} -- Surrogate model instance loaded
  40. ls_train_surrogate: {int} -- Specify if we need to retrain our surrogate model (every Local Search)
  41. solutions_file: {str} -- Path where real evaluated solutions are saved in order to train surrogate again
  42. callbacks: {[Callback]} -- list of Callback class implementation to do some instructions every number of evaluations and `load` when initializing algorithm
  43. """
  44. def __init__(self,
  45. initalizer,
  46. evaluator,
  47. operators,
  48. policy,
  49. validator,
  50. surrogate_file_path,
  51. start_train_surrogate,
  52. ls_train_surrogate,
  53. solutions_file,
  54. maximise=True,
  55. parent=None):
  56. # set real evaluator as default
  57. super().__init__(initalizer, evaluator, operators, policy,
  58. validator, maximise, parent)
  59. self._n_local_search = 0
  60. self._main_evaluator = evaluator
  61. self._surrogate_file_path = surrogate_file_path
  62. self._start_train_surrogate = start_train_surrogate
  63. self._surrogate_evaluator = None
  64. self._surrogate_analyser = None
  65. self._ls_train_surrogate = ls_train_surrogate
  66. self._solutions_file = solutions_file
  67. def train_surrogate(self):
  68. """Retrain if necessary the whole surrogate fitness approximation function
  69. """
  70. # Following https://gitlab.com/florianlprt/wsao, we re-train the model
  71. # ---------------------------------------------------------------------------
  72. # cli_restart.py problem=nd3d,size=30,filename="data/statistics_extended_svdn" \
  73. # model=lasso,alpha=1e-5 \
  74. # surrogate=walsh,order=3 \
  75. # algo=fitter,algo_restarts=10,samplefile=stats_extended.csv \
  76. # sample=1000,step=10 \
  77. # analysis=fitter,logfile=out_fit.csv
  78. problem = ND3DProblem(size=len(self._bestSolution._data)) # problem size based on best solution size (need to improve...)
  79. model = Lasso(alpha=1e-5)
  80. surrogate = WalshSurrogate(order=2, size=problem.size, model=model)
  81. analysis = FitterAnalysis(logfile="train_surrogate.log", problem=problem)
  82. algo = FitterAlgo(problem=problem, surrogate=surrogate, analysis=analysis, seed=problem.seed)
  83. # dynamic number of samples based on dataset real evaluations
  84. nsamples = None
  85. with open(self._solutions_file, 'r') as f:
  86. nsamples = len(f.readlines()) - 1 # avoid header
  87. training_samples = int(0.7 * nsamples) # 70% used for learning part at each iteration
  88. print("Start fitting again the surrogate model")
  89. print(f'Using {training_samples} of {nsamples} samples for train dataset')
  90. for r in range(10):
  91. print(f"Iteration n°{r}: for fitting surrogate")
  92. algo.run(samplefile=self._solutions_file, sample=training_samples, step=10)
  93. joblib.dump(algo, self._surrogate_file_path)
  94. def load_surrogate(self):
  95. """Load algorithm with surrogate model and create lambda evaluator function
  96. """
  97. # need to first train surrogate if not exist
  98. if not os.path.exists(self._surrogate_file_path):
  99. self.train_surrogate()
  100. self._surrogate = joblib.load(self._surrogate_file_path)
  101. # update evaluator function
  102. self._surrogate_evaluator = lambda s: self._surrogate.surrogate.predict([s._data])[0]
  103. def add_to_surrogate(self, solution):
  104. # save real evaluated solution into specific file for surrogate
  105. with open(self._solutions_file, 'a') as f:
  106. line = ""
  107. for index, e in enumerate(solution._data):
  108. line += str(e)
  109. if index < len(solution._data) - 1:
  110. line += ","
  111. line += ";"
  112. line += str(solution._score)
  113. f.write(line + "\n")
  114. def run(self, evaluations, ls_evaluations=100):
  115. """
  116. Run the iterated local search algorithm using local search (EvE compromise)
  117. Args:
  118. evaluations: {int} -- number of global evaluations for ILS
  119. ls_evaluations: {int} -- number of Local search evaluations (default: 100)
  120. Returns:
  121. {Solution} -- best solution found
  122. """
  123. # by default use of mother method to initialize variables
  124. super().run(evaluations)
  125. # initialize current solution
  126. self.initRun()
  127. # enable resuming for ILS
  128. self.resume()
  129. # count number of surrogate obtained and restart using real evaluations done
  130. nsamples = None
  131. with open(self._solutions_file, 'r') as f:
  132. nsamples = len(f.readlines()) - 1 # avoid header
  133. if self.getGlobalEvaluation() < nsamples:
  134. print(f'Restart using {nsamples} of {self._start_train_surrogate} real evaluations obtained')
  135. self._numberOfEvaluations = nsamples
  136. if self._start_train_surrogate > self.getGlobalEvaluation():
  137. # get `self.start_train_surrogate` number of real evaluations and save it into surrogate dataset file
  138. # using randomly generated solutions (in order to cover seearch space)
  139. while self._start_train_surrogate > self.getGlobalEvaluation():
  140. newSolution = self._initializer()
  141. # evaluate new solution
  142. newSolution.evaluate(self._evaluator)
  143. # add it to surrogate pool
  144. self.add_to_surrogate(newSolution)
  145. self.increaseEvaluation()
  146. # train surrogate on real evaluated solutions file
  147. self.train_surrogate()
  148. self.load_surrogate()
  149. # local search algorithm implementation
  150. while not self.stop():
  151. # set current evaluator based on used or not of surrogate function
  152. self._evaluator = self._surrogate_evaluator if self._start_train_surrogate <= self.getGlobalEvaluation() else self._main_evaluator
  153. # create new local search instance
  154. # passing global evaluation param from ILS
  155. ls = LocalSearchSurrogate(self._initializer,
  156. self._evaluator,
  157. self._operators,
  158. self._policy,
  159. self._validator,
  160. self._maximise,
  161. parent=self)
  162. # add same callbacks
  163. for callback in self._callbacks:
  164. ls.addCallback(callback)
  165. # create and search solution from local search
  166. newSolution = ls.run(ls_evaluations)
  167. # if better solution than currently, replace it (solution saved in training pool, only if surrogate process is in a second process step)
  168. # Update : always add new solution into surrogate pool, not only if solution is better
  169. #if self.isBetter(newSolution) and self.start_train_surrogate < self.getGlobalEvaluation():
  170. if self._start_train_surrogate <= self.getGlobalEvaluation():
  171. # if better solution found from local search, retrained the found solution and test again
  172. # without use of surrogate
  173. fitness_score = self._main_evaluator(newSolution)
  174. # self.increaseEvaluation() # dot not add evaluation
  175. newSolution.score = fitness_score
  176. # if solution is really better after real evaluation, then we replace
  177. if self.isBetter(newSolution):
  178. self._bestSolution = newSolution
  179. self.add_to_surrogate(newSolution)
  180. self.progress()
  181. # check using specific dynamic criteria based on r^2
  182. r_squared = self._surrogate.analysis.coefficient_of_determination(self._surrogate.surrogate)
  183. training_surrogate_every = int(r_squared * self._ls_train_surrogate)
  184. print(f"=> R^2 of surrogate is of {r_squared}. Retraining model every {training_surrogate_every} LS")
  185. # avoid issue when lauching every each local search
  186. if training_surrogate_every <= 0:
  187. training_surrogate_every = 1
  188. # check if necessary or not to train again surrogate
  189. if self._n_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
  190. # train again surrogate on real evaluated solutions file
  191. start_training = time.time()
  192. self.train_surrogate()
  193. training_time = time.time() - start_training
  194. self._surrogate_analyser = SurrogateAnalysis(training_time, training_surrogate_every, r_squared, self.getGlobalMaxEvaluation(), self._n_local_search)
  195. # reload new surrogate function
  196. self.load_surrogate()
  197. # increase number of local search done
  198. self._n_local_search += 1
  199. self.information()
  200. logging.info(f"End of {type(self).__name__}, best solution found {self._bestSolution}")
  201. self.end()
  202. return self._bestSolution
  203. def addCallback(self, callback):
  204. """Add new callback to algorithm specifying usefull parameters
  205. Args:
  206. callback: {Callback} -- specific Callback instance
  207. """
  208. # specify current main algorithm reference
  209. if self.getParent() is not None:
  210. callback.setAlgo(self.getParent())
  211. else:
  212. callback.setAlgo(self)
  213. # set as new
  214. self._callbacks.append(callback)