ILSPopSurrogate.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. """Iterated Local Search Algorithm implementation using surrogate as fitness approximation
  2. """
  3. # main imports
  4. import os
  5. import logging
  6. import joblib
  7. import time
  8. # module imports
  9. from macop.algorithms.base import Algorithm
  10. from macop.evaluators.base import Evaluator
  11. from .LSSurrogate import LocalSearchSurrogate
  12. from .utils.SurrogateAnalysis import SurrogateAnalysisMono
  13. from sklearn.linear_model import (LinearRegression, Lasso, Lars, LassoLars,
  14. LassoCV, ElasticNet)
  15. from wsao.sao.problems.nd3dproblem import ND3DProblem
  16. from wsao.sao.surrogates.walsh import WalshSurrogate
  17. from wsao.sao.algos.fitter import FitterAlgo
  18. from wsao.sao.utils.analysis import SamplerAnalysis, FitterAnalysis, OptimizerAnalysis
  19. class SurrogateEvaluator(Evaluator):
  20. # use of surrogate in order to evaluate solution
  21. def compute(self, solution):
  22. return self._data['surrogate'].surrogate.predict([solution.data])[0]
  23. class ILSPopSurrogate(Algorithm):
  24. """Iterated Local Search used to avoid local optima and increave EvE (Exploration vs Exploitation) compromise using surrogate
  25. Attributes:
  26. initalizer: {function} -- basic function strategy to initialize solution
  27. evaluator: {function} -- basic function in order to obtained fitness (mono or multiple objectives)
  28. operators: {[Operator]} -- list of operator to use when launching algorithm
  29. policy: {Policy} -- Policy class implementation strategy to select operators
  30. validator: {function} -- basic function to check if solution is valid or not under some constraints
  31. maximise: {bool} -- specify kind of optimization problem
  32. currentSolution: {Solution} -- current solution managed for current evaluation
  33. bestSolution: {Solution} -- best solution found so far during running algorithm
  34. ls_iteration: {int} -- number of evaluation for each local search algorithm
  35. population_size: {int} -- size of the population to manage
  36. surrogate_file: {str} -- Surrogate model file to load (model trained using https://gitlab.com/florianlprt/wsao)
  37. start_train_surrogate: {int} -- number of evaluation expected before start training and use surrogate
  38. surrogate: {Surrogate} -- Surrogate model instance loaded
  39. ls_train_surrogate: {int} -- Specify if we need to retrain our surrogate model (every Local Search)
  40. solutions_file: {str} -- Path where real evaluated solutions are saved in order to train surrogate again
  41. callbacks: {[Callback]} -- list of Callback class implementation to do some instructions every number of evaluations and `load` when initializing algorithm
  42. """
  43. def __init__(self,
  44. initalizer,
  45. evaluator,
  46. operators,
  47. policy,
  48. validator,
  49. population_size,
  50. surrogate_file_path,
  51. start_train_surrogate,
  52. ls_train_surrogate,
  53. solutions_file,
  54. maximise=True,
  55. parent=None):
  56. # set real evaluator as default
  57. super().__init__(initalizer, evaluator, operators, policy,
  58. validator, maximise, parent)
  59. self._n_local_search = 0
  60. self._main_evaluator = evaluator
  61. self._surrogate_file_path = surrogate_file_path
  62. self._start_train_surrogate = start_train_surrogate
  63. self._surrogate_evaluator = None
  64. self._surrogate_analyser = None
  65. self._ls_train_surrogate = ls_train_surrogate
  66. self._solutions_file = solutions_file
  67. # default population values
  68. self.population_size = population_size
  69. self.population = []
  70. for _ in range(self.population_size):
  71. self.population.append(None)
  72. def train_surrogate(self):
  73. """Retrain if necessary the whole surrogate fitness approximation function
  74. """
  75. # Following https://gitlab.com/florianlprt/wsao, we re-train the model
  76. # ---------------------------------------------------------------------------
  77. # cli_restart.py problem=nd3d,size=30,filename="data/statistics_extended_svdn" \
  78. # model=lasso,alpha=1e-5 \
  79. # surrogate=walsh,order=3 \
  80. # algo=fitter,algo_restarts=10,samplefile=stats_extended.csv \
  81. # sample=1000,step=10 \
  82. # analysis=fitter,logfile=out_fit.csv
  83. problem = ND3DProblem(size=len(self._bestSolution.data)) # problem size based on best solution size (need to improve...)
  84. model = Lasso(alpha=1e-5)
  85. surrogate = WalshSurrogate(order=2, size=problem.size, model=model)
  86. analysis = FitterAnalysis(logfile="train_surrogate.log", problem=problem)
  87. algo = FitterAlgo(problem=problem, surrogate=surrogate, analysis=analysis, seed=problem.seed)
  88. # dynamic number of samples based on dataset real evaluations
  89. nsamples = None
  90. with open(self._solutions_file, 'r') as f:
  91. nsamples = len(f.readlines()) - 1 # avoid header
  92. training_samples = int(0.7 * nsamples) # 70% used for learning part at each iteration
  93. print("Start fitting again the surrogate model")
  94. print(f'Using {training_samples} of {nsamples} samples for train dataset')
  95. for r in range(10):
  96. print(f"Iteration n°{r}: for fitting surrogate")
  97. algo.run(samplefile=self._solutions_file, sample=training_samples, step=10)
  98. joblib.dump(algo, self._surrogate_file_path)
  99. def load_surrogate(self):
  100. """Load algorithm with surrogate model and create lambda evaluator function
  101. """
  102. # need to first train surrogate if not exist
  103. if not os.path.exists(self._surrogate_file_path):
  104. self.train_surrogate()
  105. self._surrogate = joblib.load(self._surrogate_file_path)
  106. # update evaluator function
  107. self._surrogate_evaluator = SurrogateEvaluator(data={'surrogate': self._surrogate})
  108. def add_to_surrogate(self, solution):
  109. # save real evaluated solution into specific file for surrogate
  110. with open(self._solutions_file, 'a') as f:
  111. line = ""
  112. for index, e in enumerate(solution._data):
  113. line += str(e)
  114. if index < len(solution._data) - 1:
  115. line += ","
  116. line += ";"
  117. line += str(solution._score)
  118. f.write(line + "\n")
  119. def initRun(self):
  120. fitness_scores = []
  121. print('Initialisation of population')
  122. for i in range(len(self.population)):
  123. if self.population[i] is None:
  124. solution = self.initialiser()
  125. solution.evaluate(self.evaluator)
  126. self.population[i] = solution
  127. self.add_to_surrogate(solution)
  128. fitness_scores.append(self.population[i].fitness)
  129. print('Best solution initialisation')
  130. self._bestSolution = self.population[fitness_scores.index(max(fitness_scores))]
  131. def run(self, evaluations, ls_evaluations=100):
  132. """
  133. Run the iterated local search algorithm using local search (EvE compromise)
  134. Args:
  135. evaluations: {int} -- number of global evaluations for ILS
  136. ls_evaluations: {int} -- number of Local search evaluations (default: 100)
  137. Returns:
  138. {Solution} -- best solution found
  139. """
  140. # by default use of mother method to initialize variables
  141. super().run(evaluations)
  142. # enable resuming for ILS
  143. self.resume()
  144. # initialize current solution
  145. self.initRun()
  146. # count number of surrogate obtained and restart using real evaluations done
  147. nsamples = None
  148. with open(self._solutions_file, 'r') as f:
  149. nsamples = len(f.readlines()) - 1 # avoid header
  150. if self.getGlobalEvaluation() < nsamples:
  151. print(f'Restart using {nsamples} of {self._start_train_surrogate} real evaluations obtained')
  152. self._numberOfEvaluations = nsamples
  153. if self._start_train_surrogate > self.getGlobalEvaluation():
  154. # get `self.start_train_surrogate` number of real evaluations and save it into surrogate dataset file
  155. # using randomly generated solutions (in order to cover seearch space)
  156. while self._start_train_surrogate > self.getGlobalEvaluation():
  157. newSolution = self.initialiser()
  158. # evaluate new solution
  159. newSolution.evaluate(self.evaluator)
  160. # add it to surrogate pool
  161. self.add_to_surrogate(newSolution)
  162. self.increaseEvaluation()
  163. # train surrogate on real evaluated solutions file
  164. self.train_surrogate()
  165. self.load_surrogate()
  166. # local search algorithm implementation
  167. while not self.stop():
  168. # set current evaluator based on used or not of surrogate function
  169. self.local_evaluator = self._surrogate_evaluator if self._start_train_surrogate <= self.getGlobalEvaluation() else self._main_evaluator
  170. for i in range(len(self.population)):
  171. # create new local search instance
  172. # passing global evaluation param from ILS
  173. ls = LocalSearchSurrogate(self.initialiser,
  174. self.local_evaluator,
  175. self._operators,
  176. self.policy,
  177. self.validator,
  178. self._maximise,
  179. parent=self)
  180. # create current new solution using policy
  181. ls._currentSolution = self.policy.apply(self.population[i])
  182. ls.result = ls._currentSolution
  183. print("Inside pop => ", ls._currentSolution)
  184. # add same callbacks
  185. #for callback in self._callbacks:
  186. # ls.addCallback(callback)
  187. # create and search solution from local search
  188. newSolution = ls.run(ls_evaluations)
  189. # if better solution than currently, replace it (solution saved in training pool, only if surrogate process is in a second process step)
  190. # Update : always add new solution into surrogate pool, not only if solution is better
  191. #if self.isBetter(newSolution) and self.start_train_surrogate < self.getGlobalEvaluation():
  192. if self._start_train_surrogate <= self.getGlobalEvaluation():
  193. # if better solution found from local search, retrained the found solution and test again
  194. # without use of surrogate
  195. fitness_score = self._main_evaluator.compute(newSolution)
  196. # self.increaseEvaluation() # dot not add evaluation
  197. newSolution.fitness = fitness_score
  198. # if solution is really better after real evaluation, then we replace
  199. if self.isBetter(newSolution):
  200. self.result = newSolution
  201. if self.population[i].fitness < newSolution.fitness:
  202. self.population[i] = newSolution
  203. self.add_to_surrogate(newSolution)
  204. self.progress()
  205. # check using specific dynamic criteria based on r^2
  206. r_squared = self._surrogate.analysis.coefficient_of_determination(self._surrogate.surrogate)
  207. mae = self._surrogate.analysis.mae(self._surrogate.surrogate)
  208. training_surrogate_every = int(r_squared * self._ls_train_surrogate)
  209. print(f"=> R^2 of surrogate is of {r_squared}. Retraining model every {training_surrogate_every} LS")
  210. print(f"=> MAE of surrogate is of {mae}. Retraining model every {training_surrogate_every} LS")
  211. # avoid issue when lauching every each local search
  212. if training_surrogate_every <= 0:
  213. training_surrogate_every = 1
  214. # check if necessary or not to train again surrogate
  215. if self._n_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
  216. # train again surrogate on real evaluated solutions file
  217. start_training = time.time()
  218. self.train_surrogate()
  219. training_time = time.time() - start_training
  220. self._surrogate_analyser = SurrogateAnalysisMono(training_time, training_surrogate_every, r_squared, mae, self.getGlobalMaxEvaluation(), self._n_local_search)
  221. # reload new surrogate function
  222. self.load_surrogate()
  223. # increase number of local search done
  224. self._n_local_search += 1
  225. self.information()
  226. logging.info(f"End of {type(self).__name__}, best solution found {self._bestSolution}")
  227. self.end()
  228. return self._bestSolution
  229. def addCallback(self, callback):
  230. """Add new callback to algorithm specifying usefull parameters
  231. Args:
  232. callback: {Callback} -- specific Callback instance
  233. """
  234. # specify current main algorithm reference
  235. if self.getParent() is not None:
  236. callback.setAlgo(self.getParent())
  237. else:
  238. callback.setAlgo(self)
  239. # set as new
  240. self._callbacks.append(callback)