ILSPopSurrogate.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. """Iterated Local Search Algorithm implementation using surrogate as fitness approximation
  2. """
  3. # main imports
  4. import os
  5. import logging
  6. import joblib
  7. import time
  8. # module imports
  9. from macop.algorithms.base import Algorithm
  10. from macop.evaluators.base import Evaluator
  11. from macop.operators.base import KindOperator
  12. from macop.policies.reinforcement import UCBPolicy
  13. from macop.callbacks.policies import UCBCheckpoint
  14. from .LSSurrogate import LocalSearchSurrogate
  15. from .utils.SurrogateAnalysis import SurrogateAnalysisMono
  16. from sklearn.linear_model import (LinearRegression, Lasso, Lars, LassoLars,
  17. LassoCV, ElasticNet)
  18. from wsao.sao.problems.nd3dproblem import ND3DProblem
  19. from wsao.sao.surrogates.walsh import WalshSurrogate
  20. from wsao.sao.algos.fitter import FitterAlgo
  21. from wsao.sao.utils.analysis import SamplerAnalysis, FitterAnalysis, OptimizerAnalysis
  22. class LSSurrogateEvaluator(Evaluator):
  23. # use of surrogate in order to evaluate solution
  24. def compute(self, solution):
  25. return self._data['surrogate'].surrogate.predict([solution.data])[0]
  26. class ILSPopSurrogate(Algorithm):
  27. """Iterated Local Search used to avoid local optima and increave EvE (Exploration vs Exploitation) compromise using surrogate
  28. Attributes:
  29. initalizer: {function} -- basic function strategy to initialize solution
  30. evaluator: {function} -- basic function in order to obtained fitness (mono or multiple objectives)
  31. operators: {[Operator]} -- list of operator to use when launching algorithm
  32. policy: {Policy} -- Policy class implementation strategy to select operators
  33. validator: {function} -- basic function to check if solution is valid or not under some constraints
  34. maximise: {bool} -- specify kind of optimization problem
  35. currentSolution: {Solution} -- current solution managed for current evaluation
  36. bestSolution: {Solution} -- best solution found so far during running algorithm
  37. ls_iteration: {int} -- number of evaluation for each local search algorithm
  38. population_size: {int} -- size of the population to manage
  39. surrogate_file: {str} -- Surrogate model file to load (model trained using https://gitlab.com/florianlprt/wsao)
  40. start_train_surrogate: {int} -- number of evaluation expected before start training and use surrogate
  41. surrogate: {Surrogate} -- Surrogate model instance loaded
  42. ls_train_surrogate: {int} -- Specify if we need to retrain our surrogate model (every Local Search)
  43. solutions_file: {str} -- Path where real evaluated solutions are saved in order to train surrogate again
  44. callbacks: {[Callback]} -- list of Callback class implementation to do some instructions every number of evaluations and `load` when initializing algorithm
  45. """
  46. def __init__(self,
  47. initalizer,
  48. evaluator,
  49. operators,
  50. policy,
  51. validator,
  52. population_size,
  53. surrogate_file_path,
  54. start_train_surrogate,
  55. ls_train_surrogate,
  56. walsh_order,
  57. inter_policy_ls_file,
  58. solutions_file,
  59. maximise=True,
  60. parent=None):
  61. # set real evaluator as default
  62. super().__init__(initalizer, evaluator, operators, policy,
  63. validator, maximise, parent)
  64. self._n_local_search = 0
  65. self._main_evaluator = evaluator
  66. self._surrogate_file_path = surrogate_file_path
  67. self._start_train_surrogate = start_train_surrogate
  68. self._surrogate_evaluator = None
  69. self._surrogate_analyser = None
  70. self._ls_train_surrogate = ls_train_surrogate
  71. self._solutions_file = solutions_file
  72. self._walsh_order = walsh_order
  73. self._inter_policy_ls_file = inter_policy_ls_file
  74. # default population values
  75. self.population_size = population_size
  76. self.population = []
  77. for _ in range(self.population_size):
  78. self.population.append(None)
  79. def train_surrogate(self):
  80. """Retrain if necessary the whole surrogate fitness approximation function
  81. """
  82. # Following https://gitlab.com/florianlprt/wsao, we re-train the model
  83. # ---------------------------------------------------------------------------
  84. # cli_restart.py problem=nd3d,size=30,filename="data/statistics_extended_svdn" \
  85. # model=lasso,alpha=1e-5 \
  86. # surrogate=walsh,order=3 \
  87. # algo=fitter,algo_restarts=10,samplefile=stats_extended.csv \
  88. # sample=1000,step=10 \
  89. # analysis=fitter,logfile=out_fit.csv
  90. problem = ND3DProblem(size=len(self._bestSolution.data)) # problem size based on best solution size (need to improve...)
  91. model = Lasso(alpha=1e-5)
  92. surrogate = WalshSurrogate(order=self._walsh_order, size=problem.size, model=model)
  93. analysis = FitterAnalysis(logfile="train_surrogate.log", problem=problem)
  94. algo = FitterAlgo(problem=problem, surrogate=surrogate, analysis=analysis, seed=problem.seed)
  95. # dynamic number of samples based on dataset real evaluations
  96. nsamples = None
  97. with open(self._solutions_file, 'r') as f:
  98. nsamples = len(f.readlines()) - 1 # avoid header
  99. training_samples = int(0.7 * nsamples) # 70% used for learning part at each iteration
  100. print("Start fitting again the surrogate model")
  101. print(f'Using {training_samples} of {nsamples} samples for train dataset')
  102. for r in range(10):
  103. print(f"Iteration n°{r}: for fitting surrogate")
  104. algo.run(samplefile=self._solutions_file, sample=training_samples, step=10)
  105. joblib.dump(algo, self._surrogate_file_path)
  106. def load_surrogate(self):
  107. """Load algorithm with surrogate model and create lambda evaluator function
  108. """
  109. # need to first train surrogate if not exist
  110. if not os.path.exists(self._surrogate_file_path):
  111. self.train_surrogate()
  112. self._surrogate = joblib.load(self._surrogate_file_path)
  113. # update evaluator function
  114. self._surrogate_evaluator = LSSurrogateEvaluator(data={'surrogate': self._surrogate})
  115. def add_to_surrogate(self, solution):
  116. # save real evaluated solution into specific file for surrogate
  117. with open(self._solutions_file, 'a') as f:
  118. line = ""
  119. for index, e in enumerate(solution._data):
  120. line += str(e)
  121. if index < len(solution._data) - 1:
  122. line += ","
  123. line += ";"
  124. line += str(solution._score)
  125. f.write(line + "\n")
  126. def initRun(self):
  127. fitness_scores = []
  128. print('Initialisation of @population')
  129. for i in range(len(self.population)):
  130. print(f' - solution [{(i+1)}] of {len(self.population)}')
  131. if self.population[i] is None:
  132. solution = self.initialiser()
  133. solution.evaluate(self.evaluator)
  134. self.population[i] = solution
  135. self.add_to_surrogate(solution)
  136. self.increaseEvaluation()
  137. fitness_scores.append(self.population[i].fitness)
  138. print('Best solution @initialisation')
  139. self._bestSolution = self.population[fitness_scores.index(max(fitness_scores))]
  140. def run(self, evaluations, ls_evaluations=100):
  141. """
  142. Run the iterated local search algorithm using local search (EvE compromise)
  143. Args:
  144. evaluations: {int} -- number of global evaluations for ILS
  145. ls_evaluations: {int} -- number of Local search evaluations (default: 100)
  146. Returns:
  147. {Solution} -- best solution found
  148. """
  149. # by default use of mother method to initialize variables
  150. super().run(evaluations)
  151. # enable resuming for ILS
  152. self.resume()
  153. # initialize current solution
  154. self.initRun()
  155. # count number of surrogate obtained and restart using real evaluations done
  156. nsamples = None
  157. with open(self._solutions_file, 'r') as f:
  158. nsamples = len(f.readlines()) - 1 # avoid header
  159. if self.getGlobalEvaluation() < nsamples:
  160. print(f'Restart using {nsamples} of {self._start_train_surrogate} real evaluations obtained')
  161. self._numberOfEvaluations = nsamples
  162. if self._start_train_surrogate > self.getGlobalEvaluation():
  163. # get `self.start_train_surrogate` number of real evaluations and save it into surrogate dataset file
  164. # using randomly generated solutions (in order to cover seearch space)
  165. while self._start_train_surrogate > self.getGlobalEvaluation():
  166. newSolution = self.initialiser()
  167. # evaluate new solution
  168. newSolution.evaluate(self.evaluator)
  169. # add it to surrogate pool
  170. self.add_to_surrogate(newSolution)
  171. self.increaseEvaluation()
  172. # train surrogate on real evaluated solutions file
  173. self.train_surrogate()
  174. self.load_surrogate()
  175. # local search algorithm implementation
  176. while not self.stop():
  177. # set current evaluator based on used or not of surrogate function
  178. self.evaluator = self._surrogate_evaluator if self._start_train_surrogate <= self.getGlobalEvaluation() else self._main_evaluator
  179. for i in range(len(self.population)):
  180. # pass only Mutators operators for local search
  181. selected_operators = [ op for op in self._operators if op._kind == KindOperator.MUTATOR ]
  182. ls_policy = UCBPolicy(selected_operators, C=100, exp_rate=0.1)
  183. # create new local search instance
  184. # passing global evaluation param from ILS
  185. ls = LocalSearchSurrogate(self.initialiser,
  186. self.evaluator,
  187. selected_operators,
  188. ls_policy,
  189. self.validator,
  190. self._maximise,
  191. parent=None,
  192. verbose=False)
  193. ls.addCallback(UCBCheckpoint(every=1, filepath=self._inter_policy_ls_file))
  194. # create current new solution using policy and custom algorithm init
  195. ls._currentSolution = self.policy.apply(self.population[i])
  196. ls.result = ls._currentSolution
  197. # add same callbacks
  198. #for callback in self._callbacks:
  199. # ls.addCallback(callback)
  200. # create and search solution from local search
  201. newSolution = ls.run(ls_evaluations)
  202. # if better solution than currently, replace it (solution saved in training pool, only if surrogate process is in a second process step)
  203. # Update : always add new solution into surrogate pool, not only if solution is better
  204. #if self.isBetter(newSolution) and self.start_train_surrogate < self.getGlobalEvaluation():
  205. if self._start_train_surrogate <= self.getGlobalEvaluation():
  206. # if better solution found from local search, retrained the found solution and test again
  207. # without use of surrogate
  208. fitness_score = self._main_evaluator.compute(newSolution)
  209. # self.increaseEvaluation() # dot not add evaluation
  210. newSolution.fitness = fitness_score
  211. # if solution is really better after real evaluation, then we replace
  212. if self.isBetter(newSolution):
  213. self.result = newSolution
  214. # update population
  215. if self.population[i].fitness < newSolution.fitness:
  216. self.population[i] = newSolution
  217. self.add_to_surrogate(newSolution)
  218. self.progress()
  219. print(f'Best solution found so far: {self.result.fitness}')
  220. # check using specific dynamic criteria based on r^2
  221. r_squared = self._surrogate.analysis.coefficient_of_determination(self._surrogate.surrogate)
  222. mae = self._surrogate.analysis.mae(self._surrogate.surrogate)
  223. training_surrogate_every = int(r_squared * self._ls_train_surrogate)
  224. print(f"=> R^2 of surrogate is of {r_squared}. Retraining model every {training_surrogate_every} LS")
  225. print(f"=> MAE of surrogate is of {mae}. Retraining model every {training_surrogate_every} LS")
  226. # avoid issue when lauching every each local search
  227. if training_surrogate_every <= 0:
  228. training_surrogate_every = 1
  229. # check if necessary or not to train again surrogate
  230. if self._n_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
  231. # train again surrogate on real evaluated solutions file
  232. start_training = time.time()
  233. self.train_surrogate()
  234. training_time = time.time() - start_training
  235. self._surrogate_analyser = SurrogateAnalysisMono(training_time, training_surrogate_every, r_squared, mae, self.getGlobalMaxEvaluation(), self._n_local_search)
  236. # reload new surrogate function
  237. self.load_surrogate()
  238. # increase number of local search done
  239. self._n_local_search += 1
  240. self.information()
  241. logging.info(f"End of {type(self).__name__}, best solution found {self._bestSolution}")
  242. self.end()
  243. return self._bestSolution
  244. def addCallback(self, callback):
  245. """Add new callback to algorithm specifying usefull parameters
  246. Args:
  247. callback: {Callback} -- specific Callback instance
  248. """
  249. # specify current main algorithm reference
  250. if self.getParent() is not None:
  251. callback.setAlgo(self.getParent())
  252. else:
  253. callback.setAlgo(self)
  254. # set as new
  255. self._callbacks.append(callback)