ILSPopSurrogate.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. """Iterated Local Search Algorithm implementation using surrogate as fitness approximation
  2. """
  3. # main imports
  4. import os
  5. import logging
  6. import joblib
  7. import time
  8. import pandas as pd
  9. from sklearn.utils import shuffle
  10. # module imports
  11. from macop.algorithms.base import Algorithm
  12. from macop.evaluators.base import Evaluator
  13. from macop.operators.base import KindOperator
  14. from macop.policies.reinforcement import UCBPolicy
  15. from macop.callbacks.policies import UCBCheckpoint
  16. from .LSSurrogate import LocalSearchSurrogate
  17. from .utils.SurrogateAnalysis import SurrogateAnalysisMono
  18. from sklearn.linear_model import (LinearRegression, Lasso, Lars, LassoLars,
  19. LassoCV, ElasticNet)
  20. from wsao.sao.problems.nd3dproblem import ND3DProblem
  21. from wsao.sao.surrogates.walsh import WalshSurrogate
  22. from wsao.sao.algos.fitter import FitterAlgo
  23. from wsao.sao.utils.analysis import SamplerAnalysis, FitterAnalysis, OptimizerAnalysis
  24. class LSSurrogateEvaluator(Evaluator):
  25. # use of surrogate in order to evaluate solution
  26. def compute(self, solution):
  27. return self._data['surrogate'].surrogate.predict([solution.data])[0]
  28. class ILSPopSurrogate(Algorithm):
  29. """Iterated Local Search used to avoid local optima and increave EvE (Exploration vs Exploitation) compromise using surrogate
  30. Attributes:
  31. initalizer: {function} -- basic function strategy to initialize solution
  32. evaluator: {function} -- basic function in order to obtained fitness (mono or multiple objectives)
  33. operators: {[Operator]} -- list of operator to use when launching algorithm
  34. policy: {Policy} -- Policy class implementation strategy to select operators
  35. validator: {function} -- basic function to check if solution is valid or not under some constraints
  36. maximise: {bool} -- specify kind of optimization problem
  37. currentSolution: {Solution} -- current solution managed for current evaluation
  38. bestSolution: {Solution} -- best solution found so far during running algorithm
  39. ls_iteration: {int} -- number of evaluation for each local search algorithm
  40. population_size: {int} -- size of the population to manage
  41. surrogate_file: {str} -- Surrogate model file to load (model trained using https://gitlab.com/florianlprt/wsao)
  42. start_train_surrogate: {int} -- number of evaluation expected before start training and use surrogate
  43. surrogate: {Surrogate} -- Surrogate model instance loaded
  44. ls_train_surrogate: {int} -- Specify if we need to retrain our surrogate model (every Local Search)
  45. solutions_file: {str} -- Path where real evaluated solutions are saved in order to train surrogate again
  46. callbacks: {[Callback]} -- list of Callback class implementation to do some instructions every number of evaluations and `load` when initializing algorithm
  47. """
  48. def __init__(self,
  49. initalizer,
  50. evaluator,
  51. operators,
  52. policy,
  53. validator,
  54. population_size,
  55. surrogate_file_path,
  56. start_train_surrogate,
  57. ls_train_surrogate,
  58. walsh_order,
  59. inter_policy_ls_file,
  60. solutions_file,
  61. maximise=True,
  62. parent=None):
  63. # set real evaluator as default
  64. super().__init__(initalizer, evaluator, operators, policy,
  65. validator, maximise, parent)
  66. self._n_local_search = 0
  67. self._main_evaluator = evaluator
  68. self._surrogate_file_path = surrogate_file_path
  69. self._start_train_surrogate = start_train_surrogate
  70. self._surrogate_evaluator = None
  71. self._surrogate_analyser = None
  72. self._ls_train_surrogate = ls_train_surrogate
  73. self._solutions_file = solutions_file
  74. self._walsh_order = walsh_order
  75. self._inter_policy_ls_file = inter_policy_ls_file
  76. # default population values
  77. self.population_size = population_size
  78. self.population = []
  79. for _ in range(self.population_size):
  80. self.population.append(None)
  81. def train_surrogate(self):
  82. """Retrain if necessary the whole surrogate fitness approximation function
  83. """
  84. # Following https://gitlab.com/florianlprt/wsao, we re-train the model
  85. # ---------------------------------------------------------------------------
  86. # cli_restart.py problem=nd3d,size=30,filename="data/statistics_extended_svdn" \
  87. # model=lasso,alpha=1e-5 \
  88. # surrogate=walsh,order=3 \
  89. # algo=fitter,algo_restarts=10,samplefile=stats_extended.csv \
  90. # sample=1000,step=10 \
  91. # analysis=fitter,logfile=out_fit.csv
  92. problem = ND3DProblem(size=len(self._bestSolution.data)) # problem size based on best solution size (need to improve...)
  93. model = Lasso(alpha=1e-5)
  94. surrogate = WalshSurrogate(order=self._walsh_order, size=problem.size, model=model)
  95. analysis = FitterAnalysis(logfile="train_surrogate.log", problem=problem)
  96. algo = FitterAlgo(problem=problem, surrogate=surrogate, analysis=analysis, seed=problem.seed)
  97. # data set
  98. df = pd.read_csv(self._solutions_file, sep=';')
  99. # learning set and test set based on max last 1000 samples
  100. max_samples = 1000
  101. if df.x.count() < max_samples:
  102. max_samples = df.x.count()
  103. ntraining_samples = max_samples * 0.80
  104. # extract reduced dataset if necessary
  105. reduced_df = df.tail(max_samples)
  106. reduced_df = shuffle(reduced_df)
  107. # shuffle dataset
  108. learn = reduced_df.iloc[0:ntraining_samples]
  109. test = reduced_df.drop[learn.index]
  110. print("Start fitting again the surrogate model")
  111. print(f'Using {ntraining_samples} samples of {max_samples} for train dataset')
  112. for r in range(10):
  113. print(f"Iteration n°{r}: for fitting surrogate")
  114. algo.run_samples(learn=learn, test=test, step=10)
  115. joblib.dump(algo, self._surrogate_file_path)
  116. def load_surrogate(self):
  117. """Load algorithm with surrogate model and create lambda evaluator function
  118. """
  119. # need to first train surrogate if not exist
  120. if not os.path.exists(self._surrogate_file_path):
  121. self.train_surrogate()
  122. self._surrogate = joblib.load(self._surrogate_file_path)
  123. # update evaluator function
  124. self._surrogate_evaluator = LSSurrogateEvaluator(data={'surrogate': self._surrogate})
  125. def add_to_surrogate(self, solution):
  126. # save real evaluated solution into specific file for surrogate
  127. with open(self._solutions_file, 'a') as f:
  128. line = ""
  129. for index, e in enumerate(solution._data):
  130. line += str(e)
  131. if index < len(solution._data) - 1:
  132. line += ","
  133. line += ";"
  134. line += str(solution._score)
  135. f.write(line + "\n")
  136. def initRun(self):
  137. fitness_scores = []
  138. print('Initialisation of @population')
  139. for i in range(len(self.population)):
  140. print(f' - solution [{(i+1)}] of {len(self.population)}')
  141. if self.population[i] is None:
  142. solution = self.initialiser()
  143. solution.evaluate(self.evaluator)
  144. self.population[i] = solution
  145. self.add_to_surrogate(solution)
  146. self.increaseEvaluation()
  147. fitness_scores.append(self.population[i].fitness)
  148. print('Best solution @initialisation')
  149. self._bestSolution = self.population[fitness_scores.index(max(fitness_scores))]
  150. def run(self, evaluations, ls_evaluations=100):
  151. """
  152. Run the iterated local search algorithm using local search (EvE compromise)
  153. Args:
  154. evaluations: {int} -- number of global evaluations for ILS
  155. ls_evaluations: {int} -- number of Local search evaluations (default: 100)
  156. Returns:
  157. {Solution} -- best solution found
  158. """
  159. # by default use of mother method to initialize variables
  160. super().run(evaluations)
  161. # enable resuming for ILS
  162. self.resume()
  163. # initialize current solution
  164. self.initRun()
  165. # count number of surrogate obtained and restart using real evaluations done
  166. nsamples = None
  167. with open(self._solutions_file, 'r') as f:
  168. nsamples = len(f.readlines()) - 1 # avoid header
  169. if self.getGlobalEvaluation() < nsamples:
  170. print(f'Restart using {nsamples} of {self._start_train_surrogate} real evaluations obtained')
  171. self._numberOfEvaluations = nsamples
  172. if self._start_train_surrogate > self.getGlobalEvaluation():
  173. # get `self.start_train_surrogate` number of real evaluations and save it into surrogate dataset file
  174. # using randomly generated solutions (in order to cover seearch space)
  175. while self._start_train_surrogate > self.getGlobalEvaluation():
  176. newSolution = self.initialiser()
  177. # evaluate new solution
  178. newSolution.evaluate(self.evaluator)
  179. # add it to surrogate pool
  180. self.add_to_surrogate(newSolution)
  181. self.increaseEvaluation()
  182. # train surrogate on real evaluated solutions file
  183. self.train_surrogate()
  184. self.load_surrogate()
  185. # local search algorithm implementation
  186. while not self.stop():
  187. # set current evaluator based on used or not of surrogate function
  188. self.evaluator = self._surrogate_evaluator if self._start_train_surrogate <= self.getGlobalEvaluation() else self._main_evaluator
  189. for i in range(len(self.population)):
  190. # pass only Mutators operators for local search
  191. selected_operators = [ op for op in self._operators if op._kind == KindOperator.MUTATOR ]
  192. ls_policy = UCBPolicy(selected_operators, C=100, exp_rate=0.1)
  193. # create new local search instance
  194. # passing global evaluation param from ILS
  195. ls = LocalSearchSurrogate(self.initialiser,
  196. self.evaluator,
  197. selected_operators,
  198. ls_policy,
  199. self.validator,
  200. self._maximise,
  201. parent=None,
  202. verbose=False)
  203. ls.addCallback(UCBCheckpoint(every=1, filepath=self._inter_policy_ls_file))
  204. # create current new solution using policy and custom algorithm init
  205. ls._currentSolution = self.policy.apply(self.population[i])
  206. ls.result = ls._currentSolution
  207. # add same callbacks
  208. #for callback in self._callbacks:
  209. # ls.addCallback(callback)
  210. # create and search solution from local search
  211. newSolution = ls.run(ls_evaluations)
  212. # if better solution than currently, replace it (solution saved in training pool, only if surrogate process is in a second process step)
  213. # Update : always add new solution into surrogate pool, not only if solution is better
  214. #if self.isBetter(newSolution) and self.start_train_surrogate < self.getGlobalEvaluation():
  215. if self._start_train_surrogate <= self.getGlobalEvaluation():
  216. # if better solution found from local search, retrained the found solution and test again
  217. # without use of surrogate
  218. fitness_score = self._main_evaluator.compute(newSolution)
  219. # self.increaseEvaluation() # dot not add evaluation
  220. newSolution.fitness = fitness_score
  221. # if solution is really better after real evaluation, then we replace
  222. if self.isBetter(newSolution):
  223. self.result = newSolution
  224. # update population
  225. if self.population[i].fitness < newSolution.fitness:
  226. self.population[i] = newSolution
  227. self.add_to_surrogate(newSolution)
  228. self.progress()
  229. self.increaseEvaluation()
  230. print(f'=================================================================')
  231. print(f'Best solution found so far: {self.result.fitness}')
  232. # check using specific dynamic criteria based on r^2
  233. r_squared = self._surrogate.analysis.coefficient_of_determination(self._surrogate.surrogate)
  234. mae = self._surrogate.analysis.mae(self._surrogate.surrogate)
  235. training_surrogate_every = int(r_squared * self._ls_train_surrogate)
  236. print(f"=> R² of surrogate is of {r_squared}.")
  237. print(f"=> MAE of surrogate is of {mae}.")
  238. print(f'=> Retraining model every {training_surrogate_every} LS ({self._n_local_search % training_surrogate_every} of {training_surrogate_every})')
  239. # avoid issue when lauching every each local search
  240. if training_surrogate_every <= 0:
  241. training_surrogate_every = 1
  242. # check if necessary or not to train again surrogate
  243. if self._n_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
  244. # train again surrogate on real evaluated solutions file
  245. start_training = time.time()
  246. self.train_surrogate()
  247. training_time = time.time() - start_training
  248. self._surrogate_analyser = SurrogateAnalysisMono(training_time, training_surrogate_every, r_squared, mae, self.getGlobalMaxEvaluation(), self._n_local_search)
  249. # reload new surrogate function
  250. self.load_surrogate()
  251. # increase number of local search done
  252. self._n_local_search += 1
  253. self.information()
  254. logging.info(f"End of {type(self).__name__}, best solution found {self._bestSolution}")
  255. self.end()
  256. return self._bestSolution
  257. def addCallback(self, callback):
  258. """Add new callback to algorithm specifying usefull parameters
  259. Args:
  260. callback: {Callback} -- specific Callback instance
  261. """
  262. # specify current main algorithm reference
  263. if self.getParent() is not None:
  264. callback.setAlgo(self.getParent())
  265. else:
  266. callback.setAlgo(self)
  267. # set as new
  268. self._callbacks.append(callback)