ILSPopSurrogate.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. """Iterated Local Search Algorithm implementation using surrogate as fitness approximation
  2. """
  3. # main imports
  4. import os
  5. import logging
  6. import joblib
  7. import time
  8. import pandas as pd
  9. from sklearn.utils import shuffle
  10. # module imports
  11. from macop.algorithms.base import Algorithm
  12. from macop.evaluators.base import Evaluator
  13. from macop.operators.base import KindOperator
  14. from macop.policies.reinforcement import UCBPolicy
  15. from macop.callbacks.policies import UCBCheckpoint
  16. from .LSSurrogate import LocalSearchSurrogate
  17. from .utils.SurrogateAnalysis import SurrogateAnalysisMono
  18. from sklearn.linear_model import (LinearRegression, Lasso, Lars, LassoLars,
  19. LassoCV, ElasticNet)
  20. from wsao.sao.problems.nd3dproblem import ND3DProblem
  21. from wsao.sao.surrogates.walsh import WalshSurrogate
  22. from wsao.sao.algos.fitter import FitterAlgo
  23. from wsao.sao.utils.analysis import SamplerAnalysis, FitterAnalysis, OptimizerAnalysis
  24. class LSSurrogateEvaluator(Evaluator):
  25. # use of surrogate in order to evaluate solution
  26. def compute(self, solution):
  27. return self._data['surrogate'].surrogate.predict([solution.data])[0]
  28. class ILSPopSurrogate(Algorithm):
  29. """Iterated Local Search used to avoid local optima and increave EvE (Exploration vs Exploitation) compromise using surrogate
  30. Attributes:
  31. initalizer: {function} -- basic function strategy to initialize solution
  32. evaluator: {function} -- basic function in order to obtained fitness (mono or multiple objectives)
  33. operators: {[Operator]} -- list of operator to use when launching algorithm
  34. policy: {Policy} -- Policy class implementation strategy to select operators
  35. validator: {function} -- basic function to check if solution is valid or not under some constraints
  36. maximise: {bool} -- specify kind of optimization problem
  37. currentSolution: {Solution} -- current solution managed for current evaluation
  38. bestSolution: {Solution} -- best solution found so far during running algorithm
  39. ls_iteration: {int} -- number of evaluation for each local search algorithm
  40. population_size: {int} -- size of the population to manage
  41. surrogate_file: {str} -- Surrogate model file to load (model trained using https://gitlab.com/florianlprt/wsao)
  42. start_train_surrogate: {int} -- number of evaluation expected before start training and use surrogate
  43. surrogate: {Surrogate} -- Surrogate model instance loaded
  44. ls_train_surrogate: {int} -- Specify if we need to retrain our surrogate model (every Local Search)
  45. solutions_file: {str} -- Path where real evaluated solutions are saved in order to train surrogate again
  46. callbacks: {[Callback]} -- list of Callback class implementation to do some instructions every number of evaluations and `load` when initializing algorithm
  47. """
  48. def __init__(self,
  49. initalizer,
  50. evaluator,
  51. operators,
  52. policy,
  53. validator,
  54. population_size,
  55. surrogate_file_path,
  56. start_train_surrogate,
  57. ls_train_surrogate,
  58. walsh_order,
  59. inter_policy_ls_file,
  60. solutions_file,
  61. maximise=True,
  62. parent=None):
  63. # set real evaluator as default
  64. super().__init__(initalizer, evaluator, operators, policy,
  65. validator, maximise, parent)
  66. self._n_local_search = 0
  67. self._ls_local_search = 0
  68. self._main_evaluator = evaluator
  69. self._surrogate_file_path = surrogate_file_path
  70. self._start_train_surrogate = start_train_surrogate
  71. self._surrogate_evaluator = None
  72. self._surrogate_analyser = None
  73. self._ls_train_surrogate = ls_train_surrogate
  74. self._solutions_file = solutions_file
  75. self._walsh_order = walsh_order
  76. self._inter_policy_ls_file = inter_policy_ls_file
  77. # default population values
  78. self.population_size = population_size
  79. self.population = []
  80. for _ in range(self.population_size):
  81. self.population.append(None)
  82. def train_surrogate(self):
  83. """Retrain if necessary the whole surrogate fitness approximation function
  84. """
  85. # Following https://gitlab.com/florianlprt/wsao, we re-train the model
  86. # ---------------------------------------------------------------------------
  87. # cli_restart.py problem=nd3d,size=30,filename="data/statistics_extended_svdn" \
  88. # model=lasso,alpha=1e-5 \
  89. # surrogate=walsh,order=3 \
  90. # algo=fitter,algo_restarts=10,samplefile=stats_extended.csv \
  91. # sample=1000,step=10 \
  92. # analysis=fitter,logfile=out_fit.csv
  93. problem = ND3DProblem(size=len(self._bestSolution.data)) # problem size based on best solution size (need to improve...)
  94. model = Lasso(alpha=1e-5)
  95. surrogate = WalshSurrogate(order=self._walsh_order, size=problem.size, model=model)
  96. analysis = FitterAnalysis(logfile="train_surrogate.log", problem=problem)
  97. algo = FitterAlgo(problem=problem, surrogate=surrogate, analysis=analysis, seed=problem.seed)
  98. # data set
  99. df = pd.read_csv(self._solutions_file, sep=';')
  100. # learning set and test set based on max last 1000 samples
  101. max_samples = 1000
  102. if df.x.count() < max_samples:
  103. max_samples = df.x.count()
  104. ntraining_samples = int(max_samples * 0.80)
  105. # extract reduced dataset if necessary
  106. reduced_df = df.tail(max_samples)
  107. reduced_df = shuffle(reduced_df)
  108. # shuffle dataset
  109. learn = reduced_df.tail(ntraining_samples)
  110. test = reduced_df.drop(learn.index)
  111. print("Start fitting again the surrogate model")
  112. print(f'Using {ntraining_samples} samples of {max_samples} for train dataset')
  113. for r in range(10):
  114. print(f"Iteration n°{r}: for fitting surrogate")
  115. algo.run_samples(learn=learn, test=test, step=10)
  116. joblib.dump(algo, self._surrogate_file_path)
  117. def load_surrogate(self):
  118. """Load algorithm with surrogate model and create lambda evaluator function
  119. """
  120. # need to first train surrogate if not exist
  121. if not os.path.exists(self._surrogate_file_path):
  122. self.train_surrogate()
  123. self._surrogate = joblib.load(self._surrogate_file_path)
  124. # update evaluator function
  125. self._surrogate_evaluator = LSSurrogateEvaluator(data={'surrogate': self._surrogate})
  126. def add_to_surrogate(self, solution):
  127. # save real evaluated solution into specific file for surrogate
  128. with open(self._solutions_file, 'a') as f:
  129. line = ""
  130. for index, e in enumerate(solution._data):
  131. line += str(e)
  132. if index < len(solution._data) - 1:
  133. line += ","
  134. line += ";"
  135. line += str(solution._score)
  136. f.write(line + "\n")
  137. def initRun(self):
  138. fitness_scores = []
  139. print('Initialisation of @population')
  140. for i in range(len(self.population)):
  141. print(f' - solution [{(i+1)}] of {len(self.population)}')
  142. if self.population[i] is None:
  143. solution = self.initialiser()
  144. solution.evaluate(self.evaluator)
  145. self.population[i] = solution
  146. self.add_to_surrogate(solution)
  147. self.increaseEvaluation()
  148. fitness_scores.append(self.population[i].fitness)
  149. print('Best solution @initialisation')
  150. self._bestSolution = self.population[fitness_scores.index(max(fitness_scores))]
  151. def run(self, evaluations, ls_evaluations=100):
  152. """
  153. Run the iterated local search algorithm using local search (EvE compromise)
  154. Args:
  155. evaluations: {int} -- number of global evaluations for ILS
  156. ls_evaluations: {int} -- number of Local search evaluations (default: 100)
  157. Returns:
  158. {Solution} -- best solution found
  159. """
  160. # by default use of mother method to initialize variables
  161. super().run(evaluations)
  162. # enable resuming for ILS
  163. self.resume()
  164. # initialize current solution
  165. self.initRun()
  166. # count number of surrogate obtained and restart using real evaluations done
  167. nsamples = None
  168. with open(self._solutions_file, 'r') as f:
  169. nsamples = len(f.readlines()) - 1 # avoid header
  170. if self.getGlobalEvaluation() < nsamples:
  171. print(f'Restart using {nsamples} of {self._start_train_surrogate} real evaluations obtained')
  172. self._numberOfEvaluations = nsamples
  173. if self._start_train_surrogate > self.getGlobalEvaluation():
  174. # get `self.start_train_surrogate` number of real evaluations and save it into surrogate dataset file
  175. # using randomly generated solutions (in order to cover seearch space)
  176. while self._start_train_surrogate > self.getGlobalEvaluation():
  177. newSolution = self.initialiser()
  178. # evaluate new solution
  179. newSolution.evaluate(self.evaluator)
  180. # add it to surrogate pool
  181. self.add_to_surrogate(newSolution)
  182. self.increaseEvaluation()
  183. # train surrogate on real evaluated solutions file
  184. self.train_surrogate()
  185. self.load_surrogate()
  186. # local search algorithm implementation
  187. while not self.stop():
  188. # set current evaluator based on used or not of surrogate function
  189. self.evaluator = self._surrogate_evaluator if self._start_train_surrogate <= self.getGlobalEvaluation() else self._main_evaluator
  190. for i in range(len(self.population)):
  191. # pass only Mutators operators for local search
  192. selected_operators = [ op for op in self._operators if op._kind == KindOperator.MUTATOR ]
  193. ls_policy = UCBPolicy(selected_operators, C=100, exp_rate=0.1)
  194. # create new local search instance
  195. # passing global evaluation param from ILS
  196. ls = LocalSearchSurrogate(self.initialiser,
  197. self.evaluator,
  198. selected_operators,
  199. ls_policy,
  200. self.validator,
  201. self._maximise,
  202. parent=None,
  203. verbose=False)
  204. ls.addCallback(UCBCheckpoint(every=1, filepath=self._inter_policy_ls_file))
  205. # create current new solution using policy and custom algorithm init
  206. ls._currentSolution = self.policy.apply(self.population[i])
  207. ls.result = ls._currentSolution
  208. # add same callbacks
  209. #for callback in self._callbacks:
  210. # ls.addCallback(callback)
  211. # create and search solution from local search
  212. newSolution = ls.run(ls_evaluations)
  213. # if better solution than currently, replace it (solution saved in training pool, only if surrogate process is in a second process step)
  214. # Update : always add new solution into surrogate pool, not only if solution is better
  215. #if self.isBetter(newSolution) and self.start_train_surrogate < self.getGlobalEvaluation():
  216. if self._start_train_surrogate <= self.getGlobalEvaluation():
  217. # if better solution found from local search, retrained the found solution and test again
  218. # without use of surrogate
  219. fitness_score = self._main_evaluator.compute(newSolution)
  220. # self.increaseEvaluation() # dot not add evaluation
  221. newSolution.fitness = fitness_score
  222. # if solution is really better after real evaluation, then we replace
  223. if self.isBetter(newSolution):
  224. self.result = newSolution
  225. # update population
  226. if self.population[i].fitness < newSolution.fitness:
  227. self.population[i] = newSolution
  228. self.add_to_surrogate(newSolution)
  229. self.progress()
  230. self.increaseEvaluation()
  231. print(f'=================================================================')
  232. print(f'Best solution found so far: {self.result.fitness}')
  233. # check using specific dynamic criteria based on r^2
  234. r_squared = self._surrogate.analysis.coefficient_of_determination(self._surrogate.surrogate)
  235. mae = self._surrogate.analysis.mae(self._surrogate.surrogate)
  236. training_surrogate_every = int(r_squared * self._ls_train_surrogate)
  237. print(f"=> R² of surrogate is of {r_squared}.")
  238. print(f"=> MAE of surrogate is of {mae}.")
  239. print(f'=> Retraining model every {training_surrogate_every} LS ({self._ls_local_search % training_surrogate_every} of {training_surrogate_every})')
  240. # avoid issue when lauching every each local search
  241. if training_surrogate_every <= 0:
  242. training_surrogate_every = 1
  243. # increase number of local search done
  244. self._n_local_search += 1
  245. self._ls_local_search += 1
  246. # check if necessary or not to train again surrogate
  247. if self._ls_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
  248. # train again surrogate on real evaluated solutions file
  249. start_training = time.time()
  250. self.train_surrogate()
  251. training_time = time.time() - start_training
  252. self._surrogate_analyser = SurrogateAnalysisMono(training_time, training_surrogate_every, r_squared, mae, self.getGlobalMaxEvaluation(), self._n_local_search)
  253. # reload new surrogate function
  254. self.load_surrogate()
  255. # reinit ls search
  256. self._ls_local_search = 0
  257. self.information()
  258. logging.info(f"End of {type(self).__name__}, best solution found {self._bestSolution}")
  259. self.end()
  260. return self._bestSolution
  261. def addCallback(self, callback):
  262. """Add new callback to algorithm specifying usefull parameters
  263. Args:
  264. callback: {Callback} -- specific Callback instance
  265. """
  266. # specify current main algorithm reference
  267. if self.getParent() is not None:
  268. callback.setAlgo(self.getParent())
  269. else:
  270. callback.setAlgo(self)
  271. # set as new
  272. self._callbacks.append(callback)