ILSMultiSurrogate.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. """Iterated Local Search Algorithm implementation using multiple-surrogate (weighted sum surrogate) as fitness approximation
  2. """
  3. # main imports
  4. import os
  5. import logging
  6. import joblib
  7. import time
  8. import math
  9. import numpy as np
  10. import pandas as pd
  11. # module imports
  12. from macop.algorithms.Algorithm import Algorithm
  13. from .LSSurrogate import LocalSearchSurrogate
  14. from .utils.SurrogateAnalysis import SurrogateAnalysis
  15. from sklearn.linear_model import (LinearRegression, Lasso, Lars, LassoLars,
  16. LassoCV, ElasticNet)
  17. from wsao.sao.problems.nd3dproblem import ND3DProblem
  18. from wsao.sao.surrogates.walsh import WalshSurrogate
  19. from wsao.sao.algos.fitter import FitterAlgo
  20. from wsao.sao.utils.analysis import SamplerAnalysis, FitterAnalysis, OptimizerAnalysis
  21. class ILSMultiSurrogate(Algorithm):
  22. """Iterated Local Search used to avoid local optima and increave EvE (Exploration vs Exploitation) compromise using multiple-surrogate
  23. Attributes:
  24. initalizer: {function} -- basic function strategy to initialize solution
  25. evaluator: {function} -- basic function in order to obtained fitness (mono or multiple objectives)
  26. operators: {[Operator]} -- list of operator to use when launching algorithm
  27. policy: {Policy} -- Policy class implementation strategy to select operators
  28. validator: {function} -- basic function to check if solution is valid or not under some constraints
  29. maximise: {bool} -- specify kind of optimization problem
  30. currentSolution: {Solution} -- current solution managed for current evaluation
  31. bestSolution: {Solution} -- best solution found so far during running algorithm
  32. ls_iteration: {int} -- number of evaluation for each local search algorithm
  33. surrogates_file: {str} -- Surrogates model folder to load (models trained using https://gitlab.com/florianlprt/wsao)
  34. start_train_surrogates: {int} -- number of evaluation expected before start training and use surrogate
  35. surrogates: [{Surrogate}] -- Surrogates model instance loaded
  36. ls_train_surrogates: {int} -- Specify if we need to retrain our surrogate model (every Local Search)
  37. k_division: {int} -- number of expected division for current features problem
  38. k_dynamic: {bool} -- specify if indices are changed for each time we train a new surrogate model
  39. solutions_file: {str} -- Path where real evaluated solutions are saved in order to train surrogate again
  40. callbacks: {[Callback]} -- list of Callback class implementation to do some instructions every number of evaluations and `load` when initializing algorithm
  41. """
  42. def __init__(self,
  43. initalizer,
  44. evaluator,
  45. operators,
  46. policy,
  47. validator,
  48. surrogate_file_path,
  49. start_train_surrogate,
  50. ls_train_surrogate,
  51. k_division,
  52. solutions_file,
  53. k_dynamic=False,
  54. maximise=True,
  55. parent=None):
  56. # set real evaluator as default
  57. super().__init__(initalizer, evaluator, operators, policy,
  58. validator, maximise, parent)
  59. self._n_local_search = 0
  60. self._main_evaluator = evaluator
  61. self._surrogate_file_path = surrogate_file_path
  62. self._start_train_surrogate = start_train_surrogate
  63. self._surrogate_evaluator = None
  64. self._surrogate_analyser = None
  65. self._ls_train_surrogate = ls_train_surrogate
  66. self._solutions_file = solutions_file
  67. self._k_division = k_division
  68. self._k_dynamic = k_dynamic
  69. def init_k_split_indices(self):
  70. a = list(range(self._bestSolution._size))
  71. n_elements = int(math.ceil(self._bestSolution._size / self._k_division)) # use of ceil to avoid loss of data
  72. splitted_indices = [a[x:x+n_elements] for x in range(0, len(a), n_elements)]
  73. return splitted_indices
  74. def train_surrogates(self):
  75. """Retrain if necessary the whole surrogate fitness approximation function
  76. """
  77. # Following https://gitlab.com/florianlprt/wsao, we re-train the model
  78. # ---------------------------------------------------------------------------
  79. # cli_restart.py problem=nd3d,size=30,filename="data/statistics_extended_svdn" \
  80. # model=lasso,alpha=1e-5 \
  81. # surrogate=walsh,order=3 \
  82. # algo=fitter,algo_restarts=10,samplefile=stats_extended.csv \
  83. # sample=1000,step=10 \
  84. # analysis=fitter,logfile=out_fit.csv
  85. # TODO : pass run samples directly using train and test
  86. # TODO : use of multiprocessing commands for each surrogate
  87. # TODO : save each surrogate model into specific folder
  88. # 1. Data sets preparation (train and test)
  89. # dynamic number of samples based on dataset real evaluations
  90. nsamples = None
  91. with open(self._solutions_file, 'r') as f:
  92. nsamples = len(f.readlines()) - 1 # avoid header
  93. training_samples = int(0.7 * nsamples) # 70% used for learning part at each iteration
  94. df = pd.read_csv(self._solutions_file, sep=';')
  95. # learning set and test set
  96. learn = df.sample(nsamples)
  97. test = df.drop(learn.index)
  98. print(f'Training all surrogate models using {training_samples} of {nsamples} samples for train dataset')
  99. # 2. for each sub space indices, learn new surrogate
  100. if not os.path.exists(self._surrogate_file_path):
  101. os.makedirs(self._surrogate_file_path)
  102. for i, indices in enumerate(self._k_indices):
  103. current_learn = learn[learn.iloc[indices]]
  104. problem = ND3DProblem(size=len(indices)) # problem size based on best solution size (need to improve...)
  105. model = Lasso(alpha=1e-5)
  106. surrogate = WalshSurrogate(order=2, size=problem.size, model=model)
  107. analysis = FitterAnalysis(logfile=f"train_surrogate_{i}.log", problem=problem)
  108. algo = FitterAlgo(problem=problem, surrogate=surrogate, analysis=analysis, seed=problem.seed)
  109. print(f"Start fitting again the surrogate model n°{i}")
  110. for r in range(10):
  111. print(f"Iteration n°{r}: for fitting surrogate n°{i}")
  112. algo.run_samples(learn=current_learn, test=test, step=10)
  113. # keep well ordered surrogate into file manager
  114. str_index = str(i)
  115. while len(str_index) < 6:
  116. str_index = "0" + str_index
  117. joblib.dump(algo, os.path.join(self._surrogate_file_path, 'surrogate_{str_indec}'))
  118. def load_surrogates(self):
  119. """Load algorithm with surrogate model and create lambda evaluator function
  120. """
  121. # need to first train surrogate if not exist
  122. if not os.path.exists(self._surrogate_file_path):
  123. self.train_surrogates()
  124. self._surrogates = []
  125. surrogates_path = sorted(os.listdir(self._surrogate_file_path))
  126. for surrogate_p in surrogates_path:
  127. model_path = os.path.join(self._surrogate_file_path, surrogate_p)
  128. surrogate_model = joblib.load(model_path)
  129. self._surrogates.append(surrogate_model)
  130. def surrogate_evaluator(self, solution):
  131. """Compute mean of each surrogate model using targeted indices
  132. Args:
  133. solution: {Solution} -- current solution to evaluate using multi-surrogate evaluation
  134. Return:
  135. mean: {float} -- mean score of surrogate models
  136. """
  137. scores = []
  138. solution_data = np.array(solution._data)
  139. # for each indices set, get trained surrogate model and made prediction score
  140. for i, indices in enumerate(self._k_indices):
  141. current_data = solution_data[indices]
  142. current_score = self._surrogates[i].surrogate.predict([current_data])[0]
  143. scores.append(current_score)
  144. return sum(scores) / len(scores)
  145. def surrogates_coefficient_of_determination(self):
  146. """Compute r² for each sub surrogate model
  147. Return:
  148. r_squared: {float} -- mean score of r_squred obtained from surrogate models
  149. """
  150. r_squared_scores = []
  151. # for each indices set, get r^2 surrogate model and made prediction score
  152. for i, _ in enumerate(self._k_indices):
  153. r_squared = self._surrogates[i].analysis.coefficient_of_determination(self._surrogates[i].surrogate)
  154. r_squared_scores.append(r_squared)
  155. return sum(r_squared_scores) / len(r_squared_scores)
  156. def add_to_surrogate(self, solution):
  157. # save real evaluated solution into specific file for surrogate
  158. with open(self._solutions_file, 'a') as f:
  159. line = ""
  160. for index, e in enumerate(solution._data):
  161. line += str(e)
  162. if index < len(solution._data) - 1:
  163. line += ","
  164. line += ";"
  165. line += str(solution._score)
  166. f.write(line + "\n")
  167. def run(self, evaluations, ls_evaluations=100):
  168. """
  169. Run the iterated local search algorithm using local search (EvE compromise)
  170. Args:
  171. evaluations: {int} -- number of global evaluations for ILS
  172. ls_evaluations: {int} -- number of Local search evaluations (default: 100)
  173. Returns:
  174. {Solution} -- best solution found
  175. """
  176. # by default use of mother method to initialize variables
  177. super().run(evaluations)
  178. # initialize current solution
  179. self.initRun()
  180. # based on best solution found, initialize k pool indices
  181. self._k_indices = self.init_k_split_indices()
  182. # enable resuming for ILS
  183. self.resume()
  184. # count number of surrogate obtained and restart using real evaluations done
  185. nsamples = None
  186. with open(self._solutions_file, 'r') as f:
  187. nsamples = len(f.readlines()) - 1 # avoid header
  188. if self.getGlobalEvaluation() < nsamples:
  189. print(f'Restart using {nsamples} of {self._start_train_surrogate} real evaluations obtained')
  190. self._numberOfEvaluations = nsamples
  191. if self._start_train_surrogate > self.getGlobalEvaluation():
  192. # get `self.start_train_surrogate` number of real evaluations and save it into surrogate dataset file
  193. # using randomly generated solutions (in order to cover seearch space)
  194. while self._start_train_surrogate > self.getGlobalEvaluation():
  195. newSolution = self._initializer()
  196. # evaluate new solution
  197. newSolution.evaluate(self._evaluator)
  198. # add it to surrogate pool
  199. self.add_to_surrogate(newSolution)
  200. self.increaseEvaluation()
  201. # train surrogate on real evaluated solutions file
  202. self.train_surrogates()
  203. self.load_surrogates()
  204. # local search algorithm implementation
  205. while not self.stop():
  206. # set current evaluator based on used or not of surrogate function
  207. self._evaluator = self.surrogate_evaluator if self._start_train_surrogate <= self.getGlobalEvaluation() else self._main_evaluator
  208. # create new local search instance
  209. # passing global evaluation param from ILS
  210. ls = LocalSearchSurrogate(self._initializer,
  211. self._evaluator,
  212. self._operators,
  213. self._policy,
  214. self._validator,
  215. self._maximise,
  216. parent=self)
  217. # add same callbacks
  218. for callback in self._callbacks:
  219. ls.addCallback(callback)
  220. # create and search solution from local search
  221. newSolution = ls.run(ls_evaluations)
  222. # if better solution than currently, replace it (solution saved in training pool, only if surrogate process is in a second process step)
  223. # Update : always add new solution into surrogate pool, not only if solution is better
  224. #if self.isBetter(newSolution) and self.start_train_surrogate < self.getGlobalEvaluation():
  225. if self._start_train_surrogate <= self.getGlobalEvaluation():
  226. # if better solution found from local search, retrained the found solution and test again
  227. # without use of surrogate
  228. fitness_score = self._main_evaluator(newSolution)
  229. # self.increaseEvaluation() # dot not add evaluation
  230. newSolution.score = fitness_score
  231. # if solution is really better after real evaluation, then we replace
  232. if self.isBetter(newSolution):
  233. self._bestSolution = newSolution
  234. self.add_to_surrogate(newSolution)
  235. self.progress()
  236. # check using specific dynamic criteria based on r^2
  237. r_squared = self.surrogates_coefficient_of_determination()
  238. training_surrogate_every = int(r_squared * self._ls_train_surrogate)
  239. print(f"=> R^2 of surrogate is of {r_squared}. Retraining model every {training_surrogate_every} LS")
  240. # avoid issue when lauching every each local search
  241. if training_surrogate_every <= 0:
  242. training_surrogate_every = 1
  243. # check if necessary or not to train again surrogate
  244. if self._n_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
  245. # reinitialization of k_indices for the new training
  246. if self._k_dynamic:
  247. print(f"Reinitialization of k_indices using `k={self._k_division} `for the new training")
  248. self.init_k_split_indices()
  249. # train again surrogate on real evaluated solutions file
  250. start_training = time.time()
  251. self.train_surrogates()
  252. training_time = time.time() - start_training
  253. self._surrogate_analyser = SurrogateAnalysis(training_time, training_surrogate_every, r_squared, self.getGlobalMaxEvaluation(), self._n_local_search)
  254. # reload new surrogate function
  255. self.load_surrogates()
  256. # increase number of local search done
  257. self._n_local_search += 1
  258. self.information()
  259. logging.info(f"End of {type(self).__name__}, best solution found {self._bestSolution}")
  260. self.end()
  261. return self._bestSolution
  262. def addCallback(self, callback):
  263. """Add new callback to algorithm specifying usefull parameters
  264. Args:
  265. callback: {Callback} -- specific Callback instance
  266. """
  267. # specify current main algorithm reference
  268. if self.getParent() is not None:
  269. callback.setAlgo(self.getParent())
  270. else:
  271. callback.setAlgo(self)
  272. # set as new
  273. self._callbacks.append(callback)