ILSMultiSurrogate.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. """Iterated Local Search Algorithm implementation using multiple-surrogate (weighted sum surrogate) as fitness approximation
  2. """
  3. # main imports
  4. import os
  5. import logging
  6. import joblib
  7. import time
  8. import math
  9. import numpy as np
  10. import pandas as pd
  11. import random
  12. # module imports
  13. from macop.algorithms.Algorithm import Algorithm
  14. from .LSSurrogate import LocalSearchSurrogate
  15. from .utils.SurrogateAnalysis import SurrogateAnalysis
  16. from sklearn.linear_model import (LinearRegression, Lasso, Lars, LassoLars,
  17. LassoCV, ElasticNet)
  18. from wsao.sao.problems.nd3dproblem import ND3DProblem
  19. from wsao.sao.surrogates.walsh import WalshSurrogate
  20. from wsao.sao.algos.fitter import FitterAlgo
  21. from wsao.sao.utils.analysis import SamplerAnalysis, FitterAnalysis, OptimizerAnalysis
  22. class ILSMultiSurrogate(Algorithm):
  23. """Iterated Local Search used to avoid local optima and increave EvE (Exploration vs Exploitation) compromise using multiple-surrogate
  24. Attributes:
  25. initalizer: {function} -- basic function strategy to initialize solution
  26. evaluator: {function} -- basic function in order to obtained fitness (mono or multiple objectives)
  27. operators: {[Operator]} -- list of operator to use when launching algorithm
  28. policy: {Policy} -- Policy class implementation strategy to select operators
  29. validator: {function} -- basic function to check if solution is valid or not under some constraints
  30. maximise: {bool} -- specify kind of optimization problem
  31. currentSolution: {Solution} -- current solution managed for current evaluation
  32. bestSolution: {Solution} -- best solution found so far during running algorithm
  33. ls_iteration: {int} -- number of evaluation for each local search algorithm
  34. surrogates_file: {str} -- Surrogates model folder to load (models trained using https://gitlab.com/florianlprt/wsao)
  35. start_train_surrogates: {int} -- number of evaluation expected before start training and use surrogate
  36. surrogates: [{Surrogate}] -- Surrogates model instance loaded
  37. ls_train_surrogates: {int} -- Specify if we need to retrain our surrogate model (every Local Search)
  38. k_division: {int} -- number of expected division for current features problem
  39. k_dynamic: {bool} -- specify if indices are changed for each time we train a new surrogate model
  40. k_random: {bool} -- random initialization of k_indices for each surrogate features model data
  41. solutions_file: {str} -- Path where real evaluated solutions are saved in order to train surrogate again
  42. callbacks: {[Callback]} -- list of Callback class implementation to do some instructions every number of evaluations and `load` when initializing algorithm
  43. """
  44. def __init__(self,
  45. initalizer,
  46. evaluator,
  47. operators,
  48. policy,
  49. validator,
  50. surrogates_file_path,
  51. start_train_surrogates,
  52. ls_train_surrogates,
  53. k_division,
  54. solutions_file,
  55. k_random=True,
  56. k_dynamic=False,
  57. maximise=True,
  58. parent=None):
  59. # set real evaluator as default
  60. super().__init__(initalizer, evaluator, operators, policy,
  61. validator, maximise, parent)
  62. self._n_local_search = 0
  63. self._main_evaluator = evaluator
  64. self._surrogates_file_path = surrogates_file_path
  65. self._start_train_surrogates = start_train_surrogates
  66. self._surrogate_evaluator = None
  67. self._surrogate_analyser = None
  68. self._ls_train_surrogates = ls_train_surrogates
  69. self._solutions_file = solutions_file
  70. self._k_division = k_division
  71. self._k_dynamic = k_dynamic
  72. self._k_random = k_random
  73. self._k_indices = None
  74. self._surrogates = None
  75. def init_k_split_indices(self):
  76. """Initialize k_indices for the new training of surrogate
  77. Returns:
  78. k_indices: [description]
  79. """
  80. a = list(range(self._bestSolution._size))
  81. n_elements = int(math.ceil(self._bestSolution._size / self._k_division)) # use of ceil to avoid loss of data
  82. if self._k_random:
  83. random.shuffle(a) # random subset
  84. splitted_indices = [a[x:x+n_elements] for x in range(0, len(a), n_elements)]
  85. return splitted_indices
  86. def train_surrogates(self):
  87. """Retrain if necessary the whole surrogate fitness approximation function
  88. """
  89. # Following https://gitlab.com/florianlprt/wsao, we re-train the model
  90. # ---------------------------------------------------------------------------
  91. # cli_restart.py problem=nd3d,size=30,filename="data/statistics_extended_svdn" \
  92. # model=lasso,alpha=1e-5 \
  93. # surrogate=walsh,order=3 \
  94. # algo=fitter,algo_restarts=10,samplefile=stats_extended.csv \
  95. # sample=1000,step=10 \
  96. # analysis=fitter,logfile=out_fit.csv
  97. # TODO : pass run samples directly using train and test
  98. # TODO : use of multiprocessing commands for each surrogate
  99. # TODO : save each surrogate model into specific folder
  100. # 1. Data sets preparation (train and test)
  101. # dynamic number of samples based on dataset real evaluations
  102. nsamples = None
  103. with open(self._solutions_file, 'r') as f:
  104. nsamples = len(f.readlines()) - 1 # avoid header
  105. training_samples = int(0.7 * nsamples) # 70% used for learning part at each iteration
  106. df = pd.read_csv(self._solutions_file, sep=';')
  107. # learning set and test set
  108. learn = df.sample(training_samples)
  109. test = df.drop(learn.index)
  110. print(f'Training all surrogate models using {training_samples} of {nsamples} samples for train dataset')
  111. # 2. for each sub space indices, learn new surrogate
  112. if not os.path.exists(self._surrogates_file_path):
  113. os.makedirs(self._surrogates_file_path)
  114. for i, indices in enumerate(self._k_indices):
  115. current_learn = learn.copy()
  116. current_learn.x = current_learn.x.apply(lambda x: ','.join(list(map(str, np.fromstring(x, dtype=int, sep=',')[indices]))))
  117. current_test = test.copy()
  118. current_test.x = current_test.x.apply(lambda x: ','.join(list(map(str, np.fromstring(x, dtype=int, sep=',')[indices]))))
  119. problem = ND3DProblem(size=len(indices)) # problem size based on best solution size (need to improve...)
  120. model = Lasso(alpha=1e-5)
  121. surrogate = WalshSurrogate(order=2, size=problem.size, model=model)
  122. analysis = FitterAnalysis(logfile=f"train_surrogate_{i}.log", problem=problem)
  123. algo = FitterAlgo(problem=problem, surrogate=surrogate, analysis=analysis, seed=problem.seed)
  124. print(f"Start fitting again the surrogate model n°{i}")
  125. for r in range(10):
  126. print(f"Iteration n°{r}: for fitting surrogate n°{i}")
  127. algo.run_samples(learn=current_learn, test=current_test, step=10)
  128. # keep well ordered surrogate into file manager
  129. str_index = str(i)
  130. while len(str_index) < 6:
  131. str_index = "0" + str_index
  132. joblib.dump(algo, os.path.join(self._surrogates_file_path, f'surrogate_{str_index}'))
  133. def load_surrogates(self):
  134. """Load algorithm with surrogate model and create lambda evaluator function
  135. """
  136. # need to first train surrogate if not exist
  137. if not os.path.exists(self._surrogates_file_path):
  138. self.train_surrogates()
  139. self._surrogates = []
  140. surrogates_path = sorted(os.listdir(self._surrogates_file_path))
  141. for surrogate_p in surrogates_path:
  142. model_path = os.path.join(self._surrogates_file_path, surrogate_p)
  143. surrogate_model = joblib.load(model_path)
  144. self._surrogates.append(surrogate_model)
  145. def surrogate_evaluator(self, solution):
  146. """Compute mean of each surrogate model using targeted indices
  147. Args:
  148. solution: {Solution} -- current solution to evaluate using multi-surrogate evaluation
  149. Return:
  150. mean: {float} -- mean score of surrogate models
  151. """
  152. scores = []
  153. solution_data = np.array(solution._data)
  154. # for each indices set, get trained surrogate model and made prediction score
  155. for i, indices in enumerate(self._k_indices):
  156. current_data = solution_data[indices]
  157. current_score = self._surrogates[i].surrogate.predict([current_data])[0]
  158. scores.append(current_score)
  159. return sum(scores) / len(scores)
  160. def surrogates_coefficient_of_determination(self):
  161. """Compute r² for each sub surrogate model
  162. Return:
  163. r_squared: {float} -- mean score of r_squred obtained from surrogate models
  164. """
  165. r_squared_scores = []
  166. # for each indices set, get r^2 surrogate model and made prediction score
  167. for i, _ in enumerate(self._k_indices):
  168. r_squared = self._surrogates[i].analysis.coefficient_of_determination(self._surrogates[i].surrogate)
  169. r_squared_scores.append(r_squared)
  170. print(r_squared_scores)
  171. return sum(r_squared_scores) / len(r_squared_scores)
  172. def add_to_surrogate(self, solution):
  173. # save real evaluated solution into specific file for surrogate
  174. with open(self._solutions_file, 'a') as f:
  175. line = ""
  176. for index, e in enumerate(solution._data):
  177. line += str(e)
  178. if index < len(solution._data) - 1:
  179. line += ","
  180. line += ";"
  181. line += str(solution._score)
  182. f.write(line + "\n")
  183. def run(self, evaluations, ls_evaluations=100):
  184. """
  185. Run the iterated local search algorithm using local search (EvE compromise)
  186. Args:
  187. evaluations: {int} -- number of global evaluations for ILS
  188. ls_evaluations: {int} -- number of Local search evaluations (default: 100)
  189. Returns:
  190. {Solution} -- best solution found
  191. """
  192. # by default use of mother method to initialize variables
  193. super().run(evaluations)
  194. # initialize current solution
  195. self.initRun()
  196. # based on best solution found, initialize k pool indices
  197. if self._k_indices == None:
  198. self._k_indices = self.init_k_split_indices()
  199. # enable resuming for ILS
  200. self.resume()
  201. # count number of surrogate obtained and restart using real evaluations done
  202. nsamples = None
  203. with open(self._solutions_file, 'r') as f:
  204. nsamples = len(f.readlines()) - 1 # avoid header
  205. if self.getGlobalEvaluation() < nsamples:
  206. print(f'Restart using {nsamples} of {self._start_train_surrogates} real evaluations obtained')
  207. self._numberOfEvaluations = nsamples
  208. if self._start_train_surrogates > self.getGlobalEvaluation():
  209. # get `self.start_train_surrogate` number of real evaluations and save it into surrogate dataset file
  210. # using randomly generated solutions (in order to cover seearch space)
  211. while self._start_train_surrogates > self.getGlobalEvaluation():
  212. newSolution = self._initializer()
  213. # evaluate new solution
  214. newSolution.evaluate(self._evaluator)
  215. # add it to surrogate pool
  216. self.add_to_surrogate(newSolution)
  217. self.increaseEvaluation()
  218. # train surrogate on real evaluated solutions file
  219. self.train_surrogates()
  220. self.load_surrogates()
  221. # local search algorithm implementation
  222. while not self.stop():
  223. # set current evaluator based on used or not of surrogate function
  224. self._evaluator = self.surrogate_evaluator if self._start_train_surrogates <= self.getGlobalEvaluation() else self._main_evaluator
  225. # create new local search instance
  226. # passing global evaluation param from ILS
  227. ls = LocalSearchSurrogate(self._initializer,
  228. self._evaluator,
  229. self._operators,
  230. self._policy,
  231. self._validator,
  232. self._maximise,
  233. parent=self)
  234. # add same callbacks
  235. for callback in self._callbacks:
  236. ls.addCallback(callback)
  237. # create and search solution from local search
  238. newSolution = ls.run(ls_evaluations)
  239. # if better solution than currently, replace it (solution saved in training pool, only if surrogate process is in a second process step)
  240. # Update : always add new solution into surrogate pool, not only if solution is better
  241. #if self.isBetter(newSolution) and self.start_train_surrogate < self.getGlobalEvaluation():
  242. if self._start_train_surrogates <= self.getGlobalEvaluation():
  243. # if better solution found from local search, retrained the found solution and test again
  244. # without use of surrogate
  245. fitness_score = self._main_evaluator(newSolution)
  246. # self.increaseEvaluation() # dot not add evaluation
  247. newSolution.score = fitness_score
  248. # if solution is really better after real evaluation, then we replace
  249. if self.isBetter(newSolution):
  250. self._bestSolution = newSolution
  251. self.add_to_surrogate(newSolution)
  252. self.progress()
  253. # check using specific dynamic criteria based on r^2
  254. r_squared = self.surrogates_coefficient_of_determination()
  255. training_surrogate_every = int(r_squared * self._ls_train_surrogates)
  256. print(f"=> R^2 of surrogate is of {r_squared}. Retraining model every {training_surrogate_every} LS")
  257. # avoid issue when lauching every each local search
  258. if training_surrogate_every <= 0:
  259. training_surrogate_every = 1
  260. # check if necessary or not to train again surrogate
  261. if self._n_local_search % training_surrogate_every == 0 and self._start_train_surrogates <= self.getGlobalEvaluation():
  262. # reinitialization of k_indices for the new training
  263. if self._k_dynamic:
  264. print(f"Reinitialization of k_indices using `k={self._k_division} `for the new training")
  265. self.init_k_split_indices()
  266. # train again surrogate on real evaluated solutions file
  267. start_training = time.time()
  268. self.train_surrogates()
  269. training_time = time.time() - start_training
  270. self._surrogate_analyser = SurrogateAnalysis(training_time, training_surrogate_every, r_squared, self.getGlobalMaxEvaluation(), self._n_local_search)
  271. # reload new surrogate function
  272. self.load_surrogates()
  273. # increase number of local search done
  274. self._n_local_search += 1
  275. self.information()
  276. logging.info(f"End of {type(self).__name__}, best solution found {self._bestSolution}")
  277. self.end()
  278. return self._bestSolution
  279. def addCallback(self, callback):
  280. """Add new callback to algorithm specifying usefull parameters
  281. Args:
  282. callback: {Callback} -- specific Callback instance
  283. """
  284. # specify current main algorithm reference
  285. if self.getParent() is not None:
  286. callback.setAlgo(self.getParent())
  287. else:
  288. callback.setAlgo(self)
  289. # set as new
  290. self._callbacks.append(callback)