Parcourir la source

Fix use of Surrogate checkpoint

Jérôme BUISINE il y a 3 ans
Parent
commit
9448323ab9

+ 16 - 14
find_best_attributes_surrogate_dl.py

@@ -79,7 +79,7 @@ def build_input(df):
 def validator(solution):
 def validator(solution):
 
 
     # at least 5 attributes
     # at least 5 attributes
-    if list(solution.data).count(1) < 5:
+    if list(solution._data).count(1) < 5:
         return False
         return False
 
 
     return True
     return True
@@ -204,7 +204,7 @@ def main():
         # get indices of filters data to use (filters selection from solution)
         # get indices of filters data to use (filters selection from solution)
         indices = []
         indices = []
 
 
-        for index, value in enumerate(solution.data): 
+        for index, value in enumerate(solution._data): 
             if value == 1: 
             if value == 1: 
                 indices.append(index) 
                 indices.append(index) 
 
 
@@ -257,6 +257,7 @@ def main():
 
 
     backup_file_path = os.path.join(backup_model_folder, p_output + '.csv')
     backup_file_path = os.path.join(backup_model_folder, p_output + '.csv')
     ucb_backup_file_path = os.path.join(backup_model_folder, p_output + '_ucbPolicy.csv')
     ucb_backup_file_path = os.path.join(backup_model_folder, p_output + '_ucbPolicy.csv')
+    surrogate_backup_file_path = os.path.join(cfg.output_surrogates_data_folder, p_output + '_train.csv')
 
 
     # prepare optimization algorithm (only use of mutation as only ILS are used here, and local search need only local permutation)
     # prepare optimization algorithm (only use of mutation as only ILS are used here, and local search need only local permutation)
     operators = [SimpleBinaryMutation(), SimpleMutation()]
     operators = [SimpleBinaryMutation(), SimpleMutation()]
@@ -273,19 +274,20 @@ def main():
             f.write('x;y\n')
             f.write('x;y\n')
 
 
     # custom ILS for surrogate use
     # custom ILS for surrogate use
-    algo = ILSSurrogate(_initalizer=init, 
-                        _evaluator=evaluate, # same evaluator by defadefaultult, as we will use the surrogate function
-                        _operators=operators, 
-                        _policy=policy, 
-                        _validator=validator,
-                        _surrogate_file_path=surrogate_output_model,
-                        _start_train_surrogate=p_start, # start learning and using surrogate after 1000 real evaluation
-                        _solutions_file=surrogate_output_data,
-                        _ls_train_surrogate=p_every_ls,
-                        _maximise=True)
+    algo = ILSSurrogate(initalizer=init, 
+                        evaluator=evaluate, # same evaluator by defadefaultult, as we will use the surrogate function
+                        operators=operators, 
+                        policy=policy, 
+                        validator=validator,
+                        surrogate_file_path=surrogate_output_model,
+                        start_train_surrogate=p_start, # start learning and using surrogate after 1000 real evaluation
+                        solutions_file=surrogate_output_data,
+                        ls_train_surrogate=p_every_ls,
+                        maximise=True)
     
     
-    algo.addCallback(BasicCheckpoint(_every=1, _filepath=backup_file_path))
-    algo.addCallback(UCBCheckpoint(_every=1, _filepath=ucb_backup_file_path))
+    algo.addCallback(BasicCheckpoint(every=1, filepath=backup_file_path))
+    algo.addCallback(UCBCheckpoint(every=1, filepath=ucb_backup_file_path))
+    algo.addCallback(SurrogateCheckpoint(every=p_ls_iteration, filepath=surrogate_backup_file_path)) # try every LS like this
 
 
     bestSol = algo.run(p_ils_iteration, p_ls_iteration)
     bestSol = algo.run(p_ils_iteration, p_ls_iteration)
 
 

+ 26 - 22
find_best_attributes_surrogate_openML.py

@@ -14,10 +14,6 @@ from sklearn.model_selection import GridSearchCV
 from sklearn.linear_model import LogisticRegression
 from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier, VotingClassifier
 from sklearn.ensemble import RandomForestClassifier, VotingClassifier
 
 
-from keras.layers import Dense, Dropout, LSTM, Embedding, GRU, BatchNormalization
-from keras.preprocessing.sequence import pad_sequences
-from keras.models import Sequential
-
 import joblib
 import joblib
 import sklearn
 import sklearn
 import sklearn.svm as svm
 import sklearn.svm as svm
@@ -44,6 +40,7 @@ from macop.operators.policies.UCBPolicy import UCBPolicy
 
 
 from macop.callbacks.BasicCheckpoint import BasicCheckpoint
 from macop.callbacks.BasicCheckpoint import BasicCheckpoint
 from macop.callbacks.UCBCheckpoint import UCBCheckpoint
 from macop.callbacks.UCBCheckpoint import UCBCheckpoint
+from optimization.callbacks.SurrogateCheckpoint import SurrogateCheckpoint
 
 
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.ensemble import RandomForestClassifier
 
 
@@ -52,14 +49,14 @@ from sklearn.ensemble import RandomForestClassifier
 def validator(solution):
 def validator(solution):
 
 
     # at least 5 attributes
     # at least 5 attributes
-    if list(solution.data).count(1) < 5:
+    if list(solution._data).count(1) < 2:
         return False
         return False
 
 
     return True
     return True
 
 
 def train_model(X_train, y_train):
 def train_model(X_train, y_train):
 
 
-    print ('Creating model...')
+    #print ('Creating model...')
     # here use of SVM with grid search CV
     # here use of SVM with grid search CV
     Cs = [0.001, 0.01, 0.1, 1, 10, 100]
     Cs = [0.001, 0.01, 0.1, 1, 10, 100]
     gammas = [0.001, 0.01, 0.1,10, 100]
     gammas = [0.001, 0.01, 0.1,10, 100]
@@ -67,7 +64,7 @@ def train_model(X_train, y_train):
 
 
     svc = svm.SVC(probability=True, class_weight='balanced')
     svc = svm.SVC(probability=True, class_weight='balanced')
     #clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
     #clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
-    clf = GridSearchCV(svc, param_grid, cv=4, verbose=1, n_jobs=-1)
+    clf = GridSearchCV(svc, param_grid, cv=4, verbose=0, n_jobs=-1)
 
 
     clf.fit(X_train, y_train)
     clf.fit(X_train, y_train)
 
 
@@ -119,7 +116,7 @@ def main():
     args = parser.parse_args()
     args = parser.parse_args()
 
 
     p_data_file = args.data
     p_data_file = args.data
-    p_every_ls     = args.every_ls
+    p_every_ls   = args.every_ls
     p_ils_iteration = args.ils
     p_ils_iteration = args.ils
     p_ls_iteration  = args.ls
     p_ls_iteration  = args.ls
     p_output = args.output
     p_output = args.output
@@ -145,11 +142,11 @@ def main():
         # get indices of filters data to use (filters selection from solution)
         # get indices of filters data to use (filters selection from solution)
         indices = []
         indices = []
 
 
-        for index, value in enumerate(solution.data): 
+        for index, value in enumerate(solution._data): 
             if value == 1: 
             if value == 1: 
                 indices.append(index) 
                 indices.append(index) 
 
 
-        print(f'Training SVM with {len(indices)} from {len(solution.data)} available features')
+        print(f'Training SVM with {len(indices)} from {len(solution._data)} available features')
 
 
         # keep only selected filters from solution
         # keep only selected filters from solution
         x_train_filters = X_train[:, indices]
         x_train_filters = X_train[:, indices]
@@ -187,6 +184,7 @@ def main():
 
 
     backup_file_path = os.path.join(backup_model_folder, p_output + '.csv')
     backup_file_path = os.path.join(backup_model_folder, p_output + '.csv')
     ucb_backup_file_path = os.path.join(backup_model_folder, p_output + '_ucbPolicy.csv')
     ucb_backup_file_path = os.path.join(backup_model_folder, p_output + '_ucbPolicy.csv')
+    surrogate_backup_file_path = os.path.join(cfg.output_surrogates_data_folder, p_output + '_train.csv')
 
 
     # prepare optimization algorithm (only use of mutation as only ILS are used here, and local search need only local permutation)
     # prepare optimization algorithm (only use of mutation as only ILS are used here, and local search need only local permutation)
     operators = [SimpleBinaryMutation(), SimpleMutation()]
     operators = [SimpleBinaryMutation(), SimpleMutation()]
@@ -205,22 +203,28 @@ def main():
 
 
     # custom start surrogate variable based on problem size
     # custom start surrogate variable based on problem size
     p_start = int(0.5 * problem_size)
     p_start = int(0.5 * problem_size)
+
+    # fixed limit
+    if p_start < 50:
+        p_start = 50
+
     print(f'Starting using surrogate after {p_start} reals training')
     print(f'Starting using surrogate after {p_start} reals training')
 
 
     # custom ILS for surrogate use
     # custom ILS for surrogate use
-    algo = ILSSurrogate(_initalizer=init, 
-                        _evaluator=evaluate, # same evaluator by defadefaultult, as we will use the surrogate function
-                        _operators=operators, 
-                        _policy=policy, 
-                        _validator=validator,
-                        _surrogate_file_path=surrogate_output_model,
-                        _start_train_surrogate=p_start, # start learning and using surrogate after 1000 real evaluation
-                        _solutions_file=surrogate_output_data,
-                        _ls_train_surrogate=p_every_ls, # retrain surrogate every 5 iteration
-                        _maximise=True)
+    algo = ILSSurrogate(initalizer=init, 
+                        evaluator=evaluate, # same evaluator by defadefaultult, as we will use the surrogate function
+                        operators=operators, 
+                        policy=policy, 
+                        validator=validator,
+                        surrogate_file_path=surrogate_output_model,
+                        start_train_surrogate=p_start, # start learning and using surrogate after 1000 real evaluation
+                        solutions_file=surrogate_output_data,
+                        ls_train_surrogate=p_every_ls, # retrain surrogate every 5 iteration
+                        maximise=True)
     
     
-    algo.addCallback(BasicCheckpoint(_every=1, _filepath=backup_file_path))
-    algo.addCallback(UCBCheckpoint(_every=1, _filepath=ucb_backup_file_path))
+    algo.addCallback(BasicCheckpoint(every=1, filepath=backup_file_path))
+    algo.addCallback(UCBCheckpoint(every=1, filepath=ucb_backup_file_path))
+    algo.addCallback(SurrogateCheckpoint(every=p_ls_iteration, filepath=surrogate_backup_file_path)) # try every LS like this
 
 
     bestSol = algo.run(p_ils_iteration, p_ls_iteration)
     bestSol = algo.run(p_ils_iteration, p_ls_iteration)
 
 

+ 88 - 70
optimization/ILSSurrogate.py

@@ -5,6 +5,7 @@
 import os
 import os
 import logging
 import logging
 import joblib
 import joblib
+import time
 
 
 # module imports
 # module imports
 from macop.algorithms.Algorithm import Algorithm
 from macop.algorithms.Algorithm import Algorithm
@@ -18,6 +19,18 @@ from wsao.sao.surrogates.walsh import WalshSurrogate
 from wsao.sao.algos.fitter import FitterAlgo
 from wsao.sao.algos.fitter import FitterAlgo
 from wsao.sao.utils.analysis import SamplerAnalysis, FitterAnalysis, OptimizerAnalysis
 from wsao.sao.utils.analysis import SamplerAnalysis, FitterAnalysis, OptimizerAnalysis
 
 
+
+# quick object for surrogate logging data
+class SurrogateAnalysis():
+
+    def __init__(self, time, every_ls, r2, evaluations, n_local_search):
+        self._time = time
+        self._every_ls = every_ls
+        self._r2 = r2
+        self._evaluations = evaluations
+        self._n_local_search = n_local_search
+
+
 class ILSSurrogate(Algorithm):
 class ILSSurrogate(Algorithm):
     """Iterated Local Search used to avoid local optima and increave EvE (Exploration vs Exploitation) compromise using surrogate
     """Iterated Local Search used to avoid local optima and increave EvE (Exploration vs Exploitation) compromise using surrogate
 
 
@@ -40,34 +53,36 @@ class ILSSurrogate(Algorithm):
         callbacks: {[Callback]} -- list of Callback class implementation to do some instructions every number of evaluations and `load` when initializing algorithm
         callbacks: {[Callback]} -- list of Callback class implementation to do some instructions every number of evaluations and `load` when initializing algorithm
     """
     """
     def __init__(self,
     def __init__(self,
-                 _initalizer,
-                 _evaluator,
-                 _operators,
-                 _policy,
-                 _validator,
-                 _surrogate_file_path,
-                 _start_train_surrogate,
-                 _ls_train_surrogate,
-                 _solutions_file,
-                 _maximise=True,
-                 _parent=None):
+                 initalizer,
+                 evaluator,
+                 operators,
+                 policy,
+                 validator,
+                 surrogate_file_path,
+                 start_train_surrogate,
+                 ls_train_surrogate,
+                 solutions_file,
+                 maximise=True,
+                 parent=None):
 
 
         # set real evaluator as default
         # set real evaluator as default
-        super().__init__(_initalizer, _evaluator, _operators, _policy,
-                _validator, _maximise, _parent)
+        super().__init__(initalizer, evaluator, operators, policy,
+                validator, maximise, parent)
 
 
-        self.n_local_search = 0
+        self._n_local_search = 0
+        self._main_evaluator = evaluator
 
 
-        self.surrogate_file_path = _surrogate_file_path
-        self.start_train_surrogate = _start_train_surrogate
+        self._surrogate_file_path = surrogate_file_path
+        self._start_train_surrogate = start_train_surrogate
 
 
-        self.surrogate_evaluator = None
+        self._surrogate_evaluator = None
+        self._surrogate_analyser = None
 
 
-        self.ls_train_surrogate = _ls_train_surrogate
-        self.solutions_file = _solutions_file
+        self._ls_train_surrogate = ls_train_surrogate
+        self._solutions_file = solutions_file
 
 
     def train_surrogate(self):
     def train_surrogate(self):
-        """etrain if necessary the whole surrogate fitness approximation function
+        """Retrain if necessary the whole surrogate fitness approximation function
         """
         """
         # Following https://gitlab.com/florianlprt/wsao, we re-train the model
         # Following https://gitlab.com/florianlprt/wsao, we re-train the model
         # ---------------------------------------------------------------------------
         # ---------------------------------------------------------------------------
@@ -78,7 +93,7 @@ class ILSSurrogate(Algorithm):
         #        sample=1000,step=10 \
         #        sample=1000,step=10 \
         #        analysis=fitter,logfile=out_fit.csv
         #        analysis=fitter,logfile=out_fit.csv
 
 
-        problem = ND3DProblem(size=len(self.bestSolution.data)) # problem size based on best solution size (need to improve...)
+        problem = ND3DProblem(size=len(self._bestSolution._data)) # problem size based on best solution size (need to improve...)
         model = Lasso(alpha=1e-5)
         model = Lasso(alpha=1e-5)
         surrogate = WalshSurrogate(order=2, size=problem.size, model=model)
         surrogate = WalshSurrogate(order=2, size=problem.size, model=model)
         analysis = FitterAnalysis(logfile="train_surrogate.log", problem=problem)
         analysis = FitterAnalysis(logfile="train_surrogate.log", problem=problem)
@@ -86,7 +101,7 @@ class ILSSurrogate(Algorithm):
 
 
         # dynamic number of samples based on dataset real evaluations
         # dynamic number of samples based on dataset real evaluations
         nsamples = None
         nsamples = None
-        with open(self.solutions_file, 'r') as f:
+        with open(self._solutions_file, 'r') as f:
             nsamples = len(f.readlines()) - 1 # avoid header
             nsamples = len(f.readlines()) - 1 # avoid header
 
 
         training_samples = int(0.7 * nsamples) # 70% used for learning part at each iteration
         training_samples = int(0.7 * nsamples) # 70% used for learning part at each iteration
@@ -94,10 +109,10 @@ class ILSSurrogate(Algorithm):
         print("Start fitting again the surrogate model")
         print("Start fitting again the surrogate model")
         print(f'Using {training_samples} of {nsamples} samples for train dataset')
         print(f'Using {training_samples} of {nsamples} samples for train dataset')
         for r in range(10):
         for r in range(10):
-            print("Iteration n°{0}: for fitting surrogate".format(r))
-            algo.run(samplefile=self.solutions_file, sample=training_samples, step=10)
+            print(f"Iteration n°{r}: for fitting surrogate")
+            algo.run(samplefile=self._solutions_file, sample=training_samples, step=10)
 
 
-        joblib.dump(algo, self.surrogate_file_path)
+        joblib.dump(algo, self._surrogate_file_path)
 
 
 
 
     def load_surrogate(self):
     def load_surrogate(self):
@@ -105,47 +120,47 @@ class ILSSurrogate(Algorithm):
         """
         """
 
 
         # need to first train surrogate if not exist
         # need to first train surrogate if not exist
-        if not os.path.exists(self.surrogate_file_path):
+        if not os.path.exists(self._surrogate_file_path):
             self.train_surrogate()
             self.train_surrogate()
 
 
-        self.surrogate = joblib.load(self.surrogate_file_path)
+        self._surrogate = joblib.load(self._surrogate_file_path)
 
 
         # update evaluator function
         # update evaluator function
-        self.surrogate_evaluator = lambda s: self.surrogate.surrogate.predict([s.data])[0]
+        self._surrogate_evaluator = lambda s: self._surrogate.surrogate.predict([s._data])[0]
 
 
     def add_to_surrogate(self, solution):
     def add_to_surrogate(self, solution):
 
 
         # save real evaluated solution into specific file for surrogate
         # save real evaluated solution into specific file for surrogate
-        with open(self.solutions_file, 'a') as f:
+        with open(self._solutions_file, 'a') as f:
 
 
             line = ""
             line = ""
 
 
-            for index, e in enumerate(solution.data):
+            for index, e in enumerate(solution._data):
 
 
                 line += str(e)
                 line += str(e)
                 
                 
-                if index < len(solution.data) - 1:
+                if index < len(solution._data) - 1:
                     line += ","
                     line += ","
 
 
             line += ";"
             line += ";"
-            line += str(solution.score)
+            line += str(solution._score)
 
 
             f.write(line + "\n")
             f.write(line + "\n")
 
 
-    def run(self, _evaluations, _ls_evaluations=100):
+    def run(self, evaluations, ls_evaluations=100):
         """
         """
         Run the iterated local search algorithm using local search (EvE compromise)
         Run the iterated local search algorithm using local search (EvE compromise)
 
 
         Args:
         Args:
-            _evaluations: {int} -- number of global evaluations for ILS
-            _ls_evaluations: {int} -- number of Local search evaluations (default: 100)
+            evaluations: {int} -- number of global evaluations for ILS
+            ls_evaluations: {int} -- number of Local search evaluations (default: 100)
 
 
         Returns:
         Returns:
             {Solution} -- best solution found
             {Solution} -- best solution found
         """
         """
 
 
         # by default use of mother method to initialize variables
         # by default use of mother method to initialize variables
-        super().run(_evaluations)
+        super().run(evaluations)
 
 
         # initialize current solution
         # initialize current solution
         self.initRun()
         self.initRun()
@@ -155,23 +170,23 @@ class ILSSurrogate(Algorithm):
 
 
         # count number of surrogate obtained and restart using real evaluations done
         # count number of surrogate obtained and restart using real evaluations done
         nsamples = None
         nsamples = None
-        with open(self.solutions_file, 'r') as f:
+        with open(self._solutions_file, 'r') as f:
             nsamples = len(f.readlines()) - 1 # avoid header
             nsamples = len(f.readlines()) - 1 # avoid header
 
 
         if self.getGlobalEvaluation() < nsamples:
         if self.getGlobalEvaluation() < nsamples:
-            print(f'Restart using {nsamples} of {self.start_train_surrogate} real evaluations obtained')
-            self.numberOfEvaluations = nsamples
+            print(f'Restart using {nsamples} of {self._start_train_surrogate} real evaluations obtained')
+            self._numberOfEvaluations = nsamples
 
 
-        if self.start_train_surrogate > self.getGlobalEvaluation():
+        if self._start_train_surrogate > self.getGlobalEvaluation():
         
         
             # get `self.start_train_surrogate` number of real evaluations and save it into surrogate dataset file
             # get `self.start_train_surrogate` number of real evaluations and save it into surrogate dataset file
             # using randomly generated solutions (in order to cover seearch space)
             # using randomly generated solutions (in order to cover seearch space)
-            while self.start_train_surrogate > self.getGlobalEvaluation():
+            while self._start_train_surrogate > self.getGlobalEvaluation():
                 
                 
-                newSolution = self.initializer()
+                newSolution = self._initializer()
 
 
                 # evaluate new solution
                 # evaluate new solution
-                newSolution.evaluate(self.evaluator)
+                newSolution.evaluate(self._evaluator)
 
 
                 # add it to surrogate pool
                 # add it to surrogate pool
                 self.add_to_surrogate(newSolution)
                 self.add_to_surrogate(newSolution)
@@ -184,50 +199,50 @@ class ILSSurrogate(Algorithm):
 
 
         # local search algorithm implementation
         # local search algorithm implementation
         while not self.stop():
         while not self.stop():
-            
+
             # set current evaluator based on used or not of surrogate function
             # set current evaluator based on used or not of surrogate function
-            current_evaluator = self.surrogate_evaluator if self.start_train_surrogate <= self.getGlobalEvaluation() else self.evaluator
+            self._evaluator = self._surrogate_evaluator if self._start_train_surrogate <= self.getGlobalEvaluation() else self._main_evaluator
 
 
             # create new local search instance
             # create new local search instance
             # passing global evaluation param from ILS
             # passing global evaluation param from ILS
-            ls = LocalSearchSurrogate(self.initializer,
-                         current_evaluator,
-                         self.operators,
-                         self.policy,
-                         self.validator,
-                         self.maximise,
-                         _parent=self)
+            ls = LocalSearchSurrogate(self._initializer,
+                         self._evaluator,
+                         self._operators,
+                         self._policy,
+                         self._validator,
+                         self._maximise,
+                         parent=self)
 
 
             # add same callbacks
             # add same callbacks
-            for callback in self.callbacks:
+            for callback in self._callbacks:
                 ls.addCallback(callback)
                 ls.addCallback(callback)
 
 
             # create and search solution from local search
             # create and search solution from local search
-            newSolution = ls.run(_ls_evaluations)
+            newSolution = ls.run(ls_evaluations)
 
 
             # if better solution than currently, replace it (solution saved in training pool, only if surrogate process is in a second process step)
             # if better solution than currently, replace it (solution saved in training pool, only if surrogate process is in a second process step)
             # Update : always add new solution into surrogate pool, not only if solution is better
             # Update : always add new solution into surrogate pool, not only if solution is better
             #if self.isBetter(newSolution) and self.start_train_surrogate < self.getGlobalEvaluation():
             #if self.isBetter(newSolution) and self.start_train_surrogate < self.getGlobalEvaluation():
-            if self.start_train_surrogate <= self.getGlobalEvaluation():
+            if self._start_train_surrogate <= self.getGlobalEvaluation():
 
 
                 # if better solution found from local search, retrained the found solution and test again
                 # if better solution found from local search, retrained the found solution and test again
                 # without use of surrogate
                 # without use of surrogate
-                fitness_score = self.evaluator(newSolution)
+                fitness_score = self._main_evaluator(newSolution)
                 # self.increaseEvaluation() # dot not add evaluation
                 # self.increaseEvaluation() # dot not add evaluation
 
 
                 newSolution.score = fitness_score
                 newSolution.score = fitness_score
 
 
                 # if solution is really better after real evaluation, then we replace
                 # if solution is really better after real evaluation, then we replace
                 if self.isBetter(newSolution):
                 if self.isBetter(newSolution):
-                    self.bestSolution = newSolution
+                    self._bestSolution = newSolution
 
 
                 self.add_to_surrogate(newSolution)
                 self.add_to_surrogate(newSolution)
 
 
                 self.progress()
                 self.progress()
 
 
             # check using specific dynamic criteria based on r^2
             # check using specific dynamic criteria based on r^2
-            r_squared = self.surrogate.analysis.coefficient_of_determination(self.surrogate.surrogate)
-            training_surrogate_every = int(r_squared * self.ls_train_surrogate)
+            r_squared = self._surrogate.analysis.coefficient_of_determination(self._surrogate.surrogate)
+            training_surrogate_every = int(r_squared * self._ls_train_surrogate)
             print(f"=> R^2 of surrogate is of {r_squared}. Retraining model every {training_surrogate_every} LS")
             print(f"=> R^2 of surrogate is of {r_squared}. Retraining model every {training_surrogate_every} LS")
 
 
             # avoid issue when lauching every each local search
             # avoid issue when lauching every each local search
@@ -235,36 +250,39 @@ class ILSSurrogate(Algorithm):
                 training_surrogate_every = 1
                 training_surrogate_every = 1
 
 
             # check if necessary or not to train again surrogate
             # check if necessary or not to train again surrogate
-            if self.n_local_search % training_surrogate_every == 0 and self.start_train_surrogate <= self.getGlobalEvaluation():
+            if self._n_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
 
 
                 # train again surrogate on real evaluated solutions file
                 # train again surrogate on real evaluated solutions file
+                start_training = time.time()
                 self.train_surrogate()
                 self.train_surrogate()
+                training_time = time.time() - start_training
+
+                self._surrogate_analyser = SurrogateAnalysis(training_time, training_surrogate_every, r_squared, self.getGlobalMaxEvaluation(), self._n_local_search)
 
 
                 # reload new surrogate function
                 # reload new surrogate function
                 self.load_surrogate()
                 self.load_surrogate()
 
 
             # increase number of local search done
             # increase number of local search done
-            self.n_local_search += 1
+            self._n_local_search += 1
 
 
             self.information()
             self.information()
 
 
-        logging.info("End of %s, best solution found %s" %
-                     (type(self).__name__, self.bestSolution))
+        logging.info(f"End of {type(self).__name__}, best solution found {self._bestSolution}")
 
 
         self.end()
         self.end()
-        return self.bestSolution
+        return self._bestSolution
 
 
-    def addCallback(self, _callback):
+    def addCallback(self, callback):
         """Add new callback to algorithm specifying usefull parameters
         """Add new callback to algorithm specifying usefull parameters
 
 
         Args:
         Args:
-            _callback: {Callback} -- specific Callback instance
+            callback: {Callback} -- specific Callback instance
         """
         """
         # specify current main algorithm reference
         # specify current main algorithm reference
-        if self.parent is not None:
-            _callback.setAlgo(self.parent)
+        if self.getParent() is not None:
+            callback.setAlgo(self.getParent())
         else:
         else:
-            _callback.setAlgo(self)
+            callback.setAlgo(self)
 
 
         # set as new
         # set as new
-        self.callbacks.append(_callback)
+        self._callbacks.append(callback)

+ 10 - 12
optimization/LSSurrogate.py

@@ -43,7 +43,7 @@ class LocalSearchSurrogate(Algorithm):
         # initialize current solution
         # initialize current solution
         self.initRun()
         self.initRun()
 
 
-        solutionSize = self.currentSolution.size
+        solutionSize = self._currentSolution._size
 
 
         # local search algorithm implementation
         # local search algorithm implementation
         while not self.stop():
         while not self.stop():
@@ -51,19 +51,18 @@ class LocalSearchSurrogate(Algorithm):
             for _ in range(solutionSize):
             for _ in range(solutionSize):
 
 
                 # update current solution using policy
                 # update current solution using policy
-                newSolution = self.update(self.currentSolution)
+                newSolution = self.update(self._currentSolution)
 
 
                 # if better solution than currently, replace it
                 # if better solution than currently, replace it
                 if self.isBetter(newSolution):
                 if self.isBetter(newSolution):
-                    self.bestSolution = newSolution
+                    self._bestSolution = newSolution
 
 
                 # increase number of evaluations
                 # increase number of evaluations
                 self.increaseEvaluation()
                 self.increaseEvaluation()
 
 
                 self.progress()
                 self.progress()
 
 
-                logging.info("---- Current %s - SCORE %s" %
-                             (newSolution, newSolution.fitness()))
+                logging.info(f"---- Current {newSolution} - SCORE {newSolution.fitness()}")
 
 
                 # add to surrogate pool file if necessary (using ILS parent reference)
                 # add to surrogate pool file if necessary (using ILS parent reference)
                 # if self.parent.start_train_surrogate >= self.getGlobalEvaluation():
                 # if self.parent.start_train_surrogate >= self.getGlobalEvaluation():
@@ -74,12 +73,11 @@ class LocalSearchSurrogate(Algorithm):
                     break
                     break
 
 
             # after applying local search on currentSolution, we switch into new local area using known current bestSolution
             # after applying local search on currentSolution, we switch into new local area using known current bestSolution
-            self.currentSolution = self.bestSolution
+            self._currentSolution = self._bestSolution
 
 
-        logging.info("End of %s, best solution found %s" %
-                     (type(self).__name__, self.bestSolution))
+        logging.info(f"End of {type(self).__name__}, best solution found {self._bestSolution}")
 
 
-        return self.bestSolution
+        return self._bestSolution
 
 
     def addCallback(self, callback):
     def addCallback(self, callback):
         """Add new callback to algorithm specifying usefull parameters
         """Add new callback to algorithm specifying usefull parameters
@@ -88,10 +86,10 @@ class LocalSearchSurrogate(Algorithm):
             callback: {Callback} -- specific Callback instance
             callback: {Callback} -- specific Callback instance
         """
         """
         # specify current main algorithm reference
         # specify current main algorithm reference
-        if self.parent is not None:
-            callback.setAlgo(self.parent)
+        if self._parent is not None:
+            callback.setAlgo(self._parent)
         else:
         else:
             callback.setAlgo(self)
             callback.setAlgo(self)
 
 
         # set as new
         # set as new
-        self.callbacks.append(callback)
+        self._callbacks.append(callback)

+ 67 - 0
optimization/callbacks/SurrogateCheckpoint.py

@@ -0,0 +1,67 @@
+"""Basic Checkpoint class implementation
+"""
+
+# main imports
+import os
+import logging
+import numpy as np
+
+# module imports
+from macop.callbacks.Callback import Callback
+from macop.utils.color import macop_text, macop_line
+
+
+class SurrogateCheckpoint(Callback):
+    """
+    SurrogateCheckpoint is used for logging training data information about surrogate
+
+    Attributes:
+        algo: {Algorithm} -- main algorithm instance reference
+        every: {int} -- checkpoint frequency used (based on number of evaluations)
+        filepath: {str} -- file path where checkpoints will be saved
+    """
+    def run(self):
+        """
+        Check if necessary to do backup based on `every` variable
+        """
+        # get current best solution
+        solution = self._algo._bestSolution
+        surrogate_analyser = self._algo._surrogate_analyser
+
+        # Do nothing is surrogate analyser does not exist
+        if surrogate_analyser is None:
+            return
+
+        currentEvaluation = self._algo.getGlobalEvaluation()
+
+        # backup if necessary
+        if currentEvaluation % self._every == 0:
+
+            logging.info(f"Surrogate analysis checkpoint is done into {self._filepath}")
+
+            solutionData = ""
+            solutionSize = len(solution._data)
+
+            for index, val in enumerate(solution._data):
+                solutionData += str(val)
+
+                if index < solutionSize - 1:
+                    solutionData += ' '
+
+            line = str(currentEvaluation) + ';' + str(surrogate_analyser._every_ls) + ';' + str(surrogate_analyser._time) + ';' + str(surrogate_analyser._r2) \
+                + ';' + solutionData + ';' + str(solution.fitness()) + ';\n'
+
+            # check if file exists
+            if not os.path.exists(self._filepath):
+                with open(self._filepath, 'w') as f:
+                    f.write(line)
+            else:
+                with open(self._filepath, 'a') as f:
+                    f.write(line)
+
+    def load(self):
+        """
+        Load nothing there, as we only log surrogate training information
+        """
+
+        logging.info("No loading to do with surrogate checkpoint")