Browse Source

new version of surrogate

Jérôme BUISINE 1 year ago
parent
commit
9d71b77f3f

+ 16 - 9
find_best_attributes_surrogate.py

@@ -34,7 +34,7 @@ from macop.operators.discrete.mutators import SimpleMutation
 from macop.operators.discrete.mutators import SimpleBinaryMutation
 from macop.operators.discrete.crossovers import SimpleCrossover
 from macop.operators.discrete.crossovers import RandomSplitCrossover
-from optimization.operators.SimplePopCrossover import SimplePopCrossover
+from optimization.operators.SimplePopCrossover import SimplePopCrossover, RandomPopCrossover
 
 from macop.policies.reinforcement import UCBPolicy
 
@@ -104,7 +104,7 @@ def _get_best_model(X_train, y_train):
 
     svc = svm.SVC(probability=True, class_weight='balanced')
     #clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, scoring=my_accuracy_scorer, n_jobs=-1)
-    clf = GridSearchCV(svc, param_grid, cv=5, verbose=1, n_jobs=4)
+    clf = GridSearchCV(svc, param_grid, cv=5, verbose=0, n_jobs=-1)
 
     clf.fit(X_train, y_train)
 
@@ -117,8 +117,11 @@ def main():
     parser = argparse.ArgumentParser(description="Train and find best filters to use for model")
 
     parser.add_argument('--data', type=str, help='dataset filename prefix (without .train and .test)', required=True)
-    parser.add_argument('--start_surrogate', type=int, help='number of evalution before starting surrogare model', default=100)
+    parser.add_argument('--start_surrogate', type=int, help='number of evalution before starting surrogare model', required=True)
+    parser.add_argument('--train_every', type=int, help='max number of evalution before retraining surrogare model', required=True)
     parser.add_argument('--length', type=int, help='max data length (need to be specify for evaluator)', required=True)
+    parser.add_argument('--pop', type=int, help='pop size', required=True)
+    parser.add_argument('--order', type=int, help='walsh order function', required=True)
     parser.add_argument('--ils', type=int, help='number of total iteration for ils algorithm', required=True)
     parser.add_argument('--ls', type=int, help='number of iteration for Local Search algorithm', required=True)
     parser.add_argument('--output', type=str, help='output surrogate model name')
@@ -127,7 +130,10 @@ def main():
 
     p_data_file = args.data
     p_length    = args.length
+    p_pop       = args.pop
+    p_order     = args.order
     p_start     = args.start_surrogate
+    p_retrain   = args.train_every
     p_ils_iteration = args.ils
     p_ls_iteration  = args.ls
     p_output = args.output
@@ -152,7 +158,6 @@ def main():
 
         # define evaluate function here (need of data information)
         def compute(self, solution):
-
             start = datetime.datetime.now()
 
             # get indices of filters data to use (filters selection from solution)
@@ -178,7 +183,7 @@ def main():
 
             diff = end - start
 
-            print("Real evaluation took: {}, score found: {}".format(divmod(diff.days * 86400 + diff.seconds, 60), test_roc_auc))
+            #print("Real evaluation took: {}, score found: {}".format(divmod(diff.days * 86400 + diff.seconds, 60), test_roc_auc))
 
             return test_roc_auc
 
@@ -201,8 +206,8 @@ def main():
     ucb_backup_file_path = os.path.join(backup_model_folder, p_output + '_ucbPolicy.csv')
 
     # prepare optimization algorithm (only use of mutation as only ILS are used here, and local search need only local permutation)
-    operators = [SimpleBinaryMutation(), SimpleMutation(), SimpleCrossover(), RandomSplitCrossover()]
-    policy = UCBPolicy(operators)
+    operators = [SimpleBinaryMutation(), SimpleMutation(), RandomPopCrossover(), SimplePopCrossover()]
+    policy = UCBPolicy(operators, C=100, exp_rate=0.1)
 
     # define first line if necessary
     if not os.path.exists(surrogate_output_data):
@@ -215,11 +220,13 @@ def main():
                         operators=operators, 
                         policy=policy, 
                         validator=validator,
-                        population_size=20,
+                        population_size=p_pop,
                         surrogate_file_path=surrogate_output_model,
                         start_train_surrogate=p_start, # start learning and using surrogate after 1000 real evaluation
                         solutions_file=surrogate_output_data,
-                        ls_train_surrogate=5,
+                        walsh_order=p_order,
+                        inter_policy_ls_file=os.path.join(backup_model_folder, p_output + '_ls_ucbPolicy.csv'),
+                        ls_train_surrogate=p_retrain,
                         maximise=True)
     
     algo.addCallback(MultiPopCheckpoint(every=1, filepath=backup_file_path))

+ 33 - 12
optimization/ILSPopSurrogate.py

@@ -10,6 +10,10 @@ import time
 # module imports
 from macop.algorithms.base import Algorithm
 from macop.evaluators.base import Evaluator
+from macop.operators.base import KindOperator
+from macop.policies.reinforcement import UCBPolicy
+
+from macop.callbacks.policies import UCBCheckpoint
 
 from .LSSurrogate import LocalSearchSurrogate
 from .utils.SurrogateAnalysis import SurrogateAnalysisMono
@@ -23,7 +27,7 @@ from wsao.sao.algos.fitter import FitterAlgo
 from wsao.sao.utils.analysis import SamplerAnalysis, FitterAnalysis, OptimizerAnalysis
 
 
-class SurrogateEvaluator(Evaluator):
+class LSSurrogateEvaluator(Evaluator):
 
     # use of surrogate in order to evaluate solution
     def compute(self, solution):
@@ -62,6 +66,8 @@ class ILSPopSurrogate(Algorithm):
                  surrogate_file_path,
                  start_train_surrogate,
                  ls_train_surrogate,
+                 walsh_order,
+                 inter_policy_ls_file,
                  solutions_file,
                  maximise=True,
                  parent=None):
@@ -82,6 +88,9 @@ class ILSPopSurrogate(Algorithm):
         self._ls_train_surrogate = ls_train_surrogate
         self._solutions_file = solutions_file
 
+        self._walsh_order = walsh_order
+        self._inter_policy_ls_file = inter_policy_ls_file
+
         # default population values
         self.population_size = population_size
         self.population = []
@@ -103,7 +112,7 @@ class ILSPopSurrogate(Algorithm):
 
         problem = ND3DProblem(size=len(self._bestSolution.data)) # problem size based on best solution size (need to improve...)
         model = Lasso(alpha=1e-5)
-        surrogate = WalshSurrogate(order=2, size=problem.size, model=model)
+        surrogate = WalshSurrogate(order=self._walsh_order, size=problem.size, model=model)
         analysis = FitterAnalysis(logfile="train_surrogate.log", problem=problem)
         algo = FitterAlgo(problem=problem, surrogate=surrogate, analysis=analysis, seed=problem.seed)
 
@@ -134,7 +143,7 @@ class ILSPopSurrogate(Algorithm):
         self._surrogate = joblib.load(self._surrogate_file_path)
 
         # update evaluator function
-        self._surrogate_evaluator = SurrogateEvaluator(data={'surrogate': self._surrogate})
+        self._surrogate_evaluator = LSSurrogateEvaluator(data={'surrogate': self._surrogate})
 
     def add_to_surrogate(self, solution):
 
@@ -158,9 +167,10 @@ class ILSPopSurrogate(Algorithm):
     def initRun(self):
 
         fitness_scores = []
-        print('Initialisation of population')
+        print('Initialisation of @population')
         for i in range(len(self.population)):
 
+            print(f'  - solution [{(i+1)}] of {len(self.population)}')
             if self.population[i] is None:
                 solution = self.initialiser()
                 solution.evaluate(self.evaluator)
@@ -168,9 +178,11 @@ class ILSPopSurrogate(Algorithm):
                 self.population[i] = solution
                 self.add_to_surrogate(solution)
 
+            self.increaseEvaluation()
+
             fitness_scores.append(self.population[i].fitness)
 
-        print('Best solution initialisation')
+        print('Best solution @initialisation')
         self._bestSolution = self.population[fitness_scores.index(max(fitness_scores))]
 
 
@@ -228,24 +240,30 @@ class ILSPopSurrogate(Algorithm):
         while not self.stop():
 
             # set current evaluator based on used or not of surrogate function
-            self.local_evaluator = self._surrogate_evaluator if self._start_train_surrogate <= self.getGlobalEvaluation() else self._main_evaluator
+            self.evaluator = self._surrogate_evaluator if self._start_train_surrogate <= self.getGlobalEvaluation() else self._main_evaluator
 
             for i in range(len(self.population)):
 
+                # pass only Mutators operators for local search
+                selected_operators = [ op for op in self._operators if op._kind == KindOperator.MUTATOR ]
+
+                ls_policy = UCBPolicy(selected_operators, C=100, exp_rate=0.1)
                 # create new local search instance
                 # passing global evaluation param from ILS
                 ls = LocalSearchSurrogate(self.initialiser,
-                            self.local_evaluator,
-                            self._operators,
-                            self.policy,
+                            self.evaluator,
+                            selected_operators,
+                            ls_policy,
                             self.validator,
                             self._maximise,
-                            parent=self)
+                            parent=None,
+                            verbose=False)
+
+                ls.addCallback(UCBCheckpoint(every=1, filepath=self._inter_policy_ls_file))
 
-                # create current new solution using policy
+                # create current new solution using policy and custom algorithm init
                 ls._currentSolution = self.policy.apply(self.population[i])
                 ls.result = ls._currentSolution
-                print("Inside pop => ", ls._currentSolution)
 
                 # add same callbacks
                 #for callback in self._callbacks:
@@ -270,6 +288,7 @@ class ILSPopSurrogate(Algorithm):
                     if self.isBetter(newSolution):
                         self.result = newSolution
 
+                    # update population
                     if self.population[i].fitness < newSolution.fitness:
                         self.population[i] = newSolution
 
@@ -277,6 +296,8 @@ class ILSPopSurrogate(Algorithm):
 
                     self.progress()
 
+                print(f'Best solution found so far: {self.result.fitness}')
+
                 # check using specific dynamic criteria based on r^2
                 r_squared = self._surrogate.analysis.coefficient_of_determination(self._surrogate.surrogate)
                 mae = self._surrogate.analysis.mae(self._surrogate.surrogate)

+ 7 - 3
optimization/LSSurrogate.py

@@ -42,9 +42,11 @@ class LocalSearchSurrogate(Algorithm):
 
         # initialize current solution
         # self.initRun()
-        print("Inside LS => ", self._currentSolution)
 
-        solutionSize = self._currentSolution._size
+        for callback in self._callbacks:
+            callback.load()
+
+        solutionSize = self._currentSolution.size
 
         # local search algorithm implementation
         while not self.stop():
@@ -61,7 +63,9 @@ class LocalSearchSurrogate(Algorithm):
                 # increase number of evaluations
                 self.increaseEvaluation()
 
-                self.progress()
+                # self.progress()
+                for callback in self._callbacks:
+                    callback.run()
 
                 logging.info(f"---- Current {newSolution} - SCORE {newSolution.fitness}")
 

+ 63 - 3
optimization/operators/SimplePopCrossover.py

@@ -15,30 +15,90 @@ class SimplePopCrossover(Crossover):
         """
 
         size = solution1._size
-        population = self._algo.population
 
         # copy data of solution
         firstData = solution1.data.copy()
 
+        population = self._algo.population if self._algo.population is not None else self._algo.getParent().population
+
         # copy of solution2 as output solution
         valid = False
         copy_solution = None
 
         # use of different random population solution
         ncounter = 0
+
         while not valid:
 
-            chosen_solution = population[random.randint(0, len(population))]
+            chosen_solution = population[random.randint(0, len(population) - 1)]
             
-            if chosen_solution.data != firstData or ncounter > 10:
+            if not list(chosen_solution.data) == list(firstData) or ncounter > 10:
                 valid = True
                 copy_solution = chosen_solution.clone()
 
             # add security
             ncounter += 1
 
+        # default empty solution
+        if copy_solution is None:
+            copy_solution = self._algo.initialiser()
+
+        # random split index
         splitIndex = int(size / 2)
 
+        if random.uniform(0, 1) > 0.5:
+            copy_solution.data[splitIndex:] = firstData[splitIndex:]
+        else:
+            copy_solution.data[:splitIndex] = firstData[:splitIndex]
+
+        return copy_solution
+
+
+class RandomPopCrossover(Crossover):
+
+    def apply(self, solution1, solution2=None):
+        """Create new solution based on best solution found and solution passed as parameter
+
+        Args:
+            solution1: {:class:`~macop.solutions.base.Solution`} -- the first solution to use for generating new solution
+            solution2: {:class:`~macop.solutions.base.Solution`} -- the second solution to use for generating new solution (using population)
+
+        Returns:
+            {:class:`~macop.solutions.base.Solution`}: new generated solution
+        """
+
+        size = solution1._size
+
+        # copy data of solution
+        firstData = solution1.data.copy()
+
+        population = self._algo.population if self._algo.population is not None else self._algo.getParent().population
+
+        # copy of solution2 as output solution
+        valid = False
+        copy_solution = None
+
+        # use of different random population solution
+        ncounter = 0
+
+        while not valid:
+
+            chosen_solution = population[random.randint(0, len(population) - 1)]
+            
+            if not list(chosen_solution.data) == list(firstData) or ncounter > 10:
+                valid = True
+                copy_solution = chosen_solution.clone()
+
+            # add security
+            ncounter += 1
+
+        # default empty solution
+        if copy_solution is None:
+            copy_solution = self._algo.initialiser()
+
+        # random split index
+        splitIndex = random.randint(0, len(population) - 1)
+
         if random.uniform(0, 1) > 0.5:
             copy_solution.data[splitIndex:] = firstData[splitIndex:]
         else: