Parcourir la source

Merge branch 'release/v0.3.9'

Jérôme BUISINE il y a 3 ans
Parent
commit
6ff68af8b4
2 fichiers modifiés avec 42 ajouts et 20 suppressions
  1. 31 11
      optimization/ILSPopSurrogate.py
  2. 11 9
      run_surrogate_rendering.sh

+ 31 - 11
optimization/ILSPopSurrogate.py

@@ -6,6 +6,8 @@ import os
 import logging
 import joblib
 import time
+import pandas as pd
+from sklearn.utils import shuffle
 
 # module imports
 from macop.algorithms.base import Algorithm
@@ -77,6 +79,7 @@ class ILSPopSurrogate(Algorithm):
                 validator, maximise, parent)
 
         self._n_local_search = 0
+        self._ls_local_search = 0
         self._main_evaluator = evaluator
 
         self._surrogate_file_path = surrogate_file_path
@@ -116,18 +119,30 @@ class ILSPopSurrogate(Algorithm):
         analysis = FitterAnalysis(logfile="train_surrogate.log", problem=problem)
         algo = FitterAlgo(problem=problem, surrogate=surrogate, analysis=analysis, seed=problem.seed)
 
-        # dynamic number of samples based on dataset real evaluations
-        nsamples = None
-        with open(self._solutions_file, 'r') as f:
-            nsamples = len(f.readlines()) - 1 # avoid header
+        # data set
+        df = pd.read_csv(self._solutions_file, sep=';')
+        
+        # learning set and test set based on max last 1000 samples
+        max_samples = 1000
 
-        training_samples = int(0.7 * nsamples) # 70% used for learning part at each iteration
+        if df.x.count() < max_samples:
+            max_samples = df.x.count()
+
+        ntraining_samples = int(max_samples * 0.80)
         
+        # extract reduced dataset if necessary
+        reduced_df = df.tail(max_samples)
+        reduced_df = shuffle(reduced_df)
+
+        # shuffle dataset
+        learn = reduced_df.tail(ntraining_samples)
+        test = reduced_df.drop(learn.index)
+
         print("Start fitting again the surrogate model")
-        print(f'Using {training_samples} of {nsamples} samples for train dataset')
+        print(f'Using {ntraining_samples} samples of {max_samples} for train dataset')
         for r in range(10):
             print(f"Iteration n°{r}: for fitting surrogate")
-            algo.run(samplefile=self._solutions_file, sample=training_samples, step=10)
+            algo.run_samples(learn=learn, test=test, step=10)
 
         joblib.dump(algo, self._surrogate_file_path)
 
@@ -307,13 +322,18 @@ class ILSPopSurrogate(Algorithm):
                 training_surrogate_every = int(r_squared * self._ls_train_surrogate)
                 print(f"=> R² of surrogate is of {r_squared}.")
                 print(f"=> MAE of surrogate is of {mae}.")
-                print(f'=> Retraining model every {training_surrogate_every} LS ({self._n_local_search % training_surrogate_every} of {training_surrogate_every})')
+                print(f'=> Retraining model every {training_surrogate_every} LS ({self._ls_local_search % training_surrogate_every} of {training_surrogate_every})')
                 # avoid issue when lauching every each local search
                 if training_surrogate_every <= 0:
                     training_surrogate_every = 1
 
+
+                # increase number of local search done
+                self._n_local_search += 1
+                self._ls_local_search += 1
+
                 # check if necessary or not to train again surrogate
-                if self._n_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
+                if self._ls_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
 
                     # train again surrogate on real evaluated solutions file
                     start_training = time.time()
@@ -325,8 +345,8 @@ class ILSPopSurrogate(Algorithm):
                     # reload new surrogate function
                     self.load_surrogate()
 
-                # increase number of local search done
-                self._n_local_search += 1
+                    # reinit ls search
+                    self._ls_local_search = 0
 
                 self.information()
 

+ 11 - 9
run_surrogate_rendering.sh

@@ -1,28 +1,30 @@
 #! /bin/bash
 
 # default param
-ILS=10000
+ILS=2000
 LS=100
 SS=50
 LENGTH=32 # number of features
 POP=100
 ORDER=2
-TRAIN_EVERY=50
+TRAIN_EVERY=10
 
 
 #output="rendering-attributes-ILS_${ILS}-POP_${POP}-LS_${LS}-SS_${SS}-SO_${ORDER}-SE_${TRAIN_EVERY}"
 DATASET="rnn/data/datasets/features-selection-rendering-scaled/features-selection-rendering-scaled"
 
-for POP in {20,60,100};
+for run in {1,2,3,4,5,6,7,8,9,10};
 do
-    for ORDER in {2,3};
+    for POP in {20,60,100};
     do
-        for LS in {1000,5000,10000};
+        for ORDER in {1,2,3};
         do
-            output="rendering-attributes-POP_${POP}-LS_${LS}-SS_${SS}-SO_${ORDER}-SE_${TRAIN_EVERY}"
-            echo "Run optim attributes using: ${output}"
-            python find_best_attributes_surrogate.py --data ${DATASET} --start_surrogate ${SS} --length 30 --ils ${ILS} --ls ${LS} --pop ${POP} --order ${ORDER} --train_every ${TRAIN_EVERY}  --output ${output}
+            for LS in {1000,5000,10000};
+            do
+                output="rendering-attributes-POP_${POP}-LS_${LS}-SS_${SS}-SO_${ORDER}-SE_${TRAIN_EVERY}-RUN_${run}"
+                echo "Run optim attributes using: ${output}"
+                python find_best_attributes_surrogate.py --data ${DATASET} --start_surrogate ${SS} --length 32 --ils ${ILS} --ls ${LS} --pop ${POP} --order ${ORDER} --train_every ${TRAIN_EVERY}  --output ${output}
+            done
         done
     done
 done
-