4 年前 · 6ff68af8b4
--- a/optimization/ILSPopSurrogate.py
+++ b/optimization/ILSPopSurrogate.py
@@ -6,6 +6,8 @@ import os
 
				 import logging
			
 
				 import joblib
			
 
				 import time
			
 
				+import pandas as pd
			
 
				+from sklearn.utils import shuffle
			
 
				 
			
 
				 # module imports
			
 
				 from macop.algorithms.base import Algorithm
			
@@ -77,6 +79,7 @@ class ILSPopSurrogate(Algorithm):
 
				                 validator, maximise, parent)
			
 
				 
			
 
				         self._n_local_search = 0
			
 
				+        self._ls_local_search = 0
			
 
				         self._main_evaluator = evaluator
			
 
				 
			
 
				         self._surrogate_file_path = surrogate_file_path
			
@@ -116,18 +119,30 @@ class ILSPopSurrogate(Algorithm):
 
				         analysis = FitterAnalysis(logfile="train_surrogate.log", problem=problem)
			
 
				         algo = FitterAlgo(problem=problem, surrogate=surrogate, analysis=analysis, seed=problem.seed)
			
 
				 
			
 
				-        # dynamic number of samples based on dataset real evaluations
			
 
				-        nsamples = None
			
 
				-        with open(self._solutions_file, 'r') as f:
			
 
				-            nsamples = len(f.readlines()) - 1 # avoid header
			
 
				+        # data set
			
 
				+        df = pd.read_csv(self._solutions_file, sep=';')
			
 
				+        
			
 
				+        # learning set and test set based on max last 1000 samples
			
 
				+        max_samples = 1000
			
 
				 
			
 
				-        training_samples = int(0.7 * nsamples) # 70% used for learning part at each iteration
			
 
				+        if df.x.count() < max_samples:
			
 
				+            max_samples = df.x.count()
			
 
				+
			
 
				+        ntraining_samples = int(max_samples * 0.80)
			
 
				         
			
 
				+        # extract reduced dataset if necessary
			
 
				+        reduced_df = df.tail(max_samples)
			
 
				+        reduced_df = shuffle(reduced_df)
			
 
				+
			
 
				+        # shuffle dataset
			
 
				+        learn = reduced_df.tail(ntraining_samples)
			
 
				+        test = reduced_df.drop(learn.index)
			
 
				+
			
 
				         print("Start fitting again the surrogate model")
			
 
				-        print(f'Using {training_samples} of {nsamples} samples for train dataset')
			
 
				+        print(f'Using {ntraining_samples} samples of {max_samples} for train dataset')
			
 
				         for r in range(10):
			
 
				             print(f"Iteration n°{r}: for fitting surrogate")
			
 
				-            algo.run(samplefile=self._solutions_file, sample=training_samples, step=10)
			
 
				+            algo.run_samples(learn=learn, test=test, step=10)
			
 
				 
			
 
				         joblib.dump(algo, self._surrogate_file_path)
			
 
				 
			
@@ -307,13 +322,18 @@ class ILSPopSurrogate(Algorithm):
 
				                 training_surrogate_every = int(r_squared * self._ls_train_surrogate)
			
 
				                 print(f"=> R² of surrogate is of {r_squared}.")
			
 
				                 print(f"=> MAE of surrogate is of {mae}.")
			
 
				-                print(f'=> Retraining model every {training_surrogate_every} LS ({self._n_local_search % training_surrogate_every} of {training_surrogate_every})')
			
 
				+                print(f'=> Retraining model every {training_surrogate_every} LS ({self._ls_local_search % training_surrogate_every} of {training_surrogate_every})')
			
 
				                 # avoid issue when lauching every each local search
			
 
				                 if training_surrogate_every <= 0:
			
 
				                     training_surrogate_every = 1
			
 
				 
			
 
				+
			
 
				+                # increase number of local search done
			
 
				+                self._n_local_search += 1
			
 
				+                self._ls_local_search += 1
			
 
				+
			
 
				                 # check if necessary or not to train again surrogate
			
 
				-                if self._n_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
			
 
				+                if self._ls_local_search % training_surrogate_every == 0 and self._start_train_surrogate <= self.getGlobalEvaluation():
			
 
				 
			
 
				                     # train again surrogate on real evaluated solutions file
			
 
				                     start_training = time.time()
			
@@ -325,8 +345,8 @@ class ILSPopSurrogate(Algorithm):
 
				                     # reload new surrogate function
			
 
				                     self.load_surrogate()
			
 
				 
			
 
				-                # increase number of local search done
			
 
				-                self._n_local_search += 1
			
 
				+                    # reinit ls search
			
 
				+                    self._ls_local_search = 0
			
 
				 
			
 
				                 self.information()
			
 
				 
			
--- a/run_surrogate_rendering.sh
+++ b/run_surrogate_rendering.sh
@@ -1,28 +1,30 @@
 
				 #! /bin/bash
			
 
				 
			
 
				 # default param
			
 
				-ILS=10000
			
 
				+ILS=2000
			
 
				 LS=100
			
 
				 SS=50
			
 
				 LENGTH=32 # number of features
			
 
				 POP=100
			
 
				 ORDER=2
			
 
				-TRAIN_EVERY=50
			
 
				+TRAIN_EVERY=10
			
 
				 
			
 
				 
			
 
				 #output="rendering-attributes-ILS_${ILS}-POP_${POP}-LS_${LS}-SS_${SS}-SO_${ORDER}-SE_${TRAIN_EVERY}"
			
 
				 DATASET="rnn/data/datasets/features-selection-rendering-scaled/features-selection-rendering-scaled"
			
 
				 
			
 
				-for POP in {20,60,100};
			
 
				+for run in {1,2,3,4,5,6,7,8,9,10};
			
 
				 do
			
 
				-    for ORDER in {2,3};
			
 
				+    for POP in {20,60,100};
			
 
				     do
			
 
				-        for LS in {1000,5000,10000};
			
 
				+        for ORDER in {1,2,3};
			
 
				         do
			
 
				-            output="rendering-attributes-POP_${POP}-LS_${LS}-SS_${SS}-SO_${ORDER}-SE_${TRAIN_EVERY}"
			
 
				-            echo "Run optim attributes using: ${output}"
			
 
				-            python find_best_attributes_surrogate.py --data ${DATASET} --start_surrogate ${SS} --length 30 --ils ${ILS} --ls ${LS} --pop ${POP} --order ${ORDER} --train_every ${TRAIN_EVERY}  --output ${output}
			
 
				+            for LS in {1000,5000,10000};
			
 
				+            do
			
 
				+                output="rendering-attributes-POP_${POP}-LS_${LS}-SS_${SS}-SO_${ORDER}-SE_${TRAIN_EVERY}-RUN_${run}"
			
 
				+                echo "Run optim attributes using: ${output}"
			
 
				+                python find_best_attributes_surrogate.py --data ${DATASET} --start_surrogate ${SS} --length 32 --ils ${ILS} --ls ${LS} --pop ${POP} --order ${ORDER} --train_every ${TRAIN_EVERY}  --output ${output}
			
 
				+            done
			
 
				         done
			
 
				     done
			
 
				 done
			
 
				-