6 jaren geleden · cfe517868e
--- a/data_processing/generateAndTrain_maxwell_custom_optimization.sh
+++ b/data_processing/generateAndTrain_maxwell_custom_optimization.sh
@@ -45,9 +45,8 @@ end=$size
 
				 for nb_zones in {10,12}; do
			
 
				 
			
 
				     for mode in {"svd","svdn","svdne"}; do
			
 
				-        #for model in {"svm_model","ensemble_model",""}; do
			
 
				-        model="svm_model"
			
 
				-
			
 
				+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
			
 
				+      
			
 
				             FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_${filter}"
			
 
				             MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_${filter}"
			
 
				             CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_${filter}_min_max"
			
@@ -62,8 +61,8 @@ for nb_zones in {10,12}; do
 
				                 python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				                 
			
 
				                 echo "Train ${MODEL_NAME}"
			
 
				-                python find_best_${filter}.py --data ${FILENAME} --choice ${model}
			
 
				+                python find_best_${filter}.py --data ${FILENAME} --choice ${model} &
			
 
				             fi
			
 
				-        #done
			
 
				+        done
			
 
				     done
			
 
				 done
			
--- a/find_best_attributes.py
+++ b/find_best_attributes.py
@@ -5,6 +5,7 @@ import argparse
 
				 import pandas as pd
			
 
				 import numpy as np
			
 
				 import logging
			
 
				+import datetime
			
 
				 
			
 
				 # model imports
			
 
				 from sklearn.model_selection import train_test_split
			
@@ -49,7 +50,7 @@ def validator(solution):
 
				 
			
 
				 # init solution (26 attributes)
			
 
				 def init():
			
 
				-    return BinarySolution([], number_of_values).random(validator)
			
 
				+    return BinarySolution([], 26).random(validator)
			
 
				 
			
 
				 def loadDataset(filename):
			
 
				 
			
@@ -100,6 +101,8 @@ def main():
 
				     p_data_file = args.data
			
 
				     p_choice    = args.choice
			
 
				 
			
 
				+    print(p_data_file)
			
 
				+
			
 
				     # load data from file
			
 
				     x_train, y_train, x_test, y_test = loadDataset(p_data_file)
			
 
				 
			
@@ -112,6 +115,7 @@ def main():
 
				     # define evaluate function here (need of data information)
			
 
				     def evaluate(solution):
			
 
				 
			
 
				+        start = datetime.datetime.now()
			
 
				         # get indices of filters data to use (filters selection from solution)
			
 
				         indices = []
			
 
				 
			
@@ -129,12 +133,19 @@ def main():
 
				         y_test_model = model.predict(x_test_filters)
			
 
				         test_roc_auc = roc_auc_score(y_test, y_test_model)
			
 
				 
			
 
				+        end = datetime.datetime.now()
			
 
				+
			
 
				+        diff = end - start
			
 
				+
			
 
				+        print("Evaluation took :", divmod(diff.days * 86400 + diff.seconds, 60))
			
 
				+
			
 
				         return test_roc_auc
			
 
				 
			
 
				     # prepare optimization algorithm
			
 
				     updators = [SimpleBinaryMutation(), SimpleMutation(), SimpleCrossover()]
			
 
				     policy = RandomPolicy(updators)
			
 
				 
			
 
				+    print("Start running ILS")
			
 
				     algo = ILS(init, evaluate, updators, policy, validator, True)
			
 
				 
			
 
				     bestSol = algo.run(ils_iteration, ls_iteration)
			
@@ -146,9 +157,19 @@ def main():
 
				     if not os.path.exists(cfg.results_information_folder):
			
 
				         os.makedirs(cfg.results_information_folder)
			
 
				 
			
 
				-    filename_path = os.path.join(cfg.results_information_folder, cfg.optimization_result_filename)
			
 
				+    filename_path = os.path.join(cfg.results_information_folder, cfg.optimization_attributes_result_filename)
			
 
				+
			
 
				+    filters_counter = 0
			
 
				+    # count number of filters
			
 
				+    for index, item in enumerate(bestSol.data):
			
 
				+        if index != 0 and index % 2 == 1:
			
 
				+
			
 
				+            # if two attributes are used
			
 
				+            if item == 1 or bestSol.data[index - 1] == 1:
			
 
				+                filters_counter += 1
			
 
				+
			
 
				 
			
 
				-    line_info = p_data_file + ';' + str(ils_iteration) + ';' + str(ls_iteration) + ';' + str(bestSol.data) + ';' + str(list(bestSol.data).count(1)) + ';' + str(bestSol.fitness())
			
 
				+    line_info = p_data_file + ';' + str(ils_iteration) + ';' + str(ls_iteration) + ';' + str(bestSol.data) + ';' + str(list(bestSol.data).count(1)) + ';' + str(filters_counter) + ';' + str(bestSol.fitness())
			
 
				     with open(filename_path, 'a') as f:
			
 
				         f.write(line_info + '\n')
			
 
				     
			
--- a/run/runAll_maxwell_custom_optimization_attributes.sh
+++ b/run/runAll_maxwell_custom_optimization_attributes.sh
@@ -28,7 +28,6 @@ if [ "${erased}" == "Y" ]; then
 
				 
			
 
				     # add of header
			
 
				     echo 'data_file; ils_iteration; ls_iteration; best_solution; nb_attributes; nb_filters; fitness (roc test);' >> ${file_path}
			
 
				-
			
 
				 fi
			
 
				 
			
 
				 size=26