6 years ago · 7681fe9222
--- a/custom_config.py
+++ b/custom_config.py
@@ -11,6 +11,7 @@ logs_folder                        = 'logs'
 
				 
			
 
				 # variables
			
 
				 features_choices_labels         = ['filters_statistics']
			
 
				+optimization_result_filename    = 'optimization_comparisons.csv'
			
 
				 
			
 
				 ## models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2","deep_keras"]
			
 
				 ## normalization_choices           = ['svd', 'svdn', 'svdne']
			
--- a/data_processing/generateAndTrain_maxwell_custom.sh
+++ b/data_processing/generateAndTrain_maxwell_custom.sh
@@ -14,10 +14,16 @@ if [ -z "$2" ]
 
				     exit 1
			
 
				 fi
			
 
				 
			
 
				-result_filename="results/models_comparisons.csv"
			
 
				-VECTOR_SIZE=200
			
 
				+if [ -z "$3" ]
			
 
				+  then
			
 
				+    echo "No argument supplied"
			
 
				+    echo "Need of kind of data to use"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				 size=$1
			
 
				 feature=$2
			
 
				+data=$3
			
 
				 
			
 
				 # selection of four scenes (only maxwell)
			
 
				 scenes="A, D, G, H"
			
@@ -30,9 +36,9 @@ for nb_zones in {4,6,8,10,12}; do
 
				     for mode in {"svd","svdn","svdne"}; do
			
 
				         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
			
 
				 
			
 
				-            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
			
 
				-            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
			
 
				-            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_min_max"
			
 
				+            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
			
 
				+            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
			
 
				+            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_min_max"
			
 
				 
			
 
				             echo $FILENAME
			
 
				 
			
@@ -41,7 +47,7 @@ for nb_zones in {4,6,8,10,12}; do
 
				 
			
 
				                 echo "${MODEL_NAME} results already generated..."
			
 
				             else
			
 
				-                python generate/generate_data_model_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				+                python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				                 python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
			
 
				 
			
 
				                 #python prediction/predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
			
--- a/data_processing/generateAndTrain_maxwell_custom_center.sh
+++ b/data_processing/generateAndTrain_maxwell_custom_center.sh
@@ -1,52 +0,0 @@
 
				-#! bin/bash
			
 
				-
			
 
				-if [ -z "$1" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need of vector size"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-if [ -z "$2" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need of feature information"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-result_filename="results/models_comparisons.csv"
			
 
				-VECTOR_SIZE=200
			
 
				-size=$1
			
 
				-feature=$2
			
 
				-
			
 
				-# selection of four scenes (only maxwell)
			
 
				-scenes="A, D, G, H"
			
 
				-
			
 
				-start=0
			
 
				-end=$size
			
 
				-
			
 
				-for nb_zones in {4,6,8,10,12}; do
			
 
				-
			
 
				-    for mode in {"svd","svdn","svdne"}; do
			
 
				-        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
			
 
				-
			
 
				-            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
			
 
				-            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
			
 
				-            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_min_max"
			
 
				-
			
 
				-            echo $FILENAME
			
 
				-
			
 
				-            # only compute if necessary (perhaps server will fall.. Just in case)
			
 
				-            if grep -q "${MODEL_NAME}" "${result_filename}"; then
			
 
				-
			
 
				-                echo "${MODEL_NAME} results already generated..."
			
 
				-            else
			
 
				-                python generate/generate_data_model_random_center.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				-                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
			
 
				-
			
 
				-                #python prediction/predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				-                python others/save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature}
			
 
				-            fi
			
 
				-        done
			
 
				-    done
			
 
				-done
			
--- a/data_processing/generateAndTrain_maxwell_custom_optimization.sh
+++ b/data_processing/generateAndTrain_maxwell_custom_optimization.sh
@@ -0,0 +1,55 @@
 
				+#! bin/bash
			
 
				+
			
 
				+if [ -z "$1" ]
			
 
				+  then
			
 
				+    echo "No argument supplied"
			
 
				+    echo "Need of vector size"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$2" ]
			
 
				+  then
			
 
				+    echo "No argument supplied"
			
 
				+    echo "Need of feature information"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$3" ]
			
 
				+  then
			
 
				+    echo "No argument supplied"
			
 
				+    echo "Need of kind of data to use"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+size=$1
			
 
				+feature=$2
			
 
				+data=$3
			
 
				+
			
 
				+# selection of four scenes (only maxwell)
			
 
				+scenes="A, D, G, H"
			
 
				+
			
 
				+start=0
			
 
				+end=$size
			
 
				+
			
 
				+for nb_zones in {4,6,8,10,12}; do
			
 
				+
			
 
				+    for mode in {"svd","svdn","svdne"}; do
			
 
				+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
			
 
				+
			
 
				+            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
			
 
				+            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
			
 
				+            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_min_max"
			
 
				+
			
 
				+            echo $FILENAME
			
 
				+
			
 
				+            # only compute if necessary (perhaps server will fall.. Just in case)
			
 
				+            if grep -q "${MODEL_NAME}" "${result_filename}"; then
			
 
				+
			
 
				+                echo "${MODEL_NAME} results already generated..."
			
 
				+            else
			
 
				+                python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				+                python find_best_attributes.py --data ${FILENAME} --choice ${model}
			
 
				+            fi
			
 
				+        done
			
 
				+    done
			
 
				+done
			
--- a/data_processing/generateAndTrain_maxwell_custom_split.sh
+++ b/data_processing/generateAndTrain_maxwell_custom_split.sh
@@ -1,52 +0,0 @@
 
				-#! bin/bash
			
 
				-
			
 
				-if [ -z "$1" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need of vector size"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-if [ -z "$2" ]
			
 
				-  then
			
 
				-    echo "No argument supplied"
			
 
				-    echo "Need of feature information"
			
 
				-    exit 1
			
 
				-fi
			
 
				-
			
 
				-result_filename="results/models_comparisons.csv"
			
 
				-VECTOR_SIZE=200
			
 
				-size=$1
			
 
				-feature=$2
			
 
				-
			
 
				-# selection of four scenes (only maxwell)
			
 
				-scenes="A, D, G, H"
			
 
				-
			
 
				-start=0
			
 
				-end=$size
			
 
				-
			
 
				-for nb_zones in {4,6,8,10,12}; do
			
 
				-
			
 
				-    for mode in {"svd","svdn","svdne"}; do
			
 
				-        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
			
 
				-
			
 
				-            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
			
 
				-            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
			
 
				-            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_min_max"
			
 
				-
			
 
				-            echo $FILENAME
			
 
				-
			
 
				-            # only compute if necessary (perhaps server will fall.. Just in case)
			
 
				-            if grep -q "${MODEL_NAME}" "${result_filename}"; then
			
 
				-
			
 
				-                echo "${MODEL_NAME} results already generated..."
			
 
				-            else
			
 
				-                python generate/generate_data_model_random_split.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				-                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
			
 
				-
			
 
				-                #python prediction/predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				-                python others/save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature}
			
 
				-            fi
			
 
				-        done
			
 
				-    done
			
 
				-done
			
--- a/find_best_attributes.py
+++ b/find_best_attributes.py
@@ -36,6 +36,8 @@ from optimization.operators.policies.RandomPolicy import RandomPolicy
 
				 # variables and parameters
			
 
				 models_list         = cfg.models_names_list
			
 
				 number_of_values    = 26
			
 
				+ils_iteration       = 100
			
 
				+ls_iteration        = 10
			
 
				 
			
 
				 # default validator
			
 
				 def validator(solution):
			
@@ -136,11 +138,23 @@ def main():
 
				 
			
 
				     algo = ILS(init, evaluate, updators, policy, validator, True)
			
 
				 
			
 
				-    bestSol = algo.run(100, 10)
			
 
				+    bestSol = algo.run(ils_iteration, ls_iteration)
			
 
				 
			
 
				     # print best solution found
			
 
				     print("Found ", bestSol)
			
 
				 
			
 
				+    # save model information into .csv file
			
 
				+    if not os.path.exists(cfg.results_information_folder):
			
 
				+        os.makedirs(cfg.results_information_folder)
			
 
				+
			
 
				+    filename_path = os.path.join(cfg.results_information_folder, cfg.optimization_result_filename)
			
 
				+
			
 
				+    line_info = p_data_file + ';' + str(ils_iteration) + ';' + str(ls_iteration) + ';' + str(bestSol.data) + ';' + str(list(bestSol.data).count(1)) + ';' + str(bestSol.fitness())
			
 
				+    with open(filename_path, 'a') as f:
			
 
				+        f.write(line_info + '\n')
			
 
				+    
			
 
				+    print('Result saved into %s' % filename_path)
			
 
				+
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     main()
			
--- a/generate/generate_data_model_random_all.py
+++ b/generate/generate_data_model_random_all.py
--- a/run/runAll_maxwell.sh
+++ b/run/runAll_maxwell.sh
@@ -1,24 +0,0 @@
 
				-#! bin/bash
			
 
				-
			
 
				-# erase "results/models_comparisons.csv" file and write new header
			
 
				-file_path='results/models_comparisons.csv'
			
 
				-
			
 
				-erased=$1
			
 
				-
			
 
				-if [ "${erased}" == "Y" ]; then
			
 
				-    echo "Previous data file erased..."
			
 
				-    rm ${file_path}
			
 
				-    mkdir -p results
			
 
				-    touch ${file_path}
			
 
				-
			
 
				-    # add of header
			
 
				-    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
			
 
				-
			
 
				-fi
			
 
				-
			
 
				-for size in {"4","8","16","26","32","40"}; do
			
 
				-
			
 
				-    for metric in {"lab","mscn","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
			
 
				-        bash data_processing/generateAndTrain_maxwell.sh ${size} ${metric}
			
 
				-    done
			
 
				-done
			
--- a/run/runAll_maxwell_custom.sh
+++ b/run/runAll_maxwell_custom.sh
@@ -2,8 +2,23 @@
 
				 
			
 
				 # erase "results/models_comparisons.csv" file and write new header
			
 
				 file_path='results/models_comparisons.csv'
			
 
				+list="all, center, split"
			
 
				 
			
 
				-erased=$1
			
 
				+if [ -z "$1" ]
			
 
				+  then
			
 
				+    echo "No argument supplied"
			
 
				+    echo "Need argument from [${list}]"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [[ "$1" =~ ^(all|center|split)$ ]]; then
			
 
				+    echo "$1 is in the list"
			
 
				+else
			
 
				+    echo "$1 is not in the list"
			
 
				+fi
			
 
				+
			
 
				+data=$1
			
 
				+erased=$2
			
 
				 
			
 
				 if [ "${erased}" == "Y" ]; then
			
 
				     echo "Previous data file erased..."
			
@@ -19,4 +34,4 @@ fi
 
				 size=26
			
 
				 feature="filters_statistics"
			
 
				 
			
 
				-bash data_processing/generateAndTrain_maxwell_custom.sh ${size} ${feature}
			
 
				+bash data_processing/generateAndTrain_maxwell_custom.sh ${size} ${feature} ${data}
			
--- a/run/runAll_maxwell_custom_center.sh
+++ b/run/runAll_maxwell_custom_center.sh
@@ -1,22 +0,0 @@
 
				-#! bin/bash
			
 
				-
			
 
				-# erase "results/models_comparisons.csv" file and write new header
			
 
				-file_path='results/models_comparisons.csv'
			
 
				-
			
 
				-erased=$1
			
 
				-
			
 
				-if [ "${erased}" == "Y" ]; then
			
 
				-    echo "Previous data file erased..."
			
 
				-    rm ${file_path}
			
 
				-    mkdir -p results
			
 
				-    touch ${file_path}
			
 
				-
			
 
				-    # add of header
			
 
				-    echo 'model_name; vector_size; start; end; nb_zones; feature; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
			
 
				-
			
 
				-fi
			
 
				-
			
 
				-size=26
			
 
				-feature="filters_statistics"
			
 
				-
			
 
				-bash data_processing/generateAndTrain_maxwell_custom_center.sh ${size} ${feature}
			
--- a/run/runAll_maxwell_custom_optimization.sh
+++ b/run/runAll_maxwell_custom_optimization.sh
@@ -0,0 +1,37 @@
 
				+#! bin/bash
			
 
				+
			
 
				+# erase "results/optimization_comparisons.csv" file and write new header
			
 
				+file_path='results/optimization_comparisons.csv'
			
 
				+list="all, center, split"
			
 
				+
			
 
				+if [ -z "$1" ]
			
 
				+  then
			
 
				+    echo "No argument supplied"
			
 
				+    echo "Need argument from [${list}]"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [[ "$1" =~ ^(all|center|split)$ ]]; then
			
 
				+    echo "$1 is in the list"
			
 
				+else
			
 
				+    echo "$1 is not in the list"
			
 
				+fi
			
 
				+
			
 
				+data=$1
			
 
				+erased=$2
			
 
				+
			
 
				+if [ "${erased}" == "Y" ]; then
			
 
				+    echo "Previous data file erased..."
			
 
				+    rm ${file_path}
			
 
				+    mkdir -p results
			
 
				+    touch ${file_path}
			
 
				+
			
 
				+    # add of header
			
 
				+    echo 'data_file; ils_iteration; ls_iteration; best_solution; nb_filters; fitness (roc test);' >> ${file_path}
			
 
				+
			
 
				+fi
			
 
				+
			
 
				+size=26
			
 
				+feature="filters_statistics"
			
 
				+
			
 
				+bash data_processing/generateAndTrain_maxwell_custom_optimization.sh ${size} ${feature} ${data}
			
--- a/run/runAll_maxwell_custom_split.sh
+++ b/run/runAll_maxwell_custom_split.sh
@@ -1,22 +0,0 @@
 
				-#! bin/bash
			
 
				-
			
 
				-# erase "results/models_comparisons.csv" file and write new header
			
 
				-file_path='results/models_comparisons.csv'
			
 
				-
			
 
				-erased=$1
			
 
				-
			
 
				-if [ "${erased}" == "Y" ]; then
			
 
				-    echo "Previous data file erased..."
			
 
				-    rm ${file_path}
			
 
				-    mkdir -p results
			
 
				-    touch ${file_path}
			
 
				-
			
 
				-    # add of header
			
 
				-    echo 'model_name; vector_size; start; end; nb_zones; feature; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
			
 
				-
			
 
				-fi
			
 
				-
			
 
				-size=26
			
 
				-feature="filters_statistics"
			
 
				-
			
 
				-bash data_processing/generateAndTrain_maxwell_custom_split.sh ${size} ${feature}