Parcourir la source

Add of scripts for optimization generation data

Jérôme BUISINE il y a 4 ans
Parent
commit
7681fe9222

+ 1 - 0
custom_config.py

@@ -11,6 +11,7 @@ logs_folder                        = 'logs'
 
 # variables
 features_choices_labels         = ['filters_statistics']
+optimization_result_filename    = 'optimization_comparisons.csv'
 
 ## models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2","deep_keras"]
 ## normalization_choices           = ['svd', 'svdn', 'svdne']

+ 12 - 6
data_processing/generateAndTrain_maxwell_custom.sh

@@ -14,10 +14,16 @@ if [ -z "$2" ]
     exit 1
 fi
 
-result_filename="results/models_comparisons.csv"
-VECTOR_SIZE=200
+if [ -z "$3" ]
+  then
+    echo "No argument supplied"
+    echo "Need of kind of data to use"
+    exit 1
+fi
+
 size=$1
 feature=$2
+data=$3
 
 # selection of four scenes (only maxwell)
 scenes="A, D, G, H"
@@ -30,9 +36,9 @@ for nb_zones in {4,6,8,10,12}; do
     for mode in {"svd","svdn","svdne"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
-            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
-            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
-            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_min_max"
+            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
+            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
+            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_min_max"
 
             echo $FILENAME
 
@@ -41,7 +47,7 @@ for nb_zones in {4,6,8,10,12}; do
 
                 echo "${MODEL_NAME} results already generated..."
             else
-                python generate/generate_data_model_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+                python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
                 python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 
                 #python prediction/predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}

+ 0 - 52
data_processing/generateAndTrain_maxwell_custom_center.sh

@@ -1,52 +0,0 @@
-#! bin/bash
-
-if [ -z "$1" ]
-  then
-    echo "No argument supplied"
-    echo "Need of vector size"
-    exit 1
-fi
-
-if [ -z "$2" ]
-  then
-    echo "No argument supplied"
-    echo "Need of feature information"
-    exit 1
-fi
-
-result_filename="results/models_comparisons.csv"
-VECTOR_SIZE=200
-size=$1
-feature=$2
-
-# selection of four scenes (only maxwell)
-scenes="A, D, G, H"
-
-start=0
-end=$size
-
-for nb_zones in {4,6,8,10,12}; do
-
-    for mode in {"svd","svdn","svdne"}; do
-        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
-
-            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
-            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
-            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_min_max"
-
-            echo $FILENAME
-
-            # only compute if necessary (perhaps server will fall.. Just in case)
-            if grep -q "${MODEL_NAME}" "${result_filename}"; then
-
-                echo "${MODEL_NAME} results already generated..."
-            else
-                python generate/generate_data_model_random_center.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
-                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
-
-                #python prediction/predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
-                python others/save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature}
-            fi
-        done
-    done
-done

+ 55 - 0
data_processing/generateAndTrain_maxwell_custom_optimization.sh

@@ -0,0 +1,55 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of vector size"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No argument supplied"
+    echo "Need of feature information"
+    exit 1
+fi
+
+if [ -z "$3" ]
+  then
+    echo "No argument supplied"
+    echo "Need of kind of data to use"
+    exit 1
+fi
+
+size=$1
+feature=$2
+data=$3
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+start=0
+end=$size
+
+for nb_zones in {4,6,8,10,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
+            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
+            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_min_max"
+
+            echo $FILENAME
+
+            # only compute if necessary (perhaps server will fall.. Just in case)
+            if grep -q "${MODEL_NAME}" "${result_filename}"; then
+
+                echo "${MODEL_NAME} results already generated..."
+            else
+                python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+                python find_best_attributes.py --data ${FILENAME} --choice ${model}
+            fi
+        done
+    done
+done

+ 0 - 52
data_processing/generateAndTrain_maxwell_custom_split.sh

@@ -1,52 +0,0 @@
-#! bin/bash
-
-if [ -z "$1" ]
-  then
-    echo "No argument supplied"
-    echo "Need of vector size"
-    exit 1
-fi
-
-if [ -z "$2" ]
-  then
-    echo "No argument supplied"
-    echo "Need of feature information"
-    exit 1
-fi
-
-result_filename="results/models_comparisons.csv"
-VECTOR_SIZE=200
-size=$1
-feature=$2
-
-# selection of four scenes (only maxwell)
-scenes="A, D, G, H"
-
-start=0
-end=$size
-
-for nb_zones in {4,6,8,10,12}; do
-
-    for mode in {"svd","svdn","svdne"}; do
-        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
-
-            FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
-            MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}"
-            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${feature}_${mode}_min_max"
-
-            echo $FILENAME
-
-            # only compute if necessary (perhaps server will fall.. Just in case)
-            if grep -q "${MODEL_NAME}" "${result_filename}"; then
-
-                echo "${MODEL_NAME} results already generated..."
-            else
-                python generate/generate_data_model_random_split.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
-                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
-
-                #python prediction/predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
-                python others/save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature}
-            fi
-        done
-    done
-done

+ 15 - 1
find_best_attributes.py

@@ -36,6 +36,8 @@ from optimization.operators.policies.RandomPolicy import RandomPolicy
 # variables and parameters
 models_list         = cfg.models_names_list
 number_of_values    = 26
+ils_iteration       = 100
+ls_iteration        = 10
 
 # default validator
 def validator(solution):
@@ -136,11 +138,23 @@ def main():
 
     algo = ILS(init, evaluate, updators, policy, validator, True)
 
-    bestSol = algo.run(100, 10)
+    bestSol = algo.run(ils_iteration, ls_iteration)
 
     # print best solution found
     print("Found ", bestSol)
 
+    # save model information into .csv file
+    if not os.path.exists(cfg.results_information_folder):
+        os.makedirs(cfg.results_information_folder)
+
+    filename_path = os.path.join(cfg.results_information_folder, cfg.optimization_result_filename)
+
+    line_info = p_data_file + ';' + str(ils_iteration) + ';' + str(ls_iteration) + ';' + str(bestSol.data) + ';' + str(list(bestSol.data).count(1)) + ';' + str(bestSol.fitness())
+    with open(filename_path, 'a') as f:
+        f.write(line_info + '\n')
+    
+    print('Result saved into %s' % filename_path)
+
 
 if __name__ == "__main__":
     main()

generate/generate_data_model_random.py → generate/generate_data_model_random_all.py


+ 0 - 24
run/runAll_maxwell.sh

@@ -1,24 +0,0 @@
-#! bin/bash
-
-# erase "results/models_comparisons.csv" file and write new header
-file_path='results/models_comparisons.csv'
-
-erased=$1
-
-if [ "${erased}" == "Y" ]; then
-    echo "Previous data file erased..."
-    rm ${file_path}
-    mkdir -p results
-    touch ${file_path}
-
-    # add of header
-    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
-
-fi
-
-for size in {"4","8","16","26","32","40"}; do
-
-    for metric in {"lab","mscn","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
-        bash data_processing/generateAndTrain_maxwell.sh ${size} ${metric}
-    done
-done

+ 17 - 2
run/runAll_maxwell_custom.sh

@@ -2,8 +2,23 @@
 
 # erase "results/models_comparisons.csv" file and write new header
 file_path='results/models_comparisons.csv'
+list="all, center, split"
 
-erased=$1
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need argument from [${list}]"
+    exit 1
+fi
+
+if [[ "$1" =~ ^(all|center|split)$ ]]; then
+    echo "$1 is in the list"
+else
+    echo "$1 is not in the list"
+fi
+
+data=$1
+erased=$2
 
 if [ "${erased}" == "Y" ]; then
     echo "Previous data file erased..."
@@ -19,4 +34,4 @@ fi
 size=26
 feature="filters_statistics"
 
-bash data_processing/generateAndTrain_maxwell_custom.sh ${size} ${feature}
+bash data_processing/generateAndTrain_maxwell_custom.sh ${size} ${feature} ${data}

+ 0 - 22
run/runAll_maxwell_custom_center.sh

@@ -1,22 +0,0 @@
-#! bin/bash
-
-# erase "results/models_comparisons.csv" file and write new header
-file_path='results/models_comparisons.csv'
-
-erased=$1
-
-if [ "${erased}" == "Y" ]; then
-    echo "Previous data file erased..."
-    rm ${file_path}
-    mkdir -p results
-    touch ${file_path}
-
-    # add of header
-    echo 'model_name; vector_size; start; end; nb_zones; feature; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
-
-fi
-
-size=26
-feature="filters_statistics"
-
-bash data_processing/generateAndTrain_maxwell_custom_center.sh ${size} ${feature}

+ 37 - 0
run/runAll_maxwell_custom_optimization.sh

@@ -0,0 +1,37 @@
+#! bin/bash
+
+# erase "results/optimization_comparisons.csv" file and write new header
+file_path='results/optimization_comparisons.csv'
+list="all, center, split"
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need argument from [${list}]"
+    exit 1
+fi
+
+if [[ "$1" =~ ^(all|center|split)$ ]]; then
+    echo "$1 is in the list"
+else
+    echo "$1 is not in the list"
+fi
+
+data=$1
+erased=$2
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p results
+    touch ${file_path}
+
+    # add of header
+    echo 'data_file; ils_iteration; ls_iteration; best_solution; nb_filters; fitness (roc test);' >> ${file_path}
+
+fi
+
+size=26
+feature="filters_statistics"
+
+bash data_processing/generateAndTrain_maxwell_custom_optimization.sh ${size} ${feature} ${data}

+ 0 - 22
run/runAll_maxwell_custom_split.sh

@@ -1,22 +0,0 @@
-#! bin/bash
-
-# erase "results/models_comparisons.csv" file and write new header
-file_path='results/models_comparisons.csv'
-
-erased=$1
-
-if [ "${erased}" == "Y" ]; then
-    echo "Previous data file erased..."
-    rm ${file_path}
-    mkdir -p results
-    touch ${file_path}
-
-    # add of header
-    echo 'model_name; vector_size; start; end; nb_zones; feature; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
-
-fi
-
-size=26
-feature="filters_statistics"
-
-bash data_processing/generateAndTrain_maxwell_custom_split.sh ${size} ${feature}