Browse Source

Update of all bash scritps

Jérôme BUISINE 10 months ago
parent
commit
5923de0e11

+ 39 - 6
data_attributes.py

@@ -23,11 +23,19 @@ import custom_config as cfg
 from modules.utils import data as dt
 
 
-def _get_26_attributes(block):
+def _get_sobel_filtered_img(lab_img, k_size=3):
+
+    sobelx = cv2.Sobel(lab_img, cv2.CV_64F, 1, 0, ksize=k_size)
+    sobely = cv2.Sobel(lab_img, cv2.CV_64F, 0, 1,ksize=k_size)
+
+    sobel_mag = np.array(np.hypot(sobelx, sobely), 'uint8')  # magnitude
+
+    return sobel_mag
     
-        img_width, img_height = 200, 200
 
-        lab_img = transform.get_LAB_L(block)
+def _get_26_attributes(lab_img):
+    
+        img_width, img_height = 200, 200
         arr = np.array(lab_img)
 
         # compute all filters statistics
@@ -92,12 +100,37 @@ def get_image_features(data_type, block):
     data = []
 
     if 'filters_statistics' in data_type:
-        data = _get_26_attributes(block)
+
+        lab_img = transform.get_LAB_L(block)
+
+        data = _get_26_attributes(lab_img)
 
     if 'filters_statistics_sobel' in data_type:
-        data = _get_26_attributes(block)
-        
 
+        lab_img = transform.get_LAB_L(block)
+
+        data = _get_26_attributes(lab_img)
+
+        # add sobel complexity value to array of data (hence we will have 27 attributes)
+        sobel_std = np.std(_get_sobel_filtered_img(lab_img))
+
+        data.append(sobel_std)
+
+    if 'svd' in data_type:
+
+        lab_img = transform.get_LAB_L(block)
+
+        data = compression.get_SVD_s(lab_img)
+
+    
+    if 'svd_sobel' in data_type:
+
+        lab_img = transform.get_LAB_L(block)
+        data = list(compression.get_SVD_s(lab_img))
+
+        sobel_std = np.std(_get_sobel_filtered_img(lab_img))
+        data.append(sobel_std)
+        
     data = np.array(data) 
 
     return data

+ 24 - 18
data_processing/generateAndTrain_maxwell_custom.sh

@@ -1,36 +1,43 @@
 #! bin/bash
 
-if [ -z "$1" ]
-  then
-    echo "No argument supplied"
-    echo "Need of vector size"
-    exit 1
-fi
+# result file name
+result_file_path="results/models_comparisons.csv"
 
-if [ -z "$2" ]
+# selection of four scenes (only maxwell)
+scenes="A,D,G,H"
+
+# only one model for the moment
+model="rfe_svm_model"
+
+# check feature param
+if [ -z "$1" ]
   then
     echo "No argument supplied"
     echo "Need of feature information"
     exit 1
 fi
 
-if [ -z "$3" ]
+# accept feature param
+feature=$1
+
+if [ -z "$2" ]
   then
     echo "No argument supplied"
     echo "Need of kind of data to use"
     exit 1
 fi
 
-size=$1
-feature=$2
-data=$3
+feature=$1
+data=$2
 
-# selection of four scenes (only maxwell)
-scenes="A, D, G, H"
+# get size depends on feature
+declare -A featuresSize
+featuresSize=( ["filters_statistics"]="26" ["svd"]="200" ["filters_statistics_sobel"]="27" ["svd_sobel"]="201")
+size=${featuresSize[feature]}
 
+# interval of data
 start=0
 end=$size
-model="rfe_svm_model"
 
 for nb_zones in {10,11,12}; do
 
@@ -43,15 +50,14 @@ for nb_zones in {10,11,12}; do
         echo $FILENAME
 
         # only compute if necessary (perhaps server will fall.. Just in case)
-        if grep -q "${MODEL_NAME}" "${result_filename}"; then
+        if grep -q "${MODEL_NAME}" "${result_file_path}"; then
 
             echo "${MODEL_NAME} results already generated..."
         else
             python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
-            #python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+            python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 
-            #python prediction/predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
-            #python others/save_model_result_in_md_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature}
+            python others/save_model_result.py --data ${FILENAME} --model "saved_models/${MODEL_NAME}.joblib" --choice ${model} --feature ${feature} --mode ${mode} --zones ${nb_zones}
         fi
     done
 done

+ 38 - 145
others/save_model_result_in_md_maxwell.py

@@ -15,13 +15,6 @@ from sklearn.model_selection import cross_val_score
 from sklearn.model_selection import StratifiedKFold
 from sklearn.model_selection import train_test_split
 
-from keras.models import Sequential
-from keras.layers import Conv1D, MaxPooling1D
-from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
-from keras.wrappers.scikit_learn import KerasClassifier
-from keras import backend as K
-from keras.models import model_from_json
-
 # image processing imports
 from ipfml import processing
 from PIL import Image
@@ -46,108 +39,33 @@ current_dirpath = os.getcwd()
 
 def main():
 
-    kind_model = 'keras'
-    model_ext = ''
     
-    parser = argparse.ArgumentParser(description="Display SVD data of scene zone")
+    parser = argparse.ArgumentParser(description="Save data results of learned model")
 
-    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--data', type=str, help='Interval value to keep from svd', default='"0, 200"')
     parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
+    parser.add_argument('--choice', type=str, help='Name of the model used', choices=cfg.models_names_list)
+    parser.add_argument('--zones', type=int, help='Number of zones used when learning')
     parser.add_argument('--feature', type=str, help='feature data choice', choices=cfg.features_choices_labels)
     parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=cfg.normalization_choices)
 
+
     args = parser.parse_args()
 
-    p_interval   = list(map(int, args.interval.split(',')))
+    p_data_file = args.data
     p_model_file = args.model
+    p_model_name = args.choice
+    p_zones      = args.zones
     p_feature    = args.feature
     p_mode       = args.mode
 
-
-    # call model and get global result in scenes
-    begin, end = p_interval
-
-    bash_cmd = "bash others/testModelByScene_maxwell.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_feature + "'"
-
-    print(bash_cmd)
-
-    ## call command ##
-    p = subprocess.Popen(bash_cmd, stdout=subprocess.PIPE, shell=True)
-
-    (output, err) = p.communicate()
-
-    ## Wait for result ##
-    p_status = p.wait()
-
-    if not os.path.exists(markdowns_folder):
-        os.makedirs(markdowns_folder)
-
-    # get model name to construct model
-
-    if '.joblib' in p_model_file:
-        kind_model = 'sklearn'
-        model_ext = '.joblib'
-
-    if '.json' in p_model_file:
-        kind_model = 'keras'
-        model_ext = '.json'
-
-    md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace(model_ext, '.md'))
-
-    with open(md_model_path, 'w') as f:
-        f.write(output.decode("utf-8"))
-
-        # read each threshold_map information if exists
-        model_map_info_path = os.path.join(threshold_map_folder, p_model_file.replace('saved_models/', ''))
-
-        if not os.path.exists(model_map_info_path):
-            f.write('\n\n No threshold map information')
-        else:
-            maps_files = os.listdir(model_map_info_path)
-
-            # get all map information
-            for t_map_file in maps_files:
-
-                file_path = os.path.join(model_map_info_path, t_map_file)
-                with open(file_path, 'r') as map_file:
-
-                    title_scene =  t_map_file.replace(threshold_map_file_prefix, '')
-                    f.write('\n\n## ' + title_scene + '\n')
-                    content = map_file.readlines()
-
-                    # getting each map line information
-                    for line in content:
-                        f.write(line)
-
-        f.close()
-
-    # Keep model information to compare
-    current_model_name = p_model_file.split('/')[-1].replace(model_ext, '')
-
-    # Prepare writing in .csv file into results folder
-    output_final_file_path = os.path.join(cfg.results_information_folder, final_csv_model_comparisons)
-
-    if not os.path.exists(cfg.results_information_folder):
-        os.makedirs(cfg.results_information_folder)
-
-    output_final_file = open(output_final_file_path, "a")
-
-    print(current_model_name)
-    # reconstruct data filename
-    for name in models_name:
-        if name in current_model_name:
-            data_filename = current_model_name
-            current_data_file_path = os.path.join('data', data_filename)
-
-    print("Current data file ")
-    print(current_data_file_path)
     model_scores = []
 
     ########################
     # 1. Get and prepare data
     ########################
-    dataset_train = pd.read_csv(current_data_file_path + '.train', header=None, sep=";")
-    dataset_test = pd.read_csv(current_data_file_path + '.test', header=None, sep=";")
+    dataset_train = pd.read_csv(p_data_file + '.train', header=None, sep=";")
+    dataset_test = pd.read_csv(p_data_file + '.test', header=None, sep=";")
 
     # default first shuffle of data
     dataset_train = shuffle(dataset_train)
@@ -183,35 +101,21 @@ def main():
     # 2. Getting model
     #######################
 
-    if kind_model == 'keras':
-        with open(p_model_file, 'r') as f:
-            json_model = json.load(f)
-            model = model_from_json(json_model)
-            model.load_weights(p_model_file.replace('.json', '.h5'))
-
-            model.compile(loss='binary_crossentropy',
-                        optimizer='adam',
-                        features=['accuracy'])
-
-        # reshape all input data
-        x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), end, 1)
-        x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), end, 1)
-
-
-    if kind_model == 'sklearn':
-        model = joblib.load(p_model_file)
+    model = joblib.load(p_model_file)
+    selected_indices = [(i + 1) for i in np.arange(len(model.support_)) if model.support_[i] == True]
+    selected_indices_displayed = [i for i in np.arange(len(model.support_)) if model.support_[i] == True]
+    print(selected_indices)
+    
+    # update dataset values using specific indices
+    x_dataset_train = x_dataset_train.loc[:, selected_indices]
+    x_dataset_test = x_dataset_test.loc[:, selected_indices]
 
     #######################
     # 3. Fit model : use of cross validation to fit model
     #######################
 
-    if kind_model == 'keras':
-        model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
-
-    if kind_model == 'sklearn':
-        model.fit(x_dataset_train, y_dataset_train)
-
-        train_accuracy = cross_val_score(model, x_dataset_train, y_dataset_train, cv=5)
+    model.estimator_.fit(x_dataset_train, y_dataset_train)
+    train_accuracy = cross_val_score(model.estimator_, x_dataset_train, y_dataset_train, cv=5)
 
     ######################
     # 4. Test : Validation and test dataset from .test dataset
@@ -229,20 +133,12 @@ def main():
 
     X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
 
-    if kind_model == 'keras':
-        y_test_model = model.predict_classes(X_test)
-        y_val_model = model.predict_classes(X_val)
-
-        y_train_model = model.predict_classes(x_dataset_train)
-
-        train_accuracy = accuracy_score(y_dataset_train, y_train_model)
-
-    if kind_model == 'sklearn':
-        y_test_model = model.predict(X_test)
-        y_val_model = model.predict(X_val)
-
-        y_train_model = model.predict(x_dataset_train)
+    # update dataset values using specific indices
+    y_test_model = model.estimator_.predict(X_test)
+    y_val_model = model.estimator_.predict(X_val)
+    y_train_model = model.estimator_.predict(x_dataset_train)
 
+    # getting all scores
     val_accuracy = accuracy_score(y_val, y_val_model)
     test_accuracy = accuracy_score(y_test, y_test_model)
 
@@ -258,17 +154,9 @@ def main():
     test_recall = recall_score(y_test, y_test_model)
     test_roc_auc = roc_auc_score(y_test, y_test_model)
 
-    if kind_model == 'keras':
-        # stats of all dataset
-        all_x_data = np.concatenate([x_dataset_train, X_test, X_val])
-        all_y_data = np.concatenate([y_dataset_train, y_test, y_val])
-        all_y_model = model.predict_classes(all_x_data)
-
-    if kind_model == 'sklearn':
-        # stats of all dataset
-        all_x_data = pd.concat([x_dataset_train, X_test, X_val])
-        all_y_data = pd.concat([y_dataset_train, y_test, y_val])
-        all_y_model = model.predict(all_x_data)
+    all_x_data = pd.concat([x_dataset_train, X_test, X_val])
+    all_y_data = pd.concat([y_dataset_train, y_test, y_val])
+    all_y_model = model.estimator_.predict(all_x_data)
 
     all_accuracy = accuracy_score(all_y_data, all_y_model)
     all_f1_score = f1_score(all_y_data, all_y_model)
@@ -308,14 +196,19 @@ def main():
     model_scores.append(all_recall_score)
     model_scores.append(all_roc_auc_score)
 
-    # TODO : improve...
-    # check if it's always the case...
-    nb_zones = current_data_file_path.split('_')[7]
-
-    final_file_line = current_model_name + '; ' + str(end - begin) + '; ' + str(begin) + '; ' + str(end) + '; ' + str(nb_zones) + '; ' + p_feature + '; ' + p_mode
+    # add final line into data
+    final_file_line = p_model_name + ';' + str(selected_indices_displayed) + '; ' + str(p_zones) + '; ' + p_feature + '; ' + p_mode
 
     for s in model_scores:
         final_file_line += '; ' + str(s)
+    
+    # Prepare writing in .csv file into results folder
+    output_final_file_path = os.path.join(cfg.results_information_folder, final_csv_model_comparisons)
+    
+    if not os.path.exists(cfg.results_information_folder):
+        os.makedirs(cfg.results_information_folder)
+
+    output_final_file = open(output_final_file_path, "a")
 
     output_final_file.write(final_file_line + '\n')
 

+ 0 - 93
others/save_model_result_in_md.py

@@ -1,93 +0,0 @@
-# main imports
-import numpy as np
-import sys, os, argparse
-import subprocess
-import time
-
-# models imports
-from sklearn.externals import joblib
-
-# image processing imports
-from PIL import Image
-
-# modules imports
-sys.path.insert(0, '') # trick to enable import of main folder module
-
-import custom_config as cfg
-
-# variables and parameters
-threshold_map_folder      = cfg.threshold_map_folder
-threshold_map_file_prefix = cfg.threshold_map_folder + "_"
-
-markdowns_folder          = cfg.models_information_folder
-zones                     = cfg.zones_indices
-
-current_dirpath = os.getcwd()
-
-def main():
-
-    parser = argparse.ArgumentParser(description="Display SVD data of scene zone")
-
-    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
-    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
-    parser.add_argument('--feature', type=str, help='Feature data choice', choices=cfg.features_choices_labels)
-    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=cfg.normalization_choices)
-
-    args = parser.parse_args()
-    
-    p_interval   = list(map(int, args.interval.split(',')))
-    p_model_file = args.model
-    p_metric     = args.metric
-    p_mode       = args.mode
-
-
-    # call model and get global result in scenes
-
-    begin, end = p_interval
-
-    bash_cmd = "bash others/testModelByScene.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_metric + "'"
-    print(bash_cmd)
-
-    ## call command ##
-    p = subprocess.Popen(bash_cmd, stdout=subprocess.PIPE, shell=True)
-
-    (output, err) = p.communicate()
-
-    ## Wait for result ##
-    p_status = p.wait()
-
-    if not os.path.exists(markdowns_folder):
-        os.makedirs(markdowns_folder)
-
-    # get model name to construct model
-    md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace('.joblib', '.md'))
-
-    with open(md_model_path, 'w') as f:
-        f.write(output.decode("utf-8"))
-
-        # read each threshold_map information if exists
-        model_map_info_path = os.path.join(threshold_map_folder, p_model_file.replace('saved_models/', ''))
-
-        if not os.path.exists(model_map_info_path):
-            f.write('\n\n No threshold map information')
-        else:
-            maps_files = os.listdir(model_map_info_path)
-
-            # get all map information
-            for t_map_file in maps_files:
-
-                file_path = os.path.join(model_map_info_path, t_map_file)
-                with open(file_path, 'r') as map_file:
-
-                    title_scene =  t_map_file.replace(threshold_map_file_prefix, '')
-                    f.write('\n\n## ' + title_scene + '\n')
-                    content = map_file.readlines()
-
-                    # getting each map line information
-                    for line in content:
-                        f.write(line)
-
-        f.close()
-
-if __name__== "__main__":
-    main()

+ 0 - 62
others/testModelByScene.sh

@@ -1,62 +0,0 @@
-#! bin/bash
-
-if [ -z "$1" ]
-  then
-    echo "No first argument supplied"
-    echo "Need of begin vector index"
-    exit 1
-fi
-
-if [ -z "$2" ]
-  then
-    echo "No second argument supplied"
-    echo "Need of end vector index"
-    exit 1
-fi
-
-if [ -z "$3" ]
-  then
-    echo "No third argument supplied"
-    echo "Need of model input"
-    exit 1
-fi
-
-if [ -z "$4" ]
-  then
-    echo "No fourth argument supplied"
-    echo "Need of mode file : 'svd', 'svdn', svdne"
-    exit 1
-fi
-
-if [ -z "$5" ]
-  then
-    echo "No fifth argument supplied"
-    echo "Need of feature : 'lab', 'mscn'"
-    exit 1
-fi
-
-INPUT_BEGIN=$1
-INPUT_END=$2
-INPUT_MODEL=$3
-INPUT_MODE=$4
-INPUT_FEATURE=$5
-
-zones="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15"
-
-echo "**Model :** ${INPUT_MODEL}"
-echo "**feature :** ${INPUT_FEATURE}"
-echo "**Mode :** ${INPUT_MODE}"
-echo "**Vector range :** [${INPUT_BEGIN}, ${INPUT_END}]"
-echo ""
-echo " # | GLOBAL | NOISY | NOT NOISY"
-echo "---|--------|-------|----------"
-
-for scene in {"A","B","C","D","E","F","G","H","I"}; do
-
-  FILENAME="data/data_${INPUT_MODE}_${INPUT_FEATURE}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
-
-  python generate/generate_data_model.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --feature ${INPUT_FEATURE} --scenes "${scene}" --zones "${zones}" --percent 1 --sep ";" --rowindex "0"
-
-  python prediction/prediction_scene.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}_feature_${INPUT_FEATURE}.prediction" --scene ${scene}
-
-done

+ 0 - 70
others/testModelByScene_maxwell.sh

@@ -1,70 +0,0 @@
-#! bin/bash
-
-if [ -z "$1" ]
-  then
-    echo "No first argument supplied"
-    echo "Need of begin vector index"
-    exit 1
-fi
-
-if [ -z "$2" ]
-  then
-    echo "No second argument supplied"
-    echo "Need of end vector index"
-    exit 1
-fi
-
-if [ -z "$3" ]
-  then
-    echo "No third argument supplied"
-    echo "Need of model input"
-    exit 1
-fi
-
-if [ -z "$4" ]
-  then
-    echo "No fourth argument supplied"
-    echo "Need of mode file : 'svd', 'svdn', svdne"
-    exit 1
-fi
-
-if [ -z "$5" ]
-  then
-    echo "No fifth argument supplied"
-    echo "Need of feature : 'lab', 'mscn'"
-    exit 1
-fi
-
-if [ -z "$6" ]
-  then
-    echo "No sixth argument supplied"
-fi
-
-
-
-INPUT_BEGIN=$1
-INPUT_END=$2
-INPUT_MODEL=$3
-INPUT_MODE=$4
-INPUT_FEATURE=$5
-
-zones="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15"
-
-echo "**Model :** ${INPUT_MODEL}"
-echo "**feature :** ${INPUT_FEATURE}"
-echo "**Mode :** ${INPUT_MODE}"
-echo "**Vector range :** [${INPUT_BEGIN}, ${INPUT_END}]"
-echo ""
-echo " # | GLOBAL | NOISY | NOT NOISY"
-echo "---|--------|-------|----------"
-
-# only take maxwell scenes
-for scene in {"A","D","G","H"}; do
-
-  FILENAME="data/data_${INPUT_MODE}_${INPUT_FEATURE}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
-
-  python generate/generate_data_model.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --feature ${INPUT_FEATURE} --scenes "${scene}" --zones "${zones}" --percent 1
-
-  python prediction/prediction_scene.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}_feature_${INPUT_FEATURE}.prediction" --scene ${scene}
-
-done

+ 31 - 18
run/runAll_maxwell_custom.sh

@@ -1,35 +1,48 @@
 #! bin/bash
 
 # erase "results/models_comparisons.csv" file and write new header
-list="all, center, split"
+result_file_path="results/models_comparisons.csv"
 
-if [ -z "$1" ]
-  then
-    echo "No argument supplied"
-    echo "Need argument from [${list}]"
+features_list="(filters_statistics|filters_statistics_sobel|svd|svd_sobel)"
+if [[ "$1" =~ ^(filters_statistics|filters_statistics_sobel|svd|svd_sobel)$ ]]; then
+    echo "$1 is in the list"
+else
+    echo "$1 is not in the list, need argument from [${features_list}]"
     exit 1
 fi
 
-if [[ "$1" =~ ^(all|center|split)$ ]]; then
-    echo "$1 is in the list"
+# accept feature param
+feature=$1
+
+# if [ -z "$2" ]
+#   then
+#     echo "No argument supplied"
+#     echo "Need argument from [${list}]"
+#     exit 1
+# fi
+
+data_list="(all|center|split)"
+if [[ "$2" =~ ^(all|center|split)$ ]]; then
+    echo "$2 is in the list"
 else
-    echo "$1 is not in the list"
+    echo "$2 is not in the list, need argument from [${data_list}]"
 fi
 
-data=$1
-erased=$2
+# accept data param
+data=$2
+
+# check erased data param
+erased=$3
 
 if [ "${erased}" == "Y" ]; then
     echo "Previous data file erased..."
-    rm ${file_path}
-    mkdir -p results
-    touch ${file_path}
+    rm ${result_file_path}
+    # if necessary
+    mkdir -p results 
+    touch ${result_file_path}
 
     # add of header
-    echo 'model_name; vector_size; start; end; nb_zones; feature; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+    echo 'model_name; selected_indices; nb_zones; feature; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
 fi
 
-size=26
-feature="filters_statistics"
-
-bash data_processing/generateAndTrain_maxwell_custom.sh ${size} ${feature} ${data}
+bash data_processing/generateAndTrain_maxwell_custom.sh ${feature} ${data}

+ 52 - 0
simulation/run_maxwell_simulation.sh

@@ -0,0 +1,52 @@
+#! bin/bash
+
+# file which contains model names we want to use for simulation
+simulate_models="simulate_models.csv"
+
+# selection of four scenes (only maxwell)
+scenes="A,D,G,H"
+
+# model choice
+model="rfe_svm_model"
+
+# check feature param
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need of feature information"
+    exit 1
+fi
+
+if [[ "$1" =~ ^(filters_statistics|filters_statistics_sobel|svd|svd_sobel)$ ]]; then
+    echo "$1 is in the list"
+else
+    echo "$1 is not in the list"
+    exit 1
+fi
+
+# accept feature param
+feature=$1
+
+declare -A featuresSize
+featuresSize=( ["filters_statistics"]="26" ["svd"]="200" ["filters_statistics_sobel"]="27" ["svd_sobel"]="201")
+
+size=${featuresSize[feature]}
+
+for nb_zones in {10,11,12}; do
+    for mode in {"svd","svdn","svdne"}; do
+
+        FILENAME="data/${model}_N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_all"
+        MODEL_NAME="${model}_N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_all"
+        CUSTOM_MIN_MAX_FILENAME="N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_all_min_max"
+
+        # only compute if necessary (perhaps server will fall.. Just in case)
+        if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
+
+            # Use of already generated model
+            python generate/generate_data_model_random.py --output ${FILENAME} --interval "0,${size}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+            python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+            python prediction/predict_seuil_expe_maxwell_curve.py --interval "0,${size}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --custom ${CUSTOM_MIN_MAX_FILENAME}
+        fi
+    done
+done

+ 0 - 41
simulation/run_maxwell_simulation_filters_statistics.sh

@@ -1,41 +0,0 @@
-#! bin/bash
-
-# file which contains model names we want to use for simulation
-simulate_models="simulate_models.csv"
-
-# selection of four scenes (only maxwell)
-scenes="A,D,G,H"
-
-size="26"
-declare -A featuresSize
-animals=( ["filters_statistics"]="26" ["svd"]="200" ["filters_statistics_sobel"]="27" ["svd_sobel"]="201")
-
-# for feature in {"lab","mscn","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2","ica_diff","svd_trunc_diff","ipca_diff","svd_reconstruct"}; do
-feature="filters_statistics"
-
-for nb_zones in {4,6,8,10,11,12}; do
-    for mode in {"svd","svdn","svdne"}; do
-        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
-
-            FILENAME="data/${model}_N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_all"
-            MODEL_NAME="${model}_N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_all"
-            CUSTOM_MIN_MAX_FILENAME="N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_all_min_max"
-
-            #echo $MODEL_NAME
-
-            # only compute if necessary (perhaps server will fall.. Just in case)
-            if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
-
-                #echo "Run simulation for ${MODEL_NAME}..."
-
-                # Use of already generated model
-                # python generate/generate_data_model_random.py --output ${FILENAME} --interval "0,${size}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
-                # python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
-
-                echo python prediction/predict_seuil_expe_maxwell_curve.py --interval "0,${size}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --custom ${CUSTOM_MIN_MAX_FILENAME}
-
-                # python others/save_model_result_in_md_maxwell.py --interval "0,${size}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature}
-            fi
-        done
-    done
-done

+ 3 - 4
train_model.py

@@ -89,7 +89,6 @@ def main():
     model = mdl.get_trained_model(p_choice, x_dataset_train, y_dataset_train)
     indices = model.support_
 
-    print(model.n_features_)
     selected_indices = [(i+1) for i in np.arange(len(indices)) if indices[i] == True]
     print(selected_indices)
 
@@ -115,9 +114,9 @@ def main():
 
     X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
 
-    if p_choice == 'rfe_svm_model': 
-        X_test = X_test.loc[:, selected_indices]
-        X_val = X_val.loc[:, selected_indices]
+    # update data using indices values
+    X_test = X_test.loc[:, selected_indices]
+    X_val = X_val.loc[:, selected_indices]
 
     y_test_model = model.estimator_.predict(X_test)
     y_val_model = model.estimator_.predict(X_val)