Browse Source

Add of modules and others scripts

Jérôme BUISINE 1 year ago
parent
commit
ac0626b314

+ 3 - 0
.gitmodules

@@ -0,0 +1,3 @@
+[submodule "modules"]
+	path = modules
+	url = https://github.com/prise-3d/Thesis-CommonModules.git

+ 0 - 1
README.md

@@ -27,7 +27,6 @@ python generate/generate_all_data.py --feature all
 
 ### Multiple directories and scripts are available:
 
-
 - **dataset/\***: all scene files information (zones of each scene, SVD descriptor files information and so on...).
 - **train_model.py**: script which is used to run specific model available.
 - **data/\***: folder which will contain all *.train* & *.test* files in order to train model.

+ 0 - 338
data_attributes.py

@@ -28,344 +28,6 @@ def get_svd_data(data_type, block):
     Method which returns the data type expected
     """
 
-    if data_type == 'lab':
-
-        block_file_path = '/tmp/lab_img.png'
-        block.save(block_file_path)
-        data = transform.get_LAB_L_SVD_s(Image.open(block_file_path))
-
-    if data_type == 'mscn':
-
-        img_mscn_revisited = transform.rgb_to_mscn(block)
-
-        # save tmp as img
-        img_output = Image.fromarray(img_mscn_revisited.astype('uint8'), 'L')
-        mscn_revisited_file_path = '/tmp/mscn_revisited_img.png'
-        img_output.save(mscn_revisited_file_path)
-        img_block = Image.open(mscn_revisited_file_path)
-
-        # extract from temp image
-        data = compression.get_SVD_s(img_block)
-
-    """if data_type == 'mscn':
-
-        img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
-        img_mscn = transform.calculate_mscn_coefficients(img_gray, 7)
-        img_mscn_norm = transform.normalize_2D_arr(img_mscn)
-
-        img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
-
-        data = compression.get_SVD_s(img_mscn_gray)
-    """
-
-    if data_type == 'low_bits_6':
-
-        low_bits_6 = transform.rgb_to_LAB_L_low_bits(block, 6)
-        data = compression.get_SVD_s(low_bits_6)
-
-    if data_type == 'low_bits_5':
-
-        low_bits_5 = transform.rgb_to_LAB_L_low_bits(block, 5)
-        data = compression.get_SVD_s(low_bits_5)
-
-    if data_type == 'low_bits_4':
-
-        low_bits_4 = transform.rgb_to_LAB_L_low_bits(block, 4)
-        data = compression.get_SVD_s(low_bits_4)
-
-    if data_type == 'low_bits_3':
-
-        low_bits_3 = transform.rgb_to_LAB_L_low_bits(block, 3)
-        data = compression.get_SVD_s(low_bits_3)
-
-    if data_type == 'low_bits_2':
-
-        low_bits_2 = transform.rgb_to_LAB_L_low_bits(block, 2)
-        data = compression.get_SVD_s(low_bits_2)
-
-    if data_type == 'low_bits_4_shifted_2':
-
-        data = compression.get_SVD_s(transform.rgb_to_LAB_L_bits(block, (3, 6)))
-
-    if data_type == 'sub_blocks_stats':
-
-        block = np.asarray(block)
-        width, height, _= block.shape
-        sub_width, sub_height = int(width / 4), int(height / 4)
-
-        sub_blocks = segmentation.divide_in_blocks(block, (sub_width, sub_height))
-
-        data = []
-
-        for sub_b in sub_blocks:
-
-            # by default use the whole lab L canal
-            l_svd_data = np.array(transform.get_LAB_L_SVD_s(sub_b))
-
-            # get information we want from svd
-            data.append(np.mean(l_svd_data))
-            data.append(np.median(l_svd_data))
-            data.append(np.percentile(l_svd_data, 25))
-            data.append(np.percentile(l_svd_data, 75))
-            data.append(np.var(l_svd_data))
-
-            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=100)
-            data.append(area_under_curve)
-
-        # convert into numpy array after computing all stats
-        data = np.asarray(data)
-
-    if data_type == 'sub_blocks_stats_reduced':
-
-        block = np.asarray(block)
-        width, height, _= block.shape
-        sub_width, sub_height = int(width / 4), int(height / 4)
-
-        sub_blocks = segmentation.divide_in_blocks(block, (sub_width, sub_height))
-
-        data = []
-
-        for sub_b in sub_blocks:
-
-            # by default use the whole lab L canal
-            l_svd_data = np.array(transform.get_LAB_L_SVD_s(sub_b))
-
-            # get information we want from svd
-            data.append(np.mean(l_svd_data))
-            data.append(np.median(l_svd_data))
-            data.append(np.percentile(l_svd_data, 25))
-            data.append(np.percentile(l_svd_data, 75))
-            data.append(np.var(l_svd_data))
-
-        # convert into numpy array after computing all stats
-        data = np.asarray(data)
-
-    if data_type == 'sub_blocks_area':
-
-        block = np.asarray(block)
-        width, height, _= block.shape
-        sub_width, sub_height = int(width / 8), int(height / 8)
-
-        sub_blocks = segmentation.divide_in_blocks(block, (sub_width, sub_height))
-
-        data = []
-
-        for sub_b in sub_blocks:
-
-            # by default use the whole lab L canal
-            l_svd_data = np.array(transform.get_LAB_L_SVD_s(sub_b))
-
-            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=50)
-            data.append(area_under_curve)
-
-        # convert into numpy array after computing all stats
-        data = np.asarray(data)
-
-    if data_type == 'sub_blocks_area_normed':
-
-        block = np.asarray(block)
-        width, height, _= block.shape
-        sub_width, sub_height = int(width / 8), int(height / 8)
-
-        sub_blocks = segmentation.divide_in_blocks(block, (sub_width, sub_height))
-
-        data = []
-
-        for sub_b in sub_blocks:
-
-            # by default use the whole lab L canal
-            l_svd_data = np.array(transform.get_LAB_L_SVD_s(sub_b))
-            l_svd_data = utils.normalize_arr(l_svd_data)
-
-            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=50)
-            data.append(area_under_curve)
-
-        # convert into numpy array after computing all stats
-        data = np.asarray(data)
-
-    if data_type == 'mscn_var_4':
-
-        data = _get_mscn_variance(block, (100, 100))
-
-    if data_type == 'mscn_var_16':
-
-        data = _get_mscn_variance(block, (50, 50))
-
-    if data_type == 'mscn_var_64':
-
-        data = _get_mscn_variance(block, (25, 25))
-
-    if data_type == 'mscn_var_16_max':
-
-        data = _get_mscn_variance(block, (50, 50))
-        data = np.asarray(data)
-        size = int(len(data) / 4)
-        indices = data.argsort()[-size:][::-1]
-        data = data[indices]
-
-    if data_type == 'mscn_var_64_max':
-
-        data = _get_mscn_variance(block, (25, 25))
-        data = np.asarray(data)
-        size = int(len(data) / 4)
-        indices = data.argsort()[-size:][::-1]
-        data = data[indices]
-
-    if data_type == 'ica_diff':
-        current_image = transform.get_LAB_L(block)
-
-        ica = FastICA(n_components=50)
-        ica.fit(current_image)
-
-        image_ica = ica.fit_transform(current_image)
-        image_restored = ica.inverse_transform(image_ica)
-
-        final_image = utils.normalize_2D_arr(image_restored)
-        final_image = np.array(final_image * 255, 'uint8')
-
-        sv_values = utils.normalize_arr(compression.get_SVD_s(current_image))
-        ica_sv_values = utils.normalize_arr(compression.get_SVD_s(final_image))
-
-        data = abs(np.array(sv_values) - np.array(ica_sv_values))
-
-    if data_type == 'svd_trunc_diff':
-
-        current_image = transform.get_LAB_L(block)
-
-        svd = TruncatedSVD(n_components=30, n_iter=100, random_state=42)
-        transformed_image = svd.fit_transform(current_image)
-        restored_image = svd.inverse_transform(transformed_image)
-
-        reduced_image = (current_image - restored_image)
-
-        U, s, V = compression.get_SVD(reduced_image)
-        data = s
-
-    if data_type == 'ipca_diff':
-
-        current_image = transform.get_LAB_L(block)
-
-        transformer = IncrementalPCA(n_components=20, batch_size=25)
-        transformed_image = transformer.fit_transform(current_image)
-        restored_image = transformer.inverse_transform(transformed_image)
-
-        reduced_image = (current_image - restored_image)
-
-        U, s, V = compression.get_SVD(reduced_image)
-        data = s
-
-    if data_type == 'svd_reconstruct':
-
-        reconstructed_interval = (90, 200)
-        begin, end = reconstructed_interval
-
-        lab_img = transform.get_LAB_L(block)
-        lab_img = np.array(lab_img, 'uint8')
-
-        U, s, V = lin_svd(lab_img, full_matrices=True)
-
-        smat = np.zeros((end-begin, end-begin), dtype=complex)
-        smat[:, :] = np.diag(s[begin:end])
-        output_img = np.dot(U[:, begin:end],  np.dot(smat, V[begin:end, :]))
-
-        output_img = np.array(output_img, 'uint8')
-
-        data = compression.get_SVD_s(output_img)
-
-    if 'sv_std_filters' in data_type:
-
-        # convert into lab by default to apply filters
-        lab_img = transform.get_LAB_L(block)
-        arr = np.array(lab_img)
-        images = []
-        
-        # Apply list of filter on arr
-        images.append(medfilt2d(arr, [3, 3]))
-        images.append(medfilt2d(arr, [5, 5]))
-        images.append(wiener(arr, [3, 3]))
-        images.append(wiener(arr, [5, 5]))
-        
-        # By default computation of current block image
-        s_arr = compression.get_SVD_s(arr)
-        sv_vector = [s_arr]
-
-        # for each new image apply SVD and get SV 
-        for img in images:
-            s = compression.get_SVD_s(img)
-            sv_vector.append(s)
-            
-        sv_array = np.array(sv_vector)
-        
-        _, len = sv_array.shape
-        
-        sv_std = []
-        
-        # normalize each SV vectors and compute standard deviation for each sub vectors
-        for i in range(len):
-            sv_array[:, i] = utils.normalize_arr(sv_array[:, i])
-            sv_std.append(np.std(sv_array[:, i]))
-        
-        indices = []
-
-        if 'lowest' in data_type:
-            indices = utils.get_indices_of_lowest_values(sv_std, 200)
-
-        if 'highest' in data_type:
-            indices = utils.get_indices_of_highest_values(sv_std, 200)
-
-        # data are arranged following std trend computed
-        data = s_arr[indices]
-
-    # with the use of wavelet
-    if 'wave_sv_std_filters' in data_type:
-
-        # convert into lab by default to apply filters
-        lab_img = transform.get_LAB_L(block)
-        arr = np.array(lab_img)
-        images = []
-        
-        # Apply list of filter on arr
-        images.append(medfilt2d(arr, [3, 3]))
-        images.append(medfilt2d(arr, [5, 5]))
-        images.append(medfilt2d(arr, [7, 7]))
-        images.append(wiener(arr, [3, 3]))
-        images.append(wiener(arr, [4, 4]))
-        images.append(wiener(arr, [5, 5]))
-        images.append(w2d(arr, 'haar', 2))
-        images.append(w2d(arr, 'haar', 3))
-        images.append(w2d(arr, 'haar', 4))
-        
-        # By default computation of current block image
-        s_arr = compression.get_SVD_s(arr)
-        sv_vector = [s_arr]
-
-        # for each new image apply SVD and get SV 
-        for img in images:
-            s = compression.get_SVD_s(img)
-            sv_vector.append(s)
-            
-        sv_array = np.array(sv_vector)
-        
-        _, len = sv_array.shape
-        
-        sv_std = []
-        
-        # normalize each SV vectors and compute standard deviation for each sub vectors
-        for i in range(len):
-            sv_array[:, i] = utils.normalize_arr(sv_array[:, i])
-            sv_std.append(np.std(sv_array[:, i]))
-        
-        indices = []
-
-        if 'lowest' in data_type:
-            indices = utils.get_indices_of_lowest_values(sv_std, 200)
-
-        if 'highest' in data_type:
-            indices = utils.get_indices_of_highest_values(sv_std, 200)
-
-        # data are arranged following std trend computed
-        data = s_arr[indices]
-
     if 'filters_statistics' in data_type:
 
         img_width, img_height = 200, 200

+ 1 - 0
modules

@@ -0,0 +1 @@
+Subproject commit d5de038bdccaa58ff2123d5227482dc6c0ea2500

+ 93 - 0
others/save_model_result_in_md.py

@@ -0,0 +1,93 @@
+# main imports
+import numpy as np
+import sys, os, argparse
+import subprocess
+import time
+
+# models imports
+from sklearn.externals import joblib
+
+# image processing imports
+from PIL import Image
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+
+# variables and parameters
+threshold_map_folder      = cfg.threshold_map_folder
+threshold_map_file_prefix = cfg.threshold_map_folder + "_"
+
+markdowns_folder          = cfg.models_information_folder
+zones                     = cfg.zones_indices
+
+current_dirpath = os.getcwd()
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Display SVD data of scene zone")
+
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
+    parser.add_argument('--feature', type=str, help='Feature data choice', choices=cfg.features_choices_labels)
+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=cfg.normalization_choices)
+
+    args = parser.parse_args()
+    
+    p_interval   = list(map(int, args.interval.split(',')))
+    p_model_file = args.model
+    p_metric     = args.metric
+    p_mode       = args.mode
+
+
+    # call model and get global result in scenes
+
+    begin, end = p_interval
+
+    bash_cmd = "bash others/testModelByScene.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_metric + "'"
+    print(bash_cmd)
+
+    ## call command ##
+    p = subprocess.Popen(bash_cmd, stdout=subprocess.PIPE, shell=True)
+
+    (output, err) = p.communicate()
+
+    ## Wait for result ##
+    p_status = p.wait()
+
+    if not os.path.exists(markdowns_folder):
+        os.makedirs(markdowns_folder)
+
+    # get model name to construct model
+    md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace('.joblib', '.md'))
+
+    with open(md_model_path, 'w') as f:
+        f.write(output.decode("utf-8"))
+
+        # read each threshold_map information if exists
+        model_map_info_path = os.path.join(threshold_map_folder, p_model_file.replace('saved_models/', ''))
+
+        if not os.path.exists(model_map_info_path):
+            f.write('\n\n No threshold map information')
+        else:
+            maps_files = os.listdir(model_map_info_path)
+
+            # get all map information
+            for t_map_file in maps_files:
+
+                file_path = os.path.join(model_map_info_path, t_map_file)
+                with open(file_path, 'r') as map_file:
+
+                    title_scene =  t_map_file.replace(threshold_map_file_prefix, '')
+                    f.write('\n\n## ' + title_scene + '\n')
+                    content = map_file.readlines()
+
+                    # getting each map line information
+                    for line in content:
+                        f.write(line)
+
+        f.close()
+
+if __name__== "__main__":
+    main()

+ 320 - 0
others/save_model_result_in_md_maxwell.py

@@ -0,0 +1,320 @@
+# main imports
+import numpy as np
+import pandas as pd
+
+import sys, os, argparse
+import subprocess
+import time
+import json
+
+# models imports
+from sklearn.utils import shuffle
+from sklearn.externals import joblib
+from sklearn.metrics import accuracy_score, f1_score, recall_score, roc_auc_score
+from sklearn.model_selection import cross_val_score
+from sklearn.model_selection import StratifiedKFold
+from sklearn.model_selection import train_test_split
+
+from keras.models import Sequential
+from keras.layers import Conv1D, MaxPooling1D
+from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
+from keras.wrappers.scikit_learn import KerasClassifier
+from keras import backend as K
+from keras.models import model_from_json
+
+# image processing imports
+from ipfml import processing
+from PIL import Image
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+
+# variables and parameters
+threshold_map_folder        = cfg.threshold_map_folder
+threshold_map_file_prefix   = cfg.threshold_map_folder + "_"
+
+markdowns_folder            = cfg.models_information_folder
+final_csv_model_comparisons = cfg.csv_model_comparisons_filename
+models_name                 = cfg.models_names_list
+
+zones                       = cfg.zones_indices
+
+current_dirpath = os.getcwd()
+
+
+def main():
+
+    kind_model = 'keras'
+    model_ext = ''
+    
+    parser = argparse.ArgumentParser(description="Display SVD data of scene zone")
+
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=cfg.metric_choices_labels)
+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=cfg.normalization_choices)
+
+    args = parser.parse_args()
+
+    p_interval   = list(map(int, args.interval.split(',')))
+    p_model_file = args.model
+    p_metric     = args.metric
+    p_mode       = args.mode
+
+
+    # call model and get global result in scenes
+    begin, end = p_interval
+
+    bash_cmd = "bash others/testModelByScene_maxwell.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_metric + "'"
+
+    print(bash_cmd)
+
+    ## call command ##
+    p = subprocess.Popen(bash_cmd, stdout=subprocess.PIPE, shell=True)
+
+    (output, err) = p.communicate()
+
+    ## Wait for result ##
+    p_status = p.wait()
+
+    if not os.path.exists(markdowns_folder):
+        os.makedirs(markdowns_folder)
+
+    # get model name to construct model
+
+    if '.joblib' in p_model_file:
+        kind_model = 'sklearn'
+        model_ext = '.joblib'
+
+    if '.json' in p_model_file:
+        kind_model = 'keras'
+        model_ext = '.json'
+
+    md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace(model_ext, '.md'))
+
+    with open(md_model_path, 'w') as f:
+        f.write(output.decode("utf-8"))
+
+        # read each threshold_map information if exists
+        model_map_info_path = os.path.join(threshold_map_folder, p_model_file.replace('saved_models/', ''))
+
+        if not os.path.exists(model_map_info_path):
+            f.write('\n\n No threshold map information')
+        else:
+            maps_files = os.listdir(model_map_info_path)
+
+            # get all map information
+            for t_map_file in maps_files:
+
+                file_path = os.path.join(model_map_info_path, t_map_file)
+                with open(file_path, 'r') as map_file:
+
+                    title_scene =  t_map_file.replace(threshold_map_file_prefix, '')
+                    f.write('\n\n## ' + title_scene + '\n')
+                    content = map_file.readlines()
+
+                    # getting each map line information
+                    for line in content:
+                        f.write(line)
+
+        f.close()
+
+    # Keep model information to compare
+    current_model_name = p_model_file.split('/')[-1].replace(model_ext, '')
+
+    # Prepare writing in .csv file into results folder
+    output_final_file_path = os.path.join(cfg.results_information_folder, final_csv_model_comparisons)
+    output_final_file = open(output_final_file_path, "a")
+
+    print(current_model_name)
+    # reconstruct data filename
+    for name in models_name:
+        if name in current_model_name:
+            data_filename = current_model_name
+            current_data_file_path = os.path.join('data', data_filename)
+
+    print("Current data file ")
+    print(current_data_file_path)
+    model_scores = []
+
+    ########################
+    # 1. Get and prepare data
+    ########################
+    dataset_train = pd.read_csv(current_data_file_path + '.train', header=None, sep=";")
+    dataset_test = pd.read_csv(current_data_file_path + '.test', header=None, sep=";")
+
+    # default first shuffle of data
+    dataset_train = shuffle(dataset_train)
+    dataset_test = shuffle(dataset_test)
+
+    # get dataset with equal number of classes occurences
+    noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
+    not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
+    nb_noisy_train = len(noisy_df_train.index)
+
+    noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
+    not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
+    nb_noisy_test = len(noisy_df_test.index)
+
+    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
+    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
+
+    # shuffle data another time
+    final_df_train = shuffle(final_df_train)
+    final_df_test = shuffle(final_df_test)
+
+    final_df_train_size = len(final_df_train.index)
+    final_df_test_size = len(final_df_test.index)
+
+    # use of the whole data set for training
+    x_dataset_train = final_df_train.ix[:,1:]
+    x_dataset_test = final_df_test.ix[:,1:]
+
+    y_dataset_train = final_df_train.ix[:,0]
+    y_dataset_test = final_df_test.ix[:,0]
+
+    #######################
+    # 2. Getting model
+    #######################
+
+    if kind_model == 'keras':
+        with open(p_model_file, 'r') as f:
+            json_model = json.load(f)
+            model = model_from_json(json_model)
+            model.load_weights(p_model_file.replace('.json', '.h5'))
+
+            model.compile(loss='binary_crossentropy',
+                        optimizer='adam',
+                        metrics=['accuracy'])
+
+        # reshape all input data
+        x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), end, 1)
+        x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), end, 1)
+
+
+    if kind_model == 'sklearn':
+        model = joblib.load(p_model_file)
+
+    #######################
+    # 3. Fit model : use of cross validation to fit model
+    #######################
+
+    if kind_model == 'keras':
+        model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
+
+    if kind_model == 'sklearn':
+        model.fit(x_dataset_train, y_dataset_train)
+
+        train_accuracy = cross_val_score(model, x_dataset_train, y_dataset_train, cv=5)
+
+    ######################
+    # 4. Test : Validation and test dataset from .test dataset
+    ######################
+
+    # we need to specify validation size to 20% of whole dataset
+    val_set_size = int(final_df_train_size/3)
+    test_set_size = val_set_size
+
+    total_validation_size = val_set_size + test_set_size
+
+    if final_df_test_size > total_validation_size:
+        x_dataset_test = x_dataset_test[0:total_validation_size]
+        y_dataset_test = y_dataset_test[0:total_validation_size]
+
+    X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
+
+    if kind_model == 'keras':
+        y_test_model = model.predict_classes(X_test)
+        y_val_model = model.predict_classes(X_val)
+
+        y_train_model = model.predict_classes(x_dataset_train)
+
+        train_accuracy = accuracy_score(y_dataset_train, y_train_model)
+
+    if kind_model == 'sklearn':
+        y_test_model = model.predict(X_test)
+        y_val_model = model.predict(X_val)
+
+        y_train_model = model.predict(x_dataset_train)
+
+    val_accuracy = accuracy_score(y_val, y_val_model)
+    test_accuracy = accuracy_score(y_test, y_test_model)
+
+    train_f1 = f1_score(y_dataset_train, y_train_model)
+    train_recall = recall_score(y_dataset_train, y_train_model)
+    train_roc_auc = roc_auc_score(y_dataset_train, y_train_model)
+
+    val_f1 = f1_score(y_val, y_val_model)
+    val_recall = recall_score(y_val, y_val_model)
+    val_roc_auc = roc_auc_score(y_val, y_val_model)
+
+    test_f1 = f1_score(y_test, y_test_model)
+    test_recall = recall_score(y_test, y_test_model)
+    test_roc_auc = roc_auc_score(y_test, y_test_model)
+
+    if kind_model == 'keras':
+        # stats of all dataset
+        all_x_data = np.concatenate([x_dataset_train, X_test, X_val])
+        all_y_data = np.concatenate([y_dataset_train, y_test, y_val])
+        all_y_model = model.predict_classes(all_x_data)
+
+    if kind_model == 'sklearn':
+        # stats of all dataset
+        all_x_data = pd.concat([x_dataset_train, X_test, X_val])
+        all_y_data = pd.concat([y_dataset_train, y_test, y_val])
+        all_y_model = model.predict(all_x_data)
+
+    all_accuracy = accuracy_score(all_y_data, all_y_model)
+    all_f1_score = f1_score(all_y_data, all_y_model)
+    all_recall_score = recall_score(all_y_data, all_y_model)
+    all_roc_auc_score = roc_auc_score(all_y_data, all_y_model)
+
+    # stats of dataset sizes
+    total_samples = final_df_train_size + val_set_size + test_set_size
+
+    model_scores.append(final_df_train_size)
+    model_scores.append(val_set_size)
+    model_scores.append(test_set_size)
+
+    model_scores.append(final_df_train_size / total_samples)
+    model_scores.append(val_set_size / total_samples)
+    model_scores.append(test_set_size / total_samples)
+
+    # add of scores
+    model_scores.append(train_accuracy)
+    model_scores.append(val_accuracy)
+    model_scores.append(test_accuracy)
+    model_scores.append(all_accuracy)
+
+    model_scores.append(train_f1)
+    model_scores.append(train_recall)
+    model_scores.append(train_roc_auc)
+
+    model_scores.append(val_f1)
+    model_scores.append(val_recall)
+    model_scores.append(val_roc_auc)
+
+    model_scores.append(test_f1)
+    model_scores.append(test_recall)
+    model_scores.append(test_roc_auc)
+
+    model_scores.append(all_f1_score)
+    model_scores.append(all_recall_score)
+    model_scores.append(all_roc_auc_score)
+
+    # TODO : improve...
+    # check if it's always the case...
+    nb_zones = current_data_file_path.split('_')[7]
+
+    final_file_line = current_model_name + '; ' + str(end - begin) + '; ' + str(begin) + '; ' + str(end) + '; ' + str(nb_zones) + '; ' + p_metric + '; ' + p_mode
+
+    for s in model_scores:
+        final_file_line += '; ' + str(s)
+
+    output_final_file.write(final_file_line + '\n')
+
+
+if __name__== "__main__":
+    main()

+ 62 - 0
others/testModelByScene.sh

@@ -0,0 +1,62 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No first argument supplied"
+    echo "Need of begin vector index"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No second argument supplied"
+    echo "Need of end vector index"
+    exit 1
+fi
+
+if [ -z "$3" ]
+  then
+    echo "No third argument supplied"
+    echo "Need of model input"
+    exit 1
+fi
+
+if [ -z "$4" ]
+  then
+    echo "No fourth argument supplied"
+    echo "Need of mode file : 'svd', 'svdn', svdne"
+    exit 1
+fi
+
+if [ -z "$5" ]
+  then
+    echo "No fifth argument supplied"
+    echo "Need of metric : 'lab', 'mscn'"
+    exit 1
+fi
+
+INPUT_BEGIN=$1
+INPUT_END=$2
+INPUT_MODEL=$3
+INPUT_MODE=$4
+INPUT_METRIC=$5
+
+zones="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15"
+
+echo "**Model :** ${INPUT_MODEL}"
+echo "**Metric :** ${INPUT_METRIC}"
+echo "**Mode :** ${INPUT_MODE}"
+echo "**Vector range :** [${INPUT_BEGIN}, ${INPUT_END}]"
+echo ""
+echo " # | GLOBAL | NOISY | NOT NOISY"
+echo "---|--------|-------|----------"
+
+for scene in {"A","B","C","D","E","F","G","H","I"}; do
+
+  FILENAME="data/data_${INPUT_MODE}_${INPUT_METRIC}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
+
+  python generate/generate_data_model.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --metric ${INPUT_METRIC} --scenes "${scene}" --zones "${zones}" --percent 1 --sep ";" --rowindex "0"
+
+  python prediction/prediction_scene.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}_metric_${INPUT_METRIC}.prediction" --scene ${scene}
+
+done

+ 70 - 0
others/testModelByScene_maxwell.sh

@@ -0,0 +1,70 @@
+#! bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No first argument supplied"
+    echo "Need of begin vector index"
+    exit 1
+fi
+
+if [ -z "$2" ]
+  then
+    echo "No second argument supplied"
+    echo "Need of end vector index"
+    exit 1
+fi
+
+if [ -z "$3" ]
+  then
+    echo "No third argument supplied"
+    echo "Need of model input"
+    exit 1
+fi
+
+if [ -z "$4" ]
+  then
+    echo "No fourth argument supplied"
+    echo "Need of mode file : 'svd', 'svdn', svdne"
+    exit 1
+fi
+
+if [ -z "$5" ]
+  then
+    echo "No fifth argument supplied"
+    echo "Need of metric : 'lab', 'mscn'"
+    exit 1
+fi
+
+if [ -z "$6" ]
+  then
+    echo "No sixth argument supplied"
+fi
+
+
+
+INPUT_BEGIN=$1
+INPUT_END=$2
+INPUT_MODEL=$3
+INPUT_MODE=$4
+INPUT_METRIC=$5
+
+zones="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15"
+
+echo "**Model :** ${INPUT_MODEL}"
+echo "**Metric :** ${INPUT_METRIC}"
+echo "**Mode :** ${INPUT_MODE}"
+echo "**Vector range :** [${INPUT_BEGIN}, ${INPUT_END}]"
+echo ""
+echo " # | GLOBAL | NOISY | NOT NOISY"
+echo "---|--------|-------|----------"
+
+# only take maxwell scenes
+for scene in {"A","D","G","H"}; do
+
+  FILENAME="data/data_${INPUT_MODE}_${INPUT_METRIC}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
+
+  python generate/generate_data_model.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --metric ${INPUT_METRIC} --scenes "${scene}" --zones "${zones}" --percent 1
+
+  python prediction/prediction_scene.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}_metric_${INPUT_METRIC}.prediction" --scene ${scene}
+
+done