il y a 5 ans · ac0626b314
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
 
				+[submodule "modules"]
			
 
				+	path = modules
			
 
				+	url = https://github.com/prise-3d/Thesis-CommonModules.git
			
--- a/README.md
+++ b/README.md
@@ -27,7 +27,6 @@ python generate/generate_all_data.py --feature all
 
				 
			
 
				 ### Multiple directories and scripts are available:
			
 
				 
			
 
				-
			
 
				 - **dataset/\***: all scene files information (zones of each scene, SVD descriptor files information and so on...).
			
 
				 - **train_model.py**: script which is used to run specific model available.
			
 
				 - **data/\***: folder which will contain all *.train* & *.test* files in order to train model.
			
--- a/data_attributes.py
+++ b/data_attributes.py
@@ -28,344 +28,6 @@ def get_svd_data(data_type, block):
 
				     Method which returns the data type expected
			
 
				     """
			
 
				 
			
 
				-    if data_type == 'lab':
			
 
				-
			
 
				-        block_file_path = '/tmp/lab_img.png'
			
 
				-        block.save(block_file_path)
			
 
				-        data = transform.get_LAB_L_SVD_s(Image.open(block_file_path))
			
 
				-
			
 
				-    if data_type == 'mscn':
			
 
				-
			
 
				-        img_mscn_revisited = transform.rgb_to_mscn(block)
			
 
				-
			
 
				-        # save tmp as img
			
 
				-        img_output = Image.fromarray(img_mscn_revisited.astype('uint8'), 'L')
			
 
				-        mscn_revisited_file_path = '/tmp/mscn_revisited_img.png'
			
 
				-        img_output.save(mscn_revisited_file_path)
			
 
				-        img_block = Image.open(mscn_revisited_file_path)
			
 
				-
			
 
				-        # extract from temp image
			
 
				-        data = compression.get_SVD_s(img_block)
			
 
				-
			
 
				-    """if data_type == 'mscn':
			
 
				-
			
 
				-        img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
			
 
				-        img_mscn = transform.calculate_mscn_coefficients(img_gray, 7)
			
 
				-        img_mscn_norm = transform.normalize_2D_arr(img_mscn)
			
 
				-
			
 
				-        img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
			
 
				-
			
 
				-        data = compression.get_SVD_s(img_mscn_gray)
			
 
				-    """
			
 
				-
			
 
				-    if data_type == 'low_bits_6':
			
 
				-
			
 
				-        low_bits_6 = transform.rgb_to_LAB_L_low_bits(block, 6)
			
 
				-        data = compression.get_SVD_s(low_bits_6)
			
 
				-
			
 
				-    if data_type == 'low_bits_5':
			
 
				-
			
 
				-        low_bits_5 = transform.rgb_to_LAB_L_low_bits(block, 5)
			
 
				-        data = compression.get_SVD_s(low_bits_5)
			
 
				-
			
 
				-    if data_type == 'low_bits_4':
			
 
				-
			
 
				-        low_bits_4 = transform.rgb_to_LAB_L_low_bits(block, 4)
			
 
				-        data = compression.get_SVD_s(low_bits_4)
			
 
				-
			
 
				-    if data_type == 'low_bits_3':
			
 
				-
			
 
				-        low_bits_3 = transform.rgb_to_LAB_L_low_bits(block, 3)
			
 
				-        data = compression.get_SVD_s(low_bits_3)
			
 
				-
			
 
				-    if data_type == 'low_bits_2':
			
 
				-
			
 
				-        low_bits_2 = transform.rgb_to_LAB_L_low_bits(block, 2)
			
 
				-        data = compression.get_SVD_s(low_bits_2)
			
 
				-
			
 
				-    if data_type == 'low_bits_4_shifted_2':
			
 
				-
			
 
				-        data = compression.get_SVD_s(transform.rgb_to_LAB_L_bits(block, (3, 6)))
			
 
				-
			
 
				-    if data_type == 'sub_blocks_stats':
			
 
				-
			
 
				-        block = np.asarray(block)
			
 
				-        width, height, _= block.shape
			
 
				-        sub_width, sub_height = int(width / 4), int(height / 4)
			
 
				-
			
 
				-        sub_blocks = segmentation.divide_in_blocks(block, (sub_width, sub_height))
			
 
				-
			
 
				-        data = []
			
 
				-
			
 
				-        for sub_b in sub_blocks:
			
 
				-
			
 
				-            # by default use the whole lab L canal
			
 
				-            l_svd_data = np.array(transform.get_LAB_L_SVD_s(sub_b))
			
 
				-
			
 
				-            # get information we want from svd
			
 
				-            data.append(np.mean(l_svd_data))
			
 
				-            data.append(np.median(l_svd_data))
			
 
				-            data.append(np.percentile(l_svd_data, 25))
			
 
				-            data.append(np.percentile(l_svd_data, 75))
			
 
				-            data.append(np.var(l_svd_data))
			
 
				-
			
 
				-            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=100)
			
 
				-            data.append(area_under_curve)
			
 
				-
			
 
				-        # convert into numpy array after computing all stats
			
 
				-        data = np.asarray(data)
			
 
				-
			
 
				-    if data_type == 'sub_blocks_stats_reduced':
			
 
				-
			
 
				-        block = np.asarray(block)
			
 
				-        width, height, _= block.shape
			
 
				-        sub_width, sub_height = int(width / 4), int(height / 4)
			
 
				-
			
 
				-        sub_blocks = segmentation.divide_in_blocks(block, (sub_width, sub_height))
			
 
				-
			
 
				-        data = []
			
 
				-
			
 
				-        for sub_b in sub_blocks:
			
 
				-
			
 
				-            # by default use the whole lab L canal
			
 
				-            l_svd_data = np.array(transform.get_LAB_L_SVD_s(sub_b))
			
 
				-
			
 
				-            # get information we want from svd
			
 
				-            data.append(np.mean(l_svd_data))
			
 
				-            data.append(np.median(l_svd_data))
			
 
				-            data.append(np.percentile(l_svd_data, 25))
			
 
				-            data.append(np.percentile(l_svd_data, 75))
			
 
				-            data.append(np.var(l_svd_data))
			
 
				-
			
 
				-        # convert into numpy array after computing all stats
			
 
				-        data = np.asarray(data)
			
 
				-
			
 
				-    if data_type == 'sub_blocks_area':
			
 
				-
			
 
				-        block = np.asarray(block)
			
 
				-        width, height, _= block.shape
			
 
				-        sub_width, sub_height = int(width / 8), int(height / 8)
			
 
				-
			
 
				-        sub_blocks = segmentation.divide_in_blocks(block, (sub_width, sub_height))
			
 
				-
			
 
				-        data = []
			
 
				-
			
 
				-        for sub_b in sub_blocks:
			
 
				-
			
 
				-            # by default use the whole lab L canal
			
 
				-            l_svd_data = np.array(transform.get_LAB_L_SVD_s(sub_b))
			
 
				-
			
 
				-            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=50)
			
 
				-            data.append(area_under_curve)
			
 
				-
			
 
				-        # convert into numpy array after computing all stats
			
 
				-        data = np.asarray(data)
			
 
				-
			
 
				-    if data_type == 'sub_blocks_area_normed':
			
 
				-
			
 
				-        block = np.asarray(block)
			
 
				-        width, height, _= block.shape
			
 
				-        sub_width, sub_height = int(width / 8), int(height / 8)
			
 
				-
			
 
				-        sub_blocks = segmentation.divide_in_blocks(block, (sub_width, sub_height))
			
 
				-
			
 
				-        data = []
			
 
				-
			
 
				-        for sub_b in sub_blocks:
			
 
				-
			
 
				-            # by default use the whole lab L canal
			
 
				-            l_svd_data = np.array(transform.get_LAB_L_SVD_s(sub_b))
			
 
				-            l_svd_data = utils.normalize_arr(l_svd_data)
			
 
				-
			
 
				-            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=50)
			
 
				-            data.append(area_under_curve)
			
 
				-
			
 
				-        # convert into numpy array after computing all stats
			
 
				-        data = np.asarray(data)
			
 
				-
			
 
				-    if data_type == 'mscn_var_4':
			
 
				-
			
 
				-        data = _get_mscn_variance(block, (100, 100))
			
 
				-
			
 
				-    if data_type == 'mscn_var_16':
			
 
				-
			
 
				-        data = _get_mscn_variance(block, (50, 50))
			
 
				-
			
 
				-    if data_type == 'mscn_var_64':
			
 
				-
			
 
				-        data = _get_mscn_variance(block, (25, 25))
			
 
				-
			
 
				-    if data_type == 'mscn_var_16_max':
			
 
				-
			
 
				-        data = _get_mscn_variance(block, (50, 50))
			
 
				-        data = np.asarray(data)
			
 
				-        size = int(len(data) / 4)
			
 
				-        indices = data.argsort()[-size:][::-1]
			
 
				-        data = data[indices]
			
 
				-
			
 
				-    if data_type == 'mscn_var_64_max':
			
 
				-
			
 
				-        data = _get_mscn_variance(block, (25, 25))
			
 
				-        data = np.asarray(data)
			
 
				-        size = int(len(data) / 4)
			
 
				-        indices = data.argsort()[-size:][::-1]
			
 
				-        data = data[indices]
			
 
				-
			
 
				-    if data_type == 'ica_diff':
			
 
				-        current_image = transform.get_LAB_L(block)
			
 
				-
			
 
				-        ica = FastICA(n_components=50)
			
 
				-        ica.fit(current_image)
			
 
				-
			
 
				-        image_ica = ica.fit_transform(current_image)
			
 
				-        image_restored = ica.inverse_transform(image_ica)
			
 
				-
			
 
				-        final_image = utils.normalize_2D_arr(image_restored)
			
 
				-        final_image = np.array(final_image * 255, 'uint8')
			
 
				-
			
 
				-        sv_values = utils.normalize_arr(compression.get_SVD_s(current_image))
			
 
				-        ica_sv_values = utils.normalize_arr(compression.get_SVD_s(final_image))
			
 
				-
			
 
				-        data = abs(np.array(sv_values) - np.array(ica_sv_values))
			
 
				-
			
 
				-    if data_type == 'svd_trunc_diff':
			
 
				-
			
 
				-        current_image = transform.get_LAB_L(block)
			
 
				-
			
 
				-        svd = TruncatedSVD(n_components=30, n_iter=100, random_state=42)
			
 
				-        transformed_image = svd.fit_transform(current_image)
			
 
				-        restored_image = svd.inverse_transform(transformed_image)
			
 
				-
			
 
				-        reduced_image = (current_image - restored_image)
			
 
				-
			
 
				-        U, s, V = compression.get_SVD(reduced_image)
			
 
				-        data = s
			
 
				-
			
 
				-    if data_type == 'ipca_diff':
			
 
				-
			
 
				-        current_image = transform.get_LAB_L(block)
			
 
				-
			
 
				-        transformer = IncrementalPCA(n_components=20, batch_size=25)
			
 
				-        transformed_image = transformer.fit_transform(current_image)
			
 
				-        restored_image = transformer.inverse_transform(transformed_image)
			
 
				-
			
 
				-        reduced_image = (current_image - restored_image)
			
 
				-
			
 
				-        U, s, V = compression.get_SVD(reduced_image)
			
 
				-        data = s
			
 
				-
			
 
				-    if data_type == 'svd_reconstruct':
			
 
				-
			
 
				-        reconstructed_interval = (90, 200)
			
 
				-        begin, end = reconstructed_interval
			
 
				-
			
 
				-        lab_img = transform.get_LAB_L(block)
			
 
				-        lab_img = np.array(lab_img, 'uint8')
			
 
				-
			
 
				-        U, s, V = lin_svd(lab_img, full_matrices=True)
			
 
				-
			
 
				-        smat = np.zeros((end-begin, end-begin), dtype=complex)
			
 
				-        smat[:, :] = np.diag(s[begin:end])
			
 
				-        output_img = np.dot(U[:, begin:end],  np.dot(smat, V[begin:end, :]))
			
 
				-
			
 
				-        output_img = np.array(output_img, 'uint8')
			
 
				-
			
 
				-        data = compression.get_SVD_s(output_img)
			
 
				-
			
 
				-    if 'sv_std_filters' in data_type:
			
 
				-
			
 
				-        # convert into lab by default to apply filters
			
 
				-        lab_img = transform.get_LAB_L(block)
			
 
				-        arr = np.array(lab_img)
			
 
				-        images = []
			
 
				-        
			
 
				-        # Apply list of filter on arr
			
 
				-        images.append(medfilt2d(arr, [3, 3]))
			
 
				-        images.append(medfilt2d(arr, [5, 5]))
			
 
				-        images.append(wiener(arr, [3, 3]))
			
 
				-        images.append(wiener(arr, [5, 5]))
			
 
				-        
			
 
				-        # By default computation of current block image
			
 
				-        s_arr = compression.get_SVD_s(arr)
			
 
				-        sv_vector = [s_arr]
			
 
				-
			
 
				-        # for each new image apply SVD and get SV 
			
 
				-        for img in images:
			
 
				-            s = compression.get_SVD_s(img)
			
 
				-            sv_vector.append(s)
			
 
				-            
			
 
				-        sv_array = np.array(sv_vector)
			
 
				-        
			
 
				-        _, len = sv_array.shape
			
 
				-        
			
 
				-        sv_std = []
			
 
				-        
			
 
				-        # normalize each SV vectors and compute standard deviation for each sub vectors
			
 
				-        for i in range(len):
			
 
				-            sv_array[:, i] = utils.normalize_arr(sv_array[:, i])
			
 
				-            sv_std.append(np.std(sv_array[:, i]))
			
 
				-        
			
 
				-        indices = []
			
 
				-
			
 
				-        if 'lowest' in data_type:
			
 
				-            indices = utils.get_indices_of_lowest_values(sv_std, 200)
			
 
				-
			
 
				-        if 'highest' in data_type:
			
 
				-            indices = utils.get_indices_of_highest_values(sv_std, 200)
			
 
				-
			
 
				-        # data are arranged following std trend computed
			
 
				-        data = s_arr[indices]
			
 
				-
			
 
				-    # with the use of wavelet
			
 
				-    if 'wave_sv_std_filters' in data_type:
			
 
				-
			
 
				-        # convert into lab by default to apply filters
			
 
				-        lab_img = transform.get_LAB_L(block)
			
 
				-        arr = np.array(lab_img)
			
 
				-        images = []
			
 
				-        
			
 
				-        # Apply list of filter on arr
			
 
				-        images.append(medfilt2d(arr, [3, 3]))
			
 
				-        images.append(medfilt2d(arr, [5, 5]))
			
 
				-        images.append(medfilt2d(arr, [7, 7]))
			
 
				-        images.append(wiener(arr, [3, 3]))
			
 
				-        images.append(wiener(arr, [4, 4]))
			
 
				-        images.append(wiener(arr, [5, 5]))
			
 
				-        images.append(w2d(arr, 'haar', 2))
			
 
				-        images.append(w2d(arr, 'haar', 3))
			
 
				-        images.append(w2d(arr, 'haar', 4))
			
 
				-        
			
 
				-        # By default computation of current block image
			
 
				-        s_arr = compression.get_SVD_s(arr)
			
 
				-        sv_vector = [s_arr]
			
 
				-
			
 
				-        # for each new image apply SVD and get SV 
			
 
				-        for img in images:
			
 
				-            s = compression.get_SVD_s(img)
			
 
				-            sv_vector.append(s)
			
 
				-            
			
 
				-        sv_array = np.array(sv_vector)
			
 
				-        
			
 
				-        _, len = sv_array.shape
			
 
				-        
			
 
				-        sv_std = []
			
 
				-        
			
 
				-        # normalize each SV vectors and compute standard deviation for each sub vectors
			
 
				-        for i in range(len):
			
 
				-            sv_array[:, i] = utils.normalize_arr(sv_array[:, i])
			
 
				-            sv_std.append(np.std(sv_array[:, i]))
			
 
				-        
			
 
				-        indices = []
			
 
				-
			
 
				-        if 'lowest' in data_type:
			
 
				-            indices = utils.get_indices_of_lowest_values(sv_std, 200)
			
 
				-
			
 
				-        if 'highest' in data_type:
			
 
				-            indices = utils.get_indices_of_highest_values(sv_std, 200)
			
 
				-
			
 
				-        # data are arranged following std trend computed
			
 
				-        data = s_arr[indices]
			
 
				-
			
 
				     if 'filters_statistics' in data_type:
			
 
				 
			
 
				         img_width, img_height = 200, 200
			
--- a/modules
+++ b/modules
@@ -0,0 +1 @@
 
				+Subproject commit d5de038bdccaa58ff2123d5227482dc6c0ea2500
			
--- a/others/save_model_result_in_md.py
+++ b/others/save_model_result_in_md.py
@@ -0,0 +1,93 @@
 
				+# main imports
			
 
				+import numpy as np
			
 
				+import sys, os, argparse
			
 
				+import subprocess
			
 
				+import time
			
 
				+
			
 
				+# models imports
			
 
				+from sklearn.externals import joblib
			
 
				+
			
 
				+# image processing imports
			
 
				+from PIL import Image
			
 
				+
			
 
				+# modules imports
			
 
				+sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				+
			
 
				+import custom_config as cfg
			
 
				+
			
 
				+# variables and parameters
			
 
				+threshold_map_folder      = cfg.threshold_map_folder
			
 
				+threshold_map_file_prefix = cfg.threshold_map_folder + "_"
			
 
				+
			
 
				+markdowns_folder          = cfg.models_information_folder
			
 
				+zones                     = cfg.zones_indices
			
 
				+
			
 
				+current_dirpath = os.getcwd()
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(description="Display SVD data of scene zone")
			
 
				+
			
 
				+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
			
 
				+    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
			
 
				+    parser.add_argument('--feature', type=str, help='Feature data choice', choices=cfg.features_choices_labels)
			
 
				+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=cfg.normalization_choices)
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+    
			
 
				+    p_interval   = list(map(int, args.interval.split(',')))
			
 
				+    p_model_file = args.model
			
 
				+    p_metric     = args.metric
			
 
				+    p_mode       = args.mode
			
 
				+
			
 
				+
			
 
				+    # call model and get global result in scenes
			
 
				+
			
 
				+    begin, end = p_interval
			
 
				+
			
 
				+    bash_cmd = "bash others/testModelByScene.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_metric + "'"
			
 
				+    print(bash_cmd)
			
 
				+
			
 
				+    ## call command ##
			
 
				+    p = subprocess.Popen(bash_cmd, stdout=subprocess.PIPE, shell=True)
			
 
				+
			
 
				+    (output, err) = p.communicate()
			
 
				+
			
 
				+    ## Wait for result ##
			
 
				+    p_status = p.wait()
			
 
				+
			
 
				+    if not os.path.exists(markdowns_folder):
			
 
				+        os.makedirs(markdowns_folder)
			
 
				+
			
 
				+    # get model name to construct model
			
 
				+    md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace('.joblib', '.md'))
			
 
				+
			
 
				+    with open(md_model_path, 'w') as f:
			
 
				+        f.write(output.decode("utf-8"))
			
 
				+
			
 
				+        # read each threshold_map information if exists
			
 
				+        model_map_info_path = os.path.join(threshold_map_folder, p_model_file.replace('saved_models/', ''))
			
 
				+
			
 
				+        if not os.path.exists(model_map_info_path):
			
 
				+            f.write('\n\n No threshold map information')
			
 
				+        else:
			
 
				+            maps_files = os.listdir(model_map_info_path)
			
 
				+
			
 
				+            # get all map information
			
 
				+            for t_map_file in maps_files:
			
 
				+
			
 
				+                file_path = os.path.join(model_map_info_path, t_map_file)
			
 
				+                with open(file_path, 'r') as map_file:
			
 
				+
			
 
				+                    title_scene =  t_map_file.replace(threshold_map_file_prefix, '')
			
 
				+                    f.write('\n\n## ' + title_scene + '\n')
			
 
				+                    content = map_file.readlines()
			
 
				+
			
 
				+                    # getting each map line information
			
 
				+                    for line in content:
			
 
				+                        f.write(line)
			
 
				+
			
 
				+        f.close()
			
 
				+
			
 
				+if __name__== "__main__":
			
 
				+    main()
			
--- a/others/save_model_result_in_md_maxwell.py
+++ b/others/save_model_result_in_md_maxwell.py
@@ -0,0 +1,320 @@
 
				+# main imports
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+
			
 
				+import sys, os, argparse
			
 
				+import subprocess
			
 
				+import time
			
 
				+import json
			
 
				+
			
 
				+# models imports
			
 
				+from sklearn.utils import shuffle
			
 
				+from sklearn.externals import joblib
			
 
				+from sklearn.metrics import accuracy_score, f1_score, recall_score, roc_auc_score
			
 
				+from sklearn.model_selection import cross_val_score
			
 
				+from sklearn.model_selection import StratifiedKFold
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+
			
 
				+from keras.models import Sequential
			
 
				+from keras.layers import Conv1D, MaxPooling1D
			
 
				+from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
			
 
				+from keras.wrappers.scikit_learn import KerasClassifier
			
 
				+from keras import backend as K
			
 
				+from keras.models import model_from_json
			
 
				+
			
 
				+# image processing imports
			
 
				+from ipfml import processing
			
 
				+from PIL import Image
			
 
				+
			
 
				+# modules imports
			
 
				+sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				+
			
 
				+import custom_config as cfg
			
 
				+
			
 
				+# variables and parameters
			
 
				+threshold_map_folder        = cfg.threshold_map_folder
			
 
				+threshold_map_file_prefix   = cfg.threshold_map_folder + "_"
			
 
				+
			
 
				+markdowns_folder            = cfg.models_information_folder
			
 
				+final_csv_model_comparisons = cfg.csv_model_comparisons_filename
			
 
				+models_name                 = cfg.models_names_list
			
 
				+
			
 
				+zones                       = cfg.zones_indices
			
 
				+
			
 
				+current_dirpath = os.getcwd()
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    kind_model = 'keras'
			
 
				+    model_ext = ''
			
 
				+    
			
 
				+    parser = argparse.ArgumentParser(description="Display SVD data of scene zone")
			
 
				+
			
 
				+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
			
 
				+    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
			
 
				+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=cfg.metric_choices_labels)
			
 
				+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=cfg.normalization_choices)
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    p_interval   = list(map(int, args.interval.split(',')))
			
 
				+    p_model_file = args.model
			
 
				+    p_metric     = args.metric
			
 
				+    p_mode       = args.mode
			
 
				+
			
 
				+
			
 
				+    # call model and get global result in scenes
			
 
				+    begin, end = p_interval
			
 
				+
			
 
				+    bash_cmd = "bash others/testModelByScene_maxwell.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_metric + "'"
			
 
				+
			
 
				+    print(bash_cmd)
			
 
				+
			
 
				+    ## call command ##
			
 
				+    p = subprocess.Popen(bash_cmd, stdout=subprocess.PIPE, shell=True)
			
 
				+
			
 
				+    (output, err) = p.communicate()
			
 
				+
			
 
				+    ## Wait for result ##
			
 
				+    p_status = p.wait()
			
 
				+
			
 
				+    if not os.path.exists(markdowns_folder):
			
 
				+        os.makedirs(markdowns_folder)
			
 
				+
			
 
				+    # get model name to construct model
			
 
				+
			
 
				+    if '.joblib' in p_model_file:
			
 
				+        kind_model = 'sklearn'
			
 
				+        model_ext = '.joblib'
			
 
				+
			
 
				+    if '.json' in p_model_file:
			
 
				+        kind_model = 'keras'
			
 
				+        model_ext = '.json'
			
 
				+
			
 
				+    md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace(model_ext, '.md'))
			
 
				+
			
 
				+    with open(md_model_path, 'w') as f:
			
 
				+        f.write(output.decode("utf-8"))
			
 
				+
			
 
				+        # read each threshold_map information if exists
			
 
				+        model_map_info_path = os.path.join(threshold_map_folder, p_model_file.replace('saved_models/', ''))
			
 
				+
			
 
				+        if not os.path.exists(model_map_info_path):
			
 
				+            f.write('\n\n No threshold map information')
			
 
				+        else:
			
 
				+            maps_files = os.listdir(model_map_info_path)
			
 
				+
			
 
				+            # get all map information
			
 
				+            for t_map_file in maps_files:
			
 
				+
			
 
				+                file_path = os.path.join(model_map_info_path, t_map_file)
			
 
				+                with open(file_path, 'r') as map_file:
			
 
				+
			
 
				+                    title_scene =  t_map_file.replace(threshold_map_file_prefix, '')
			
 
				+                    f.write('\n\n## ' + title_scene + '\n')
			
 
				+                    content = map_file.readlines()
			
 
				+
			
 
				+                    # getting each map line information
			
 
				+                    for line in content:
			
 
				+                        f.write(line)
			
 
				+
			
 
				+        f.close()
			
 
				+
			
 
				+    # Keep model information to compare
			
 
				+    current_model_name = p_model_file.split('/')[-1].replace(model_ext, '')
			
 
				+
			
 
				+    # Prepare writing in .csv file into results folder
			
 
				+    output_final_file_path = os.path.join(cfg.results_information_folder, final_csv_model_comparisons)
			
 
				+    output_final_file = open(output_final_file_path, "a")
			
 
				+
			
 
				+    print(current_model_name)
			
 
				+    # reconstruct data filename
			
 
				+    for name in models_name:
			
 
				+        if name in current_model_name:
			
 
				+            data_filename = current_model_name
			
 
				+            current_data_file_path = os.path.join('data', data_filename)
			
 
				+
			
 
				+    print("Current data file ")
			
 
				+    print(current_data_file_path)
			
 
				+    model_scores = []
			
 
				+
			
 
				+    ########################
			
 
				+    # 1. Get and prepare data
			
 
				+    ########################
			
 
				+    dataset_train = pd.read_csv(current_data_file_path + '.train', header=None, sep=";")
			
 
				+    dataset_test = pd.read_csv(current_data_file_path + '.test', header=None, sep=";")
			
 
				+
			
 
				+    # default first shuffle of data
			
 
				+    dataset_train = shuffle(dataset_train)
			
 
				+    dataset_test = shuffle(dataset_test)
			
 
				+
			
 
				+    # get dataset with equal number of classes occurences
			
 
				+    noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
			
 
				+    not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
			
 
				+    nb_noisy_train = len(noisy_df_train.index)
			
 
				+
			
 
				+    noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
			
 
				+    not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
			
 
				+    nb_noisy_test = len(noisy_df_test.index)
			
 
				+
			
 
				+    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
			
 
				+    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
			
 
				+
			
 
				+    # shuffle data another time
			
 
				+    final_df_train = shuffle(final_df_train)
			
 
				+    final_df_test = shuffle(final_df_test)
			
 
				+
			
 
				+    final_df_train_size = len(final_df_train.index)
			
 
				+    final_df_test_size = len(final_df_test.index)
			
 
				+
			
 
				+    # use of the whole data set for training
			
 
				+    x_dataset_train = final_df_train.ix[:,1:]
			
 
				+    x_dataset_test = final_df_test.ix[:,1:]
			
 
				+
			
 
				+    y_dataset_train = final_df_train.ix[:,0]
			
 
				+    y_dataset_test = final_df_test.ix[:,0]
			
 
				+
			
 
				+    #######################
			
 
				+    # 2. Getting model
			
 
				+    #######################
			
 
				+
			
 
				+    if kind_model == 'keras':
			
 
				+        with open(p_model_file, 'r') as f:
			
 
				+            json_model = json.load(f)
			
 
				+            model = model_from_json(json_model)
			
 
				+            model.load_weights(p_model_file.replace('.json', '.h5'))
			
 
				+
			
 
				+            model.compile(loss='binary_crossentropy',
			
 
				+                        optimizer='adam',
			
 
				+                        metrics=['accuracy'])
			
 
				+
			
 
				+        # reshape all input data
			
 
				+        x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), end, 1)
			
 
				+        x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), end, 1)
			
 
				+
			
 
				+
			
 
				+    if kind_model == 'sklearn':
			
 
				+        model = joblib.load(p_model_file)
			
 
				+
			
 
				+    #######################
			
 
				+    # 3. Fit model : use of cross validation to fit model
			
 
				+    #######################
			
 
				+
			
 
				+    if kind_model == 'keras':
			
 
				+        model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
			
 
				+
			
 
				+    if kind_model == 'sklearn':
			
 
				+        model.fit(x_dataset_train, y_dataset_train)
			
 
				+
			
 
				+        train_accuracy = cross_val_score(model, x_dataset_train, y_dataset_train, cv=5)
			
 
				+
			
 
				+    ######################
			
 
				+    # 4. Test : Validation and test dataset from .test dataset
			
 
				+    ######################
			
 
				+
			
 
				+    # we need to specify validation size to 20% of whole dataset
			
 
				+    val_set_size = int(final_df_train_size/3)
			
 
				+    test_set_size = val_set_size
			
 
				+
			
 
				+    total_validation_size = val_set_size + test_set_size
			
 
				+
			
 
				+    if final_df_test_size > total_validation_size:
			
 
				+        x_dataset_test = x_dataset_test[0:total_validation_size]
			
 
				+        y_dataset_test = y_dataset_test[0:total_validation_size]
			
 
				+
			
 
				+    X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
			
 
				+
			
 
				+    if kind_model == 'keras':
			
 
				+        y_test_model = model.predict_classes(X_test)
			
 
				+        y_val_model = model.predict_classes(X_val)
			
 
				+
			
 
				+        y_train_model = model.predict_classes(x_dataset_train)
			
 
				+
			
 
				+        train_accuracy = accuracy_score(y_dataset_train, y_train_model)
			
 
				+
			
 
				+    if kind_model == 'sklearn':
			
 
				+        y_test_model = model.predict(X_test)
			
 
				+        y_val_model = model.predict(X_val)
			
 
				+
			
 
				+        y_train_model = model.predict(x_dataset_train)
			
 
				+
			
 
				+    val_accuracy = accuracy_score(y_val, y_val_model)
			
 
				+    test_accuracy = accuracy_score(y_test, y_test_model)
			
 
				+
			
 
				+    train_f1 = f1_score(y_dataset_train, y_train_model)
			
 
				+    train_recall = recall_score(y_dataset_train, y_train_model)
			
 
				+    train_roc_auc = roc_auc_score(y_dataset_train, y_train_model)
			
 
				+
			
 
				+    val_f1 = f1_score(y_val, y_val_model)
			
 
				+    val_recall = recall_score(y_val, y_val_model)
			
 
				+    val_roc_auc = roc_auc_score(y_val, y_val_model)
			
 
				+
			
 
				+    test_f1 = f1_score(y_test, y_test_model)
			
 
				+    test_recall = recall_score(y_test, y_test_model)
			
 
				+    test_roc_auc = roc_auc_score(y_test, y_test_model)
			
 
				+
			
 
				+    if kind_model == 'keras':
			
 
				+        # stats of all dataset
			
 
				+        all_x_data = np.concatenate([x_dataset_train, X_test, X_val])
			
 
				+        all_y_data = np.concatenate([y_dataset_train, y_test, y_val])
			
 
				+        all_y_model = model.predict_classes(all_x_data)
			
 
				+
			
 
				+    if kind_model == 'sklearn':
			
 
				+        # stats of all dataset
			
 
				+        all_x_data = pd.concat([x_dataset_train, X_test, X_val])
			
 
				+        all_y_data = pd.concat([y_dataset_train, y_test, y_val])
			
 
				+        all_y_model = model.predict(all_x_data)
			
 
				+
			
 
				+    all_accuracy = accuracy_score(all_y_data, all_y_model)
			
 
				+    all_f1_score = f1_score(all_y_data, all_y_model)
			
 
				+    all_recall_score = recall_score(all_y_data, all_y_model)
			
 
				+    all_roc_auc_score = roc_auc_score(all_y_data, all_y_model)
			
 
				+
			
 
				+    # stats of dataset sizes
			
 
				+    total_samples = final_df_train_size + val_set_size + test_set_size
			
 
				+
			
 
				+    model_scores.append(final_df_train_size)
			
 
				+    model_scores.append(val_set_size)
			
 
				+    model_scores.append(test_set_size)
			
 
				+
			
 
				+    model_scores.append(final_df_train_size / total_samples)
			
 
				+    model_scores.append(val_set_size / total_samples)
			
 
				+    model_scores.append(test_set_size / total_samples)
			
 
				+
			
 
				+    # add of scores
			
 
				+    model_scores.append(train_accuracy)
			
 
				+    model_scores.append(val_accuracy)
			
 
				+    model_scores.append(test_accuracy)
			
 
				+    model_scores.append(all_accuracy)
			
 
				+
			
 
				+    model_scores.append(train_f1)
			
 
				+    model_scores.append(train_recall)
			
 
				+    model_scores.append(train_roc_auc)
			
 
				+
			
 
				+    model_scores.append(val_f1)
			
 
				+    model_scores.append(val_recall)
			
 
				+    model_scores.append(val_roc_auc)
			
 
				+
			
 
				+    model_scores.append(test_f1)
			
 
				+    model_scores.append(test_recall)
			
 
				+    model_scores.append(test_roc_auc)
			
 
				+
			
 
				+    model_scores.append(all_f1_score)
			
 
				+    model_scores.append(all_recall_score)
			
 
				+    model_scores.append(all_roc_auc_score)
			
 
				+
			
 
				+    # TODO : improve...
			
 
				+    # check if it's always the case...
			
 
				+    nb_zones = current_data_file_path.split('_')[7]
			
 
				+
			
 
				+    final_file_line = current_model_name + '; ' + str(end - begin) + '; ' + str(begin) + '; ' + str(end) + '; ' + str(nb_zones) + '; ' + p_metric + '; ' + p_mode
			
 
				+
			
 
				+    for s in model_scores:
			
 
				+        final_file_line += '; ' + str(s)
			
 
				+
			
 
				+    output_final_file.write(final_file_line + '\n')
			
 
				+
			
 
				+
			
 
				+if __name__== "__main__":
			
 
				+    main()
			
--- a/others/testModelByScene.sh
+++ b/others/testModelByScene.sh
@@ -0,0 +1,62 @@
 
				+#! bin/bash
			
 
				+
			
 
				+if [ -z "$1" ]
			
 
				+  then
			
 
				+    echo "No first argument supplied"
			
 
				+    echo "Need of begin vector index"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$2" ]
			
 
				+  then
			
 
				+    echo "No second argument supplied"
			
 
				+    echo "Need of end vector index"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$3" ]
			
 
				+  then
			
 
				+    echo "No third argument supplied"
			
 
				+    echo "Need of model input"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$4" ]
			
 
				+  then
			
 
				+    echo "No fourth argument supplied"
			
 
				+    echo "Need of mode file : 'svd', 'svdn', svdne"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$5" ]
			
 
				+  then
			
 
				+    echo "No fifth argument supplied"
			
 
				+    echo "Need of metric : 'lab', 'mscn'"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+INPUT_BEGIN=$1
			
 
				+INPUT_END=$2
			
 
				+INPUT_MODEL=$3
			
 
				+INPUT_MODE=$4
			
 
				+INPUT_METRIC=$5
			
 
				+
			
 
				+zones="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15"
			
 
				+
			
 
				+echo "**Model :** ${INPUT_MODEL}"
			
 
				+echo "**Metric :** ${INPUT_METRIC}"
			
 
				+echo "**Mode :** ${INPUT_MODE}"
			
 
				+echo "**Vector range :** [${INPUT_BEGIN}, ${INPUT_END}]"
			
 
				+echo ""
			
 
				+echo " # | GLOBAL | NOISY | NOT NOISY"
			
 
				+echo "---|--------|-------|----------"
			
 
				+
			
 
				+for scene in {"A","B","C","D","E","F","G","H","I"}; do
			
 
				+
			
 
				+  FILENAME="data/data_${INPUT_MODE}_${INPUT_METRIC}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
			
 
				+
			
 
				+  python generate/generate_data_model.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --metric ${INPUT_METRIC} --scenes "${scene}" --zones "${zones}" --percent 1 --sep ";" --rowindex "0"
			
 
				+
			
 
				+  python prediction/prediction_scene.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}_metric_${INPUT_METRIC}.prediction" --scene ${scene}
			
 
				+
			
 
				+done
			
--- a/others/testModelByScene_maxwell.sh
+++ b/others/testModelByScene_maxwell.sh
@@ -0,0 +1,70 @@
 
				+#! bin/bash
			
 
				+
			
 
				+if [ -z "$1" ]
			
 
				+  then
			
 
				+    echo "No first argument supplied"
			
 
				+    echo "Need of begin vector index"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$2" ]
			
 
				+  then
			
 
				+    echo "No second argument supplied"
			
 
				+    echo "Need of end vector index"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$3" ]
			
 
				+  then
			
 
				+    echo "No third argument supplied"
			
 
				+    echo "Need of model input"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$4" ]
			
 
				+  then
			
 
				+    echo "No fourth argument supplied"
			
 
				+    echo "Need of mode file : 'svd', 'svdn', svdne"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$5" ]
			
 
				+  then
			
 
				+    echo "No fifth argument supplied"
			
 
				+    echo "Need of metric : 'lab', 'mscn'"
			
 
				+    exit 1
			
 
				+fi
			
 
				+
			
 
				+if [ -z "$6" ]
			
 
				+  then
			
 
				+    echo "No sixth argument supplied"
			
 
				+fi
			
 
				+
			
 
				+
			
 
				+
			
 
				+INPUT_BEGIN=$1
			
 
				+INPUT_END=$2
			
 
				+INPUT_MODEL=$3
			
 
				+INPUT_MODE=$4
			
 
				+INPUT_METRIC=$5
			
 
				+
			
 
				+zones="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15"
			
 
				+
			
 
				+echo "**Model :** ${INPUT_MODEL}"
			
 
				+echo "**Metric :** ${INPUT_METRIC}"
			
 
				+echo "**Mode :** ${INPUT_MODE}"
			
 
				+echo "**Vector range :** [${INPUT_BEGIN}, ${INPUT_END}]"
			
 
				+echo ""
			
 
				+echo " # | GLOBAL | NOISY | NOT NOISY"
			
 
				+echo "---|--------|-------|----------"
			
 
				+
			
 
				+# only take maxwell scenes
			
 
				+for scene in {"A","D","G","H"}; do
			
 
				+
			
 
				+  FILENAME="data/data_${INPUT_MODE}_${INPUT_METRIC}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
			
 
				+
			
 
				+  python generate/generate_data_model.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --metric ${INPUT_METRIC} --scenes "${scene}" --zones "${zones}" --percent 1
			
 
				+
			
 
				+  python prediction/prediction_scene.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}_metric_${INPUT_METRIC}.prediction" --scene ${scene}
			
 
				+
			
 
				+done
		`@@ -0,0 +1 @@`
		`+Subproject commit d5de038bdccaa58ff2123d5227482dc6c0ea2500`