5 years ago · c99fb8162f
--- a/analysis/svd_entropy_analysis.ipynb
+++ b/analysis/svd_entropy_analysis.ipynb
--- a/analysis/svd_entropy_diff_analysis.ipynb
+++ b/analysis/svd_entropy_diff_analysis.ipynb
--- a/analysis/svd_entropy_gradient_analysis.ipynb
+++ b/analysis/svd_entropy_gradient_analysis.ipynb
--- a/analysis/svd_entropy_gradientabs_analysis.ipynb
+++ b/analysis/svd_entropy_gradientabs_analysis.ipynb
--- a/analysis/svd_entropy_minus_analysis.ipynb
+++ b/analysis/svd_entropy_minus_analysis.ipynb
--- a/analysis/svd_reconstruction_analysis.ipynb
+++ b/analysis/svd_reconstruction_analysis.ipynb
--- a/analysis/svd_scenes_analysis.ipynb
+++ b/analysis/svd_scenes_analysis.ipynb
--- a/custom_config.py
+++ b/custom_config.py
--- a/data_attributes.py
+++ b/data_attributes.py
@@ -298,12 +298,12 @@ def get_image_features(data_type, block):
 
				             
			
 
				         sv_array = np.array(sv_vector)
			
 
				         
			
 
				-        _, len = sv_array.shape
			
 
				+        _, length = sv_array.shape
			
 
				         
			
 
				         sv_std = []
			
 
				         
			
 
				         # normalize each SV vectors and compute standard deviation for each sub vectors
			
 
				-        for i in range(len):
			
 
				+        for i in range(length):
			
 
				             sv_array[:, i] = utils.normalize_arr(sv_array[:, i])
			
 
				             sv_std.append(np.std(sv_array[:, i]))
			
 
				         
			
@@ -340,12 +340,12 @@ def get_image_features(data_type, block):
 
				             
			
 
				         sv_array = np.array(sv_vector)
			
 
				         
			
 
				-        _, len = sv_array.shape
			
 
				+        _, length = sv_array.shape
			
 
				         
			
 
				         sv_std = []
			
 
				         
			
 
				         # normalize each SV vectors and compute standard deviation for each sub vectors
			
 
				-        for i in range(len):
			
 
				+        for i in range(length):
			
 
				             sv_array[:, i] = utils.normalize_arr(sv_array[:, i])
			
 
				             sv_std.append(np.std(sv_array[:, i]))
			
 
				         
			
@@ -672,6 +672,39 @@ def get_image_features(data_type, block):
 
				 
			
 
				         data = s
			
 
				 
			
 
				+    if data_type == 'svd_entropy':
			
 
				+        l_img = transform.get_LAB_L(block)
			
 
				+
			
 
				+        blocks = segmentation.divide_in_blocks(l_img, (20, 20))
			
 
				+
			
 
				+        values = []
			
 
				+        for b in blocks:
			
 
				+            sv = compression.get_SVD_s(b)
			
 
				+            values.append(utils.get_entropy(sv))
			
 
				+        data = np.array(values)
			
 
				+
			
 
				+    if data_type == 'svd_entropy_20':
			
 
				+        l_img = transform.get_LAB_L(block)
			
 
				+
			
 
				+        blocks = segmentation.divide_in_blocks(l_img, (20, 20))
			
 
				+
			
 
				+        values = []
			
 
				+        for b in blocks:
			
 
				+            sv = compression.get_SVD_s(b)
			
 
				+            values.append(utils.get_entropy(sv))
			
 
				+        data = np.array(values)
			
 
				+
			
 
				+    if data_type == 'svd_entropy_noise_20':
			
 
				+        l_img = transform.get_LAB_L(block)
			
 
				+
			
 
				+        blocks = segmentation.divide_in_blocks(l_img, (20, 20))
			
 
				+
			
 
				+        values = []
			
 
				+        for b in blocks:
			
 
				+            sv = compression.get_SVD_s(b)
			
 
				+            sv_size = len(sv)
			
 
				+            values.append(utils.get_entropy(sv[int(sv_size / 4):]))
			
 
				+        data = np.array(values)
			
 
				         
			
 
				     return data
			
 
				 
			
--- a/display/display_svd_data_scene.py
+++ b/display/display_svd_data_scene.py
@@ -1,225 +0,0 @@
 
				-# main imports
			
 
				-import sys, os, argparse
			
 
				-import numpy as np
			
 
				-
			
 
				-# image processing imports
			
 
				-from PIL import Image
			
 
				-import matplotlib.pyplot as plt
			
 
				-
			
 
				-import ipfml.iqa.fr as fr_iqa
			
 
				-from ipfml import utils
			
 
				-
			
 
				-# modules and config imports
			
 
				-sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				-
			
 
				-import custom_config as cfg
			
 
				-from modules.utils import data as dt
			
 
				-from data_attributes import get_image_features
			
 
				-
			
 
				-# getting configuration information
			
 
				-zone_folder         = cfg.zone_folder
			
 
				-min_max_filename    = cfg.min_max_filename_extension
			
 
				-
			
 
				-# define all scenes values
			
 
				-scenes_list         = cfg.scenes_names
			
 
				-scenes_indices      = cfg.scenes_indices
			
 
				-choices             = cfg.normalization_choices
			
 
				-path                = cfg.dataset_path
			
 
				-zones               = cfg.zones_indices
			
 
				-seuil_expe_filename = cfg.seuil_expe_filename
			
 
				-
			
 
				-features_choices    = cfg.features_choices_labels
			
 
				-
			
 
				-max_nb_bits         = 8
			
 
				-display_error       = False
			
 
				-
			
 
				-
			
 
				-def display_svd_values(p_scene, p_interval, p_indices, p_feature, p_mode, p_step, p_norm, p_ylim):
			
 
				-    """
			
 
				-    @brief Method which gives information about svd curves from zone of picture
			
 
				-    @param p_scene, scene expected to show svd values
			
 
				-    @param p_interval, interval [begin, end] of svd data to display
			
 
				-    @param p_interval, interval [begin, end] of samples or minutes from render generation engine
			
 
				-    @param p_feature, feature computed to show
			
 
				-    @param p_mode, normalization's mode
			
 
				-    @param p_norm, normalization or not of selected svd data
			
 
				-    @param p_ylim, ylim choice to better display of data
			
 
				-    @return nothing
			
 
				-    """
			
 
				-
			
 
				-    max_value_svd = 0
			
 
				-    min_value_svd = sys.maxsize
			
 
				-
			
 
				-    scenes = os.listdir(path)
			
 
				-    # remove min max file from scenes folder
			
 
				-    scenes = [s for s in scenes if min_max_filename not in s]
			
 
				-
			
 
				-    begin_data, end_data = p_interval
			
 
				-    begin_index, end_index = p_indices
			
 
				-
			
 
				-    # go ahead each scenes
			
 
				-    for folder_scene in scenes:
			
 
				-
			
 
				-        if p_scene == folder_scene:
			
 
				-            scene_path = os.path.join(path, folder_scene)
			
 
				-
			
 
				-            # construct each zones folder name
			
 
				-            zones_folder = []
			
 
				-
			
 
				-            # get zones list info
			
 
				-            for index in zones:
			
 
				-                index_str = str(index)
			
 
				-                if len(index_str) < 2:
			
 
				-                    index_str = "0" + index_str
			
 
				-
			
 
				-                current_zone = "zone"+index_str
			
 
				-                zones_folder.append(current_zone)
			
 
				-
			
 
				-            images_data = []
			
 
				-            images_indices = []
			
 
				-
			
 
				-            threshold_learned_zones = []
			
 
				-    
			
 
				-            # get all images of folder
			
 
				-            scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
			
 
				-            number_scene_image = len(scene_images)
			
 
				-            
			
 
				-            for id, zone_folder in enumerate(zones_folder):
			
 
				-
			
 
				-                # get threshold information
			
 
				-                zone_path = os.path.join(scene_path, zone_folder)
			
 
				-                path_seuil = os.path.join(zone_path, seuil_expe_filename)
			
 
				-
			
 
				-                # open treshold path and get this information
			
 
				-                with open(path_seuil, "r") as seuil_file:
			
 
				-                    threshold_learned = int(seuil_file.readline().strip())
			
 
				-                    threshold_learned_zones.append(threshold_learned)
			
 
				-
			
 
				-            threshold_mean = np.mean(np.asarray(threshold_learned_zones))
			
 
				-            threshold_image_found = False
			
 
				-
			
 
				-            svd_data = []
			
 
				-
			
 
				-
			
 
				-            # for each images
			
 
				-            for id_img, img_path in enumerate(scene_images):
			
 
				-                
			
 
				-                current_quality_image = dt.get_scene_image_quality(img_path)
			
 
				-
			
 
				-                img = Image.open(img_path)
			
 
				-
			
 
				-                svd_values = get_image_features(p_feature, img)
			
 
				-
			
 
				-                if p_norm:
			
 
				-                    svd_values = svd_values[begin_data:end_data]
			
 
				-
			
 
				-                #svd_values = np.asarray([math.log(x) for x in svd_values])
			
 
				-
			
 
				-                # update min max values
			
 
				-                min_value = svd_values.min()
			
 
				-                max_value = svd_values.max()
			
 
				-
			
 
				-                if min_value < min_value_svd:
			
 
				-                    min_value_svd = min_value
			
 
				-
			
 
				-                if max_value > min_value_svd:
			
 
				-                    max_value_svd = max_value
			
 
				-
			
 
				-                # keep in memory used data
			
 
				-                if current_quality_image % p_step == 0:
			
 
				-                    if current_quality_image >= begin_index and current_quality_image <= end_index:
			
 
				-
			
 
				-                        images_indices.append(dt.get_scene_image_postfix(img_path))
			
 
				-                        svd_data.append(svd_values)
			
 
				-
			
 
				-                if threshold_mean < current_quality_image and not threshold_image_found:
			
 
				-
			
 
				-                    threshold_image_found = True
			
 
				-                    threshold_image_zone = current_quality_image
			
 
				-
			
 
				-                    print("Quality mean : ", current_quality_image, "\n")
			
 
				-                    
			
 
				-                    if dt.get_scene_image_postfix(img_path) not in images_indices:
			
 
				-                        images_indices.append(dt.get_scene_image_postfix(img_path))
			
 
				-
			
 
				-                print('%.2f%%' % ((id_img + 1) / number_scene_image * 100))
			
 
				-                sys.stdout.write("\033[F")
			
 
				-
			
 
				-
			
 
				-            # all indices of picture to plot
			
 
				-            print(images_indices)
			
 
				-
			
 
				-            for id, data in enumerate(svd_data):
			
 
				-
			
 
				-                current_data = data
			
 
				-
			
 
				-                if not p_norm:
			
 
				-                    current_data = current_data[begin_data:end_data]
			
 
				-
			
 
				-                if p_mode == 'svdn':
			
 
				-                    current_data = utils.normalize_arr(current_data)
			
 
				-
			
 
				-                if p_mode == 'svdne':
			
 
				-                    current_data = utils.normalize_arr_with_range(current_data, min_value_svd, max_value_svd)
			
 
				-
			
 
				-                images_data.append(current_data)
			
 
				-
			
 
				-
			
 
				-            # display all data using matplotlib (configure plt)
			
 
				-            fig, ax = plt.subplots(figsize=(30, 22))
			
 
				-            ax.set_facecolor('#FFFFFF')
			
 
				-            #fig.patch.set_facecolor('#F9F9F9')
			
 
				-
			
 
				-            ax.tick_params(labelsize=22)
			
 
				-            #plt.rc('xtick', labelsize=22)
			
 
				-            #plt.rc('ytick', labelsize=22)
			
 
				-
			
 
				-            #plt.title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + '], ' + p_feature + ' feature, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=24)
			
 
				-            ax.set_ylabel('Component values', fontsize=28)
			
 
				-            ax.set_xlabel('Vector features', fontsize=28)
			
 
				-
			
 
				-            for id, data in enumerate(images_data):
			
 
				-
			
 
				-                p_label = p_scene + "_" + images_indices[id]
			
 
				-
			
 
				-                if int(images_indices[id]) == int(threshold_image_zone):
			
 
				-                    ax.plot(data, label=p_label + " (threshold mean)", lw=4, color='red')
			
 
				-                else:
			
 
				-                    ax.plot(data, label=p_label)
			
 
				-
			
 
				-            plt.legend(bbox_to_anchor=(0.60, 0.98), loc=2, borderaxespad=0.2, fontsize=26)
			
 
				-
			
 
				-            start_ylim, end_ylim = p_ylim
			
 
				-            ax.set_ylim(start_ylim, end_ylim)
			
 
				-
			
 
				-            plot_name = p_scene + '_' + p_feature + '_' + str(p_step) + '_' + p_mode + '_' + str(p_norm) + '.png'
			
 
				-            plt.savefig(plot_name, facecolor=ax.get_facecolor())
			
 
				-
			
 
				-def main():
			
 
				-
			
 
				-    parser = argparse.ArgumentParser(description="Display SVD data of scene")
			
 
				-
			
 
				-    parser.add_argument('--scene', type=str, help='scene index to use', choices=cfg.scenes_indices)
			
 
				-    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
			
 
				-    parser.add_argument('--indices', type=str, help='Samples interval to display', default='"0, 900"')
			
 
				-    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
			
 
				-    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=cfg.normalization_choices)
			
 
				-    parser.add_argument('--step', type=int, help='Each step samples to display', default=10)
			
 
				-    parser.add_argument('--norm', type=int, help='If values will be normalized or not', choices=[0, 1])
			
 
				-    parser.add_argument('--ylim', type=str, help='ylim interval to use', default='"0, 1"')
			
 
				-
			
 
				-    args = parser.parse_args()
			
 
				-
			
 
				-    p_scene    = scenes_list[scenes_indices.index(args.scene)]
			
 
				-    p_indices  = list(map(int, args.indices.split(',')))
			
 
				-    p_interval = list(map(int, args.interval.split(',')))
			
 
				-    p_feature  = args.feature
			
 
				-    p_mode     = args.mode
			
 
				-    p_step     = args.step
			
 
				-    p_norm     = args.norm
			
 
				-    p_ylim     = list(map(float, args.ylim.split(',')))
			
 
				-
			
 
				-    display_svd_values(p_scene, p_interval, p_indices, p_feature, p_mode, p_step, p_norm, p_ylim)
			
 
				-
			
 
				-if __name__== "__main__":
			
 
				-    main()
			
--- a/display/display_svd_data_scene_file.py
+++ b/display/display_svd_data_scene_file.py
@@ -0,0 +1,236 @@
 
				+# main imports
			
 
				+import sys, os, argparse
			
 
				+import numpy as np
			
 
				+import math
			
 
				+
			
 
				+# image processing imports
			
 
				+from PIL import Image
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+import ipfml.iqa.fr as fr_iqa
			
 
				+from ipfml import utils
			
 
				+
			
 
				+# modules and config imports
			
 
				+sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				+
			
 
				+import custom_config as cfg
			
 
				+from modules.utils import data as dt
			
 
				+from data_attributes import get_image_features
			
 
				+
			
 
				+# getting configuration information
			
 
				+zone_folder         = cfg.zone_folder
			
 
				+min_max_filename    = cfg.min_max_filename_extension
			
 
				+
			
 
				+# define all scenes values
			
 
				+scenes_list         = cfg.scenes_names
			
 
				+scenes_indices      = cfg.scenes_indices
			
 
				+choices             = cfg.normalization_choices
			
 
				+zones               = cfg.zones_indices
			
 
				+seuil_expe_filename = cfg.seuil_expe_filename
			
 
				+
			
 
				+features_choices    = cfg.features_choices_labels
			
 
				+
			
 
				+max_nb_bits         = 8
			
 
				+display_error       = False
			
 
				+
			
 
				+
			
 
				+def display_svd_values(p_scene, p_thresholds, p_interval, p_indices, p_feature, p_mode, p_step, p_norm, p_ylim, p_label):
			
 
				+    """
			
 
				+    @brief Method which gives information about svd curves from zone of picture
			
 
				+    @param p_scene, scene expected to show svd values
			
 
				+    @param p_interval, interval [begin, end] of svd data to display
			
 
				+    @param p_interval, interval [begin, end] of samples or minutes from render generation engine
			
 
				+    @param p_feature, feature computed to show
			
 
				+    @param p_mode, normalization's mode
			
 
				+    @param p_norm, normalization or not of selected svd data
			
 
				+    @param p_ylim, ylim choice to better display of data
			
 
				+    @return nothing
			
 
				+    """
			
 
				+
			
 
				+    max_value_svd = 0
			
 
				+    min_value_svd = sys.maxsize
			
 
				+
			
 
				+    begin_data, end_data = p_interval
			
 
				+    begin_index, end_index = p_indices
			
 
				+
			
 
				+    # go ahead selected scene
			
 
				+    scene_path = p_scene
			
 
				+
			
 
				+    # construct each zones folder name
			
 
				+    zones_folder = []
			
 
				+
			
 
				+    # get zones list info
			
 
				+    for index in zones:
			
 
				+        index_str = str(index)
			
 
				+        if len(index_str) < 2:
			
 
				+            index_str = "0" + index_str
			
 
				+
			
 
				+        current_zone = "zone"+index_str
			
 
				+        zones_folder.append(current_zone)
			
 
				+
			
 
				+    images_data = []
			
 
				+    images_indices = []
			
 
				+
			
 
				+    threshold_learned_zones = []
			
 
				+
			
 
				+    # get all images of folder
			
 
				+    scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
			
 
				+    number_scene_image = len(scene_images)
			
 
				+    
			
 
				+    _, scene_name = os.path.split(p_scene)
			
 
				+    threshold_learned_zones = p_thresholds[scene_name]
			
 
				+
			
 
				+    threshold_mean = np.mean(np.asarray(threshold_learned_zones))
			
 
				+    threshold_image_found = False
			
 
				+
			
 
				+    svd_data = []
			
 
				+
			
 
				+
			
 
				+    # for each images
			
 
				+    for id_img, img_path in enumerate(scene_images):
			
 
				+        
			
 
				+        current_quality_image = dt.get_scene_image_quality(img_path)
			
 
				+
			
 
				+        img = Image.open(img_path)
			
 
				+
			
 
				+        svd_values = get_image_features(p_feature, img)
			
 
				+
			
 
				+        if p_norm:
			
 
				+            svd_values = svd_values[begin_data:end_data]
			
 
				+
			
 
				+        #svd_values = np.asarray([math.log(x) for x in svd_values])
			
 
				+
			
 
				+        # update min max values
			
 
				+        min_value = svd_values.min()
			
 
				+        max_value = svd_values.max()
			
 
				+
			
 
				+        if min_value < min_value_svd:
			
 
				+            min_value_svd = min_value
			
 
				+
			
 
				+        if max_value > min_value_svd:
			
 
				+            max_value_svd = max_value
			
 
				+
			
 
				+        # keep in memory used data
			
 
				+        if current_quality_image % p_step == 0:
			
 
				+            if current_quality_image >= begin_index and current_quality_image <= end_index:
			
 
				+
			
 
				+                images_indices.append(dt.get_scene_image_postfix(img_path))
			
 
				+                svd_data.append(svd_values)
			
 
				+
			
 
				+        if threshold_mean < current_quality_image and not threshold_image_found:
			
 
				+
			
 
				+            threshold_image_found = True
			
 
				+            threshold_image_zone = current_quality_image
			
 
				+
			
 
				+            print("Quality mean : ", current_quality_image, "\n")
			
 
				+            
			
 
				+            if dt.get_scene_image_postfix(img_path) not in images_indices:
			
 
				+                images_indices.append(dt.get_scene_image_postfix(img_path))
			
 
				+
			
 
				+        print('%.2f%%' % ((id_img + 1) / number_scene_image * 100))
			
 
				+        sys.stdout.write("\033[F")
			
 
				+
			
 
				+
			
 
				+    # all indices of picture to plot
			
 
				+    print(images_indices)
			
 
				+
			
 
				+    for id, data in enumerate(svd_data):
			
 
				+
			
 
				+        # current_data = [ math.log10(d + 1.) for d in data ]
			
 
				+        # print(current_data)
			
 
				+
			
 
				+        current_data = data
			
 
				+
			
 
				+        if not p_norm:
			
 
				+            current_data = current_data[begin_data:end_data]
			
 
				+
			
 
				+        if p_mode == 'svdn':
			
 
				+            current_data = utils.normalize_arr(current_data)
			
 
				+
			
 
				+        if p_mode == 'svdne':
			
 
				+            current_data = utils.normalize_arr_with_range(current_data, min_value_svd, max_value_svd)
			
 
				+
			
 
				+        images_data.append(current_data)
			
 
				+
			
 
				+
			
 
				+    # display all data using matplotlib (configure plt)
			
 
				+    fig, ax = plt.subplots(figsize=(30, 22))
			
 
				+    ax.set_facecolor('#FFFFFF')
			
 
				+    #fig.patch.set_facecolor('#F9F9F9')
			
 
				+
			
 
				+    ax.tick_params(labelsize=26)
			
 
				+    #plt.rc('xtick', labelsize=22)
			
 
				+    #plt.rc('ytick', labelsize=22)
			
 
				+
			
 
				+    #plt.title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + '], ' + p_feature + ' feature, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=24)
			
 
				+    ax.set_ylabel('Component values', fontsize=36)
			
 
				+    ax.set_xlabel('Singular value component indices', fontsize=36)
			
 
				+
			
 
				+    for id, data in enumerate(images_data):
			
 
				+
			
 
				+        #p_label = p_scene + "_" + images_indices[id]
			
 
				+        p_label = images_indices[id] + " samples"
			
 
				+
			
 
				+        if int(images_indices[id]) == int(threshold_image_zone):
			
 
				+            ax.plot(data, label=p_label + " (threshold mean)", lw=8, color='red')
			
 
				+        else:
			
 
				+            ax.plot(data, label=p_label, lw=4)
			
 
				+
			
 
				+    plt.legend(bbox_to_anchor=(0.60, 0.98), loc=2, borderaxespad=0.2, fontsize=32)
			
 
				+
			
 
				+    start_ylim, end_ylim = p_ylim
			
 
				+    ax.set_ylim(start_ylim, end_ylim)
			
 
				+
			
 
				+    plot_name = scene_name + '_' + p_feature + '_' + str(p_step) + '_' + p_mode + '_' + str(p_norm) + '.png'
			
 
				+    plt.title('Tend of Singular values at different samples of ' + p_label + ' scene', fontsize=40)
			
 
				+    plt.savefig(plot_name, transparent=True)
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(description="Display SVD data of scene")
			
 
				+
			
 
				+    parser.add_argument('--scene', type=str, help='scene folder to use', required=True)
			
 
				+    parser.add_argument('--thresholds', type=str, help='expected thresholds file', required=True)
			
 
				+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
			
 
				+    parser.add_argument('--indices', type=str, help='Samples interval to display', default='"0, 900"')
			
 
				+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
			
 
				+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=cfg.normalization_choices)
			
 
				+    parser.add_argument('--step', type=int, help='Each step samples to display', default=10)
			
 
				+    parser.add_argument('--norm', type=int, help='If values will be normalized or not', choices=[0, 1])
			
 
				+    parser.add_argument('--ylim', type=str, help='ylim interval to use', default='0,1')
			
 
				+    parser.add_argument('--label', type=str, help='output label name', default="")
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    p_scene    = args.scene
			
 
				+    p_thresholds = args.thresholds
			
 
				+    p_indices  = list(map(int, args.indices.split(',')))
			
 
				+    p_interval = list(map(int, args.interval.split(',')))
			
 
				+    p_feature  = args.feature
			
 
				+    p_mode     = args.mode
			
 
				+    p_step     = args.step
			
 
				+    p_norm     = args.norm
			
 
				+    p_ylim     = list(map(float, args.ylim.split(',')))
			
 
				+    p_label    = args.label
			
 
				+
			
 
				+    # 1. retrieve human_thresholds
			
 
				+    human_thresholds = {}
			
 
				+
			
 
				+    # extract thresholds
			
 
				+    with open(p_thresholds) as f:
			
 
				+        thresholds_line = f.readlines()
			
 
				+
			
 
				+        for line in thresholds_line:
			
 
				+            data = line.split(';')
			
 
				+            del data[-1] # remove unused last element `\n`
			
 
				+            current_scene = data[0]
			
 
				+            thresholds_scene = data[1:]
			
 
				+
			
 
				+            # TODO : check if really necessary
			
 
				+            if current_scene != '50_shades_of_grey':
			
 
				+                human_thresholds[current_scene] = [ int(threshold) for threshold in  thresholds_scene ]
			
 
				+
			
 
				+    display_svd_values(p_scene, human_thresholds, p_interval, p_indices, p_feature, p_mode, p_step, p_norm, p_ylim, p_label)
			
 
				+
			
 
				+if __name__== "__main__":
			
 
				+    main()
			
--- a/generate/generate_all_data.py
+++ b/generate/generate_all_data.py
@@ -163,6 +163,8 @@ def main():
 
				     parser = argparse.ArgumentParser(description="Compute and prepare data of feature of all scenes (keep in memory min and max value found)")
			
 
				 
			
 
				     parser.add_argument('--feature', type=str, 
			
 
				+                                    help="feature choice in order to compute data (use 'all' if all features are needed)", required=True)
			
 
				+    parser.add_argument('--dataset', type=str, 
			
 
				                                     help="feature choice in order to compute data (use 'all' if all features are needed)")
			
 
				 
			
 
				     args = parser.parse_args()
			
--- a/generate/generate_all_data_file.py
+++ b/generate/generate_all_data_file.py
@@ -0,0 +1,225 @@
 
				+# main imports
			
 
				+import sys, os, argparse
			
 
				+import numpy as np
			
 
				+import random
			
 
				+import time
			
 
				+import json
			
 
				+
			
 
				+# image processing imports
			
 
				+from PIL import Image
			
 
				+
			
 
				+from ipfml.processing import transform, segmentation
			
 
				+from ipfml import utils
			
 
				+
			
 
				+# modules imports
			
 
				+sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				+
			
 
				+import custom_config as cfg
			
 
				+from modules.utils import data as dt
			
 
				+from data_attributes import get_image_features
			
 
				+
			
 
				+
			
 
				+# getting configuration information
			
 
				+zone_folder             = cfg.zone_folder
			
 
				+min_max_filename        = cfg.min_max_filename_extension
			
 
				+
			
 
				+# define all scenes values
			
 
				+choices                 = cfg.normalization_choices
			
 
				+zones                   = cfg.zones_indices
			
 
				+seuil_expe_filename     = cfg.seuil_expe_filename
			
 
				+
			
 
				+features_choices        = cfg.features_choices_labels
			
 
				+output_data_folder      = cfg.output_data_folder
			
 
				+
			
 
				+generic_output_file_svd = '_random.csv'
			
 
				+
			
 
				+def generate_data_feature(path, output, human_thresholds, data_type, mode):
			
 
				+    """
			
 
				+    @brief Method which generates all .csv files from scenes
			
 
				+    @param data_type,  feature choice
			
 
				+    @param mode, normalization choice
			
 
				+    @return nothing
			
 
				+    """
			
 
				+
			
 
				+    scenes = os.listdir(path)
			
 
				+    # remove min max file from scenes folder
			
 
				+    scenes = [s for s in scenes if min_max_filename not in s]
			
 
				+
			
 
				+    # keep in memory min and max data found from data_type
			
 
				+    min_val_found = sys.maxsize
			
 
				+    max_val_found = 0
			
 
				+
			
 
				+    output_path = os.path.join(cfg.output_data_generated, output)
			
 
				+
			
 
				+    if not os.path.exists(output_path):
			
 
				+        os.makedirs(output_path)
			
 
				+
			
 
				+    data_min_max_filename = os.path.join(output_path, data_type + min_max_filename)
			
 
				+
			
 
				+    # go ahead each scenes
			
 
				+    for folder_scene in human_thresholds:
			
 
				+
			
 
				+        print(folder_scene)
			
 
				+        scene_path = os.path.join(path, folder_scene)
			
 
				+        output_scene_path = os.path.join(output_path, folder_scene)
			
 
				+
			
 
				+        if not os.path.exists(output_scene_path):
			
 
				+            os.makedirs(output_scene_path)
			
 
				+
			
 
				+        # getting output filename
			
 
				+        output_svd_filename = data_type + "_" + mode + generic_output_file_svd
			
 
				+
			
 
				+        # construct each zones folder name
			
 
				+        zones_folder = []
			
 
				+        svd_output_files = []
			
 
				+
			
 
				+        # get zones list info
			
 
				+        for index in zones:
			
 
				+            index_str = str(index)
			
 
				+            if len(index_str) < 2:
			
 
				+                index_str = "0" + index_str
			
 
				+
			
 
				+            current_zone = "zone"+index_str
			
 
				+            zones_folder.append(current_zone)
			
 
				+
			
 
				+            zone_path = os.path.join(scene_path, current_zone)
			
 
				+            output_zone_path = os.path.join(output_scene_path, current_zone)
			
 
				+
			
 
				+            if not os.path.exists(output_zone_path):
			
 
				+                os.makedirs(output_zone_path)
			
 
				+
			
 
				+            svd_file_path = os.path.join(output_zone_path, output_svd_filename)
			
 
				+
			
 
				+            # add writer into list
			
 
				+            svd_output_files.append(open(svd_file_path, 'w'))
			
 
				+
			
 
				+        # get all images of folder
			
 
				+        scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
			
 
				+        number_scene_image = len(scene_images)
			
 
				+            
			
 
				+        for id_img, img_path in enumerate(scene_images):
			
 
				+            
			
 
				+            current_image_postfix = dt.get_scene_image_postfix(img_path)
			
 
				+
			
 
				+            current_img = Image.open(img_path)
			
 
				+            img_blocks = segmentation.divide_in_blocks(current_img, (200, 200))
			
 
				+
			
 
				+            for id_block, block in enumerate(img_blocks):
			
 
				+
			
 
				+                ###########################
			
 
				+                # feature computation part #
			
 
				+                ###########################
			
 
				+
			
 
				+                data = get_image_features(data_type, block)
			
 
				+
			
 
				+                ##################
			
 
				+                # Data mode part #
			
 
				+                ##################
			
 
				+
			
 
				+                # modify data depending mode
			
 
				+                if mode == 'svdne':
			
 
				+
			
 
				+                    # getting max and min information from min_max_filename
			
 
				+                    with open(data_min_max_filename, 'r') as f:
			
 
				+                        min_val = float(f.readline())
			
 
				+                        max_val = float(f.readline())
			
 
				+
			
 
				+                    data = utils.normalize_arr_with_range(data, min_val, max_val)
			
 
				+
			
 
				+                if mode == 'svdn':
			
 
				+                    data = utils.normalize_arr(data)
			
 
				+
			
 
				+                # save min and max found from dataset in order to normalize data using whole data known
			
 
				+                if mode == 'svd':
			
 
				+
			
 
				+                    current_min = data.min()
			
 
				+                    current_max = data.max()
			
 
				+
			
 
				+                    if current_min < min_val_found:
			
 
				+                        min_val_found = current_min
			
 
				+
			
 
				+                    if current_max > max_val_found:
			
 
				+                        max_val_found = current_max
			
 
				+
			
 
				+                # now write data into current writer
			
 
				+                current_file = svd_output_files[id_block]
			
 
				+
			
 
				+                # add of index
			
 
				+                current_file.write(current_image_postfix + ';')
			
 
				+
			
 
				+                for val in data:
			
 
				+                    current_file.write(str(val) + ";")
			
 
				+
			
 
				+                current_file.write('\n')
			
 
				+
			
 
				+            print(data_type + "_" + mode + "_" + folder_scene + " - " + "{0:.2f}".format((id_img + 1) / number_scene_image * 100.) + "%")
			
 
				+            sys.stdout.write("\033[F")
			
 
				+
			
 
				+        for f in svd_output_files:
			
 
				+            f.close()
			
 
				+
			
 
				+        print('\n')
			
 
				+
			
 
				+    # save current information about min file found
			
 
				+    if mode == 'svd':
			
 
				+        with open(data_min_max_filename, 'w') as f:
			
 
				+            f.write(str(min_val_found) + '\n')
			
 
				+            f.write(str(max_val_found) + '\n')
			
 
				+
			
 
				+    print("%s_%s : end of data generation\n" % (data_type, mode))
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(description="Compute and prepare data of feature of all scenes (keep in memory min and max value found)")
			
 
				+
			
 
				+    parser.add_argument('--feature', type=str, 
			
 
				+                                    help="feature choice in order to compute data (use 'all' if all features are needed)", required=True)
			
 
				+    parser.add_argument('--dataset', type=str, 
			
 
				+                                    help="dataset with all scenes", required=True)
			
 
				+    parser.add_argument('--output', type=str, 
			
 
				+                                    help="output where data files are saved", required=True)
			
 
				+
			
 
				+    parser.add_argument('--thresholds', type=str, help='file with scene list information and thresholds', required=True)
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    p_feature = args.feature
			
 
				+    p_dataset = args.dataset
			
 
				+    p_output  = args.output
			
 
				+    p_thresholds = args.thresholds
			
 
				+
			
 
				+    # 1. retrieve human_thresholds
			
 
				+    human_thresholds = {}
			
 
				+
			
 
				+    # extract thresholds
			
 
				+    with open(p_thresholds) as f:
			
 
				+        thresholds_line = f.readlines()
			
 
				+
			
 
				+        for line in thresholds_line:
			
 
				+            data = line.split(';')
			
 
				+            del data[-1] # remove unused last element `\n`
			
 
				+            current_scene = data[0]
			
 
				+            thresholds_scene = data[1:]
			
 
				+
			
 
				+            # TODO : check if really necessary
			
 
				+            if current_scene != '50_shades_of_grey':
			
 
				+                human_thresholds[current_scene] = [ int(threshold) for threshold in  thresholds_scene ]
			
 
				+
			
 
				+    # generate all or specific feature data
			
 
				+    if p_feature == 'all':
			
 
				+        for m in features_choices:
			
 
				+            generate_data_feature(p_dataset, p_output, human_thresholds, m, 'svd')
			
 
				+            generate_data_feature(p_dataset, p_output, human_thresholds, m, 'svdn')
			
 
				+            generate_data_feature(p_dataset, p_output, human_thresholds, m, 'svdne')
			
 
				+    else:
			
 
				+
			
 
				+        if p_feature not in features_choices:
			
 
				+            raise ValueError('Unknown feature choice : ', features_choices)
			
 
				+            
			
 
				+        generate_data_feature(p_dataset, p_output, human_thresholds, p_feature, 'svd')
			
 
				+        generate_data_feature(p_dataset, p_output, human_thresholds, p_feature, 'svdn')
			
 
				+        generate_data_feature(p_dataset, p_output, human_thresholds, p_feature, 'svdne')
			
 
				+
			
 
				+if __name__== "__main__":
			
 
				+    main()
			
--- a/generate/generate_data_model_file.py
+++ b/generate/generate_data_model_file.py
@@ -0,0 +1,267 @@
 
				+# main imports
			
 
				+import sys, os, argparse
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import random
			
 
				+
			
 
				+# image processing imports
			
 
				+from PIL import Image
			
 
				+
			
 
				+from ipfml import utils
			
 
				+
			
 
				+# modules imports
			
 
				+sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				+
			
 
				+import custom_config as cfg
			
 
				+from modules.utils import data as dt
			
 
				+from data_attributes import get_image_features
			
 
				+
			
 
				+
			
 
				+# getting configuration information
			
 
				+learned_folder          = cfg.learned_zones_folder
			
 
				+min_max_filename        = cfg.min_max_filename_extension
			
 
				+
			
 
				+# define all scenes variables
			
 
				+zones                   = cfg.zones_indices
			
 
				+seuil_expe_filename     = cfg.seuil_expe_filename
			
 
				+
			
 
				+normalization_choices   = cfg.normalization_choices
			
 
				+features_choices        = cfg.features_choices_labels
			
 
				+output_data_folder      = cfg.output_datasets
			
 
				+custom_min_max_folder   = cfg.min_max_custom_folder
			
 
				+min_max_ext             = cfg.min_max_filename_extension
			
 
				+zones_indices           = cfg.zones_indices
			
 
				+
			
 
				+generic_output_file_svd = '_random.csv'
			
 
				+
			
 
				+min_value_interval = sys.maxsize
			
 
				+max_value_interval = 0
			
 
				+
			
 
				+def construct_new_line(threshold, interval, line, choice, each, norm):
			
 
				+    begin, end = interval
			
 
				+
			
 
				+    line_data = line.split(';')
			
 
				+    seuil = line_data[0]
			
 
				+    features = line_data[begin+1:end+1]
			
 
				+
			
 
				+    features = [float(m) for id, m in enumerate(features) if id % each == 0 ]
			
 
				+
			
 
				+    if norm:
			
 
				+        if choice == 'svdne':
			
 
				+            features = utils.normalize_arr_with_range(features, min_value_interval, max_value_interval)
			
 
				+        if choice == 'svdn':
			
 
				+            features = utils.normalize_arr(features)
			
 
				+
			
 
				+    if threshold > int(seuil):
			
 
				+        line = '1'
			
 
				+    else:
			
 
				+        line = '0'
			
 
				+
			
 
				+    for val in features:
			
 
				+        line += ';'
			
 
				+        line += str(val)
			
 
				+    line += '\n'
			
 
				+
			
 
				+    return line
			
 
				+
			
 
				+def get_min_max_value_interval(path, _scenes_list, _interval, _feature):
			
 
				+
			
 
				+    global min_value_interval, max_value_interval
			
 
				+
			
 
				+    scenes = os.listdir(path)
			
 
				+
			
 
				+    # remove min max file from scenes folder
			
 
				+    scenes = [s for s in scenes if min_max_filename not in s]
			
 
				+
			
 
				+    for folder_scene in scenes:
			
 
				+
			
 
				+        # only take care of maxwell scenes
			
 
				+        if folder_scene in _scenes_list:
			
 
				+
			
 
				+            scene_path = os.path.join(path, folder_scene)
			
 
				+
			
 
				+            zones_folder = []
			
 
				+            # create zones list
			
 
				+            for index in zones:
			
 
				+                index_str = str(index)
			
 
				+                if len(index_str) < 2:
			
 
				+                    index_str = "0" + index_str
			
 
				+                zones_folder.append("zone"+index_str)
			
 
				+
			
 
				+            for zone_folder in zones_folder:
			
 
				+                zone_path = os.path.join(scene_path, zone_folder)
			
 
				+                data_filename = _feature + "_svd" + generic_output_file_svd
			
 
				+                data_file_path = os.path.join(zone_path, data_filename)
			
 
				+
			
 
				+                # getting number of line and read randomly lines
			
 
				+                f = open(data_file_path)
			
 
				+                lines = f.readlines()
			
 
				+
			
 
				+                # check if user select current scene and zone to be part of training data set
			
 
				+                for line in lines:
			
 
				+
			
 
				+                    begin, end = _interval
			
 
				+
			
 
				+                    line_data = line.split(';')
			
 
				+                    features = line_data[begin+1:end+1]
			
 
				+                    features = [float(m) for m in features]
			
 
				+
			
 
				+                    min_value = min(features)
			
 
				+                    max_value = max(features)
			
 
				+
			
 
				+                    if min_value < min_value_interval:
			
 
				+                        min_value_interval = min_value
			
 
				+
			
 
				+                    if max_value > max_value_interval:
			
 
				+                        max_value_interval = max_value
			
 
				+
			
 
				+
			
 
				+def generate_data_model(_filename, _data_path, _interval, _choice, _feature, _thresholds, _learned_zones, _step=1, _each=1, _norm=False, _custom=False):
			
 
				+
			
 
				+    output_train_filename = os.path.join(output_data_folder, _filename + ".train")
			
 
				+    output_test_filename = os.path.join(output_data_folder,_filename + ".test")
			
 
				+
			
 
				+    # create path if not exists
			
 
				+    if not os.path.exists(output_data_folder):
			
 
				+        os.makedirs(output_data_folder)
			
 
				+
			
 
				+    train_file = open(output_train_filename, 'w')
			
 
				+    test_file = open(output_test_filename, 'w')
			
 
				+
			
 
				+    # get zone indices
			
 
				+    zones_indices = np.arange(16)
			
 
				+
			
 
				+    for folder_scene in _learned_zones:
			
 
				+
			
 
				+        # get train zones
			
 
				+        train_zones = _learned_zones[folder_scene]
			
 
				+        scene_thresholds = _thresholds[folder_scene]
			
 
				+        scene_path = os.path.join(_data_path, folder_scene)
			
 
				+
			
 
				+        for id_zone, index_folder in enumerate(zones_indices):
			
 
				+
			
 
				+            index_str = str(index_folder)
			
 
				+            if len(index_str) < 2:
			
 
				+                index_str = "0" + index_str
			
 
				+            current_zone_folder = "zone" + index_str
			
 
				+
			
 
				+            zone_path = os.path.join(scene_path, current_zone_folder)
			
 
				+
			
 
				+            # if custom normalization choices then we use svd values not already normalized
			
 
				+            if _custom:
			
 
				+                data_filename = _feature + "_svd" + generic_output_file_svd
			
 
				+            else:
			
 
				+                data_filename = _feature + "_" + _choice + generic_output_file_svd
			
 
				+
			
 
				+            data_file_path = os.path.join(zone_path, data_filename)
			
 
				+
			
 
				+            # getting number of line and read randomly lines
			
 
				+            f = open(data_file_path)
			
 
				+            lines = f.readlines()
			
 
				+
			
 
				+            num_lines = len(lines)
			
 
				+
			
 
				+            lines_indexes = np.arange(num_lines)
			
 
				+            random.shuffle(lines_indexes)
			
 
				+
			
 
				+            counter = 0
			
 
				+            # check if user select current scene and zone to be part of training data set
			
 
				+            for index in lines_indexes:
			
 
				+
			
 
				+                image_index = int(lines[index].split(';')[0])
			
 
				+
			
 
				+                if image_index % _step == 0:
			
 
				+                    line = construct_new_line(scene_thresholds[id_zone], _interval, lines[index], _choice, _each, _norm)
			
 
				+
			
 
				+                    if id_zone in train_zones:
			
 
				+                        train_file.write(line)
			
 
				+                    else:
			
 
				+                        test_file.write(line)
			
 
				+
			
 
				+                counter += 1
			
 
				+
			
 
				+            f.close()
			
 
				+
			
 
				+    train_file.close()
			
 
				+    test_file.close()
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    # getting all params
			
 
				+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
			
 
				+
			
 
				+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)', required=True)
			
 
				+    parser.add_argument('--data', type=str, help='folder which contains data of dataset', required=True)
			
 
				+    parser.add_argument('--thresholds', type=str, help='file with scene list information and thresholds', required=True)
			
 
				+    parser.add_argument('--selected_zones', type=str, help='file which contains all selected zones of scene', required=True)  
			
 
				+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"', required=True)
			
 
				+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
			
 
				+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices, required=True)
			
 
				+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
			
 
				+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
			
 
				+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    p_filename = args.output
			
 
				+    p_data     = args.data
			
 
				+    p_thresholds = args.thresholds
			
 
				+    p_selected_zones = args.selected_zones
			
 
				+    p_interval = list(map(int, args.interval.split(',')))
			
 
				+    p_kind     = args.kind
			
 
				+    p_feature  = args.feature
			
 
				+    p_step     = args.step
			
 
				+    p_each     = args.each
			
 
				+    p_custom   = args.custom
			
 
				+
			
 
				+    # 1. retrieve human_thresholds
			
 
				+    human_thresholds = {}
			
 
				+
			
 
				+    # extract thresholds
			
 
				+    with open(p_thresholds) as f:
			
 
				+        thresholds_line = f.readlines()
			
 
				+
			
 
				+        for line in thresholds_line:
			
 
				+            data = line.split(';')
			
 
				+            del data[-1] # remove unused last element `\n`
			
 
				+            current_scene = data[0]
			
 
				+            thresholds_scene = data[1:]
			
 
				+
			
 
				+            # TODO : check if really necessary
			
 
				+            if current_scene != '50_shades_of_grey':
			
 
				+                human_thresholds[current_scene] = [ int(threshold) for threshold in  thresholds_scene ]
			
 
				+
			
 
				+    # 2. get selected zones
			
 
				+    selected_zones = {}
			
 
				+    with(open(p_selected_zones, 'r')) as f:
			
 
				+
			
 
				+        for line in f.readlines():
			
 
				+
			
 
				+            data = line.split(';')
			
 
				+            del data[-1]
			
 
				+            scene_name = data[0]
			
 
				+            thresholds = data[1:]
			
 
				+
			
 
				+            selected_zones[scene_name] = [ int(t) for t in thresholds ]
			
 
				+
			
 
				+    # find min max value if necessary to renormalize data
			
 
				+    if p_custom:
			
 
				+        get_min_max_value_interval(p_data, selected_zones, p_interval, p_feature)
			
 
				+
			
 
				+        # write new file to save
			
 
				+        if not os.path.exists(custom_min_max_folder):
			
 
				+            os.makedirs(custom_min_max_folder)
			
 
				+
			
 
				+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
			
 
				+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
			
 
				+
			
 
				+        with open(min_max_filename_path, 'w') as f:
			
 
				+            f.write(str(min_value_interval) + '\n')
			
 
				+            f.write(str(max_value_interval) + '\n')
			
 
				+
			
 
				+    # create database using img folder (generate first time only)
			
 
				+    generate_data_model(p_filename, p_data, p_interval, p_kind, p_feature, human_thresholds, selected_zones, p_step, p_each, p_custom)
			
 
				+
			
 
				+if __name__== "__main__":
			
 
				+    main()
			
--- a/generate/generate_selected_zones_file.py
+++ b/generate/generate_selected_zones_file.py
@@ -0,0 +1,104 @@
 
				+# main imports
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import sys, os, argparse
			
 
				+import random
			
 
				+
			
 
				+
			
 
				+# modules and config imports
			
 
				+sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				+
			
 
				+import custom_config as cfg
			
 
				+
			
 
				+
			
 
				+def save_learned_zones(output_name, scene, zones):
			
 
				+
			
 
				+    if not os.path.exists(cfg.output_zones_learned):
			
 
				+        os.makedirs(cfg.output_zones_learned)
			
 
				+
			
 
				+    with open(os.path.join(cfg.output_zones_learned, output_name), 'a') as f:
			
 
				+        f.write(scene + ';')
			
 
				+
			
 
				+        for zone in zones:
			
 
				+            f.write(str(zone) + ';')
			
 
				+
			
 
				+        f.write('\n')
			
 
				+
			
 
				+
			
 
				+def get_random_zones(scene, zones, n_zones):
			
 
				+
			
 
				+    random.shuffle(zones)
			
 
				+
			
 
				+    # specific case for 'Cuisine01' (zone 12 is also noisy even in reference image)
			
 
				+    # if scene == 'Cuisine01':
			
 
				+    #     while 12 in zones[0:n_zones]:
			
 
				+    #         random.shuffle(zones)
			
 
				+    
			
 
				+    return zones[0:n_zones]
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(description="Read and compute entropy data file (using diff)")
			
 
				+
			
 
				+    parser.add_argument('--folder', type=str, help='dataset scene folder', required=True)
			
 
				+    parser.add_argument('--n_zones', type=int, help='number of zones used in train', default=10)
			
 
				+    parser.add_argument('--output', type=str, help='file with specific training zone', required=True)
			
 
				+    parser.add_argument('--thresholds', type=str, help='file with specific thresholds (using only scene from this file', default='')
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    p_folder       = args.folder
			
 
				+    p_n_zones      = args.n_zones
			
 
				+    p_output       = args.output
			
 
				+    p_thresholds   = args.thresholds
			
 
				+
			
 
				+    # extract scenes to use if specified
			
 
				+    available_scenes = None
			
 
				+
			
 
				+    if len(p_thresholds) > 0:
			
 
				+        
			
 
				+        available_scenes = []
			
 
				+
			
 
				+        with open(p_thresholds) as f:
			
 
				+            thresholds_line = f.readlines()
			
 
				+
			
 
				+            for line in thresholds_line:
			
 
				+                data = line.split(';')
			
 
				+                del data[-1] # remove unused last element `\n`
			
 
				+                current_scene = data[0]
			
 
				+
			
 
				+                # need to rename `current_name` because we only used part6
			
 
				+                # scene_split = current_scene.split('_')
			
 
				+                # del scene_split[-1]
			
 
				+                # scene_name = '_'.join(scene_split)
			
 
				+
			
 
				+                available_scenes.append(current_scene)
			
 
				+
			
 
				+
			
 
				+    # specific number of zones (zones indices)
			
 
				+    zones = np.arange(16)
			
 
				+
			
 
				+    # get all scene names
			
 
				+    scenes = os.listdir(p_folder)
			
 
				+
			
 
				+    # create output thresholds directory if necessary
			
 
				+    folder, _ = os.path.split(p_output)
			
 
				+
			
 
				+    if len(folder) > 0:
			
 
				+        os.makedirs(folder)
			
 
				+
			
 
				+    # for each scene we generate random zones choice
			
 
				+    for folder_scene in scenes:
			
 
				+
			
 
				+        if available_scenes is not None:
			
 
				+
			
 
				+            if folder_scene in available_scenes:
			
 
				+                selected_zones = get_random_zones(folder_scene, zones, p_n_zones)
			
 
				+                save_learned_zones(p_output, folder_scene, selected_zones)
			
 
				+        else:
			
 
				+            selected_zones = get_random_zones(folder_scene, zones, p_n_zones)
			
 
				+            save_learned_zones(p_output, folder_scene, selected_zones)
			
 
				+            
			
 
				+
			
 
				+if __name__== "__main__":
			
 
				+    main()
			
--- a/train_keras_svd.py
+++ b/train_keras_svd.py
@@ -4,6 +4,7 @@ import argparse
 
				 import json
			
 
				 import numpy as np
			
 
				 import pandas as pd
			
 
				+import logging
			
 
				 
			
 
				 # models imports
			
 
				 from keras.preprocessing.image import ImageDataGenerator
			
@@ -12,9 +13,10 @@ from keras.layers import Conv1D, MaxPooling1D
 
				 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
			
 
				 from keras.wrappers.scikit_learn import KerasClassifier
			
 
				 from keras import backend as K
			
 
				+from keras.callbacks import Callback
			
 
				 
			
 
				 from sklearn.utils import shuffle
			
 
				-from sklearn.metrics import roc_auc_score
			
 
				+from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
			
 
				 
			
 
				 # modules and config imports
			
 
				 import custom_config as cfg
			
@@ -50,6 +52,29 @@ def f1(y_true, y_pred):
 
				     recall = recall(y_true, y_pred)
			
 
				     return 2*((precision*recall)/(precision+recall+K.epsilon()))
			
 
				 
			
 
				+
			
 
				+class IntervalEvaluation(Callback):
			
 
				+    def __init__(self, validation_data=(), interval=1):
			
 
				+        super(Callback, self).__init__()
			
 
				+
			
 
				+        self.interval = interval
			
 
				+        self.X_val, self.y_val = validation_data
			
 
				+
			
 
				+    def on_epoch_end(self, epoch, logs={}):
			
 
				+        if epoch % self.interval == 0:
			
 
				+            y_pred = self.model.predict_proba(self.X_val, verbose=0)
			
 
				+            y_pred = [ 0 if y < 0.5 else 1 for y in y_pred ]
			
 
				+            auc_score = roc_auc_score(self.y_val, y_pred)
			
 
				+            acc_score = accuracy_score(self.y_val, y_pred)
			
 
				+            f1_test_score = f1_score(self.y_val, y_pred)
			
 
				+            
			
 
				+            print("------------------------------")
			
 
				+            print("[test dataset] for epoch {:d}".format(epoch + 1))
			
 
				+            print("ROC AUC : {:.6f}".format(auc_score))
			
 
				+            print("ACCURACY: {:.6f}".format(acc_score))
			
 
				+            print("F1 score: {:.6f}".format(f1_test_score))
			
 
				+            print("------------------------------")
			
 
				+
			
 
				 def generate_model(input_shape):
			
 
				 
			
 
				     model = Sequential()
			
@@ -86,41 +111,41 @@ def generate_model(input_shape):
 
				 
			
 
				     model.add(Flatten(input_shape=input_shape))
			
 
				 
			
 
				-    model.add(Dense(2048))
			
 
				-    model.add(Activation('relu'))
			
 
				-    model.add(BatchNormalization())
			
 
				-    model.add(Dropout(0.2))
			
 
				+    # model.add(Dense(2048))
			
 
				+    # model.add(Activation('relu'))
			
 
				+    # model.add(BatchNormalization())
			
 
				+    # model.add(Dropout(0.2))
			
 
				 
			
 
				     model.add(Dense(1024))
			
 
				     model.add(Activation('relu'))
			
 
				     model.add(BatchNormalization())
			
 
				-    model.add(Dropout(0.2))
			
 
				+    model.add(Dropout(0.4))
			
 
				 
			
 
				     model.add(Dense(512))
			
 
				     model.add(Activation('relu'))
			
 
				     model.add(BatchNormalization())
			
 
				-    model.add(Dropout(0.3))
			
 
				+    model.add(Dropout(0.4))
			
 
				 
			
 
				     model.add(Dense(256))
			
 
				     model.add(Activation('relu'))
			
 
				     model.add(BatchNormalization())
			
 
				-    model.add(Dropout(0.3))
			
 
				+    model.add(Dropout(0.4))
			
 
				 
			
 
				     model.add(Dense(128))
			
 
				     model.add(Activation('relu'))
			
 
				     model.add(BatchNormalization())
			
 
				-    model.add(Dropout(0.3))
			
 
				+    model.add(Dropout(0.4))
			
 
				 
			
 
				     model.add(Dense(20))
			
 
				     model.add(Activation('relu'))
			
 
				     model.add(BatchNormalization())
			
 
				-    model.add(Dropout(0.3))
			
 
				+    model.add(Dropout(0.4))
			
 
				 
			
 
				     model.add(Dense(1))
			
 
				     model.add(Activation('sigmoid'))
			
 
				 
			
 
				     model.compile(loss='binary_crossentropy',
			
 
				-                  optimizer='adam',
			
 
				+                  optimizer='rmsprop',
			
 
				                   metrics=['accuracy', f1])
			
 
				 
			
 
				     return model
			
@@ -155,30 +180,46 @@ def main():
 
				     dataset_test = shuffle(dataset_test)
			
 
				 
			
 
				     # get dataset with equal number of classes occurences
			
 
				-    noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
			
 
				-    not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
			
 
				+    noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 1]
			
 
				+    not_noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 0]
			
 
				     nb_noisy_train = len(noisy_df_train.index)
			
 
				+    nb_not_noisy_train = len(not_noisy_df_train.index)
			
 
				 
			
 
				-    noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
			
 
				-    not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
			
 
				+    noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 1]
			
 
				+    not_noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 0]
			
 
				     nb_noisy_test = len(noisy_df_test.index)
			
 
				+    nb_not_noisy_test = len(not_noisy_df_test.index)
			
 
				 
			
 
				-    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
			
 
				-    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
			
 
				+    final_df_train = pd.concat([not_noisy_df_train, noisy_df_train])
			
 
				+    final_df_test = pd.concat([not_noisy_df_test, noisy_df_test])
			
 
				 
			
 
				     # shuffle data another time
			
 
				     final_df_train = shuffle(final_df_train)
			
 
				     final_df_test = shuffle(final_df_test)
			
 
				 
			
 
				-    final_df_train_size = len(final_df_train.index)
			
 
				-    final_df_test_size = len(final_df_test.index)
			
 
				-
			
 
				     # use of the whole data set for training
			
 
				-    x_dataset_train = final_df_train.ix[:,1:]
			
 
				-    x_dataset_test = final_df_test.ix[:,1:]
			
 
				+    x_dataset_train = final_df_train.iloc[:,1:]
			
 
				+    x_dataset_test = final_df_test.iloc[:,1:]
			
 
				+
			
 
				+    y_dataset_train = final_df_train.iloc[:,0]
			
 
				+    y_dataset_test = final_df_test.iloc[:,0]
			
 
				+
			
 
				+    noisy_samples = nb_noisy_test + nb_noisy_train
			
 
				+    not_noisy_samples = nb_not_noisy_test + nb_not_noisy_train
			
 
				+
			
 
				+    total_samples = noisy_samples + not_noisy_samples
			
 
				+
			
 
				+    print('noisy', noisy_samples)
			
 
				+    print('not_noisy', not_noisy_samples)
			
 
				+    print('total', total_samples)
			
 
				+
			
 
				+    class_weight = {
			
 
				+        0: noisy_samples / float(total_samples),
			
 
				+        1: not_noisy_samples / float(total_samples)
			
 
				+    }
			
 
				+
			
 
				+    print(class_weight)
			
 
				 
			
 
				-    y_dataset_train = final_df_train.ix[:,0]
			
 
				-    y_dataset_test = final_df_test.ix[:,0]
			
 
				 
			
 
				     #######################
			
 
				     # 2. Getting model
			
@@ -196,7 +237,9 @@ def main():
 
				     x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), p_vector_size, 1)
			
 
				     x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), p_vector_size, 1)
			
 
				 
			
 
				-    model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
			
 
				+    ival = IntervalEvaluation(validation_data=(x_dataset_test, y_dataset_test), interval=1)
			
 
				+
			
 
				+    model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch, callbacks=[ival], class_weight=class_weight)
			
 
				 
			
 
				     score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size)
			
 
				 
			
@@ -215,8 +258,11 @@ def main():
 
				 
			
 
				     # Save results obtained from model
			
 
				     y_test_prediction = model.predict(x_dataset_test)
			
 
				+    y_test_prediction = [ 0 if y < 0.5 else 1 for y in y_test_prediction ]
			
 
				+
			
 
				     print("Metrics : ", model.metrics_names)
			
 
				-    print("Prediction : ", score)
			
 
				+    print("ACC score : ", accuracy_score(y_dataset_test, y_test_prediction))
			
 
				+    print("F1 score : ", f1_score(y_dataset_test, y_test_prediction))
			
 
				     print("ROC AUC : ", roc_auc_score(y_dataset_test, y_test_prediction))
			
 
				 
			
 
				 
			
--- a/train_model.py
+++ b/train_model.py
@@ -9,9 +9,9 @@ from sklearn.model_selection import GridSearchCV
 
				 from sklearn.linear_model import LogisticRegression
			
 
				 from sklearn.ensemble import RandomForestClassifier, VotingClassifier
			
 
				 
			
 
				+import joblib
			
 
				 import sklearn.svm as svm
			
 
				 from sklearn.utils import shuffle
			
 
				-from sklearn.externals import joblib
			
 
				 from sklearn.metrics import accuracy_score, f1_score
			
 
				 from sklearn.model_selection import cross_val_score
			
 
				 
			
@@ -57,12 +57,12 @@ def main():
 
				     dataset_test = shuffle(dataset_test)
			
 
				 
			
 
				     # get dataset with equal number of classes occurences
			
 
				-    noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
			
 
				-    not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
			
 
				+    noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 1]
			
 
				+    not_noisy_df_train = dataset_train[dataset_train.iloc[:, 0] == 0]
			
 
				     nb_noisy_train = len(noisy_df_train.index)
			
 
				 
			
 
				-    noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
			
 
				-    not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
			
 
				+    noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 1]
			
 
				+    not_noisy_df_test = dataset_test[dataset_test.iloc[:, 0] == 0]
			
 
				     nb_noisy_test = len(noisy_df_test.index)
			
 
				 
			
 
				     final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
			
@@ -76,11 +76,11 @@ def main():
 
				     final_df_test_size = len(final_df_test.index)
			
 
				 
			
 
				     # use of the whole data set for training
			
 
				-    x_dataset_train = final_df_train.ix[:,1:]
			
 
				-    x_dataset_test = final_df_test.ix[:,1:]
			
 
				+    x_dataset_train = final_df_train.iloc[:,1:]
			
 
				+    x_dataset_test = final_df_test.iloc[:,1:]
			
 
				 
			
 
				-    y_dataset_train = final_df_train.ix[:,0]
			
 
				-    y_dataset_test = final_df_test.ix[:,0]
			
 
				+    y_dataset_train = final_df_train.iloc[:,0]
			
 
				+    y_dataset_test = final_df_test.iloc[:,0]
			
 
				 
			
 
				     #######################
			
 
				     # 2. Construction of the model : Ensemble model structure