il y a 5 ans · 9027fd20f9
--- a/analysis/svd_entropy_analysis.ipynb
+++ b/analysis/svd_entropy_analysis.ipynb
--- a/analysis/svd_entropy_diff_analysis.ipynb
+++ b/analysis/svd_entropy_diff_analysis.ipynb
--- a/analysis/svd_entropy_gradient_analysis.ipynb
+++ b/analysis/svd_entropy_gradient_analysis.ipynb
--- a/analysis/svd_entropy_gradientabs_analysis.ipynb
+++ b/analysis/svd_entropy_gradientabs_analysis.ipynb
--- a/analysis/svd_entropy_minus_analysis.ipynb
+++ b/analysis/svd_entropy_minus_analysis.ipynb
--- a/analysis/svd_scenes_analysis.ipynb
+++ b/analysis/svd_scenes_analysis.ipynb
--- a/custom_config.py
+++ b/custom_config.py
@@ -1,9 +1,18 @@
 
																 from modules.config.attributes_config import *
															
 
																+import os
															
 
																+
															
 
																 # store all variables from global config
															
 
																 context_vars = vars()
															
 
																 # folders
															
 
																+output_data_folder              = 'data'
															
 
																+output_data_generated           = os.path.join(output_data_folder, 'generated')
															
 
																+output_datasets                 = os.path.join(output_data_folder, 'datasets')
															
 
																+output_zones_learned            = os.path.join(output_data_folder, 'learned_zones')
															
 
																+output_models                   = os.path.join(output_data_folder, 'saved_models')
															
 
																+output_results_folder           = os.path.join(output_data_folder, 'results')
															
 
																+
															
 
																 ## min_max_custom_folder           = 'custom_norm'
															
 
																 ## correlation_indices_folder      = 'corr_indices'
															
 
																 data_augmented_filename         = 'augmented_dataset.csv'
															
--- a/display/display_svd_data_scene.py
+++ b/display/display_svd_data_scene.py
@@ -1,6 +1,7 @@
 
																 # main imports
															
 
																 import sys, os, argparse
															
 
																 import numpy as np
															
 
																+import math
															
 
																 # image processing imports
															
 
																 from PIL import Image
															
@@ -151,6 +152,9 @@ def display_svd_values(p_scene, p_interval, p_indices, p_feature, p_mode, p_step
 
																             for id, data in enumerate(svd_data):
															
 
																+                # current_data = [ math.log10(d + 1.) for d in data ]
															
 
																+                # print(current_data)
															
 
																+
															
 
																                 current_data = data
															
 
																                 if not p_norm:
															
@@ -170,30 +174,32 @@ def display_svd_values(p_scene, p_interval, p_indices, p_feature, p_mode, p_step
 
																             ax.set_facecolor('#FFFFFF')
															
 
																             #fig.patch.set_facecolor('#F9F9F9')
															
 
																-            ax.tick_params(labelsize=22)
															
 
																+            ax.tick_params(labelsize=26)
															
 
																             #plt.rc('xtick', labelsize=22)
															
 
																             #plt.rc('ytick', labelsize=22)
															
 
																             #plt.title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + '], ' + p_feature + ' feature, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=24)
															
 
																-            ax.set_ylabel('Component values', fontsize=28)
															
 
																-            ax.set_xlabel('Vector features', fontsize=28)
															
 
																+            ax.set_ylabel('Component values', fontsize=36)
															
 
																+            ax.set_xlabel('Singular value component indices', fontsize=36)
															
 
																             for id, data in enumerate(images_data):
															
 
																-                p_label = p_scene + "_" + images_indices[id]
															
 
																+                #p_label = p_scene + "_" + images_indices[id]
															
 
																+                p_label = images_indices[id] + " samples"
															
 
																                 if int(images_indices[id]) == int(threshold_image_zone):
															
 
																-                    ax.plot(data, label=p_label + " (threshold mean)", lw=4, color='red')
															
 
																+                    ax.plot(data, label=p_label + " (threshold mean)", lw=6, color='red')
															
 
																                 else:
															
 
																                     ax.plot(data, label=p_label)
															
 
																-            plt.legend(bbox_to_anchor=(0.60, 0.98), loc=2, borderaxespad=0.2, fontsize=26)
															
 
																+            plt.legend(bbox_to_anchor=(0.60, 0.98), loc=2, borderaxespad=0.2, fontsize=32)
															
 
																             start_ylim, end_ylim = p_ylim
															
 
																             ax.set_ylim(start_ylim, end_ylim)
															
 
																             plot_name = p_scene + '_' + p_feature + '_' + str(p_step) + '_' + p_mode + '_' + str(p_norm) + '.png'
															
 
																-            plt.savefig(plot_name, facecolor=ax.get_facecolor())
															
 
																+            plt.title('Tend of Singular values at different samples of Flat scene', fontsize=40)
															
 
																+            plt.savefig(plot_name, transparent=True)
															
 
																 def main():
															
@@ -206,7 +212,7 @@ def main():
 
																     parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=cfg.normalization_choices)
															
 
																     parser.add_argument('--step', type=int, help='Each step samples to display', default=10)
															
 
																     parser.add_argument('--norm', type=int, help='If values will be normalized or not', choices=[0, 1])
															
 
																-    parser.add_argument('--ylim', type=str, help='ylim interval to use', default='"0, 1"')
															
 
																+    parser.add_argument('--ylim', type=str, help='ylim interval to use', default='0,1')
															
 
																     args = parser.parse_args()
															
--- a/generate/generate_all_data.py
+++ b/generate/generate_all_data.py
@@ -163,6 +163,8 @@ def main():
 
																     parser = argparse.ArgumentParser(description="Compute and prepare data of feature of all scenes (keep in memory min and max value found)")
															
 
																     parser.add_argument('--feature', type=str, 
															
 
																+                                    help="feature choice in order to compute data (use 'all' if all features are needed)", required=True)
															
 
																+    parser.add_argument('--dataset', type=str, 
															
 
																                                     help="feature choice in order to compute data (use 'all' if all features are needed)")
															
 
																     args = parser.parse_args()
															
--- a/generate/generate_all_data_file.py
+++ b/generate/generate_all_data_file.py
@@ -0,0 +1,225 @@
 
																+# main imports
															
 
																+import sys, os, argparse
															
 
																+import numpy as np
															
 
																+import random
															
 
																+import time
															
 
																+import json
															
 
																+
															
 
																+# image processing imports
															
 
																+from PIL import Image
															
 
																+
															
 
																+from ipfml.processing import transform, segmentation
															
 
																+from ipfml import utils
															
 
																+
															
 
																+# modules imports
															
 
																+sys.path.insert(0, '') # trick to enable import of main folder module
															
 
																+
															
 
																+import custom_config as cfg
															
 
																+from modules.utils import data as dt
															
 
																+from data_attributes import get_image_features
															
 
																+
															
 
																+
															
 
																+# getting configuration information
															
 
																+zone_folder             = cfg.zone_folder
															
 
																+min_max_filename        = cfg.min_max_filename_extension
															
 
																+
															
 
																+# define all scenes values
															
 
																+choices                 = cfg.normalization_choices
															
 
																+zones                   = cfg.zones_indices
															
 
																+seuil_expe_filename     = cfg.seuil_expe_filename
															
 
																+
															
 
																+features_choices        = cfg.features_choices_labels
															
 
																+output_data_folder      = cfg.output_data_folder
															
 
																+
															
 
																+generic_output_file_svd = '_random.csv'
															
 
																+
															
 
																+def generate_data_feature(path, output, human_thresholds, data_type, mode):
															
 
																+    """
															
 
																+    @brief Method which generates all .csv files from scenes
															
 
																+    @param data_type,  feature choice
															
 
																+    @param mode, normalization choice
															
 
																+    @return nothing
															
 
																+    """
															
 
																+
															
 
																+    scenes = os.listdir(path)
															
 
																+    # remove min max file from scenes folder
															
 
																+    scenes = [s for s in scenes if min_max_filename not in s]
															
 
																+
															
 
																+    # keep in memory min and max data found from data_type
															
 
																+    min_val_found = sys.maxsize
															
 
																+    max_val_found = 0
															
 
																+
															
 
																+    output_path = os.path.join(cfg.output_data_generated, output)
															
 
																+
															
 
																+    if not os.path.exists(output_path):
															
 
																+        os.makedirs(output_path)
															
 
																+
															
 
																+    data_min_max_filename = os.path.join(output_path, data_type + min_max_filename)
															
 
																+
															
 
																+    # go ahead each scenes
															
 
																+    for folder_scene in human_thresholds:
															
 
																+
															
 
																+        print(folder_scene)
															
 
																+        scene_path = os.path.join(path, folder_scene)
															
 
																+        output_scene_path = os.path.join(output_path, folder_scene)
															
 
																+
															
 
																+        if not os.path.exists(output_scene_path):
															
 
																+            os.makedirs(output_scene_path)
															
 
																+
															
 
																+        # getting output filename
															
 
																+        output_svd_filename = data_type + "_" + mode + generic_output_file_svd
															
 
																+
															
 
																+        # construct each zones folder name
															
 
																+        zones_folder = []
															
 
																+        svd_output_files = []
															
 
																+
															
 
																+        # get zones list info
															
 
																+        for index in zones:
															
 
																+            index_str = str(index)
															
 
																+            if len(index_str) < 2:
															
 
																+                index_str = "0" + index_str
															
 
																+
															
 
																+            current_zone = "zone"+index_str
															
 
																+            zones_folder.append(current_zone)
															
 
																+
															
 
																+            zone_path = os.path.join(scene_path, current_zone)
															
 
																+            output_zone_path = os.path.join(output_scene_path, current_zone)
															
 
																+
															
 
																+            if not os.path.exists(output_zone_path):
															
 
																+                os.makedirs(output_zone_path)
															
 
																+
															
 
																+            svd_file_path = os.path.join(output_zone_path, output_svd_filename)
															
 
																+
															
 
																+            # add writer into list
															
 
																+            svd_output_files.append(open(svd_file_path, 'w'))
															
 
																+
															
 
																+        # get all images of folder
															
 
																+        scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
															
 
																+        number_scene_image = len(scene_images)
															
 
																+            
															
 
																+        for id_img, img_path in enumerate(scene_images):
															
 
																+            
															
 
																+            current_image_postfix = dt.get_scene_image_postfix(img_path)
															
 
																+
															
 
																+            current_img = Image.open(img_path)
															
 
																+            img_blocks = segmentation.divide_in_blocks(current_img, (200, 200))
															
 
																+
															
 
																+            for id_block, block in enumerate(img_blocks):
															
 
																+
															
 
																+                ###########################
															
 
																+                # feature computation part #
															
 
																+                ###########################
															
 
																+
															
 
																+                data = get_image_features(data_type, block)
															
 
																+
															
 
																+                ##################
															
 
																+                # Data mode part #
															
 
																+                ##################
															
 
																+
															
 
																+                # modify data depending mode
															
 
																+                if mode == 'svdne':
															
 
																+
															
 
																+                    # getting max and min information from min_max_filename
															
 
																+                    with open(data_min_max_filename, 'r') as f:
															
 
																+                        min_val = float(f.readline())
															
 
																+                        max_val = float(f.readline())
															
 
																+
															
 
																+                    data = utils.normalize_arr_with_range(data, min_val, max_val)
															
 
																+
															
 
																+                if mode == 'svdn':
															
 
																+                    data = utils.normalize_arr(data)
															
 
																+
															
 
																+                # save min and max found from dataset in order to normalize data using whole data known
															
 
																+                if mode == 'svd':
															
 
																+
															
 
																+                    current_min = data.min()
															
 
																+                    current_max = data.max()
															
 
																+
															
 
																+                    if current_min < min_val_found:
															
 
																+                        min_val_found = current_min
															
 
																+
															
 
																+                    if current_max > max_val_found:
															
 
																+                        max_val_found = current_max
															
 
																+
															
 
																+                # now write data into current writer
															
 
																+                current_file = svd_output_files[id_block]
															
 
																+
															
 
																+                # add of index
															
 
																+                current_file.write(current_image_postfix + ';')
															
 
																+
															
 
																+                for val in data:
															
 
																+                    current_file.write(str(val) + ";")
															
 
																+
															
 
																+                current_file.write('\n')
															
 
																+
															
 
																+            print(data_type + "_" + mode + "_" + folder_scene + " - " + "{0:.2f}".format((id_img + 1) / number_scene_image * 100.) + "%")
															
 
																+            sys.stdout.write("\033[F")
															
 
																+
															
 
																+        for f in svd_output_files:
															
 
																+            f.close()
															
 
																+
															
 
																+        print('\n')
															
 
																+
															
 
																+    # save current information about min file found
															
 
																+    if mode == 'svd':
															
 
																+        with open(data_min_max_filename, 'w') as f:
															
 
																+            f.write(str(min_val_found) + '\n')
															
 
																+            f.write(str(max_val_found) + '\n')
															
 
																+
															
 
																+    print("%s_%s : end of data generation\n" % (data_type, mode))
															
 
																+
															
 
																+
															
 
																+def main():
															
 
																+
															
 
																+    parser = argparse.ArgumentParser(description="Compute and prepare data of feature of all scenes (keep in memory min and max value found)")
															
 
																+
															
 
																+    parser.add_argument('--feature', type=str, 
															
 
																+                                    help="feature choice in order to compute data (use 'all' if all features are needed)", required=True)
															
 
																+    parser.add_argument('--dataset', type=str, 
															
 
																+                                    help="dataset with all scenes", required=True)
															
 
																+    parser.add_argument('--output', type=str, 
															
 
																+                                    help="output where data files are saved", required=True)
															
 
																+
															
 
																+    parser.add_argument('--thresholds', type=str, help='file with scene list information and thresholds', required=True)
															
 
																+
															
 
																+    args = parser.parse_args()
															
 
																+
															
 
																+    p_feature = args.feature
															
 
																+    p_dataset = args.dataset
															
 
																+    p_output  = args.output
															
 
																+    p_thresholds = args.thresholds
															
 
																+
															
 
																+    # 1. retrieve human_thresholds
															
 
																+    human_thresholds = {}
															
 
																+
															
 
																+    # extract thresholds
															
 
																+    with open(p_thresholds) as f:
															
 
																+        thresholds_line = f.readlines()
															
 
																+
															
 
																+        for line in thresholds_line:
															
 
																+            data = line.split(';')
															
 
																+            del data[-1] # remove unused last element `\n`
															
 
																+            current_scene = data[0]
															
 
																+            thresholds_scene = data[1:]
															
 
																+
															
 
																+            # TODO : check if really necessary
															
 
																+            if current_scene != '50_shades_of_grey':
															
 
																+                human_thresholds[current_scene] = [ int(threshold) for threshold in  thresholds_scene ]
															
 
																+
															
 
																+    # generate all or specific feature data
															
 
																+    if p_feature == 'all':
															
 
																+        for m in features_choices:
															
 
																+            generate_data_feature(p_dataset, p_output, human_thresholds, m, 'svd')
															
 
																+            generate_data_feature(p_dataset, p_output, human_thresholds, m, 'svdn')
															
 
																+            generate_data_feature(p_dataset, p_output, human_thresholds, m, 'svdne')
															
 
																+    else:
															
 
																+
															
 
																+        if p_feature not in features_choices:
															
 
																+            raise ValueError('Unknown feature choice : ', features_choices)
															
 
																+            
															
 
																+        generate_data_feature(p_dataset, p_output, human_thresholds, p_feature, 'svd')
															
 
																+        generate_data_feature(p_dataset, p_output, human_thresholds, p_feature, 'svdn')
															
 
																+        generate_data_feature(p_dataset, p_output, human_thresholds, p_feature, 'svdne')
															
 
																+
															
 
																+if __name__== "__main__":
															
 
																+    main()
															
--- a/generate/generate_data_model_file.py
+++ b/generate/generate_data_model_file.py
@@ -0,0 +1,267 @@
 
																+# main imports
															
 
																+import sys, os, argparse
															
 
																+import numpy as np
															
 
																+import pandas as pd
															
 
																+import random
															
 
																+
															
 
																+# image processing imports
															
 
																+from PIL import Image
															
 
																+
															
 
																+from ipfml import utils
															
 
																+
															
 
																+# modules imports
															
 
																+sys.path.insert(0, '') # trick to enable import of main folder module
															
 
																+
															
 
																+import custom_config as cfg
															
 
																+from modules.utils import data as dt
															
 
																+from data_attributes import get_image_features
															
 
																+
															
 
																+
															
 
																+# getting configuration information
															
 
																+learned_folder          = cfg.learned_zones_folder
															
 
																+min_max_filename        = cfg.min_max_filename_extension
															
 
																+
															
 
																+# define all scenes variables
															
 
																+zones                   = cfg.zones_indices
															
 
																+seuil_expe_filename     = cfg.seuil_expe_filename
															
 
																+
															
 
																+normalization_choices   = cfg.normalization_choices
															
 
																+features_choices        = cfg.features_choices_labels
															
 
																+output_data_folder      = cfg.output_datasets
															
 
																+custom_min_max_folder   = cfg.min_max_custom_folder
															
 
																+min_max_ext             = cfg.min_max_filename_extension
															
 
																+zones_indices           = cfg.zones_indices
															
 
																+
															
 
																+generic_output_file_svd = '_random.csv'
															
 
																+
															
 
																+min_value_interval = sys.maxsize
															
 
																+max_value_interval = 0
															
 
																+
															
 
																+def construct_new_line(threshold, interval, line, choice, each, norm):
															
 
																+    begin, end = interval
															
 
																+
															
 
																+    line_data = line.split(';')
															
 
																+    seuil = line_data[0]
															
 
																+    features = line_data[begin+1:end+1]
															
 
																+
															
 
																+    features = [float(m) for id, m in enumerate(features) if id % each == 0 ]
															
 
																+
															
 
																+    if norm:
															
 
																+        if choice == 'svdne':
															
 
																+            features = utils.normalize_arr_with_range(features, min_value_interval, max_value_interval)
															
 
																+        if choice == 'svdn':
															
 
																+            features = utils.normalize_arr(features)
															
 
																+
															
 
																+    if threshold > int(seuil):
															
 
																+        line = '1'
															
 
																+    else:
															
 
																+        line = '0'
															
 
																+
															
 
																+    for val in features:
															
 
																+        line += ';'
															
 
																+        line += str(val)
															
 
																+    line += '\n'
															
 
																+
															
 
																+    return line
															
 
																+
															
 
																+def get_min_max_value_interval(path, _scenes_list, _interval, _feature):
															
 
																+
															
 
																+    global min_value_interval, max_value_interval
															
 
																+
															
 
																+    scenes = os.listdir(path)
															
 
																+
															
 
																+    # remove min max file from scenes folder
															
 
																+    scenes = [s for s in scenes if min_max_filename not in s]
															
 
																+
															
 
																+    for folder_scene in scenes:
															
 
																+
															
 
																+        # only take care of maxwell scenes
															
 
																+        if folder_scene in _scenes_list:
															
 
																+
															
 
																+            scene_path = os.path.join(path, folder_scene)
															
 
																+
															
 
																+            zones_folder = []
															
 
																+            # create zones list
															
 
																+            for index in zones:
															
 
																+                index_str = str(index)
															
 
																+                if len(index_str) < 2:
															
 
																+                    index_str = "0" + index_str
															
 
																+                zones_folder.append("zone"+index_str)
															
 
																+
															
 
																+            for zone_folder in zones_folder:
															
 
																+                zone_path = os.path.join(scene_path, zone_folder)
															
 
																+                data_filename = _feature + "_svd" + generic_output_file_svd
															
 
																+                data_file_path = os.path.join(zone_path, data_filename)
															
 
																+
															
 
																+                # getting number of line and read randomly lines
															
 
																+                f = open(data_file_path)
															
 
																+                lines = f.readlines()
															
 
																+
															
 
																+                # check if user select current scene and zone to be part of training data set
															
 
																+                for line in lines:
															
 
																+
															
 
																+                    begin, end = _interval
															
 
																+
															
 
																+                    line_data = line.split(';')
															
 
																+                    features = line_data[begin+1:end+1]
															
 
																+                    features = [float(m) for m in features]
															
 
																+
															
 
																+                    min_value = min(features)
															
 
																+                    max_value = max(features)
															
 
																+
															
 
																+                    if min_value < min_value_interval:
															
 
																+                        min_value_interval = min_value
															
 
																+
															
 
																+                    if max_value > max_value_interval:
															
 
																+                        max_value_interval = max_value
															
 
																+
															
 
																+
															
 
																+def generate_data_model(_filename, _data_path, _interval, _choice, _feature, _thresholds, _learned_zones, _step=1, _each=1, _norm=False, _custom=False):
															
 
																+
															
 
																+    output_train_filename = os.path.join(output_data_folder, _filename + ".train")
															
 
																+    output_test_filename = os.path.join(output_data_folder,_filename + ".test")
															
 
																+
															
 
																+    # create path if not exists
															
 
																+    if not os.path.exists(output_data_folder):
															
 
																+        os.makedirs(output_data_folder)
															
 
																+
															
 
																+    train_file = open(output_train_filename, 'w')
															
 
																+    test_file = open(output_test_filename, 'w')
															
 
																+
															
 
																+    # get zone indices
															
 
																+    zones_indices = np.arange(16)
															
 
																+
															
 
																+    for folder_scene in _learned_zones:
															
 
																+
															
 
																+        # get train zones
															
 
																+        train_zones = _learned_zones[folder_scene]
															
 
																+        scene_thresholds = _thresholds[folder_scene]
															
 
																+        scene_path = os.path.join(_data_path, folder_scene)
															
 
																+
															
 
																+        for id_zone, index_folder in enumerate(zones_indices):
															
 
																+
															
 
																+            index_str = str(index_folder)
															
 
																+            if len(index_str) < 2:
															
 
																+                index_str = "0" + index_str
															
 
																+            current_zone_folder = "zone" + index_str
															
 
																+
															
 
																+            zone_path = os.path.join(scene_path, current_zone_folder)
															
 
																+
															
 
																+            # if custom normalization choices then we use svd values not already normalized
															
 
																+            if _custom:
															
 
																+                data_filename = _feature + "_svd" + generic_output_file_svd
															
 
																+            else:
															
 
																+                data_filename = _feature + "_" + _choice + generic_output_file_svd
															
 
																+
															
 
																+            data_file_path = os.path.join(zone_path, data_filename)
															
 
																+
															
 
																+            # getting number of line and read randomly lines
															
 
																+            f = open(data_file_path)
															
 
																+            lines = f.readlines()
															
 
																+
															
 
																+            num_lines = len(lines)
															
 
																+
															
 
																+            lines_indexes = np.arange(num_lines)
															
 
																+            random.shuffle(lines_indexes)
															
 
																+
															
 
																+            counter = 0
															
 
																+            # check if user select current scene and zone to be part of training data set
															
 
																+            for index in lines_indexes:
															
 
																+
															
 
																+                image_index = int(lines[index].split(';')[0])
															
 
																+
															
 
																+                if image_index % _step == 0:
															
 
																+                    line = construct_new_line(scene_thresholds[id_zone], _interval, lines[index], _choice, _each, _norm)
															
 
																+
															
 
																+                    if id_zone in train_zones:
															
 
																+                        train_file.write(line)
															
 
																+                    else:
															
 
																+                        test_file.write(line)
															
 
																+
															
 
																+                counter += 1
															
 
																+
															
 
																+            f.close()
															
 
																+
															
 
																+    train_file.close()
															
 
																+    test_file.close()
															
 
																+
															
 
																+
															
 
																+def main():
															
 
																+
															
 
																+    # getting all params
															
 
																+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
															
 
																+
															
 
																+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)', required=True)
															
 
																+    parser.add_argument('--data', type=str, help='folder which contains data of dataset', required=True)
															
 
																+    parser.add_argument('--thresholds', type=str, help='file with scene list information and thresholds', required=True)
															
 
																+    parser.add_argument('--selected_zones', type=str, help='file which contains all selected zones of scene', required=True)  
															
 
																+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"', required=True)
															
 
																+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
															
 
																+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices, required=True)
															
 
																+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
															
 
																+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
															
 
																+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
															
 
																+
															
 
																+    args = parser.parse_args()
															
 
																+
															
 
																+    p_filename = args.output
															
 
																+    p_data     = args.data
															
 
																+    p_thresholds = args.thresholds
															
 
																+    p_selected_zones = args.selected_zones
															
 
																+    p_interval = list(map(int, args.interval.split(',')))
															
 
																+    p_kind     = args.kind
															
 
																+    p_feature  = args.feature
															
 
																+    p_step     = args.step
															
 
																+    p_each     = args.each
															
 
																+    p_custom   = args.custom
															
 
																+
															
 
																+    # 1. retrieve human_thresholds
															
 
																+    human_thresholds = {}
															
 
																+
															
 
																+    # extract thresholds
															
 
																+    with open(p_thresholds) as f:
															
 
																+        thresholds_line = f.readlines()
															
 
																+
															
 
																+        for line in thresholds_line:
															
 
																+            data = line.split(';')
															
 
																+            del data[-1] # remove unused last element `\n`
															
 
																+            current_scene = data[0]
															
 
																+            thresholds_scene = data[1:]
															
 
																+
															
 
																+            # TODO : check if really necessary
															
 
																+            if current_scene != '50_shades_of_grey':
															
 
																+                human_thresholds[current_scene] = [ int(threshold) for threshold in  thresholds_scene ]
															
 
																+
															
 
																+    # 2. get selected zones
															
 
																+    selected_zones = {}
															
 
																+    with(open(p_selected_zones, 'r')) as f:
															
 
																+
															
 
																+        for line in f.readlines():
															
 
																+
															
 
																+            data = line.split(';')
															
 
																+            del data[-1]
															
 
																+            scene_name = data[0]
															
 
																+            thresholds = data[1:]
															
 
																+
															
 
																+            selected_zones[scene_name] = [ int(t) for t in thresholds ]
															
 
																+
															
 
																+    # find min max value if necessary to renormalize data
															
 
																+    if p_custom:
															
 
																+        get_min_max_value_interval(p_data, selected_zones, p_interval, p_feature)
															
 
																+
															
 
																+        # write new file to save
															
 
																+        if not os.path.exists(custom_min_max_folder):
															
 
																+            os.makedirs(custom_min_max_folder)
															
 
																+
															
 
																+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
															
 
																+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
															
 
																+
															
 
																+        with open(min_max_filename_path, 'w') as f:
															
 
																+            f.write(str(min_value_interval) + '\n')
															
 
																+            f.write(str(max_value_interval) + '\n')
															
 
																+
															
 
																+    # create database using img folder (generate first time only)
															
 
																+    generate_data_model(p_filename, p_data, p_interval, p_kind, p_feature, human_thresholds, selected_zones, p_step, p_each, p_custom)
															
 
																+
															
 
																+if __name__== "__main__":
															
 
																+    main()
															
--- a/generate/generate_selected_zones_file.py
+++ b/generate/generate_selected_zones_file.py
@@ -0,0 +1,104 @@
 
																+# main imports
															
 
																+import numpy as np
															
 
																+import pandas as pd
															
 
																+import sys, os, argparse
															
 
																+import random
															
 
																+
															
 
																+
															
 
																+# modules and config imports
															
 
																+sys.path.insert(0, '') # trick to enable import of main folder module
															
 
																+
															
 
																+import custom_config as cfg
															
 
																+
															
 
																+
															
 
																+def save_learned_zones(output_name, scene, zones):
															
 
																+
															
 
																+    if not os.path.exists(cfg.output_zones_learned):
															
 
																+        os.makedirs(cfg.output_zones_learned)
															
 
																+
															
 
																+    with open(os.path.join(cfg.output_zones_learned, output_name), 'a') as f:
															
 
																+        f.write(scene + ';')
															
 
																+
															
 
																+        for zone in zones:
															
 
																+            f.write(str(zone) + ';')
															
 
																+
															
 
																+        f.write('\n')
															
 
																+
															
 
																+
															
 
																+def get_random_zones(scene, zones, n_zones):
															
 
																+
															
 
																+    random.shuffle(zones)
															
 
																+
															
 
																+    # specific case for 'Cuisine01' (zone 12 is also noisy even in reference image)
															
 
																+    # if scene == 'Cuisine01':
															
 
																+    #     while 12 in zones[0:n_zones]:
															
 
																+    #         random.shuffle(zones)
															
 
																+    
															
 
																+    return zones[0:n_zones]
															
 
																+
															
 
																+def main():
															
 
																+
															
 
																+    parser = argparse.ArgumentParser(description="Read and compute entropy data file (using diff)")
															
 
																+
															
 
																+    parser.add_argument('--folder', type=str, help='dataset scene folder', required=True)
															
 
																+    parser.add_argument('--n_zones', type=int, help='number of zones used in train', default=10)
															
 
																+    parser.add_argument('--output', type=str, help='file with specific training zone', required=True)
															
 
																+    parser.add_argument('--thresholds', type=str, help='file with specific thresholds (using only scene from this file', default='')
															
 
																+
															
 
																+    args = parser.parse_args()
															
 
																+
															
 
																+    p_folder       = args.folder
															
 
																+    p_n_zones      = args.n_zones
															
 
																+    p_output       = args.output
															
 
																+    p_thresholds   = args.thresholds
															
 
																+
															
 
																+    # extract scenes to use if specified
															
 
																+    available_scenes = None
															
 
																+
															
 
																+    if len(p_thresholds) > 0:
															
 
																+        
															
 
																+        available_scenes = []
															
 
																+
															
 
																+        with open(p_thresholds) as f:
															
 
																+            thresholds_line = f.readlines()
															
 
																+
															
 
																+            for line in thresholds_line:
															
 
																+                data = line.split(';')
															
 
																+                del data[-1] # remove unused last element `\n`
															
 
																+                current_scene = data[0]
															
 
																+
															
 
																+                # need to rename `current_name` because we only used part6
															
 
																+                # scene_split = current_scene.split('_')
															
 
																+                # del scene_split[-1]
															
 
																+                # scene_name = '_'.join(scene_split)
															
 
																+
															
 
																+                available_scenes.append(current_scene)
															
 
																+
															
 
																+
															
 
																+    # specific number of zones (zones indices)
															
 
																+    zones = np.arange(16)
															
 
																+
															
 
																+    # get all scene names
															
 
																+    scenes = os.listdir(p_folder)
															
 
																+
															
 
																+    # create output thresholds directory if necessary
															
 
																+    folder, _ = os.path.split(p_output)
															
 
																+
															
 
																+    if len(folder) > 0:
															
 
																+        os.makedirs(folder)
															
 
																+
															
 
																+    # for each scene we generate random zones choice
															
 
																+    for folder_scene in scenes:
															
 
																+
															
 
																+        if available_scenes is not None:
															
 
																+
															
 
																+            if folder_scene in available_scenes:
															
 
																+                selected_zones = get_random_zones(folder_scene, zones, p_n_zones)
															
 
																+                save_learned_zones(p_output, folder_scene, selected_zones)
															
 
																+        else:
															
 
																+            selected_zones = get_random_zones(folder_scene, zones, p_n_zones)
															
 
																+            save_learned_zones(p_output, folder_scene, selected_zones)
															
 
																+            
															
 
																+
															
 
																+if __name__== "__main__":
															
 
																+    main()