Parcourir la source

Merge branch 'release/v0.1.9'

Jérôme BUISINE il y a 5 ans
Parent
commit
96b87646a0

+ 62 - 0
SVDAnalysis/svd_mean_rotations_view.py

@@ -0,0 +1,62 @@
+from ipfml import processing, utils
+from skimage import transform
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+from PIL import Image
+
+data_folder = "../fichiersSVD_light"
+
+def get_svd_mean_image_rotations(img_path):
+
+    print("Extract features from... " + img_path)
+    img = np.asarray(Image.open(img_path))
+    width, height, dim = img.shape
+
+    img_mean = np.empty([width, height, 3])
+    rotations = []
+    svd_data_rotation = []
+
+    for i in range(4):
+        rotations.append(processing.rotate_image(img, (i+1)*90, pil=False))
+        svd_data_rotation.append(processing.get_LAB_L_SVD_s(rotations[i]))
+
+    nb_rotations = len(rotations)
+
+    img_mean = processing.fusion_images(rotations, pil=False)
+
+    data = processing.get_LAB_L_SVD_s(img_mean)
+
+    # getting max and min information from min_max_filename
+    with open(data_folder + "/lab_min_max_values", 'r') as f:
+        min_val = float(f.readline())
+        max_val = float(f.readline())
+
+    data = utils.normalize_arr_with_range(data, min_val, max_val)
+
+    return data
+
+scene   = 'Appart1opt02'
+mean_svd_values = []
+indices = ["00020", "00080", "00150", "00300", "00500", "00700", "00900"]
+
+for index in indices:
+    path = os.path.join(data_folder, scene + '/appartAopt_' + index + '.png')
+    mean_svd_values.append(get_svd_mean_image_rotations(path))
+
+plt.title("Information from merged rotations images at different noise level from " + scene + " scene", fontsize=22)
+
+plt.ylabel('Singular values', fontsize=18)
+plt.xlabel('Vector features', fontsize=18)
+
+for id, data in enumerate(mean_svd_values):
+
+    p_label = "appartAopt_" + indices[id]
+    plt.plot(data, label=p_label)
+
+plt.legend(bbox_to_anchor=(0.8, 1), loc=2, borderaxespad=0.2, fontsize=16)
+
+plt.ylim(0, 0.01)
+plt.show()
+

+ 62 - 0
SVDAnalysis/svd_roration_view.py

@@ -0,0 +1,62 @@
+from ipfml import processing, utils
+from skimage import transform
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+from PIL import Image
+
+data_folder = "../fichiersSVD_light"
+
+def get_svd_mean_and_image_rotations(img_path):
+
+    print("Extract features from... " + img_path)
+    img = np.asarray(Image.open(img_path))
+    width, height, dim = img.shape
+
+    img_mean = np.empty([width, height, 3])
+    rotations = []
+    svd_data_rotation = []
+
+    for i in range(4):
+        rotations.append(processing.rotate_image(img, (i+1)*90, pil=False))
+        svd_data_rotation.append(processing.get_LAB_L_SVD_s(rotations[i]))
+        Image.fromarray(rotations[i]).show()
+
+    mean_image = processing.fusion_images(rotations, pil=False)
+    mean_data = processing.get_LAB_L_SVD_s(mean_image)
+
+    # getting max and min information from min_max_filename
+    with open(data_folder + "/lab_min_max_values", 'r') as f:
+        min_val = float(f.readline())
+        max_val = float(f.readline())
+
+    mean_data = utils.normalize_arr_with_range(mean_data, min_val, max_val)
+
+    return [utils.normalize_arr_with_range(data, min_val, max_val) for data in svd_data_rotation], mean_data
+
+scene   = 'Appart1opt02'
+indices = ["00020", "00080", "00150", "00300", "00500", "00700", "00900"]
+
+for index in indices:
+    path = os.path.join(data_folder, scene + '/appartAopt_' + index + '.png')
+    svd_data, mean_svd_data = get_svd_mean_and_image_rotations(path)
+
+    plt.title("SVD information of rotations and merged image from " + scene + " scene", fontsize=22)
+
+    plt.ylabel('Singular values', fontsize=18)
+    plt.xlabel('Vector features', fontsize=18)
+
+    for id, data in enumerate(svd_data):
+
+        p_label = "appartAopt_" + index + "_" + str((id +1) * 90)
+        plt.plot(data, label=p_label)
+
+    mean_label = "appartAopt_" + index + "_mean"
+    plt.plot(mean_svd_data, label=mean_label)
+
+    plt.legend(bbox_to_anchor=(0.8, 1), loc=2, borderaxespad=0.2, fontsize=16)
+
+    plt.ylim(0, 0.01)
+    plt.show()
+

+ 4 - 5
display_scenes_zones.py

@@ -14,8 +14,7 @@ import time
 import json
 import json
 
 
 from PIL import Image
 from PIL import Image
-from ipfml import processing
-from ipfml import metrics
+from ipfml import processing, metrics, utils
 from skimage import color
 from skimage import color
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
 
 
@@ -150,7 +149,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
 
 
                         img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
                         img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
                         img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
                         img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
-                        img_mscn_norm = processing.normalize_2D_arr(img_mscn)
+                        img_mscn_norm = utils.normalize_2D_arr(img_mscn)
 
 
                         img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
                         img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
 
 
@@ -200,7 +199,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
                     # modify data depending mode
                     # modify data depending mode
 
 
                     if p_kind == 'svdn':
                     if p_kind == 'svdn':
-                        data = processing.normalize_arr(data)
+                        data = utils.normalize_arr(data)
 
 
                     if p_kind == 'svdne':
                     if p_kind == 'svdne':
                         path_min_max = os.path.join(path, data_type + min_max_filename)
                         path_min_max = os.path.join(path, data_type + min_max_filename)
@@ -209,7 +208,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
                             min_val = float(f.readline())
                             min_val = float(f.readline())
                             max_val = float(f.readline())
                             max_val = float(f.readline())
 
 
-                        data = processing.normalize_arr_with_range(data, min_val, max_val)
+                        data = utils.normalize_arr_with_range(data, min_val, max_val)
 
 
                     # append of data
                     # append of data
                     images_data.append(data)
                     images_data.append(data)

+ 2 - 3
display_scenes_zones_shifted.py

@@ -14,8 +14,7 @@ import time
 import json
 import json
 
 
 from PIL import Image
 from PIL import Image
-from ipfml import processing
-from ipfml import metrics
+from ipfml import processing, metrics, utils
 from skimage import color
 from skimage import color
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
 
 
@@ -136,7 +135,7 @@ def display_data_scenes(p_scene, p_bits, p_shifted):
                     ##################
                     ##################
 
 
                     # modify data depending mode
                     # modify data depending mode
-                    data = processing.normalize_arr(data)
+                    data = utils.normalize_arr(data)
                     images_data.append(data)
                     images_data.append(data)
 
 
                 zones_images_data.append(images_data)
                 zones_images_data.append(images_data)

+ 4 - 4
display_simulation_curves.py

@@ -4,7 +4,7 @@ import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
 import os, sys, getopt
 import os, sys, getopt
 
 
-from modules.utils.data_type import get_svd_data
+from modules.utils.data import get_svd_data
 
 
 label_freq = 6
 label_freq = 6
 
 
@@ -26,8 +26,7 @@ def display_curves(folder_path):
 
 
         df = pd.read_csv(path_file, header=None, sep=";")
         df = pd.read_csv(path_file, header=None, sep=";")
 
 
-
-        fig=plt.figure(figsize=(8, 8))
+        fig=plt.figure(figsize=(35, 22))
         fig.suptitle("Detection simulation for " + scene_names[id] + " scene", fontsize=20)
         fig.suptitle("Detection simulation for " + scene_names[id] + " scene", fontsize=20)
 
 
         for index, row in df.iterrows():
         for index, row in df.iterrows():
@@ -61,7 +60,8 @@ def display_curves(folder_path):
             plt.xticks(x, x_labels, rotation=45)
             plt.xticks(x, x_labels, rotation=45)
             plt.ylim(-1, 2)
             plt.ylim(-1, 2)
 
 
-        plt.show()
+        plt.savefig(os.path.join(folder_path, scene_names[id] + '_simulation_curve.png'))
+        #plt.show()
 
 
 def main():
 def main():
 
 

+ 312 - 0
display_svd_area_data_scene.py

@@ -0,0 +1,312 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, getopt
+
+import numpy as np
+import random
+import time
+import json
+
+from PIL import Image
+from ipfml import processing, metrics, utils
+import ipfml.iqa.fr as fr_iqa
+
+from skimage import color
+
+import matplotlib.pyplot as plt
+from modules.utils.data import get_svd_data
+
+from modules.utils import config as cfg
+
+# getting configuration information
+config_filename     = cfg.config_filename
+zone_folder         = cfg.zone_folder
+min_max_filename    = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list         = cfg.scenes_names
+scenes_indices      = cfg.scenes_indices
+choices             = cfg.normalization_choices
+path                = cfg.dataset_path
+zones               = cfg.zones_indices
+seuil_expe_filename = cfg.seuil_expe_filename
+
+metric_choices      = cfg.metric_choices_labels
+
+max_nb_bits = 8
+
+integral_area_choices = ['trapz', 'simps']
+
+def get_area_under_curve(p_area, p_data):
+
+    noise_method = None
+    function_name = 'integral_area_' + p_area
+
+    try:
+        area_method = getattr(utils, function_name)
+    except AttributeError:
+        raise NotImplementedError("Error `{}` not implement `{}`".format(utils.__name__, function_name))
+
+    return area_method(p_data, dx=800)
+
+
+def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_area, p_ylim):
+    """
+    @brief Method which gives information about svd curves from zone of picture
+    @param p_scene, scene expected to show svd values
+    @param p_interval, interval [begin, end] of svd data to display
+    @param p_interval, interval [begin, end] of samples or minutes from render generation engine
+    @param p_metric, metric computed to show
+    @param p_mode, normalization's mode
+    @param p_norm, normalization or not of selected svd data
+    @param p_area, area method name to compute area under curve
+    @param p_ylim, ylim choice to better display of data
+    @return nothing
+    """
+
+    max_value_svd = 0
+    min_value_svd = sys.maxsize
+
+    image_indices = []
+
+    scenes = os.listdir(path)
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    begin_data, end_data = p_interval
+    begin_index, end_index = p_indices
+
+    data_min_max_filename = os.path.join(path, p_metric + min_max_filename)
+
+    # go ahead each scenes
+    for id_scene, folder_scene in enumerate(scenes):
+
+        if p_scene == folder_scene:
+            scene_path = os.path.join(path, folder_scene)
+
+            config_file_path = os.path.join(scene_path, config_filename)
+
+            with open(config_file_path, "r") as config_file:
+                last_image_name = config_file.readline().strip()
+                prefix_image_name = config_file.readline().strip()
+                start_index_image = config_file.readline().strip()
+                end_index_image = config_file.readline().strip()
+                step_counter = int(config_file.readline().strip())
+
+            # construct each zones folder name
+            zones_folder = []
+
+            # get zones list info
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+
+                current_zone = "zone"+index_str
+                zones_folder.append(current_zone)
+
+            images_data = []
+            images_indices = []
+
+            threshold_learned_zones = []
+
+            for id, zone_folder in enumerate(zones_folder):
+
+                # get threshold information
+                zone_path = os.path.join(scene_path, zone_folder)
+                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+                # open treshold path and get this information
+                with open(path_seuil, "r") as seuil_file:
+                    threshold_learned = int(seuil_file.readline().strip())
+                    threshold_learned_zones.append(threshold_learned)
+
+            current_counter_index = int(start_index_image)
+            end_counter_index = int(end_index_image)
+
+            threshold_mean = np.mean(np.asarray(threshold_learned_zones))
+            threshold_image_found = False
+
+            file_path = os.path.join(scene_path, prefix_image_name + "{}.png")
+
+            svd_data = []
+
+            while(current_counter_index <= end_counter_index):
+
+                current_counter_index_str = str(current_counter_index)
+
+                while len(start_index_image) > len(current_counter_index_str):
+                    current_counter_index_str = "0" + current_counter_index_str
+
+                image_path = file_path.format(str(current_counter_index_str))
+                img = Image.open(image_path)
+
+                svd_values = get_svd_data(p_metric, img)
+
+                if p_norm:
+                    svd_values = svd_values[begin_data:end_data]
+
+                # update min max values
+                min_value = svd_values.min()
+                max_value = svd_values.max()
+
+                if min_value < min_value_svd:
+                    min_value_svd = min_value
+
+                if max_value > min_value_svd:
+                    max_value_svd = max_value
+
+                # keep in memory used data
+                if current_counter_index % p_step == 0:
+                    if current_counter_index >= begin_index and current_counter_index <= end_index:
+                        images_indices.append(current_counter_index_str)
+                        svd_data.append(svd_values)
+
+                    if threshold_mean < int(current_counter_index) and not threshold_image_found:
+
+                        threshold_image_found = True
+                        threshold_image_zone = current_counter_index_str
+
+                current_counter_index += step_counter
+                print('%.2f%%' % (current_counter_index / end_counter_index * 100))
+                sys.stdout.write("\033[F")
+
+
+            # all indices of picture to plot
+            print(images_indices)
+
+            previous_data = []
+            area_data = []
+
+            for id, data in enumerate(svd_data):
+
+                current_data = data
+
+                if not p_norm:
+                    current_data = current_data[begin_data:end_data]
+
+                if p_mode == 'svdn':
+                    current_data = utils.normalize_arr(current_data)
+
+                if p_mode == 'svdne':
+                    current_data = utils.normalize_arr_with_range(current_data, min_value_svd, max_value_svd)
+
+                images_data.append(current_data)
+
+                # not use this script for 'sub_blocks_stats'
+                current_area = get_area_under_curve(p_area, current_data)
+                area_data.append(current_area)
+
+            # display all data using matplotlib (configure plt)
+            gridsize = (3, 2)
+
+            # fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(30, 22))
+            fig = plt.figure(figsize=(30, 22))
+            ax1 = plt.subplot2grid(gridsize, (0, 0), colspan=2, rowspan=2)
+            ax2 = plt.subplot2grid(gridsize, (2, 0), colspan=2)
+
+
+            ax1.set_title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + ']' + p_metric + ' metric, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=20)
+            ax1.set_ylabel('Image samples or time (minutes) generation', fontsize=14)
+            ax1.set_xlabel('Vector features', fontsize=16)
+
+            for id, data in enumerate(images_data):
+
+                p_label = p_scene + '_' + str(images_indices[id]) + " | " + p_area + ": " + str(area_data[id])
+
+                if images_indices[id] == threshold_image_zone:
+                    ax1.plot(data, label=p_label, lw=4, color='red')
+                    threshold_id = id
+                else:
+                    ax1.plot(data, label=p_label)
+
+            ax1.legend(bbox_to_anchor=(0.7, 1), loc=2, borderaxespad=0.2, fontsize=14)
+
+            start_ylim, end_ylim = p_ylim
+            ax1.set_ylim(start_ylim, end_ylim)
+
+            ax2.set_title(p_area + " information for whole step images")
+            ax2.set_ylabel(p_area + ' area values')
+            ax2.set_xlabel('Number of samples per pixels or times')
+            ax2.set_xticks(range(len(images_indices)))
+            ax2.set_xticklabels(list(map(int, images_indices)))
+            ax2.plot([threshold_id, threshold_id], [np.min(area_data), np.max(area_data)], 'k-', lw=2, color='red')
+            ax2.plot(area_data)
+
+            plt.show()
+
+def main():
+
+
+    # by default p_step value is 10 to enable all photos
+    p_step = 10
+    p_ylim = (0, 1)
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python display_svd_area_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --area simps --ylim "0, 0.1"')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hs:i:i:z:l:m:s:n:a:y", ["help=", "scene=", "interval=", "indices=", "metric=", "mode=", "step=", "norm=", "area=", "ylim="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python display_svd_area_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --area simps --ylim "0, 0.1"')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python display_svd_area_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --area simps --ylim "0, 0.1"')
+            sys.exit()
+        elif o in ("-s", "--scene"):
+            p_scene = a
+
+            if p_scene not in scenes_indices:
+                assert False, "Invalid scene choice"
+            else:
+                p_scene = scenes_list[scenes_indices.index(p_scene)]
+        elif o in ("-i", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+
+        elif o in ("-i", "--indices"):
+            p_indices = list(map(int, a.split(',')))
+
+        elif o in ("-m", "--metric"):
+            p_metric = a
+
+            if p_metric not in metric_choices:
+                assert False, "Invalid metric choice"
+
+        elif o in ("-m", "--mode"):
+            p_mode = a
+
+            if p_mode not in choices:
+                assert False, "Invalid normalization choice, expected ['svd', 'svdn', 'svdne']"
+
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+
+        elif o in ("-n", "--norm"):
+            p_norm = int(a)
+
+        elif o in ("-a", "--area"):
+            p_area = a
+
+            if p_area not in integral_area_choices:
+                assert False, "Invalid area computation choices : %s " % integral_area_choices
+
+        elif o in ("-y", "--ylim"):
+            p_ylim = list(map(float, a.split(',')))
+
+        else:
+            assert False, "unhandled option"
+
+    display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_area, p_ylim)
+
+if __name__== "__main__":
+    main()

+ 318 - 0
display_svd_data_scene.py

@@ -0,0 +1,318 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, getopt
+
+import numpy as np
+import random
+import time
+import json
+
+from PIL import Image
+from ipfml import processing, metrics, utils
+import ipfml.iqa.fr as fr_iqa
+
+from skimage import color
+
+import matplotlib.pyplot as plt
+from modules.utils.data import get_svd_data
+
+from modules.utils import config as cfg
+
+# getting configuration information
+config_filename     = cfg.config_filename
+zone_folder         = cfg.zone_folder
+min_max_filename    = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list         = cfg.scenes_names
+scenes_indices      = cfg.scenes_indices
+choices             = cfg.normalization_choices
+path                = cfg.dataset_path
+zones               = cfg.zones_indices
+seuil_expe_filename = cfg.seuil_expe_filename
+
+metric_choices      = cfg.metric_choices_labels
+
+max_nb_bits = 8
+
+error_data_choices  = ['mae', 'mse', 'ssim', 'psnr']
+
+
+def get_error_distance(p_error, y_true, y_test):
+
+    noise_method = None
+    function_name = p_error
+
+    try:
+        error_method = getattr(fr_iqa, function_name)
+    except AttributeError:
+        raise NotImplementedError("Error `{}` not implement `{}`".format(fr_iqa.__name__, function_name))
+
+    return error_method(y_true, y_test)
+
+
+def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_error, p_ylim):
+    """
+    @brief Method which gives information about svd curves from zone of picture
+    @param p_scene, scene expected to show svd values
+    @param p_interval, interval [begin, end] of svd data to display
+    @param p_interval, interval [begin, end] of samples or minutes from render generation engine
+    @param p_metric, metric computed to show
+    @param p_mode, normalization's mode
+    @param p_norm, normalization or not of selected svd data
+    @param p_error, error metric used to display
+    @param p_ylim, ylim choice to better display of data
+    @return nothing
+    """
+
+    max_value_svd = 0
+    min_value_svd = sys.maxsize
+
+    image_indices = []
+
+    scenes = os.listdir(path)
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    begin_data, end_data = p_interval
+    begin_index, end_index = p_indices
+
+    data_min_max_filename = os.path.join(path, p_metric + min_max_filename)
+
+    # go ahead each scenes
+    for id_scene, folder_scene in enumerate(scenes):
+
+        if p_scene == folder_scene:
+            scene_path = os.path.join(path, folder_scene)
+
+            config_file_path = os.path.join(scene_path, config_filename)
+
+            with open(config_file_path, "r") as config_file:
+                last_image_name = config_file.readline().strip()
+                prefix_image_name = config_file.readline().strip()
+                start_index_image = config_file.readline().strip()
+                end_index_image = config_file.readline().strip()
+                step_counter = int(config_file.readline().strip())
+
+            # construct each zones folder name
+            zones_folder = []
+
+            # get zones list info
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+
+                current_zone = "zone"+index_str
+                zones_folder.append(current_zone)
+
+            images_data = []
+            images_indices = []
+
+            threshold_learned_zones = []
+
+            for id, zone_folder in enumerate(zones_folder):
+
+                # get threshold information
+
+                zone_path = os.path.join(scene_path, zone_folder)
+                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+                # open treshold path and get this information
+                with open(path_seuil, "r") as seuil_file:
+                    threshold_learned = int(seuil_file.readline().strip())
+                    threshold_learned_zones.append(threshold_learned)
+
+            current_counter_index = int(start_index_image)
+            end_counter_index = int(end_index_image)
+
+            threshold_mean = np.mean(np.asarray(threshold_learned_zones))
+            threshold_image_found = False
+
+            file_path = os.path.join(scene_path, prefix_image_name + "{}.png")
+
+            svd_data = []
+
+            while(current_counter_index <= end_counter_index):
+
+                current_counter_index_str = str(current_counter_index)
+
+                while len(start_index_image) > len(current_counter_index_str):
+                    current_counter_index_str = "0" + current_counter_index_str
+
+                image_path = file_path.format(str(current_counter_index_str))
+                img = Image.open(image_path)
+
+                svd_values = get_svd_data(p_metric, img)
+
+                if p_norm:
+                    svd_values = svd_values[begin_data:end_data]
+
+                # update min max values
+                min_value = svd_values.min()
+                max_value = svd_values.max()
+
+                if min_value < min_value_svd:
+                    min_value_svd = min_value
+
+                if max_value > min_value_svd:
+                    max_value_svd = max_value
+
+                # keep in memory used data
+                if current_counter_index % p_step == 0:
+                    if current_counter_index >= begin_index and current_counter_index <= end_index:
+                        images_indices.append(current_counter_index_str)
+                        svd_data.append(svd_values)
+
+                    if threshold_mean < int(current_counter_index) and not threshold_image_found:
+
+                        threshold_image_found = True
+                        threshold_image_zone = current_counter_index_str
+
+                current_counter_index += step_counter
+                print('%.2f%%' % (current_counter_index / end_counter_index * 100))
+                sys.stdout.write("\033[F")
+
+
+            # all indices of picture to plot
+            print(images_indices)
+
+            previous_data = []
+            error_data = [0.]
+
+            for id, data in enumerate(svd_data):
+
+                current_data = data
+
+                if not p_norm:
+                    current_data = current_data[begin_data:end_data]
+
+                if p_mode == 'svdn':
+                    current_data = utils.normalize_arr(current_data)
+
+                if p_mode == 'svdne':
+                    current_data = utils.normalize_arr_with_range(current_data, min_value_svd, max_value_svd)
+
+                images_data.append(current_data)
+
+                # use of whole image data for computation of ssim or psnr
+                if p_error == 'ssim' or p_error == 'psnr':
+                    image_path = file_path.format(str(current_id))
+                    current_data = np.asarray(Image.open(image_path))
+
+                if len(previous_data) > 0:
+
+                    current_error = get_error_distance(p_error, previous_data, current_data)
+                    error_data.append(current_error)
+
+                if len(previous_data) == 0:
+                    previous_data = current_data
+
+            # display all data using matplotlib (configure plt)
+            gridsize = (3, 2)
+
+            # fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(30, 22))
+            fig = plt.figure(figsize=(30, 22))
+            ax1 = plt.subplot2grid(gridsize, (0, 0), colspan=2, rowspan=2)
+            ax2 = plt.subplot2grid(gridsize, (2, 0), colspan=2)
+
+
+            ax1.set_title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + ']' + p_metric + ' metric, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=20)
+            ax1.set_ylabel('Image samples or time (minutes) generation', fontsize=14)
+            ax1.set_xlabel('Vector features', fontsize=16)
+
+            for id, data in enumerate(images_data):
+
+                p_label = p_scene + '_' + str(images_indices[id]) + " | " + p_error + ": " + str(error_data[id])
+
+                if images_indices[id] == threshold_image_zone:
+                    ax1.plot(data, label=p_label, lw=4, color='red')
+                else:
+                    ax1.plot(data, label=p_label)
+
+            ax1.legend(bbox_to_anchor=(0.7, 1), loc=2, borderaxespad=0.2, fontsize=14)
+
+            start_ylim, end_ylim = p_ylim
+            ax1.set_ylim(start_ylim, end_ylim)
+
+            ax2.set_title(p_error + " information for whole step images")
+            ax2.set_ylabel(p_error + ' error')
+            ax2.set_xlabel('Number of samples per pixels or times')
+            ax2.set_xticks(range(len(images_indices)))
+            ax2.set_xticklabels(list(map(int, images_indices)))
+            ax2.plot(error_data)
+
+            plt.show()
+
+def main():
+
+
+    # by default p_step value is 10 to enable all photos
+    p_step = 10
+    p_ylim = (0, 1)
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hs:i:i:z:l:m:s:n:e:y", ["help=", "scene=", "interval=", "indices=", "metric=", "mode=", "step=", "norm=", "error=", "ylim="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+            sys.exit()
+        elif o in ("-s", "--scene"):
+            p_scene = a
+
+            if p_scene not in scenes_indices:
+                assert False, "Invalid scene choice"
+            else:
+                p_scene = scenes_list[scenes_indices.index(p_scene)]
+        elif o in ("-i", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+
+        elif o in ("-i", "--indices"):
+            p_indices = list(map(int, a.split(',')))
+
+        elif o in ("-m", "--metric"):
+            p_metric = a
+
+            if p_metric not in metric_choices:
+                assert False, "Invalid metric choice"
+
+        elif o in ("-m", "--mode"):
+            p_mode = a
+
+            if p_mode not in choices:
+                assert False, "Invalid normalization choice, expected ['svd', 'svdn', 'svdne']"
+
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+
+        elif o in ("-n", "--norm"):
+            p_norm = int(a)
+
+        elif o in ("-e", "--error"):
+            p_error = a
+
+        elif o in ("-y", "--ylim"):
+            p_ylim = list(map(float, a.split(',')))
+
+        else:
+            assert False, "unhandled option"
+
+    display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_error, p_ylim)
+
+if __name__== "__main__":
+    main()

+ 117 - 31
display_svd_zone_scene.py

@@ -15,12 +15,11 @@ import time
 import json
 import json
 
 
 from PIL import Image
 from PIL import Image
-from ipfml import processing
-from ipfml import metrics
+from ipfml import processing, metrics, utils
 from skimage import color
 from skimage import color
 
 
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
-from modules.utils.data_type import get_svd_data
+from modules.utils.data import get_svd_data
 
 
 from modules.utils import config as cfg
 from modules.utils import config as cfg
 
 
@@ -31,7 +30,7 @@ min_max_filename    = cfg.min_max_filename_extension
 
 
 # define all scenes values
 # define all scenes values
 scenes_list         = cfg.scenes_names
 scenes_list         = cfg.scenes_names
-scenes_indexes      = cfg.scenes_indices
+scenes_indices      = cfg.scenes_indices
 choices             = cfg.normalization_choices
 choices             = cfg.normalization_choices
 path                = cfg.dataset_path
 path                = cfg.dataset_path
 zones               = cfg.zones_indices
 zones               = cfg.zones_indices
@@ -39,16 +38,76 @@ seuil_expe_filename = cfg.seuil_expe_filename
 
 
 metric_choices      = cfg.metric_choices_labels
 metric_choices      = cfg.metric_choices_labels
 
 
+generic_output_file_svd = '_random.csv'
+
 max_nb_bits = 8
 max_nb_bits = 8
+min_value_interval = sys.maxsize
+max_value_interval = 0
+
+def get_min_max_value_interval(_scene, _interval, _metric):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of current scene
+        if folder_scene == _scene:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
 
 
-def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _metric + "_svd" + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+                    metrics = line_data[begin+1:end+1]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def display_svd_values(p_scene, p_interval, p_indices, p_zone, p_metric, p_mode, p_step, p_norm, p_ylim):
     """
     """
     @brief Method which gives information about svd curves from zone of picture
     @brief Method which gives information about svd curves from zone of picture
     @param p_scene, scene expected to show svd values
     @param p_scene, scene expected to show svd values
+    @param p_interval, interval [begin, end] of svd data to display
     @param p_interval, interval [begin, end] of samples or minutes from render generation engine
     @param p_interval, interval [begin, end] of samples or minutes from render generation engine
     @param p_zone, zone's identifier of picture
     @param p_zone, zone's identifier of picture
     @param p_metric, metric computed to show
     @param p_metric, metric computed to show
     @param p_mode, normalization's mode
     @param p_mode, normalization's mode
+    @param p_step, step of images indices
+    @param p_norm, normalization or not of selected svd data
+    @param p_ylim, ylim choice to better display of data
     @return nothing
     @return nothing
     """
     """
 
 
@@ -56,14 +115,15 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
     # remove min max file from scenes folder
     # remove min max file from scenes folder
     scenes = [s for s in scenes if min_max_filename not in s]
     scenes = [s for s in scenes if min_max_filename not in s]
 
 
-    begin, end = p_interval
+    begin_data, end_data = p_interval
+    begin_index, end_index = p_indices
+
     data_min_max_filename = os.path.join(path, p_metric + min_max_filename)
     data_min_max_filename = os.path.join(path, p_metric + min_max_filename)
 
 
     # go ahead each scenes
     # go ahead each scenes
     for id_scene, folder_scene in enumerate(scenes):
     for id_scene, folder_scene in enumerate(scenes):
 
 
         if p_scene == folder_scene:
         if p_scene == folder_scene:
-            print(folder_scene)
             scene_path = os.path.join(path, folder_scene)
             scene_path = os.path.join(path, folder_scene)
 
 
             config_file_path = os.path.join(scene_path, config_filename)
             config_file_path = os.path.join(scene_path, config_filename)
@@ -88,7 +148,7 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
                 zones_folder.append(current_zone)
                 zones_folder.append(current_zone)
 
 
             zones_images_data = []
             zones_images_data = []
-            images_indexes = []
+            images_indices = []
 
 
             zone_folder = zones_folder[p_zone]
             zone_folder = zones_folder[p_zone]
 
 
@@ -113,10 +173,9 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
                 while len(start_index_image) > len(current_counter_index_str):
                 while len(start_index_image) > len(current_counter_index_str):
                     current_counter_index_str = "0" + current_counter_index_str
                     current_counter_index_str = "0" + current_counter_index_str
 
 
-
                 if current_counter_index % p_step == 0:
                 if current_counter_index % p_step == 0:
-                    if current_counter_index >= begin and current_counter_index <= end:
-                        images_indexes.append(current_counter_index_str)
+                    if current_counter_index >= begin_index and current_counter_index <= end_index:
+                        images_indices.append(current_counter_index_str)
 
 
                     if seuil_learned < int(current_counter_index) and not threshold_image_found:
                     if seuil_learned < int(current_counter_index) and not threshold_image_found:
 
 
@@ -125,10 +184,10 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
 
 
                 current_counter_index += step_counter
                 current_counter_index += step_counter
 
 
-            # all indexes of picture to plot
-            print(images_indexes)
+            # all indices of picture to plot
+            print(images_indices)
 
 
-            for index in images_indexes:
+            for index in images_indices:
 
 
                 img_path = os.path.join(scene_path, prefix_image_name + str(index) + ".png")
                 img_path = os.path.join(scene_path, prefix_image_name + str(index) + ".png")
 
 
@@ -142,6 +201,10 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
                 # Here you can add the way you compute data
                 # Here you can add the way you compute data
                 data = get_svd_data(p_metric, block)
                 data = get_svd_data(p_metric, block)
 
 
+                # TODO : improve part of this code to get correct min / max values
+                if p_norm:
+                    data = data[begin_data:end_data]
+
                 ##################
                 ##################
                 # Data mode part #
                 # Data mode part #
                 ##################
                 ##################
@@ -149,32 +212,41 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
                 if p_mode == 'svdne':
                 if p_mode == 'svdne':
 
 
                     # getting max and min information from min_max_filename
                     # getting max and min information from min_max_filename
-                    with open(data_min_max_filename, 'r') as f:
-                        min_val = float(f.readline())
-                        max_val = float(f.readline())
+                    if not p_norm:
+                        with open(data_min_max_filename, 'r') as f:
+                            min_val = float(f.readline())
+                            max_val = float(f.readline())
+                    else:
+                        min_val = min_value_interval
+                        max_val = max_value_interval
 
 
-                    data = processing.normalize_arr_with_range(data, min_val, max_val)
+                    data = utils.normalize_arr_with_range(data, min_val, max_val)
 
 
                 if p_mode == 'svdn':
                 if p_mode == 'svdn':
-                    data = processing.normalize_arr(data)
+                    data = utils.normalize_arr(data)
 
 
-                zones_images_data.append(data)
+                if not p_norm:
+                    zones_images_data.append(data[begin_data:end_data])
+                else:
+                    zones_images_data.append(data)
 
 
-            plt.title(p_scene + ' scene interval information ['+ str(begin) +', '+ str(end) +'], ' + p_metric + ' metric, ' + p_mode, fontsize=20)
+            plt.title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + ']' + p_metric + ' metric, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=20)
             plt.ylabel('Image samples or time (minutes) generation', fontsize=14)
             plt.ylabel('Image samples or time (minutes) generation', fontsize=14)
             plt.xlabel('Vector features', fontsize=16)
             plt.xlabel('Vector features', fontsize=16)
 
 
             for id, data in enumerate(zones_images_data):
             for id, data in enumerate(zones_images_data):
 
 
-                p_label = p_scene + "_" + images_indexes[id]
+                p_label = p_scene + "_" + images_indices[id]
 
 
-                if images_indexes[id] == threshold_image_zone:
+                if images_indices[id] == threshold_image_zone:
                     plt.plot(data, label=p_label, lw=4, color='red')
                     plt.plot(data, label=p_label, lw=4, color='red')
                 else:
                 else:
                     plt.plot(data, label=p_label)
                     plt.plot(data, label=p_label)
 
 
             plt.legend(bbox_to_anchor=(0.8, 1), loc=2, borderaxespad=0.2, fontsize=14)
             plt.legend(bbox_to_anchor=(0.8, 1), loc=2, borderaxespad=0.2, fontsize=14)
-            plt.ylim(0, 0.1)
+
+            start_ylim, end_ylim = p_ylim
+            plt.ylim(start_ylim, end_ylim)
 
 
             plt.show()
             plt.show()
 
 
@@ -183,31 +255,36 @@ def main():
 
 
     # by default p_step value is 10 to enable all photos
     # by default p_step value is 10 to enable all photos
     p_step = 10
     p_step = 10
+    p_norm = 0
+    p_ylim = (0, 1)
 
 
     if len(sys.argv) <= 1:
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
         print('Run with default parameters...')
-        print('python display_svd_zone_scene.py --scene A --interval "0,200" --zone 3 --metric lab --mode svdne --step 50')
+        print('python display_svd_zone_scene.py --scene A --interval "0,200" --indices "0, 900" --zone 3 --metric lab --mode svdne --step 50 --norm 0 --ylim "0, 0.1"')
         sys.exit(2)
         sys.exit(2)
     try:
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "hs:i:z:l:m:s", ["help=", "scene=", "interval=", "zone=", "metric=", "mode=", "step="])
+        opts, args = getopt.getopt(sys.argv[1:], "hs:i:i:z:l:m:s:n:y", ["help=", "scene=", "interval=", "indices=", "zone=", "metric=", "mode=", "step=", "norm=", "ylim="])
     except getopt.GetoptError:
     except getopt.GetoptError:
         # print help information and exit:
         # print help information and exit:
-        print('python display_svd_zone_scene.py --scene A --interval "0,200" --zone 3 --metric lab --mode svdne --step 50')
+        print('python display_svd_zone_scene.py --scene A --interval "0,200" --indices "0, 900" --zone 3 --metric lab --mode svdne --step 50 --norm 0 --ylim "0, 0.1"')
         sys.exit(2)
         sys.exit(2)
     for o, a in opts:
     for o, a in opts:
         if o == "-h":
         if o == "-h":
-            print('python display_svd_zone_scene.py --scene A --interval "0,200" --zone 3 --metric lab --mode svdne --step 50')
+            print('python display_svd_zone_scene.py --scene A --interval "0,200" --indices "0, 900" --zone 3 --metric lab --mode svdne --step 50 --norm 0 --ylim "0, 0.1"')
             sys.exit()
             sys.exit()
         elif o in ("-s", "--scene"):
         elif o in ("-s", "--scene"):
             p_scene = a
             p_scene = a
 
 
-            if p_scene not in scenes_indexes:
+            if p_scene not in scenes_indices:
                 assert False, "Invalid scene choice"
                 assert False, "Invalid scene choice"
             else:
             else:
-                p_scene = scenes_list[scenes_indexes.index(p_scene)]
+                p_scene = scenes_list[scenes_indices.index(p_scene)]
         elif o in ("-i", "--interval"):
         elif o in ("-i", "--interval"):
             p_interval = list(map(int, a.split(',')))
             p_interval = list(map(int, a.split(',')))
 
 
+        elif o in ("-i", "--indices"):
+            p_indices = list(map(int, a.split(',')))
+
         elif o in ("-z", "--zone"):
         elif o in ("-z", "--zone"):
             p_zone = int(a)
             p_zone = int(a)
 
 
@@ -226,10 +303,19 @@ def main():
         elif o in ("-s", "--step"):
         elif o in ("-s", "--step"):
             p_step = int(a)
             p_step = int(a)
 
 
+        elif o in ("-n", "--norm"):
+            p_norm = int(a)
+
+        elif o in ("-y", "--ylim"):
+            p_ylim = list(map(float, a.split(',')))
+
         else:
         else:
             assert False, "unhandled option"
             assert False, "unhandled option"
 
 
-    display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step)
+    if p_norm:
+        get_min_max_value_interval(p_scene, p_interval, p_metric)
+
+    display_svd_values(p_scene, p_interval, p_indices, p_zone, p_metric, p_mode, p_step, p_norm, p_ylim)
 
 
 if __name__== "__main__":
 if __name__== "__main__":
     main()
     main()

+ 2 - 2
generateAndTrain_maxwell.sh

@@ -37,7 +37,7 @@ for counter in {0..4}; do
         end=$(($size))
         end=$(($size))
     fi
     fi
 
 
-    for nb_zones in {4,6,8,10,12,14}; do
+    for nb_zones in {4,6,8,10,12}; do
 
 
         echo $start $end
         echo $start $end
 
 
@@ -54,7 +54,7 @@ for counter in {0..4}; do
 
 
                     echo "${MODEL_NAME} results already generated..."
                     echo "${MODEL_NAME} results already generated..."
                 else
                 else
-                    python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0'
+                    python generate_data_model_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --renderer "maxwell" --step 40 --random 1 --percent 1
                     python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
                     python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 
 
                     #python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2'
                     #python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2'

+ 2 - 2
generateAndTrain_maxwell_custom.sh

@@ -37,7 +37,7 @@ for counter in {0..4}; do
         end=$(($size))
         end=$(($size))
     fi
     fi
 
 
-    for nb_zones in {4,6,8,10,12,14}; do
+    for nb_zones in {4,6,8,10,12}; do
 
 
         echo $start $end
         echo $start $end
 
 
@@ -55,7 +55,7 @@ for counter in {0..4}; do
 
 
                     echo "${MODEL_NAME} results already generated..."
                     echo "${MODEL_NAME} results already generated..."
                 else
                 else
-                    python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0' --custom ${CUSTOM_MIN_MAX_FILENAME}
+                    python generate_data_model_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
                     python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
                     python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 
 
                     #python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
                     #python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}

+ 46 - 57
generate_all_data.py

@@ -13,10 +13,9 @@ import random
 import time
 import time
 import json
 import json
 
 
-from modules.utils.data_type import get_svd_data
+from modules.utils.data import get_svd_data
 from PIL import Image
 from PIL import Image
-from ipfml import processing
-from ipfml import metrics
+from ipfml import processing, metrics, utils
 from skimage import color
 from skimage import color
 
 
 from modules.utils import config as cfg
 from modules.utils import config as cfg
@@ -38,7 +37,6 @@ metric_choices          = cfg.metric_choices_labels
 output_data_folder      = cfg.output_data_folder
 output_data_folder      = cfg.output_data_folder
 
 
 generic_output_file_svd = '_random.csv'
 generic_output_file_svd = '_random.csv'
-picture_step            = 10
 
 
 def generate_data_svd(data_type, mode):
 def generate_data_svd(data_type, mode):
     """
     """
@@ -102,64 +100,63 @@ def generate_data_svd(data_type, mode):
 
 
         while(current_counter_index <= end_counter_index):
         while(current_counter_index <= end_counter_index):
 
 
-            if current_counter_index % picture_step == 0:
-                current_counter_index_str = str(current_counter_index)
+            current_counter_index_str = str(current_counter_index)
 
 
-                while len(start_index_image) > len(current_counter_index_str):
-                    current_counter_index_str = "0" + current_counter_index_str
+            while len(start_index_image) > len(current_counter_index_str):
+                current_counter_index_str = "0" + current_counter_index_str
 
 
-                img_path = os.path.join(scene_path, prefix_image_name + current_counter_index_str + ".png")
+            img_path = os.path.join(scene_path, prefix_image_name + current_counter_index_str + ".png")
 
 
-                current_img = Image.open(img_path)
-                img_blocks = processing.divide_in_blocks(current_img, (200, 200))
+            current_img = Image.open(img_path)
+            img_blocks = processing.divide_in_blocks(current_img, (200, 200))
 
 
-                for id_block, block in enumerate(img_blocks):
+            for id_block, block in enumerate(img_blocks):
 
 
-                    ###########################
-                    # Metric computation part #
-                    ###########################
+                ###########################
+                # Metric computation part #
+                ###########################
 
 
-                    data = get_svd_data(data_type, block)
+                data = get_svd_data(data_type, block)
 
 
-                    ##################
-                    # Data mode part #
-                    ##################
+                ##################
+                # Data mode part #
+                ##################
 
 
-                    # modify data depending mode
-                    if mode == 'svdne':
+                # modify data depending mode
+                if mode == 'svdne':
 
 
-                        # getting max and min information from min_max_filename
-                        with open(data_min_max_filename, 'r') as f:
-                            min_val = float(f.readline())
-                            max_val = float(f.readline())
+                    # getting max and min information from min_max_filename
+                    with open(data_min_max_filename, 'r') as f:
+                        min_val = float(f.readline())
+                        max_val = float(f.readline())
 
 
-                        data = processing.normalize_arr_with_range(data, min_val, max_val)
+                    data = utils.normalize_arr_with_range(data, min_val, max_val)
 
 
-                    if mode == 'svdn':
-                        data = processing.normalize_arr(data)
+                if mode == 'svdn':
+                    data = utils.normalize_arr(data)
 
 
-                    # save min and max found from dataset in order to normalize data using whole data known
-                    if mode == 'svd':
+                # save min and max found from dataset in order to normalize data using whole data known
+                if mode == 'svd':
 
 
-                        current_min = data.min()
-                        current_max = data.max()
+                    current_min = data.min()
+                    current_max = data.max()
 
 
-                        if current_min < min_val_found:
-                            min_val_found = current_min
+                    if current_min < min_val_found:
+                        min_val_found = current_min
 
 
-                        if current_max > max_val_found:
-                            max_val_found = current_max
+                    if current_max > max_val_found:
+                        max_val_found = current_max
 
 
-                    # now write data into current writer
-                    current_file = svd_output_files[id_block]
+                # now write data into current writer
+                current_file = svd_output_files[id_block]
 
 
-                    # add of index
-                    current_file.write(current_counter_index_str + ';')
+                # add of index
+                current_file.write(current_counter_index_str + ';')
 
 
-                    for val in data:
-                        current_file.write(str(val) + ";")
+                for val in data:
+                    current_file.write(str(val) + ";")
 
 
-                    current_file.write('\n')
+                current_file.write('\n')
 
 
             start_index_image_int = int(start_index_image)
             start_index_image_int = int(start_index_image)
             print(data_type + "_" + mode + "_" + folder_scene + " - " + "{0:.2f}".format((current_counter_index - start_index_image_int) / (end_counter_index - start_index_image_int)* 100.) + "%")
             print(data_type + "_" + mode + "_" + folder_scene + " - " + "{0:.2f}".format((current_counter_index - start_index_image_int) / (end_counter_index - start_index_image_int)* 100.) + "%")
@@ -178,32 +175,30 @@ def generate_data_svd(data_type, mode):
             f.write(str(min_val_found) + '\n')
             f.write(str(min_val_found) + '\n')
             f.write(str(max_val_found) + '\n')
             f.write(str(max_val_found) + '\n')
 
 
-    print("%s : end of data generation\n" % mode)
+    print("%s_%s : end of data generation\n" % (data_type, mode))
 
 
 
 
 def main():
 def main():
 
 
     # default value of p_step
     # default value of p_step
-    p_step = 10
+    p_step = 1
 
 
     if len(sys.argv) <= 1:
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
         print('Run with default parameters...')
         print('python generate_all_data.py --metric all')
         print('python generate_all_data.py --metric all')
         print('python generate_all_data.py --metric lab')
         print('python generate_all_data.py --metric lab')
-        print('python generate_all_data.py --metric lab --step 10')
+        print('python generate_all_data.py --metric lab')
         sys.exit(2)
         sys.exit(2)
     try:
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "hms", ["help=", "metric=", "step="])
+        opts, args = getopt.getopt(sys.argv[1:], "hms", ["help=", "metric="])
     except getopt.GetoptError:
     except getopt.GetoptError:
         # print help information and exit:
         # print help information and exit:
-        print('python generate_all_data.py --metric all --step 10')
+        print('python generate_all_data.py --metric all')
         sys.exit(2)
         sys.exit(2)
     for o, a in opts:
     for o, a in opts:
         if o == "-h":
         if o == "-h":
-            print('python generate_all_data.py --metric all --step 10')
+            print('python generate_all_data.py --metric all')
             sys.exit()
             sys.exit()
-        elif o in ("-s", "--step"):
-            p_step = int(a)
         elif o in ("-m", "--metric"):
         elif o in ("-m", "--metric"):
             p_metric = a
             p_metric = a
 
 
@@ -212,12 +207,6 @@ def main():
         else:
         else:
             assert False, "unhandled option"
             assert False, "unhandled option"
 
 
-    global picture_step
-    picture_step = p_step
-
-    if picture_step % 10 != 0:
-        assert False, "Picture step variable needs to be divided by ten"
-
     # generate all or specific metric data
     # generate all or specific metric data
     if p_metric == 'all':
     if p_metric == 'all':
         for m in metric_choices:
         for m in metric_choices:

+ 177 - 62
generate_data_model.py

@@ -13,27 +13,51 @@ import random
 import time
 import time
 import json
 import json
 
 
-config_filename   = "config"
-zone_folder       = "zone"
-min_max_filename  = "_min_max_values"
-generic_output_file_svd = '_random.csv'
-output_data_folder = 'data'
+from PIL import Image
+from ipfml import processing, metrics, utils
+
+from modules.utils import config as cfg
+from modules.utils import data as dt
+
+# getting configuration information
+config_filename         = cfg.config_filename
+zone_folder             = cfg.zone_folder
+min_max_filename        = cfg.min_max_filename_extension
 
 
 # define all scenes values
 # define all scenes values
-scenes = ['Appart1opt02', 'Bureau1', 'Cendrier', 'Cuisine01', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre', 'SdbDroite', 'Selles']
-scenes_indexes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
-choices = ['svd', 'svdn', 'svdne']
-path = './fichiersSVD_light'
-zones = np.arange(16)
-seuil_expe_filename = 'seuilExpe'
-
-def construct_new_line(path_seuil, interval, line, sep, index):
+scenes_list             = cfg.scenes_names
+scenes_indexes          = cfg.scenes_indices
+choices                 = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+metric_choices          = cfg.metric_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+zones_indices           = cfg.zones_indices
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval = sys.maxsize
+max_value_interval = 0
+
+def construct_new_line(path_seuil, interval, line, choice, each, norm):
     begin, end = interval
     begin, end = interval
 
 
     line_data = line.split(';')
     line_data = line.split(';')
     seuil = line_data[0]
     seuil = line_data[0]
     metrics = line_data[begin+1:end+1]
     metrics = line_data[begin+1:end+1]
 
 
+    metrics = [float(m) for id, m in enumerate(metrics) if id % each == 0 ]
+
+    if norm:
+        if choice == 'svdne':
+            metrics = utils.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            metrics = utils.normalize_arr(metrics)
+
     with open(path_seuil, "r") as seuil_file:
     with open(path_seuil, "r") as seuil_file:
         seuil_learned = int(seuil_file.readline().strip())
         seuil_learned = int(seuil_file.readline().strip())
 
 
@@ -43,15 +67,65 @@ def construct_new_line(path_seuil, interval, line, sep, index):
         line = '0'
         line = '0'
 
 
     for idx, val in enumerate(metrics):
     for idx, val in enumerate(metrics):
-        if index:
-            line += " " + str(idx + 1)
-        line += sep
-        line += val
+        line += ';'
+        line += str(val)
     line += '\n'
     line += '\n'
 
 
     return line
     return line
 
 
-def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes, _zones = zones, _percent = 1, _sep=':', _index=True):
+def get_min_max_value_interval(_scenes_list, _interval, _metric):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of maxwell scenes
+        if folder_scene in _scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _metric + "_svd" + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+                    metrics = line_data[begin+1:end+1]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _zones = zones_indices, _percent = 1, _step=1, _each=1, _norm=False, _custom=False):
 
 
     output_train_filename = _filename + ".train"
     output_train_filename = _filename + ".train"
     output_test_filename = _filename + ".test"
     output_test_filename = _filename + ".test"
@@ -72,67 +146,85 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
     scenes = [s for s in scenes if min_max_filename not in s]
     scenes = [s for s in scenes if min_max_filename not in s]
 
 
     for id_scene, folder_scene in enumerate(scenes):
     for id_scene, folder_scene in enumerate(scenes):
-        scene_path = os.path.join(path, folder_scene)
 
 
-        zones_folder = []
-        # create zones list
-        for index in zones:
-            index_str = str(index)
-            if len(index_str) < 2:
-                index_str = "0" + index_str
-            zones_folder.append("zone"+index_str)
+        # only take care of maxwell scenes
+        if folder_scene in scenes_list:
 
 
-        for id_zone, zone_folder in enumerate(zones_folder):
-            zone_path = os.path.join(scene_path, zone_folder)
-            data_filename = _metric + "_" + _choice + generic_output_file_svd
-            data_file_path = os.path.join(zone_path, data_filename)
+            scene_path = os.path.join(path, folder_scene)
 
 
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
 
 
-             # getting number of line and read randomly lines
-            f = open(data_file_path)
-            lines = f.readlines()
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
 
 
-            num_lines = len(lines)
+                # if custom normalization choices then we use svd values not already normalized
+                if _custom:
+                    data_filename = _metric + "_svd" + generic_output_file_svd
+                else:
+                    data_filename = _metric + "_" + _choice + generic_output_file_svd
 
 
-            lines_indexes = np.arange(num_lines)
-            random.shuffle(lines_indexes)
+                data_file_path = os.path.join(zone_path, data_filename)
 
 
-            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
 
 
-            counter = 0
-            # check if user select current scene and zone to be part of training data set
-            for index in lines_indexes:
-                line = construct_new_line(path_seuil, _interval, lines[index], _sep, _index)
+                num_lines = len(lines)
 
 
-                percent = counter / num_lines
+                lines_indexes = np.arange(num_lines)
+                random.shuffle(lines_indexes)
 
 
-                if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
-                    train_file.write(line)
-                else:
-                    test_file.write(line)
+                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+                counter = 0
+                # check if user select current scene and zone to be part of training data set
+                for index in lines_indexes:
+
+                    image_index = int(lines[index].split(';')[0])
+                    percent = counter / num_lines
 
 
-                counter += 1
+                    if image_index % _step == 0:
+                        line = construct_new_line(path_seuil, _interval, lines[index], _choice, _each, _norm)
 
 
-            f.close()
+                        if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
+                            train_file.write(line)
+                        else:
+                            test_file.write(line)
+
+                    counter += 1
+
+                f.close()
 
 
     train_file.close()
     train_file.close()
     test_file.close()
     test_file.close()
 
 
+
 def main():
 def main():
 
 
+    p_custom = False
+    p_step      = 1
+    p_renderer  = 'all'
+    p_each      = 1
+
     if len(sys.argv) <= 1:
     if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3" --percent 0.7 --sep ":" --rowindex "1"')
+        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
         sys.exit(2)
         sys.exit(2)
     try:
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:z:p:r", ["help=", "output=", "interval=", "kind=", "metric=", "scenes=", "zones=", "percent=", "sep=", "rowindex="])
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:z:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "zones=", "percent=", "renderer=", "step=", "each=", "custom="])
     except getopt.GetoptError:
     except getopt.GetoptError:
         # print help information and exit:
         # print help information and exit:
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3" --percent 0.7 --sep ":" --rowindex "1"')
+        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
         sys.exit(2)
         sys.exit(2)
     for o, a in opts:
     for o, a in opts:
         if o == "-h":
         if o == "-h":
-            print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3" --percent 0.7 --sep ":" --rowindex "1"')
+            print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
+
             sys.exit()
             sys.exit()
         elif o in ("-o", "--output"):
         elif o in ("-o", "--output"):
             p_filename = a
             p_filename = a
@@ -151,25 +243,48 @@ def main():
                 p_zones = [a.strip()]
                 p_zones = [a.strip()]
         elif o in ("-p", "--percent"):
         elif o in ("-p", "--percent"):
             p_percent = float(a)
             p_percent = float(a)
-        elif o in ("-s", "--sep"):
-            p_sep = a
-        elif o in ("-r", "--rowindex"):
-            if int(a) == 1:
-                p_rowindex = True
-            else:
-                p_rowindex = False
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+        elif o in ("-e", "--each"):
+            p_each = int(a)
+        elif o in ("-r", "--renderer"):
+            p_renderer = a
+
+            if p_renderer not in cfg.renderer_choices:
+                assert False, "Unknown renderer choice, %s" % cfg.renderer_choices
+        elif o in ("-c", "--custom"):
+            p_custom = a
         else:
         else:
             assert False, "unhandled option"
             assert False, "unhandled option"
 
 
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
     # getting scenes from indexes user selection
     # getting scenes from indexes user selection
     scenes_selected = []
     scenes_selected = []
 
 
     for scene_id in p_scenes:
     for scene_id in p_scenes:
         index = scenes_indexes.index(scene_id.strip())
         index = scenes_indexes.index(scene_id.strip())
-        scenes_selected.append(scenes[index])
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(scenes_list, p_interval, p_metric)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
 
 
     # create database using img folder (generate first time only)
     # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_zones, p_percent, p_sep, p_rowindex)
+    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_zones, p_percent, p_step, p_each, p_custom)
 
 
 if __name__== "__main__":
 if __name__== "__main__":
     main()
     main()

+ 0 - 277
generate_data_model_r.py

@@ -1,277 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Sep 14 21:02:42 2018
-
-@author: jbuisine
-"""
-
-from __future__ import print_function
-import sys, os, getopt
-import numpy as np
-import random
-import time
-import json
-
-from PIL import Image
-from ipfml import processing, metrics
-
-from modules.utils import config as cfg
-
-# getting configuration information
-config_filename         = cfg.config_filename
-zone_folder             = cfg.zone_folder
-min_max_filename        = cfg.min_max_filename_extension
-
-# define all scenes values
-scenes_list             = cfg.scenes_names
-scenes_indexes          = cfg.scenes_indices
-choices                 = cfg.normalization_choices
-path                    = cfg.dataset_path
-zones                   = cfg.zones_indices
-seuil_expe_filename     = cfg.seuil_expe_filename
-
-metric_choices          = cfg.metric_choices_labels
-output_data_folder      = cfg.output_data_folder
-custom_min_max_folder   = cfg.min_max_custom_folder
-min_max_ext             = cfg.min_max_filename_extension
-zones_indices           = cfg.zones_indices
-
-generic_output_file_svd = '_random.csv'
-
-min_value_interval = sys.maxsize
-max_value_interval = 0
-
-def construct_new_line(path_seuil, interval, line, norm, sep, index):
-    begin, end = interval
-
-    line_data = line.split(';')
-    seuil = line_data[0]
-    metrics = line_data[begin+1:end+1]
-
-    metrics = [float(m) for m in metrics]
-
-    # TODO : check if it's always necessary to do that (loss of information for svd)
-    if norm:
-        metrics = processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
-
-    with open(path_seuil, "r") as seuil_file:
-        seuil_learned = int(seuil_file.readline().strip())
-
-    if seuil_learned > int(seuil):
-        line = '1'
-    else:
-        line = '0'
-
-    for idx, val in enumerate(metrics):
-        if index:
-            line += " " + str(idx + 1)
-        line += sep
-        line += str(val)
-    line += '\n'
-
-    return line
-
-def get_min_max_value_interval(_filename, _interval, _choice, _metric):
-
-    global min_value_interval, max_value_interval
-
-    scenes = os.listdir(path)
-
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    for id_scene, folder_scene in enumerate(scenes):
-
-        # only take care of maxwell scenes
-        if folder_scene in scenes_list:
-
-            scene_path = os.path.join(path, folder_scene)
-
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
-
-            # shuffle list of zones (=> randomly choose zones)
-            random.shuffle(zones_folder)
-
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
-                data_file_path = os.path.join(zone_path, data_filename)
-
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
-
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for line in lines:
-
-                    begin, end = _interval
-
-                    line_data = line.split(';')
-                    metrics = line_data[begin+1:end+1]
-                    metrics = [float(m) for m in metrics]
-
-                    min_value = min(metrics)
-                    max_value = max(metrics)
-
-                    if min_value < min_value_interval:
-                        min_value_interval = min_value
-
-                    if max_value > max_value_interval:
-                        max_value_interval = max_value
-
-                    counter += 1
-
-
-def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _zones = zones_indices, _percent = 1, _norm = False, _sep=':', _index=True):
-
-    output_train_filename = _filename + ".train"
-    output_test_filename = _filename + ".test"
-
-    if not '/' in output_train_filename:
-        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
-
-    # create path if not exists
-    if not os.path.exists(output_data_folder):
-        os.makedirs(output_data_folder)
-
-    train_file = open(output_train_filename, 'w')
-    test_file = open(output_test_filename, 'w')
-
-    scenes = os.listdir(path)
-
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    for id_scene, folder_scene in enumerate(scenes):
-
-        # only take care of maxwell scenes
-        if folder_scene in scenes_list:
-
-            scene_path = os.path.join(path, folder_scene)
-
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
-
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
-                data_file_path = os.path.join(zone_path, data_filename)
-
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
-
-                num_lines = len(lines)
-
-                lines_indexes = np.arange(num_lines)
-                random.shuffle(lines_indexes)
-
-                path_seuil = os.path.join(zone_path, seuil_expe_filename)
-
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for index in lines_indexes:
-                    line = construct_new_line(path_seuil, _interval, lines[index], _norm, _sep, _index)
-
-                    percent = counter / num_lines
-
-                    if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
-                        train_file.write(line)
-                    else:
-                        test_file.write(line)
-
-                    counter += 1
-
-                f.close()
-
-    train_file.close()
-    test_file.close()
-
-
-def main():
-
-    p_custom = False
-
-    if len(sys.argv) <= 1:
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:z:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "zones=", "percent=", "sep=", "rowindex=", "custom="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-
-            sys.exit()
-        elif o in ("-o", "--output"):
-            p_filename = a
-        elif o in ("-i", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-k", "--kind"):
-            p_kind = a
-        elif o in ("-m", "--metric"):
-            p_metric = a
-        elif o in ("-s", "--scenes"):
-            p_scenes = a.split(',')
-        elif o in ("-z", "--zones"):
-            if ',' in a:
-                p_zones = list(map(int, a.split(',')))
-            else:
-                p_zones = [a.strip()]
-        elif o in ("-p", "--percent"):
-            p_percent = float(a)
-        elif o in ("-s", "--sep"):
-            p_sep = a
-        elif o in ("-r", "--rowindex"):
-            if int(a) == 1:
-                p_rowindex = True
-            else:
-                p_rowindex = False
-        elif o in ("-c", "--custom"):
-            p_custom = a
-        else:
-            assert False, "unhandled option"
-
-    # getting scenes from indexes user selection
-    scenes_selected = []
-
-    for scene_id in p_scenes:
-        index = scenes_indexes.index(scene_id.strip())
-        scenes_selected.append(scenes_list[index])
-
-    # find min max value if necessary to renormalize data
-    if p_custom:
-        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
-
-        # write new file to save
-        if not os.path.exists(custom_min_max_folder):
-            os.makedirs(custom_min_max_folder)
-
-        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
-        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
-
-        with open(min_max_filename_path, 'w') as f:
-            f.write(str(min_value_interval) + '\n')
-            f.write(str(max_value_interval) + '\n')
-
-    # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_zones, p_percent, p_custom, p_sep, p_rowindex)
-
-if __name__== "__main__":
-    main()

+ 92 - 50
generate_data_model_random.py

@@ -14,9 +14,10 @@ import time
 import json
 import json
 
 
 from PIL import Image
 from PIL import Image
-from ipfml import processing, metrics
+from ipfml import processing, metrics, utils
 
 
 from modules.utils import config as cfg
 from modules.utils import config as cfg
+from modules.utils import data as dt
 
 
 # getting configuration information
 # getting configuration information
 config_filename         = cfg.config_filename
 config_filename         = cfg.config_filename
@@ -24,9 +25,10 @@ zone_folder             = cfg.zone_folder
 min_max_filename        = cfg.min_max_filename_extension
 min_max_filename        = cfg.min_max_filename_extension
 
 
 # define all scenes values
 # define all scenes values
-scenes_list             = cfg.scenes_names
-scenes_indexes          = cfg.scenes_indices
-choices                 = cfg.normalization_choices
+all_scenes_list         = cfg.scenes_names
+all_scenes_indices      = cfg.scenes_indices
+
+normalization_choices   = cfg.normalization_choices
 path                    = cfg.dataset_path
 path                    = cfg.dataset_path
 zones                   = cfg.zones_indices
 zones                   = cfg.zones_indices
 seuil_expe_filename     = cfg.seuil_expe_filename
 seuil_expe_filename     = cfg.seuil_expe_filename
@@ -38,21 +40,27 @@ min_max_ext             = cfg.min_max_filename_extension
 
 
 generic_output_file_svd = '_random.csv'
 generic_output_file_svd = '_random.csv'
 
 
-min_value_interval = sys.maxsize
-max_value_interval = 0
+min_value_interval      = sys.maxsize
+max_value_interval      = 0
+
 
 
-def construct_new_line(path_seuil, interval, line, norm, sep, index):
+def construct_new_line(path_seuil, interval, line, choice, each, norm):
     begin, end = interval
     begin, end = interval
 
 
     line_data = line.split(';')
     line_data = line.split(';')
     seuil = line_data[0]
     seuil = line_data[0]
     metrics = line_data[begin+1:end+1]
     metrics = line_data[begin+1:end+1]
 
 
-    metrics = [float(m) for m in metrics]
+    # keep only if modulo result is 0 (keep only each wanted values)
+    metrics = [float(m) for id, m in enumerate(metrics) if id % each == 0]
 
 
     # TODO : check if it's always necessary to do that (loss of information for svd)
     # TODO : check if it's always necessary to do that (loss of information for svd)
     if norm:
     if norm:
-        metrics = processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+
+        if choice == 'svdne':
+            metrics = utils.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            metrics = utils.normalize_arr(metrics)
 
 
     with open(path_seuil, "r") as seuil_file:
     with open(path_seuil, "r") as seuil_file:
         seuil_learned = int(seuil_file.readline().strip())
         seuil_learned = int(seuil_file.readline().strip())
@@ -63,15 +71,13 @@ def construct_new_line(path_seuil, interval, line, norm, sep, index):
         line = '0'
         line = '0'
 
 
     for idx, val in enumerate(metrics):
     for idx, val in enumerate(metrics):
-        if index:
-            line += " " + str(idx + 1)
-        line += sep
+        line += ';'
         line += str(val)
         line += str(val)
     line += '\n'
     line += '\n'
 
 
     return line
     return line
 
 
-def get_min_max_value_interval(_filename, _interval, _choice, _metric):
+def get_min_max_value_interval(_scenes_list, _interval, _metric):
 
 
     global min_value_interval, max_value_interval
     global min_value_interval, max_value_interval
 
 
@@ -83,7 +89,7 @@ def get_min_max_value_interval(_filename, _interval, _choice, _metric):
     for id_scene, folder_scene in enumerate(scenes):
     for id_scene, folder_scene in enumerate(scenes):
 
 
         # only take care of maxwell scenes
         # only take care of maxwell scenes
-        if folder_scene in scenes_list:
+        if folder_scene in _scenes_list:
 
 
             scene_path = os.path.join(path, folder_scene)
             scene_path = os.path.join(path, folder_scene)
 
 
@@ -95,26 +101,26 @@ def get_min_max_value_interval(_filename, _interval, _choice, _metric):
                     index_str = "0" + index_str
                     index_str = "0" + index_str
                 zones_folder.append("zone"+index_str)
                 zones_folder.append("zone"+index_str)
 
 
-            # shuffle list of zones (=> randomly choose zones)
-            random.shuffle(zones_folder)
-
             for id_zone, zone_folder in enumerate(zones_folder):
             for id_zone, zone_folder in enumerate(zones_folder):
+
                 zone_path = os.path.join(scene_path, zone_folder)
                 zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
+
+                # if custom normalization choices then we use svd values not already normalized
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+
                 data_file_path = os.path.join(zone_path, data_filename)
                 data_file_path = os.path.join(zone_path, data_filename)
 
 
                 # getting number of line and read randomly lines
                 # getting number of line and read randomly lines
                 f = open(data_file_path)
                 f = open(data_file_path)
                 lines = f.readlines()
                 lines = f.readlines()
 
 
-                counter = 0
                 # check if user select current scene and zone to be part of training data set
                 # check if user select current scene and zone to be part of training data set
                 for line in lines:
                 for line in lines:
 
 
-
                     begin, end = _interval
                     begin, end = _interval
 
 
                     line_data = line.split(';')
                     line_data = line.split(';')
+
                     metrics = line_data[begin+1:end+1]
                     metrics = line_data[begin+1:end+1]
                     metrics = [float(m) for m in metrics]
                     metrics = [float(m) for m in metrics]
 
 
@@ -127,10 +133,8 @@ def get_min_max_value_interval(_filename, _interval, _choice, _metric):
                     if max_value > max_value_interval:
                     if max_value > max_value_interval:
                         max_value_interval = max_value
                         max_value_interval = max_value
 
 
-                    counter += 1
-
 
 
-def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _nb_zones = 4, _percent = 1, _norm = False, _sep=':', _index=True):
+def generate_data_model(_scenes_list, _filename, _interval, _choice, _metric, _scenes, _nb_zones = 4, _percent = 1, _random=0, _step=1, _each=1, _custom = False):
 
 
     output_train_filename = _filename + ".train"
     output_train_filename = _filename + ".train"
     output_test_filename = _filename + ".test"
     output_test_filename = _filename + ".test"
@@ -142,18 +146,18 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
     if not os.path.exists(output_data_folder):
     if not os.path.exists(output_data_folder):
         os.makedirs(output_data_folder)
         os.makedirs(output_data_folder)
 
 
-    train_file = open(output_train_filename, 'w')
-    test_file = open(output_test_filename, 'w')
-
     scenes = os.listdir(path)
     scenes = os.listdir(path)
 
 
     # remove min max file from scenes folder
     # remove min max file from scenes folder
     scenes = [s for s in scenes if min_max_filename not in s]
     scenes = [s for s in scenes if min_max_filename not in s]
 
 
+    train_file_data = []
+    test_file_data  = []
+
     for id_scene, folder_scene in enumerate(scenes):
     for id_scene, folder_scene in enumerate(scenes):
 
 
         # only take care of maxwell scenes
         # only take care of maxwell scenes
-        if folder_scene in scenes_list:
+        if folder_scene in _scenes_list:
 
 
             scene_path = os.path.join(path, folder_scene)
             scene_path = os.path.join(path, folder_scene)
 
 
@@ -166,11 +170,19 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
                 zones_folder.append("zone"+index_str)
                 zones_folder.append("zone"+index_str)
 
 
             # shuffle list of zones (=> randomly choose zones)
             # shuffle list of zones (=> randomly choose zones)
-            random.shuffle(zones_folder)
+            # only in random mode
+            if _random:
+                random.shuffle(zones_folder)
 
 
             for id_zone, zone_folder in enumerate(zones_folder):
             for id_zone, zone_folder in enumerate(zones_folder):
                 zone_path = os.path.join(scene_path, zone_folder)
                 zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
+
+                # if custom normalization choices then we use svd values not already normalized
+                if _custom:
+                    data_filename = _metric + "_svd"+ generic_output_file_svd
+                else:
+                    data_filename = _metric + "_" + _choice + generic_output_file_svd
+
                 data_file_path = os.path.join(zone_path, data_filename)
                 data_file_path = os.path.join(zone_path, data_filename)
 
 
                 # getting number of line and read randomly lines
                 # getting number of line and read randomly lines
@@ -179,48 +191,64 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
 
 
                 num_lines = len(lines)
                 num_lines = len(lines)
 
 
-                lines_indexes = np.arange(num_lines)
-                random.shuffle(lines_indexes)
+                # randomly shuffle image
+                if _random:
+                    random.shuffle(lines)
 
 
                 path_seuil = os.path.join(zone_path, seuil_expe_filename)
                 path_seuil = os.path.join(zone_path, seuil_expe_filename)
 
 
                 counter = 0
                 counter = 0
                 # check if user select current scene and zone to be part of training data set
                 # check if user select current scene and zone to be part of training data set
-                for index in lines_indexes:
-                    line = construct_new_line(path_seuil, _interval, lines[index], _norm, _sep, _index)
+                for data in lines:
 
 
                     percent = counter / num_lines
                     percent = counter / num_lines
+                    image_index = int(data.split(';')[0])
+
+                    if image_index % _step == 0:
+                        line = construct_new_line(path_seuil, _interval, data, _choice, _each, _custom)
 
 
-                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
-                        train_file.write(line)
-                    else:
-                        test_file.write(line)
+                        if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                            train_file_data.append(line)
+                        else:
+                            test_file_data.append(line)
 
 
                     counter += 1
                     counter += 1
 
 
                 f.close()
                 f.close()
 
 
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    for line in train_file_data:
+        train_file.write(line)
+
+    for line in test_file_data:
+        test_file.write(line)
+
     train_file.close()
     train_file.close()
     test_file.close()
     test_file.close()
 
 
 
 
 def main():
 def main():
 
 
-    p_custom = False
+    p_custom    = False
+    p_step      = 1
+    p_renderer  = 'all'
+    p_each      = 1
 
 
     if len(sys.argv) <= 1:
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
         print('Run with default parameters...')
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 renderer all  --custom min_max_filename')
         sys.exit(2)
         sys.exit(2)
     try:
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "sep=", "rowindex=", "custom="])
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:r:p:s:e:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "random=", "percent=", "step=", "each=", "renderer=", "custom="])
     except getopt.GetoptError:
     except getopt.GetoptError:
         # print help information and exit:
         # print help information and exit:
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 --renderer all --custom min_max_filename')
         sys.exit(2)
         sys.exit(2)
     for o, a in opts:
     for o, a in opts:
         if o == "-h":
         if o == "-h":
-            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 --renderer all --custom min_max_filename')
             sys.exit()
             sys.exit()
         elif o in ("-o", "--output"):
         elif o in ("-o", "--output"):
             p_filename = a
             p_filename = a
@@ -228,36 +256,50 @@ def main():
             p_interval = list(map(int, a.split(',')))
             p_interval = list(map(int, a.split(',')))
         elif o in ("-k", "--kind"):
         elif o in ("-k", "--kind"):
             p_kind = a
             p_kind = a
+
+            if p_kind not in normalization_choices:
+                assert False, "Invalid normalization choice, %s" % normalization_choices
+
         elif o in ("-m", "--metric"):
         elif o in ("-m", "--metric"):
             p_metric = a
             p_metric = a
         elif o in ("-s", "--scenes"):
         elif o in ("-s", "--scenes"):
             p_scenes = a.split(',')
             p_scenes = a.split(',')
         elif o in ("-n", "--nb_zones"):
         elif o in ("-n", "--nb_zones"):
             p_nb_zones = int(a)
             p_nb_zones = int(a)
+        elif o in ("-r", "--random"):
+            p_random = int(a)
         elif o in ("-p", "--percent"):
         elif o in ("-p", "--percent"):
             p_percent = float(a)
             p_percent = float(a)
         elif o in ("-s", "--sep"):
         elif o in ("-s", "--sep"):
             p_sep = a
             p_sep = a
-        elif o in ("-r", "--rowindex"):
-            if int(a) == 1:
-                p_rowindex = True
-            else:
-                p_rowindex = False
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+        elif o in ("-e", "--each"):
+            p_each = int(a)
+        elif o in ("-r", "--renderer"):
+            p_renderer = a
+
+            if p_renderer not in cfg.renderer_choices:
+                assert False, "Unknown renderer choice, %s" % cfg.renderer_choices
         elif o in ("-c", "--custom"):
         elif o in ("-c", "--custom"):
             p_custom = a
             p_custom = a
         else:
         else:
             assert False, "unhandled option"
             assert False, "unhandled option"
 
 
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
     # getting scenes from indexes user selection
     # getting scenes from indexes user selection
     scenes_selected = []
     scenes_selected = []
 
 
     for scene_id in p_scenes:
     for scene_id in p_scenes:
-        index = scenes_indexes.index(scene_id.strip())
+        index = scenes_indices.index(scene_id.strip())
         scenes_selected.append(scenes_list[index])
         scenes_selected.append(scenes_list[index])
 
 
     # find min max value if necessary to renormalize data
     # find min max value if necessary to renormalize data
     if p_custom:
     if p_custom:
-        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
+        get_min_max_value_interval(scenes_list, p_interval, p_metric)
 
 
         # write new file to save
         # write new file to save
         if not os.path.exists(custom_min_max_folder):
         if not os.path.exists(custom_min_max_folder):
@@ -271,7 +313,7 @@ def main():
             f.write(str(max_value_interval) + '\n')
             f.write(str(max_value_interval) + '\n')
 
 
     # create database using img folder (generate first time only)
     # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_custom, p_sep, p_rowindex)
+    generate_data_model(scenes_list, p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_random, p_step, p_each, p_custom)
 
 
 if __name__== "__main__":
 if __name__== "__main__":
     main()
     main()

+ 0 - 277
generate_data_model_random_maxwell.py

@@ -1,277 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Sep 14 21:02:42 2018
-
-@author: jbuisine
-"""
-
-from __future__ import print_function
-import sys, os, getopt
-import numpy as np
-import random
-import time
-import json
-
-from PIL import Image
-from ipfml import processing, metrics
-
-from modules.utils import config as cfg
-
-# getting configuration information
-config_filename         = cfg.config_filename
-zone_folder             = cfg.zone_folder
-min_max_filename        = cfg.min_max_filename_extension
-
-# define all scenes values
-scenes_list             = cfg.maxwell_scenes_names
-scenes_indexes          = cfg.maxwell_scenes_indices
-choices                 = cfg.normalization_choices
-path                    = cfg.dataset_path
-zones                   = cfg.zones_indices
-seuil_expe_filename     = cfg.seuil_expe_filename
-
-metric_choices          = cfg.metric_choices_labels
-output_data_folder      = cfg.output_data_folder
-custom_min_max_folder   = cfg.min_max_custom_folder
-min_max_ext             = cfg.min_max_filename_extension
-
-generic_output_file_svd = '_random.csv'
-
-min_value_interval = sys.maxsize
-max_value_interval = 0
-
-def construct_new_line(path_seuil, interval, line, norm, sep, index):
-    begin, end = interval
-
-    line_data = line.split(';')
-    seuil = line_data[0]
-    metrics = line_data[begin+1:end+1]
-
-    metrics = [float(m) for m in metrics]
-
-    # TODO : check if it's always necessary to do that (loss of information for svd)
-    if norm:
-        metrics = processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
-
-    with open(path_seuil, "r") as seuil_file:
-        seuil_learned = int(seuil_file.readline().strip())
-
-    if seuil_learned > int(seuil):
-        line = '1'
-    else:
-        line = '0'
-
-    for idx, val in enumerate(metrics):
-        if index:
-            line += " " + str(idx + 1)
-        line += sep
-        line += str(val)
-    line += '\n'
-
-    return line
-
-def get_min_max_value_interval(_filename, _interval, _choice, _metric):
-
-    global min_value_interval, max_value_interval
-
-    scenes = os.listdir(path)
-
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    for id_scene, folder_scene in enumerate(scenes):
-
-        # only take care of maxwell scenes
-        if folder_scene in scenes_list:
-
-            scene_path = os.path.join(path, folder_scene)
-
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
-
-            # shuffle list of zones (=> randomly choose zones)
-            random.shuffle(zones_folder)
-
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
-                data_file_path = os.path.join(zone_path, data_filename)
-
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
-
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for line in lines:
-
-
-                    begin, end = _interval
-
-                    line_data = line.split(';')
-                    metrics = line_data[begin+1:end+1]
-                    metrics = [float(m) for m in metrics]
-
-                    min_value = min(metrics)
-                    max_value = max(metrics)
-
-                    if min_value < min_value_interval:
-                        min_value_interval = min_value
-
-                    if max_value > max_value_interval:
-                        max_value_interval = max_value
-
-                    counter += 1
-
-
-def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _nb_zones = 4, _percent = 1, _norm = False, _sep=':', _index=True):
-
-    output_train_filename = _filename + ".train"
-    output_test_filename = _filename + ".test"
-
-    if not '/' in output_train_filename:
-        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
-
-    # create path if not exists
-    if not os.path.exists(output_data_folder):
-        os.makedirs(output_data_folder)
-
-    train_file = open(output_train_filename, 'w')
-    test_file = open(output_test_filename, 'w')
-
-    scenes = os.listdir(path)
-
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    for id_scene, folder_scene in enumerate(scenes):
-
-        # only take care of maxwell scenes
-        if folder_scene in scenes_list:
-
-            scene_path = os.path.join(path, folder_scene)
-
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
-
-            # shuffle list of zones (=> randomly choose zones)
-            random.shuffle(zones_folder)
-
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
-                data_file_path = os.path.join(zone_path, data_filename)
-
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
-
-                num_lines = len(lines)
-
-                lines_indexes = np.arange(num_lines)
-                random.shuffle(lines_indexes)
-
-                path_seuil = os.path.join(zone_path, seuil_expe_filename)
-
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for index in lines_indexes:
-                    line = construct_new_line(path_seuil, _interval, lines[index], _norm, _sep, _index)
-
-                    percent = counter / num_lines
-
-                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
-                        train_file.write(line)
-                    else:
-                        test_file.write(line)
-
-                    counter += 1
-
-                f.close()
-
-    train_file.close()
-    test_file.close()
-
-
-def main():
-
-    p_custom = False
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "sep=", "rowindex=", "custom="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-            sys.exit()
-        elif o in ("-o", "--output"):
-            p_filename = a
-        elif o in ("-i", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-k", "--kind"):
-            p_kind = a
-        elif o in ("-m", "--metric"):
-            p_metric = a
-        elif o in ("-s", "--scenes"):
-            p_scenes = a.split(',')
-        elif o in ("-n", "--nb_zones"):
-            p_nb_zones = int(a)
-        elif o in ("-p", "--percent"):
-            p_percent = float(a)
-        elif o in ("-s", "--sep"):
-            p_sep = a
-        elif o in ("-r", "--rowindex"):
-            if int(a) == 1:
-                p_rowindex = True
-            else:
-                p_rowindex = False
-        elif o in ("-c", "--custom"):
-            p_custom = a
-        else:
-            assert False, "unhandled option"
-
-    # getting scenes from indexes user selection
-    scenes_selected = []
-
-    for scene_id in p_scenes:
-        index = scenes_indexes.index(scene_id.strip())
-        scenes_selected.append(scenes_list[index])
-
-    # find min max value if necessary to renormalize data
-    if p_custom:
-        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
-
-        # write new file to save
-        if not os.path.exists(custom_min_max_folder):
-            os.makedirs(custom_min_max_folder)
-
-        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
-        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
-
-        with open(min_max_filename_path, 'w') as f:
-            f.write(str(min_value_interval) + '\n')
-            f.write(str(max_value_interval) + '\n')
-
-    # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_custom, p_sep, p_rowindex)
-
-if __name__== "__main__":
-    main()

+ 9 - 1
modules/utils/config.py

@@ -16,13 +16,21 @@ config_filename                 = "config"
 models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2"]
 models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2"]
 
 
 # define all scenes values
 # define all scenes values
+renderer_choices                = ['all', 'maxwell', 'igloo', 'cycle']
+
 scenes_names                    = ['Appart1opt02', 'Bureau1', 'Cendrier', 'Cuisine01', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre', 'SdbDroite', 'Selles']
 scenes_names                    = ['Appart1opt02', 'Bureau1', 'Cendrier', 'Cuisine01', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre', 'SdbDroite', 'Selles']
 scenes_indices                  = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
 scenes_indices                  = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
 
 
 maxwell_scenes_names            = ['Appart1opt02', 'Cuisine01', 'SdbCentre', 'SdbDroite']
 maxwell_scenes_names            = ['Appart1opt02', 'Cuisine01', 'SdbCentre', 'SdbDroite']
 maxwell_scenes_indices          = ['A', 'D', 'G', 'H']
 maxwell_scenes_indices          = ['A', 'D', 'G', 'H']
 
 
+igloo_scenes_names              = ['Bureau1', 'PNDVuePlongeante']
+igloo_scenes_indices            = ['B', 'F']
+
+cycle_scenes_names              = ['EchecBas', 'Selles']
+cycle_scenes_indices            = ['E', 'I']
+
 normalization_choices           = ['svd', 'svdn', 'svdne']
 normalization_choices           = ['svd', 'svdn', 'svdne']
 zones_indices                   = np.arange(16)
 zones_indices                   = np.arange(16)
 
 
-metric_choices_labels           = ['lab', 'mscn', 'mscn_revisited', 'low_bits_2', 'low_bits_3', 'low_bits_4', 'low_bits_5', 'low_bits_6','low_bits_4_shifted_2']
+metric_choices_labels           = ['lab', 'mscn_revisited', 'low_bits_2', 'low_bits_3', 'low_bits_4', 'low_bits_5', 'low_bits_6','low_bits_4_shifted_2', 'sub_blocks_stats', 'sub_blocks_area', 'sub_blocks_stats_reduced']

+ 176 - 0
modules/utils/data.py

@@ -0,0 +1,176 @@
+from ipfml import processing, metrics, utils
+from modules.utils.config import *
+
+from PIL import Image
+from skimage import color
+
+import numpy as np
+
+
+_scenes_names_prefix   = '_scenes_names'
+_scenes_indices_prefix = '_scenes_indices'
+
+# store all variables from current module context
+context_vars = vars()
+
+
+def get_svd_data(data_type, block):
+    """
+    Method which returns the data type expected
+    """
+
+    if data_type == 'lab':
+
+        block_file_path = '/tmp/lab_img.png'
+        block.save(block_file_path)
+        data = processing.get_LAB_L_SVD_s(Image.open(block_file_path))
+
+    if data_type == 'mscn_revisited':
+
+        img_mscn_revisited = processing.rgb_to_mscn(block)
+
+        # save tmp as img
+        img_output = Image.fromarray(img_mscn_revisited.astype('uint8'), 'L')
+        mscn_revisited_file_path = '/tmp/mscn_revisited_img.png'
+        img_output.save(mscn_revisited_file_path)
+        img_block = Image.open(mscn_revisited_file_path)
+
+        # extract from temp image
+        data = metrics.get_SVD_s(img_block)
+
+    if data_type == 'mscn':
+
+        img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
+        img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
+        img_mscn_norm = processing.normalize_2D_arr(img_mscn)
+
+        img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
+
+        data = metrics.get_SVD_s(img_mscn_gray)
+
+    if data_type == 'low_bits_6':
+
+        low_bits_6 = processing.rgb_to_LAB_L_low_bits(block, 6)
+        data = metrics.get_SVD_s(low_bits_6)
+
+    if data_type == 'low_bits_5':
+
+        low_bits_5 = processing.rgb_to_LAB_L_low_bits(block, 5)
+        data = metrics.get_SVD_s(low_bits_5)
+
+    if data_type == 'low_bits_4':
+
+        low_bits_4 = processing.rgb_to_LAB_L_low_bits(block, 4)
+        data = metrics.get_SVD_s(low_bits_4)
+
+    if data_type == 'low_bits_3':
+
+        low_bits_3 = processing.rgb_to_LAB_L_low_bits(block, 3)
+        data = metrics.get_SVD_s(low_bits_3)
+
+    if data_type == 'low_bits_2':
+
+        low_bits_2 = processing.rgb_to_LAB_L_low_bits(block, 2)
+        data = metrics.get_SVD_s(low_bits_2)
+
+    if data_type == 'low_bits_4_shifted_2':
+
+        data = metrics.get_SVD_s(processing.rgb_to_LAB_L_bits(block, (3, 6)))
+
+    if data_type == 'sub_blocks_stats':
+
+        block = np.asarray(block)
+        width, height, _= block.shape
+        sub_width, sub_height = int(width / 4), int(height / 4)
+
+        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
+
+        data = []
+
+        for sub_b in sub_blocks:
+
+            # by default use the whole lab L canal
+            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
+
+            # get information we want from svd
+            data.append(np.mean(l_svd_data))
+            data.append(np.median(l_svd_data))
+            data.append(np.percentile(l_svd_data, 25))
+            data.append(np.percentile(l_svd_data, 75))
+            data.append(np.var(l_svd_data))
+
+            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=100)
+            data.append(area_under_curve)
+
+        # convert into numpy array after computing all stats
+        data = np.asarray(data)
+
+    if data_type == 'sub_blocks_stats_reduced':
+
+        block = np.asarray(block)
+        width, height, _= block.shape
+        sub_width, sub_height = int(width / 4), int(height / 4)
+
+        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
+
+        data = []
+
+        for sub_b in sub_blocks:
+
+            # by default use the whole lab L canal
+            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
+
+            # get information we want from svd
+            data.append(np.mean(l_svd_data))
+            data.append(np.median(l_svd_data))
+            data.append(np.percentile(l_svd_data, 25))
+            data.append(np.percentile(l_svd_data, 75))
+            data.append(np.var(l_svd_data))
+
+        # convert into numpy array after computing all stats
+        data = np.asarray(data)
+
+    if data_type == 'sub_blocks_area':
+
+        block = np.asarray(block)
+        width, height, _= block.shape
+        sub_width, sub_height = int(width / 8), int(height / 8)
+
+        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
+
+        data = []
+
+        for sub_b in sub_blocks:
+
+            # by default use the whole lab L canal
+            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
+
+            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=50)
+            data.append(area_under_curve)
+
+        # convert into numpy array after computing all stats
+        data = np.asarray(data)
+
+
+    return data
+
+def get_renderer_scenes_indices(renderer_name):
+
+    if renderer_name not in renderer_choices:
+        raise ValueError("Unknown renderer name")
+
+    if renderer_name == 'all':
+        return scenes_indices
+    else:
+        return context_vars[renderer_name + _scenes_indices_prefix]
+
+def get_renderer_scenes_names(renderer_name):
+
+    if renderer_name not in renderer_choices:
+        raise ValueError("Unknown renderer name")
+
+    if renderer_name == 'all':
+        return scenes_names
+    else:
+        return context_vars[renderer_name + _scenes_names_prefix]
+

+ 0 - 72
modules/utils/data_type.py

@@ -1,72 +0,0 @@
-from ipfml import processing, metrics
-from PIL import Image
-from skimage import color
-
-import numpy as np
-
-def get_svd_data(data_type, block):
-    """
-    Method which returns the data type expected
-    """
-
-    if data_type == 'lab':
-
-        block_file_path = '/tmp/lab_img.png'
-        block.save(block_file_path)
-        data = processing.get_LAB_L_SVD_s(Image.open(block_file_path))
-
-    if data_type == 'mscn_revisited':
-
-        img_mscn_revisited = processing.rgb_to_mscn(block)
-
-        # save tmp as img
-        img_output = Image.fromarray(img_mscn_revisited.astype('uint8'), 'L')
-        mscn_revisited_file_path = '/tmp/mscn_revisited_img.png'
-        img_output.save(mscn_revisited_file_path)
-        img_block = Image.open(mscn_revisited_file_path)
-
-        # extract from temp image
-        data = metrics.get_SVD_s(img_block)
-
-    if data_type == 'mscn':
-
-        img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
-        img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
-        img_mscn_norm = processing.normalize_2D_arr(img_mscn)
-
-        img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
-
-        data = metrics.get_SVD_s(img_mscn_gray)
-
-    if data_type == 'low_bits_6':
-
-        low_bits_6 = processing.rgb_to_LAB_L_low_bits(block, 6)
-        data = metrics.get_SVD_s(low_bits_6)
-
-    if data_type == 'low_bits_5':
-
-        low_bits_5 = processing.rgb_to_LAB_L_low_bits(block, 5)
-        data = metrics.get_SVD_s(low_bits_5)
-
-    if data_type == 'low_bits_4':
-
-        low_bits_4 = processing.rgb_to_LAB_L_low_bits(block, 4)
-        data = metrics.get_SVD_s(low_bits_4)
-
-    if data_type == 'low_bits_3':
-
-        low_bits_3 = processing.rgb_to_LAB_L_low_bits(block, 3)
-        data = metrics.get_SVD_s(low_bits_3)
-
-    if data_type == 'low_bits_2':
-
-        low_bits_2 = processing.rgb_to_LAB_L_low_bits(block, 2)
-        data = metrics.get_SVD_s(low_bits_2)
-
-    if data_type == 'low_bits_4_shifted_2':
-
-        data = metrics.get_SVD_s(processing.rgb_to_LAB_L_bits(block, (3, 6)))
-
-    return data
-
-

+ 44 - 23
predict_noisy_image_svd.py

@@ -2,17 +2,17 @@ from sklearn.externals import joblib
 
 
 import numpy as np
 import numpy as np
 
 
-from ipfml import processing
+from ipfml import processing, utils
 from PIL import Image
 from PIL import Image
 
 
 import sys, os, getopt
 import sys, os, getopt
 
 
 from modules.utils import config as cfg
 from modules.utils import config as cfg
-from modules.utils import data_type as dt
+from modules.utils import data as dt
 
 
 path                  = cfg.dataset_path
 path                  = cfg.dataset_path
 min_max_ext           = cfg.min_max_filename_extension
 min_max_ext           = cfg.min_max_filename_extension
-metric_choices       = cfg.metric_choices_labels
+metric_choices        = cfg.metric_choices_labels
 normalization_choices = cfg.normalization_choices
 normalization_choices = cfg.normalization_choices
 
 
 custom_min_max_folder = cfg.min_max_custom_folder
 custom_min_max_folder = cfg.min_max_custom_folder
@@ -51,7 +51,7 @@ def main():
 
 
             if not p_mode in normalization_choices:
             if not p_mode in normalization_choices:
                 assert False, "Mode of normalization not recognized"
                 assert False, "Mode of normalization not recognized"
-        elif o in ("-m", "--custom"):
+        elif o in ("-c", "--custom"):
             p_custom = a
             p_custom = a
 
 
         else:
         else:
@@ -65,32 +65,53 @@ def main():
 
 
     data = dt.get_svd_data(p_metric, img)
     data = dt.get_svd_data(p_metric, img)
 
 
-    # check mode to normalize data
-    if p_mode == 'svdne':
+    # get interval values
+    begin, end = p_interval
 
 
-        # set min_max_filename if custom use
-        if p_custom:
-            min_max_filename = custom_min_max_folder + '/' +  p_custom
-        else:
-            min_max_file_path = path + '/' + p_metric + min_max_ext
+    # check if custom min max file is used
+    if p_custom:
+
+        test_data = data[begin:end]
 
 
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
+        if p_mode == 'svdne':
 
 
-        l_values = processing.normalize_arr_with_range(data, min, max)
+            # set min_max_filename if custom use
+            min_max_file_path = custom_min_max_folder + '/' +  p_custom
+
+            # need to read min_max_file
+            file_path = os.path.join(os.path.dirname(__file__), min_max_file_path)
+            with open(file_path, 'r') as f:
+                min_val = float(f.readline().replace('\n', ''))
+                max_val = float(f.readline().replace('\n', ''))
+
+            test_data = utils.normalize_arr_with_range(test_data, min_val, max_val)
+
+        if p_mode == 'svdn':
+            test_data = utils.normalize_arr(test_data)
 
 
-    elif p_mode == 'svdn':
-        l_values = processing.normalize_arr(data)
     else:
     else:
-        l_values = data
 
 
+        # check mode to normalize data
+        if p_mode == 'svdne':
+
+            # set min_max_filename if custom use
+            min_max_file_path = path + '/' + p_metric + min_max_ext
+
+            # need to read min_max_file
+            file_path = os.path.join(os.path.dirname(__file__), min_max_file_path)
+            with open(file_path, 'r') as f:
+                min_val = float(f.readline().replace('\n', ''))
+                max_val = float(f.readline().replace('\n', ''))
+
+            l_values = utils.normalize_arr_with_range(data, min_val, max_val)
+
+        elif p_mode == 'svdn':
+            l_values = utils.normalize_arr(data)
+        else:
+            l_values = data
+
+        test_data = l_values[begin:end]
 
 
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
 
 
     # get prediction of model
     # get prediction of model
     prediction = model.predict([test_data])[0]
     prediction = model.predict([test_data])[0]

+ 1 - 1
predict_seuil_expe.py

@@ -2,7 +2,7 @@ from sklearn.externals import joblib
 
 
 import numpy as np
 import numpy as np
 
 
-from ipfml import processing
+from ipfml import processing, utils
 from PIL import Image
 from PIL import Image
 
 
 import sys, os, getopt
 import sys, os, getopt

+ 1 - 1
runAll_maxwell.sh

@@ -12,7 +12,7 @@ if [ "${erased}" == "Y" ]; then
     touch ${file_path}
     touch ${file_path}
 
 
     # add of header
     # add of header
-    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; F1_val; F1_test; F1_all' >> ${file_path}
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
 
 
 fi
 fi
 
 

+ 52 - 0
runAll_maxwell_area.sh

@@ -0,0 +1,52 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start_index; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+metric="sub_blocks_area"
+start_index=0
+end_index=16
+number=16
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+
+for nb_zones in {4,6,8,10,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+            echo $FILENAME
+
+            # only compute if necessary (perhaps server will fall.. Just in case)
+            if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                echo "${MODEL_NAME} results already generated..."
+            else
+                python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+            fi
+        done
+    done
+done
+
+

+ 24 - 0
runAll_maxwell_custom.sh

@@ -0,0 +1,24 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+for size in {"4","8","16","26","32","40"}; do
+
+    for metric in {"lab","mscn","mscn_revisited","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
+        bash generateAndTrain_maxwell_custom.sh ${size} ${metric}
+    done
+done

+ 52 - 0
runAll_maxwell_sub_blocks_stats.sh

@@ -0,0 +1,52 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start_index; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+metric="sub_blocks_stats"
+start_index=0
+end_index=24
+number=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+
+for nb_zones in {4,6,8,10,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+            echo $FILENAME
+
+            # only compute if necessary (perhaps server will fall.. Just in case)
+            if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                echo "${MODEL_NAME} results already generated..."
+            else
+                python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+            fi
+        done
+    done
+done
+
+

+ 52 - 0
runAll_maxwell_sub_blocks_stats_reduced.sh

@@ -0,0 +1,52 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start_index; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+metric="sub_blocks_stats_reduced"
+start_index=0
+end_index=24
+number=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+
+for nb_zones in {4,6,8,10,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+            echo $FILENAME
+
+            # only compute if necessary (perhaps server will fall.. Just in case)
+            if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                echo "${MODEL_NAME} results already generated..."
+            else
+                python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+            fi
+        done
+    done
+done
+
+

+ 2 - 2
run_maxwell_simulation.sh

@@ -34,11 +34,11 @@ for size in {"4","8","16","26","32","40"}; do
                         FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
 
 
-                        if grep -q "${MODEL_NAME}" "${simulate_models}"; then
+                        if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
                             echo "Run simulation for model ${MODEL_NAME}"
                             echo "Run simulation for model ${MODEL_NAME}"
 
 
                             # by default regenerate model
                             # by default regenerate model
-                            python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0'
+                            python generate_data_model_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1
 
 
                             python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
                             python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 
 

+ 2 - 2
run_maxwell_simulation_custom.sh

@@ -35,11 +35,11 @@ for size in {"4","8","16","26","32","40"}; do
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
                         CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
 
 
-                        if grep -q "${MODEL_NAME}" "${simulate_models}"; then
+                        if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
                             echo "Run simulation for model ${MODEL_NAME}"
                             echo "Run simulation for model ${MODEL_NAME}"
 
 
                             # by default regenerate model
                             # by default regenerate model
-                            python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --norm 0 --sep ';' --rowindex '0' --custom ${CUSTOM_MIN_MAX_FILENAME}
+                            python generate_data_model_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
 
 
                             python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
                             python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 
 

+ 23 - 1
save_model_result_in_md_maxwell.py

@@ -1,6 +1,6 @@
 from sklearn.utils import shuffle
 from sklearn.utils import shuffle
 from sklearn.externals import joblib
 from sklearn.externals import joblib
-from sklearn.metrics import accuracy_score, f1_score
+from sklearn.metrics import accuracy_score, f1_score, recall_score, roc_auc_score
 from sklearn.model_selection import cross_val_score
 from sklearn.model_selection import cross_val_score
 from sklearn.model_selection import train_test_split
 from sklearn.model_selection import train_test_split
 
 
@@ -194,9 +194,18 @@ def main():
     test_accuracy = accuracy_score(y_test, y_test_model)
     test_accuracy = accuracy_score(y_test, y_test_model)
 
 
     y_train_model = model.predict(x_dataset_train)
     y_train_model = model.predict(x_dataset_train)
+
     train_f1 = f1_score(y_dataset_train, y_train_model)
     train_f1 = f1_score(y_dataset_train, y_train_model)
+    train_recall = recall_score(y_dataset_train, y_train_model)
+    train_roc_auc = roc_auc_score(y_dataset_train, y_train_model)
+
     val_f1 = f1_score(y_val, y_val_model)
     val_f1 = f1_score(y_val, y_val_model)
+    val_recall = recall_score(y_val, y_val_model)
+    val_roc_auc = roc_auc_score(y_val, y_val_model)
+
     test_f1 = f1_score(y_test, y_test_model)
     test_f1 = f1_score(y_test, y_test_model)
+    test_recall = recall_score(y_test, y_test_model)
+    test_roc_auc = roc_auc_score(y_test, y_test_model)
 
 
     # stats of all dataset
     # stats of all dataset
     all_x_data = pd.concat([x_dataset_train, X_test, X_val])
     all_x_data = pd.concat([x_dataset_train, X_test, X_val])
@@ -205,6 +214,8 @@ def main():
     all_y_model = model.predict(all_x_data)
     all_y_model = model.predict(all_x_data)
     all_accuracy = accuracy_score(all_y_data, all_y_model)
     all_accuracy = accuracy_score(all_y_data, all_y_model)
     all_f1_score = f1_score(all_y_data, all_y_model)
     all_f1_score = f1_score(all_y_data, all_y_model)
+    all_recall_score = recall_score(all_y_data, all_y_model)
+    all_roc_auc_score = roc_auc_score(all_y_data, all_y_model)
 
 
     # stats of dataset sizes
     # stats of dataset sizes
     total_samples = final_df_train_size + val_set_size + test_set_size
     total_samples = final_df_train_size + val_set_size + test_set_size
@@ -224,9 +235,20 @@ def main():
     model_scores.append(all_accuracy)
     model_scores.append(all_accuracy)
 
 
     model_scores.append(train_f1)
     model_scores.append(train_f1)
+    model_scores.append(train_recall)
+    model_scores.append(train_roc_auc)
+
     model_scores.append(val_f1)
     model_scores.append(val_f1)
+    model_scores.append(val_recall)
+    model_scores.append(val_roc_auc)
+
     model_scores.append(test_f1)
     model_scores.append(test_f1)
+    model_scores.append(test_recall)
+    model_scores.append(test_roc_auc)
+
     model_scores.append(all_f1_score)
     model_scores.append(all_f1_score)
+    model_scores.append(all_recall_score)
+    model_scores.append(all_roc_auc_score)
 
 
     # TODO : improve...
     # TODO : improve...
     # check if it's always the case...
     # check if it's always the case...

+ 1 - 1
testModelByScene_maxwell.sh

@@ -63,7 +63,7 @@ for scene in {"A","D","G","H"}; do
 
 
   FILENAME="data/data_${INPUT_MODE}_${INPUT_METRIC}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
   FILENAME="data/data_${INPUT_MODE}_${INPUT_METRIC}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
 
 
-  python generate_data_model.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --metric ${INPUT_METRIC} --scenes "${scene}" --zones "${zones}" --percent 1 --sep ";" --rowindex "0"
+  python generate_data_model.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --metric ${INPUT_METRIC} --scenes "${scene}" --zones "${zones}" --percent 1
 
 
   python prediction_scene.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}_metric_${INPUT_METRIC}.prediction" --scene ${scene}
   python prediction_scene.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}_metric_${INPUT_METRIC}.prediction" --scene ${scene}