Parcourir la source

Merge branch 'release/v0.1.9'

Jérôme BUISINE il y a 6 ans
Parent
commit
96b87646a0

+ 62 - 0
SVDAnalysis/svd_mean_rotations_view.py

@@ -0,0 +1,62 @@
+from ipfml import processing, utils
+from skimage import transform
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+from PIL import Image
+
+data_folder = "../fichiersSVD_light"
+
+def get_svd_mean_image_rotations(img_path):
+
+    print("Extract features from... " + img_path)
+    img = np.asarray(Image.open(img_path))
+    width, height, dim = img.shape
+
+    img_mean = np.empty([width, height, 3])
+    rotations = []
+    svd_data_rotation = []
+
+    for i in range(4):
+        rotations.append(processing.rotate_image(img, (i+1)*90, pil=False))
+        svd_data_rotation.append(processing.get_LAB_L_SVD_s(rotations[i]))
+
+    nb_rotations = len(rotations)
+
+    img_mean = processing.fusion_images(rotations, pil=False)
+
+    data = processing.get_LAB_L_SVD_s(img_mean)
+
+    # getting max and min information from min_max_filename
+    with open(data_folder + "/lab_min_max_values", 'r') as f:
+        min_val = float(f.readline())
+        max_val = float(f.readline())
+
+    data = utils.normalize_arr_with_range(data, min_val, max_val)
+
+    return data
+
+scene   = 'Appart1opt02'
+mean_svd_values = []
+indices = ["00020", "00080", "00150", "00300", "00500", "00700", "00900"]
+
+for index in indices:
+    path = os.path.join(data_folder, scene + '/appartAopt_' + index + '.png')
+    mean_svd_values.append(get_svd_mean_image_rotations(path))
+
+plt.title("Information from merged rotations images at different noise level from " + scene + " scene", fontsize=22)
+
+plt.ylabel('Singular values', fontsize=18)
+plt.xlabel('Vector features', fontsize=18)
+
+for id, data in enumerate(mean_svd_values):
+
+    p_label = "appartAopt_" + indices[id]
+    plt.plot(data, label=p_label)
+
+plt.legend(bbox_to_anchor=(0.8, 1), loc=2, borderaxespad=0.2, fontsize=16)
+
+plt.ylim(0, 0.01)
+plt.show()
+

+ 62 - 0
SVDAnalysis/svd_roration_view.py

@@ -0,0 +1,62 @@
+from ipfml import processing, utils
+from skimage import transform
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+from PIL import Image
+
+data_folder = "../fichiersSVD_light"
+
+def get_svd_mean_and_image_rotations(img_path):
+
+    print("Extract features from... " + img_path)
+    img = np.asarray(Image.open(img_path))
+    width, height, dim = img.shape
+
+    img_mean = np.empty([width, height, 3])
+    rotations = []
+    svd_data_rotation = []
+
+    for i in range(4):
+        rotations.append(processing.rotate_image(img, (i+1)*90, pil=False))
+        svd_data_rotation.append(processing.get_LAB_L_SVD_s(rotations[i]))
+        Image.fromarray(rotations[i]).show()
+
+    mean_image = processing.fusion_images(rotations, pil=False)
+    mean_data = processing.get_LAB_L_SVD_s(mean_image)
+
+    # getting max and min information from min_max_filename
+    with open(data_folder + "/lab_min_max_values", 'r') as f:
+        min_val = float(f.readline())
+        max_val = float(f.readline())
+
+    mean_data = utils.normalize_arr_with_range(mean_data, min_val, max_val)
+
+    return [utils.normalize_arr_with_range(data, min_val, max_val) for data in svd_data_rotation], mean_data
+
+scene   = 'Appart1opt02'
+indices = ["00020", "00080", "00150", "00300", "00500", "00700", "00900"]
+
+for index in indices:
+    path = os.path.join(data_folder, scene + '/appartAopt_' + index + '.png')
+    svd_data, mean_svd_data = get_svd_mean_and_image_rotations(path)
+
+    plt.title("SVD information of rotations and merged image from " + scene + " scene", fontsize=22)
+
+    plt.ylabel('Singular values', fontsize=18)
+    plt.xlabel('Vector features', fontsize=18)
+
+    for id, data in enumerate(svd_data):
+
+        p_label = "appartAopt_" + index + "_" + str((id +1) * 90)
+        plt.plot(data, label=p_label)
+
+    mean_label = "appartAopt_" + index + "_mean"
+    plt.plot(mean_svd_data, label=mean_label)
+
+    plt.legend(bbox_to_anchor=(0.8, 1), loc=2, borderaxespad=0.2, fontsize=16)
+
+    plt.ylim(0, 0.01)
+    plt.show()
+

+ 4 - 5
display_scenes_zones.py

@@ -14,8 +14,7 @@ import time
 import json
 
 from PIL import Image
-from ipfml import processing
-from ipfml import metrics
+from ipfml import processing, metrics, utils
 from skimage import color
 import matplotlib.pyplot as plt
 
@@ -150,7 +149,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
 
                         img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
                         img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
-                        img_mscn_norm = processing.normalize_2D_arr(img_mscn)
+                        img_mscn_norm = utils.normalize_2D_arr(img_mscn)
 
                         img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
 
@@ -200,7 +199,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
                     # modify data depending mode
 
                     if p_kind == 'svdn':
-                        data = processing.normalize_arr(data)
+                        data = utils.normalize_arr(data)
 
                     if p_kind == 'svdne':
                         path_min_max = os.path.join(path, data_type + min_max_filename)
@@ -209,7 +208,7 @@ def display_data_scenes(data_type, p_scene, p_kind):
                             min_val = float(f.readline())
                             max_val = float(f.readline())
 
-                        data = processing.normalize_arr_with_range(data, min_val, max_val)
+                        data = utils.normalize_arr_with_range(data, min_val, max_val)
 
                     # append of data
                     images_data.append(data)

+ 2 - 3
display_scenes_zones_shifted.py

@@ -14,8 +14,7 @@ import time
 import json
 
 from PIL import Image
-from ipfml import processing
-from ipfml import metrics
+from ipfml import processing, metrics, utils
 from skimage import color
 import matplotlib.pyplot as plt
 
@@ -136,7 +135,7 @@ def display_data_scenes(p_scene, p_bits, p_shifted):
                     ##################
 
                     # modify data depending mode
-                    data = processing.normalize_arr(data)
+                    data = utils.normalize_arr(data)
                     images_data.append(data)
 
                 zones_images_data.append(images_data)

+ 4 - 4
display_simulation_curves.py

@@ -4,7 +4,7 @@ import pandas as pd
 import matplotlib.pyplot as plt
 import os, sys, getopt
 
-from modules.utils.data_type import get_svd_data
+from modules.utils.data import get_svd_data
 
 label_freq = 6
 
@@ -26,8 +26,7 @@ def display_curves(folder_path):
 
         df = pd.read_csv(path_file, header=None, sep=";")
 
-
-        fig=plt.figure(figsize=(8, 8))
+        fig=plt.figure(figsize=(35, 22))
         fig.suptitle("Detection simulation for " + scene_names[id] + " scene", fontsize=20)
 
         for index, row in df.iterrows():
@@ -61,7 +60,8 @@ def display_curves(folder_path):
             plt.xticks(x, x_labels, rotation=45)
             plt.ylim(-1, 2)
 
-        plt.show()
+        plt.savefig(os.path.join(folder_path, scene_names[id] + '_simulation_curve.png'))
+        #plt.show()
 
 def main():
 

+ 312 - 0
display_svd_area_data_scene.py

@@ -0,0 +1,312 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, getopt
+
+import numpy as np
+import random
+import time
+import json
+
+from PIL import Image
+from ipfml import processing, metrics, utils
+import ipfml.iqa.fr as fr_iqa
+
+from skimage import color
+
+import matplotlib.pyplot as plt
+from modules.utils.data import get_svd_data
+
+from modules.utils import config as cfg
+
+# getting configuration information
+config_filename     = cfg.config_filename
+zone_folder         = cfg.zone_folder
+min_max_filename    = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list         = cfg.scenes_names
+scenes_indices      = cfg.scenes_indices
+choices             = cfg.normalization_choices
+path                = cfg.dataset_path
+zones               = cfg.zones_indices
+seuil_expe_filename = cfg.seuil_expe_filename
+
+metric_choices      = cfg.metric_choices_labels
+
+max_nb_bits = 8
+
+integral_area_choices = ['trapz', 'simps']
+
+def get_area_under_curve(p_area, p_data):
+
+    noise_method = None
+    function_name = 'integral_area_' + p_area
+
+    try:
+        area_method = getattr(utils, function_name)
+    except AttributeError:
+        raise NotImplementedError("Error `{}` not implement `{}`".format(utils.__name__, function_name))
+
+    return area_method(p_data, dx=800)
+
+
+def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_area, p_ylim):
+    """
+    @brief Method which gives information about svd curves from zone of picture
+    @param p_scene, scene expected to show svd values
+    @param p_interval, interval [begin, end] of svd data to display
+    @param p_interval, interval [begin, end] of samples or minutes from render generation engine
+    @param p_metric, metric computed to show
+    @param p_mode, normalization's mode
+    @param p_norm, normalization or not of selected svd data
+    @param p_area, area method name to compute area under curve
+    @param p_ylim, ylim choice to better display of data
+    @return nothing
+    """
+
+    max_value_svd = 0
+    min_value_svd = sys.maxsize
+
+    image_indices = []
+
+    scenes = os.listdir(path)
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    begin_data, end_data = p_interval
+    begin_index, end_index = p_indices
+
+    data_min_max_filename = os.path.join(path, p_metric + min_max_filename)
+
+    # go ahead each scenes
+    for id_scene, folder_scene in enumerate(scenes):
+
+        if p_scene == folder_scene:
+            scene_path = os.path.join(path, folder_scene)
+
+            config_file_path = os.path.join(scene_path, config_filename)
+
+            with open(config_file_path, "r") as config_file:
+                last_image_name = config_file.readline().strip()
+                prefix_image_name = config_file.readline().strip()
+                start_index_image = config_file.readline().strip()
+                end_index_image = config_file.readline().strip()
+                step_counter = int(config_file.readline().strip())
+
+            # construct each zones folder name
+            zones_folder = []
+
+            # get zones list info
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+
+                current_zone = "zone"+index_str
+                zones_folder.append(current_zone)
+
+            images_data = []
+            images_indices = []
+
+            threshold_learned_zones = []
+
+            for id, zone_folder in enumerate(zones_folder):
+
+                # get threshold information
+                zone_path = os.path.join(scene_path, zone_folder)
+                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+                # open treshold path and get this information
+                with open(path_seuil, "r") as seuil_file:
+                    threshold_learned = int(seuil_file.readline().strip())
+                    threshold_learned_zones.append(threshold_learned)
+
+            current_counter_index = int(start_index_image)
+            end_counter_index = int(end_index_image)
+
+            threshold_mean = np.mean(np.asarray(threshold_learned_zones))
+            threshold_image_found = False
+
+            file_path = os.path.join(scene_path, prefix_image_name + "{}.png")
+
+            svd_data = []
+
+            while(current_counter_index <= end_counter_index):
+
+                current_counter_index_str = str(current_counter_index)
+
+                while len(start_index_image) > len(current_counter_index_str):
+                    current_counter_index_str = "0" + current_counter_index_str
+
+                image_path = file_path.format(str(current_counter_index_str))
+                img = Image.open(image_path)
+
+                svd_values = get_svd_data(p_metric, img)
+
+                if p_norm:
+                    svd_values = svd_values[begin_data:end_data]
+
+                # update min max values
+                min_value = svd_values.min()
+                max_value = svd_values.max()
+
+                if min_value < min_value_svd:
+                    min_value_svd = min_value
+
+                if max_value > min_value_svd:
+                    max_value_svd = max_value
+
+                # keep in memory used data
+                if current_counter_index % p_step == 0:
+                    if current_counter_index >= begin_index and current_counter_index <= end_index:
+                        images_indices.append(current_counter_index_str)
+                        svd_data.append(svd_values)
+
+                    if threshold_mean < int(current_counter_index) and not threshold_image_found:
+
+                        threshold_image_found = True
+                        threshold_image_zone = current_counter_index_str
+
+                current_counter_index += step_counter
+                print('%.2f%%' % (current_counter_index / end_counter_index * 100))
+                sys.stdout.write("\033[F")
+
+
+            # all indices of picture to plot
+            print(images_indices)
+
+            previous_data = []
+            area_data = []
+
+            for id, data in enumerate(svd_data):
+
+                current_data = data
+
+                if not p_norm:
+                    current_data = current_data[begin_data:end_data]
+
+                if p_mode == 'svdn':
+                    current_data = utils.normalize_arr(current_data)
+
+                if p_mode == 'svdne':
+                    current_data = utils.normalize_arr_with_range(current_data, min_value_svd, max_value_svd)
+
+                images_data.append(current_data)
+
+                # not use this script for 'sub_blocks_stats'
+                current_area = get_area_under_curve(p_area, current_data)
+                area_data.append(current_area)
+
+            # display all data using matplotlib (configure plt)
+            gridsize = (3, 2)
+
+            # fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(30, 22))
+            fig = plt.figure(figsize=(30, 22))
+            ax1 = plt.subplot2grid(gridsize, (0, 0), colspan=2, rowspan=2)
+            ax2 = plt.subplot2grid(gridsize, (2, 0), colspan=2)
+
+
+            ax1.set_title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + ']' + p_metric + ' metric, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=20)
+            ax1.set_ylabel('Image samples or time (minutes) generation', fontsize=14)
+            ax1.set_xlabel('Vector features', fontsize=16)
+
+            for id, data in enumerate(images_data):
+
+                p_label = p_scene + '_' + str(images_indices[id]) + " | " + p_area + ": " + str(area_data[id])
+
+                if images_indices[id] == threshold_image_zone:
+                    ax1.plot(data, label=p_label, lw=4, color='red')
+                    threshold_id = id
+                else:
+                    ax1.plot(data, label=p_label)
+
+            ax1.legend(bbox_to_anchor=(0.7, 1), loc=2, borderaxespad=0.2, fontsize=14)
+
+            start_ylim, end_ylim = p_ylim
+            ax1.set_ylim(start_ylim, end_ylim)
+
+            ax2.set_title(p_area + " information for whole step images")
+            ax2.set_ylabel(p_area + ' area values')
+            ax2.set_xlabel('Number of samples per pixels or times')
+            ax2.set_xticks(range(len(images_indices)))
+            ax2.set_xticklabels(list(map(int, images_indices)))
+            ax2.plot([threshold_id, threshold_id], [np.min(area_data), np.max(area_data)], 'k-', lw=2, color='red')
+            ax2.plot(area_data)
+
+            plt.show()
+
+def main():
+
+
+    # by default p_step value is 10 to enable all photos
+    p_step = 10
+    p_ylim = (0, 1)
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python display_svd_area_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --area simps --ylim "0, 0.1"')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hs:i:i:z:l:m:s:n:a:y", ["help=", "scene=", "interval=", "indices=", "metric=", "mode=", "step=", "norm=", "area=", "ylim="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python display_svd_area_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --area simps --ylim "0, 0.1"')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python display_svd_area_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --area simps --ylim "0, 0.1"')
+            sys.exit()
+        elif o in ("-s", "--scene"):
+            p_scene = a
+
+            if p_scene not in scenes_indices:
+                assert False, "Invalid scene choice"
+            else:
+                p_scene = scenes_list[scenes_indices.index(p_scene)]
+        elif o in ("-i", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+
+        elif o in ("-i", "--indices"):
+            p_indices = list(map(int, a.split(',')))
+
+        elif o in ("-m", "--metric"):
+            p_metric = a
+
+            if p_metric not in metric_choices:
+                assert False, "Invalid metric choice"
+
+        elif o in ("-m", "--mode"):
+            p_mode = a
+
+            if p_mode not in choices:
+                assert False, "Invalid normalization choice, expected ['svd', 'svdn', 'svdne']"
+
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+
+        elif o in ("-n", "--norm"):
+            p_norm = int(a)
+
+        elif o in ("-a", "--area"):
+            p_area = a
+
+            if p_area not in integral_area_choices:
+                assert False, "Invalid area computation choices : %s " % integral_area_choices
+
+        elif o in ("-y", "--ylim"):
+            p_ylim = list(map(float, a.split(',')))
+
+        else:
+            assert False, "unhandled option"
+
+    display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_area, p_ylim)
+
+if __name__== "__main__":
+    main()

+ 318 - 0
display_svd_data_scene.py

@@ -0,0 +1,318 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, getopt
+
+import numpy as np
+import random
+import time
+import json
+
+from PIL import Image
+from ipfml import processing, metrics, utils
+import ipfml.iqa.fr as fr_iqa
+
+from skimage import color
+
+import matplotlib.pyplot as plt
+from modules.utils.data import get_svd_data
+
+from modules.utils import config as cfg
+
+# getting configuration information
+config_filename     = cfg.config_filename
+zone_folder         = cfg.zone_folder
+min_max_filename    = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list         = cfg.scenes_names
+scenes_indices      = cfg.scenes_indices
+choices             = cfg.normalization_choices
+path                = cfg.dataset_path
+zones               = cfg.zones_indices
+seuil_expe_filename = cfg.seuil_expe_filename
+
+metric_choices      = cfg.metric_choices_labels
+
+max_nb_bits = 8
+
+error_data_choices  = ['mae', 'mse', 'ssim', 'psnr']
+
+
+def get_error_distance(p_error, y_true, y_test):
+
+    noise_method = None
+    function_name = p_error
+
+    try:
+        error_method = getattr(fr_iqa, function_name)
+    except AttributeError:
+        raise NotImplementedError("Error `{}` not implement `{}`".format(fr_iqa.__name__, function_name))
+
+    return error_method(y_true, y_test)
+
+
+def display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_error, p_ylim):
+    """
+    @brief Method which gives information about svd curves from zone of picture
+    @param p_scene, scene expected to show svd values
+    @param p_interval, interval [begin, end] of svd data to display
+    @param p_interval, interval [begin, end] of samples or minutes from render generation engine
+    @param p_metric, metric computed to show
+    @param p_mode, normalization's mode
+    @param p_norm, normalization or not of selected svd data
+    @param p_error, error metric used to display
+    @param p_ylim, ylim choice to better display of data
+    @return nothing
+    """
+
+    max_value_svd = 0
+    min_value_svd = sys.maxsize
+
+    image_indices = []
+
+    scenes = os.listdir(path)
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    begin_data, end_data = p_interval
+    begin_index, end_index = p_indices
+
+    data_min_max_filename = os.path.join(path, p_metric + min_max_filename)
+
+    # go ahead each scenes
+    for id_scene, folder_scene in enumerate(scenes):
+
+        if p_scene == folder_scene:
+            scene_path = os.path.join(path, folder_scene)
+
+            config_file_path = os.path.join(scene_path, config_filename)
+
+            with open(config_file_path, "r") as config_file:
+                last_image_name = config_file.readline().strip()
+                prefix_image_name = config_file.readline().strip()
+                start_index_image = config_file.readline().strip()
+                end_index_image = config_file.readline().strip()
+                step_counter = int(config_file.readline().strip())
+
+            # construct each zones folder name
+            zones_folder = []
+
+            # get zones list info
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+
+                current_zone = "zone"+index_str
+                zones_folder.append(current_zone)
+
+            images_data = []
+            images_indices = []
+
+            threshold_learned_zones = []
+
+            for id, zone_folder in enumerate(zones_folder):
+
+                # get threshold information
+
+                zone_path = os.path.join(scene_path, zone_folder)
+                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+                # open treshold path and get this information
+                with open(path_seuil, "r") as seuil_file:
+                    threshold_learned = int(seuil_file.readline().strip())
+                    threshold_learned_zones.append(threshold_learned)
+
+            current_counter_index = int(start_index_image)
+            end_counter_index = int(end_index_image)
+
+            threshold_mean = np.mean(np.asarray(threshold_learned_zones))
+            threshold_image_found = False
+
+            file_path = os.path.join(scene_path, prefix_image_name + "{}.png")
+
+            svd_data = []
+
+            while(current_counter_index <= end_counter_index):
+
+                current_counter_index_str = str(current_counter_index)
+
+                while len(start_index_image) > len(current_counter_index_str):
+                    current_counter_index_str = "0" + current_counter_index_str
+
+                image_path = file_path.format(str(current_counter_index_str))
+                img = Image.open(image_path)
+
+                svd_values = get_svd_data(p_metric, img)
+
+                if p_norm:
+                    svd_values = svd_values[begin_data:end_data]
+
+                # update min max values
+                min_value = svd_values.min()
+                max_value = svd_values.max()
+
+                if min_value < min_value_svd:
+                    min_value_svd = min_value
+
+                if max_value > min_value_svd:
+                    max_value_svd = max_value
+
+                # keep in memory used data
+                if current_counter_index % p_step == 0:
+                    if current_counter_index >= begin_index and current_counter_index <= end_index:
+                        images_indices.append(current_counter_index_str)
+                        svd_data.append(svd_values)
+
+                    if threshold_mean < int(current_counter_index) and not threshold_image_found:
+
+                        threshold_image_found = True
+                        threshold_image_zone = current_counter_index_str
+
+                current_counter_index += step_counter
+                print('%.2f%%' % (current_counter_index / end_counter_index * 100))
+                sys.stdout.write("\033[F")
+
+
+            # all indices of picture to plot
+            print(images_indices)
+
+            previous_data = []
+            error_data = [0.]
+
+            for id, data in enumerate(svd_data):
+
+                current_data = data
+
+                if not p_norm:
+                    current_data = current_data[begin_data:end_data]
+
+                if p_mode == 'svdn':
+                    current_data = utils.normalize_arr(current_data)
+
+                if p_mode == 'svdne':
+                    current_data = utils.normalize_arr_with_range(current_data, min_value_svd, max_value_svd)
+
+                images_data.append(current_data)
+
+                # use of whole image data for computation of ssim or psnr
+                if p_error == 'ssim' or p_error == 'psnr':
+                    image_path = file_path.format(str(current_id))
+                    current_data = np.asarray(Image.open(image_path))
+
+                if len(previous_data) > 0:
+
+                    current_error = get_error_distance(p_error, previous_data, current_data)
+                    error_data.append(current_error)
+
+                if len(previous_data) == 0:
+                    previous_data = current_data
+
+            # display all data using matplotlib (configure plt)
+            gridsize = (3, 2)
+
+            # fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(30, 22))
+            fig = plt.figure(figsize=(30, 22))
+            ax1 = plt.subplot2grid(gridsize, (0, 0), colspan=2, rowspan=2)
+            ax2 = plt.subplot2grid(gridsize, (2, 0), colspan=2)
+
+
+            ax1.set_title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + ']' + p_metric + ' metric, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=20)
+            ax1.set_ylabel('Image samples or time (minutes) generation', fontsize=14)
+            ax1.set_xlabel('Vector features', fontsize=16)
+
+            for id, data in enumerate(images_data):
+
+                p_label = p_scene + '_' + str(images_indices[id]) + " | " + p_error + ": " + str(error_data[id])
+
+                if images_indices[id] == threshold_image_zone:
+                    ax1.plot(data, label=p_label, lw=4, color='red')
+                else:
+                    ax1.plot(data, label=p_label)
+
+            ax1.legend(bbox_to_anchor=(0.7, 1), loc=2, borderaxespad=0.2, fontsize=14)
+
+            start_ylim, end_ylim = p_ylim
+            ax1.set_ylim(start_ylim, end_ylim)
+
+            ax2.set_title(p_error + " information for whole step images")
+            ax2.set_ylabel(p_error + ' error')
+            ax2.set_xlabel('Number of samples per pixels or times')
+            ax2.set_xticks(range(len(images_indices)))
+            ax2.set_xticklabels(list(map(int, images_indices)))
+            ax2.plot(error_data)
+
+            plt.show()
+
+def main():
+
+
+    # by default p_step value is 10 to enable all photos
+    p_step = 10
+    p_ylim = (0, 1)
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hs:i:i:z:l:m:s:n:e:y", ["help=", "scene=", "interval=", "indices=", "metric=", "mode=", "step=", "norm=", "error=", "ylim="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python display_svd_data_scene.py --scene A --interval "0,800" --indices "0, 900" --metric lab --mode svdne --step 50 --norm 0 --error mae --ylim "0, 0.1"')
+            sys.exit()
+        elif o in ("-s", "--scene"):
+            p_scene = a
+
+            if p_scene not in scenes_indices:
+                assert False, "Invalid scene choice"
+            else:
+                p_scene = scenes_list[scenes_indices.index(p_scene)]
+        elif o in ("-i", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+
+        elif o in ("-i", "--indices"):
+            p_indices = list(map(int, a.split(',')))
+
+        elif o in ("-m", "--metric"):
+            p_metric = a
+
+            if p_metric not in metric_choices:
+                assert False, "Invalid metric choice"
+
+        elif o in ("-m", "--mode"):
+            p_mode = a
+
+            if p_mode not in choices:
+                assert False, "Invalid normalization choice, expected ['svd', 'svdn', 'svdne']"
+
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+
+        elif o in ("-n", "--norm"):
+            p_norm = int(a)
+
+        elif o in ("-e", "--error"):
+            p_error = a
+
+        elif o in ("-y", "--ylim"):
+            p_ylim = list(map(float, a.split(',')))
+
+        else:
+            assert False, "unhandled option"
+
+    display_svd_values(p_scene, p_interval, p_indices, p_metric, p_mode, p_step, p_norm, p_error, p_ylim)
+
+if __name__== "__main__":
+    main()

+ 117 - 31
display_svd_zone_scene.py

@@ -15,12 +15,11 @@ import time
 import json
 
 from PIL import Image
-from ipfml import processing
-from ipfml import metrics
+from ipfml import processing, metrics, utils
 from skimage import color
 
 import matplotlib.pyplot as plt
-from modules.utils.data_type import get_svd_data
+from modules.utils.data import get_svd_data
 
 from modules.utils import config as cfg
 
@@ -31,7 +30,7 @@ min_max_filename    = cfg.min_max_filename_extension
 
 # define all scenes values
 scenes_list         = cfg.scenes_names
-scenes_indexes      = cfg.scenes_indices
+scenes_indices      = cfg.scenes_indices
 choices             = cfg.normalization_choices
 path                = cfg.dataset_path
 zones               = cfg.zones_indices
@@ -39,16 +38,76 @@ seuil_expe_filename = cfg.seuil_expe_filename
 
 metric_choices      = cfg.metric_choices_labels
 
+generic_output_file_svd = '_random.csv'
+
 max_nb_bits = 8
+min_value_interval = sys.maxsize
+max_value_interval = 0
+
+def get_min_max_value_interval(_scene, _interval, _metric):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of current scene
+        if folder_scene == _scene:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
 
-def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _metric + "_svd" + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+                    metrics = line_data[begin+1:end+1]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def display_svd_values(p_scene, p_interval, p_indices, p_zone, p_metric, p_mode, p_step, p_norm, p_ylim):
     """
     @brief Method which gives information about svd curves from zone of picture
     @param p_scene, scene expected to show svd values
+    @param p_interval, interval [begin, end] of svd data to display
     @param p_interval, interval [begin, end] of samples or minutes from render generation engine
     @param p_zone, zone's identifier of picture
     @param p_metric, metric computed to show
     @param p_mode, normalization's mode
+    @param p_step, step of images indices
+    @param p_norm, normalization or not of selected svd data
+    @param p_ylim, ylim choice to better display of data
     @return nothing
     """
 
@@ -56,14 +115,15 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
     # remove min max file from scenes folder
     scenes = [s for s in scenes if min_max_filename not in s]
 
-    begin, end = p_interval
+    begin_data, end_data = p_interval
+    begin_index, end_index = p_indices
+
     data_min_max_filename = os.path.join(path, p_metric + min_max_filename)
 
     # go ahead each scenes
     for id_scene, folder_scene in enumerate(scenes):
 
         if p_scene == folder_scene:
-            print(folder_scene)
             scene_path = os.path.join(path, folder_scene)
 
             config_file_path = os.path.join(scene_path, config_filename)
@@ -88,7 +148,7 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
                 zones_folder.append(current_zone)
 
             zones_images_data = []
-            images_indexes = []
+            images_indices = []
 
             zone_folder = zones_folder[p_zone]
 
@@ -113,10 +173,9 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
                 while len(start_index_image) > len(current_counter_index_str):
                     current_counter_index_str = "0" + current_counter_index_str
 
-
                 if current_counter_index % p_step == 0:
-                    if current_counter_index >= begin and current_counter_index <= end:
-                        images_indexes.append(current_counter_index_str)
+                    if current_counter_index >= begin_index and current_counter_index <= end_index:
+                        images_indices.append(current_counter_index_str)
 
                     if seuil_learned < int(current_counter_index) and not threshold_image_found:
 
@@ -125,10 +184,10 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
 
                 current_counter_index += step_counter
 
-            # all indexes of picture to plot
-            print(images_indexes)
+            # all indices of picture to plot
+            print(images_indices)
 
-            for index in images_indexes:
+            for index in images_indices:
 
                 img_path = os.path.join(scene_path, prefix_image_name + str(index) + ".png")
 
@@ -142,6 +201,10 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
                 # Here you can add the way you compute data
                 data = get_svd_data(p_metric, block)
 
+                # TODO : improve part of this code to get correct min / max values
+                if p_norm:
+                    data = data[begin_data:end_data]
+
                 ##################
                 # Data mode part #
                 ##################
@@ -149,32 +212,41 @@ def display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step):
                 if p_mode == 'svdne':
 
                     # getting max and min information from min_max_filename
-                    with open(data_min_max_filename, 'r') as f:
-                        min_val = float(f.readline())
-                        max_val = float(f.readline())
+                    if not p_norm:
+                        with open(data_min_max_filename, 'r') as f:
+                            min_val = float(f.readline())
+                            max_val = float(f.readline())
+                    else:
+                        min_val = min_value_interval
+                        max_val = max_value_interval
 
-                    data = processing.normalize_arr_with_range(data, min_val, max_val)
+                    data = utils.normalize_arr_with_range(data, min_val, max_val)
 
                 if p_mode == 'svdn':
-                    data = processing.normalize_arr(data)
+                    data = utils.normalize_arr(data)
 
-                zones_images_data.append(data)
+                if not p_norm:
+                    zones_images_data.append(data[begin_data:end_data])
+                else:
+                    zones_images_data.append(data)
 
-            plt.title(p_scene + ' scene interval information ['+ str(begin) +', '+ str(end) +'], ' + p_metric + ' metric, ' + p_mode, fontsize=20)
+            plt.title(p_scene + ' scene interval information SVD['+ str(begin_data) +', '+ str(end_data) +'], from scenes indices [' + str(begin_index) + ', '+ str(end_index) + ']' + p_metric + ' metric, ' + p_mode + ', with step of ' + str(p_step) + ', svd norm ' + str(p_norm), fontsize=20)
             plt.ylabel('Image samples or time (minutes) generation', fontsize=14)
             plt.xlabel('Vector features', fontsize=16)
 
             for id, data in enumerate(zones_images_data):
 
-                p_label = p_scene + "_" + images_indexes[id]
+                p_label = p_scene + "_" + images_indices[id]
 
-                if images_indexes[id] == threshold_image_zone:
+                if images_indices[id] == threshold_image_zone:
                     plt.plot(data, label=p_label, lw=4, color='red')
                 else:
                     plt.plot(data, label=p_label)
 
             plt.legend(bbox_to_anchor=(0.8, 1), loc=2, borderaxespad=0.2, fontsize=14)
-            plt.ylim(0, 0.1)
+
+            start_ylim, end_ylim = p_ylim
+            plt.ylim(start_ylim, end_ylim)
 
             plt.show()
 
@@ -183,31 +255,36 @@ def main():
 
     # by default p_step value is 10 to enable all photos
     p_step = 10
+    p_norm = 0
+    p_ylim = (0, 1)
 
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
-        print('python display_svd_zone_scene.py --scene A --interval "0,200" --zone 3 --metric lab --mode svdne --step 50')
+        print('python display_svd_zone_scene.py --scene A --interval "0,200" --indices "0, 900" --zone 3 --metric lab --mode svdne --step 50 --norm 0 --ylim "0, 0.1"')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "hs:i:z:l:m:s", ["help=", "scene=", "interval=", "zone=", "metric=", "mode=", "step="])
+        opts, args = getopt.getopt(sys.argv[1:], "hs:i:i:z:l:m:s:n:y", ["help=", "scene=", "interval=", "indices=", "zone=", "metric=", "mode=", "step=", "norm=", "ylim="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python display_svd_zone_scene.py --scene A --interval "0,200" --zone 3 --metric lab --mode svdne --step 50')
+        print('python display_svd_zone_scene.py --scene A --interval "0,200" --indices "0, 900" --zone 3 --metric lab --mode svdne --step 50 --norm 0 --ylim "0, 0.1"')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python display_svd_zone_scene.py --scene A --interval "0,200" --zone 3 --metric lab --mode svdne --step 50')
+            print('python display_svd_zone_scene.py --scene A --interval "0,200" --indices "0, 900" --zone 3 --metric lab --mode svdne --step 50 --norm 0 --ylim "0, 0.1"')
             sys.exit()
         elif o in ("-s", "--scene"):
             p_scene = a
 
-            if p_scene not in scenes_indexes:
+            if p_scene not in scenes_indices:
                 assert False, "Invalid scene choice"
             else:
-                p_scene = scenes_list[scenes_indexes.index(p_scene)]
+                p_scene = scenes_list[scenes_indices.index(p_scene)]
         elif o in ("-i", "--interval"):
             p_interval = list(map(int, a.split(',')))
 
+        elif o in ("-i", "--indices"):
+            p_indices = list(map(int, a.split(',')))
+
         elif o in ("-z", "--zone"):
             p_zone = int(a)
 
@@ -226,10 +303,19 @@ def main():
         elif o in ("-s", "--step"):
             p_step = int(a)
 
+        elif o in ("-n", "--norm"):
+            p_norm = int(a)
+
+        elif o in ("-y", "--ylim"):
+            p_ylim = list(map(float, a.split(',')))
+
         else:
             assert False, "unhandled option"
 
-    display_svd_values(p_scene, p_interval, p_zone, p_metric, p_mode, p_step)
+    if p_norm:
+        get_min_max_value_interval(p_scene, p_interval, p_metric)
+
+    display_svd_values(p_scene, p_interval, p_indices, p_zone, p_metric, p_mode, p_step, p_norm, p_ylim)
 
 if __name__== "__main__":
     main()

+ 2 - 2
generateAndTrain_maxwell.sh

@@ -37,7 +37,7 @@ for counter in {0..4}; do
         end=$(($size))
     fi
 
-    for nb_zones in {4,6,8,10,12,14}; do
+    for nb_zones in {4,6,8,10,12}; do
 
         echo $start $end
 
@@ -54,7 +54,7 @@ for counter in {0..4}; do
 
                     echo "${MODEL_NAME} results already generated..."
                 else
-                    python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0'
+                    python generate_data_model_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --renderer "maxwell" --step 40 --random 1 --percent 1
                     python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 
                     #python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2'

+ 2 - 2
generateAndTrain_maxwell_custom.sh

@@ -37,7 +37,7 @@ for counter in {0..4}; do
         end=$(($size))
     fi
 
-    for nb_zones in {4,6,8,10,12,14}; do
+    for nb_zones in {4,6,8,10,12}; do
 
         echo $start $end
 
@@ -55,7 +55,7 @@ for counter in {0..4}; do
 
                     echo "${MODEL_NAME} results already generated..."
                 else
-                    python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0' --custom ${CUSTOM_MIN_MAX_FILENAME}
+                    python generate_data_model_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
                     python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 
                     #python predict_seuil_expe_maxwell.py --interval "${start},${end}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}

+ 46 - 57
generate_all_data.py

@@ -13,10 +13,9 @@ import random
 import time
 import json
 
-from modules.utils.data_type import get_svd_data
+from modules.utils.data import get_svd_data
 from PIL import Image
-from ipfml import processing
-from ipfml import metrics
+from ipfml import processing, metrics, utils
 from skimage import color
 
 from modules.utils import config as cfg
@@ -38,7 +37,6 @@ metric_choices          = cfg.metric_choices_labels
 output_data_folder      = cfg.output_data_folder
 
 generic_output_file_svd = '_random.csv'
-picture_step            = 10
 
 def generate_data_svd(data_type, mode):
     """
@@ -102,64 +100,63 @@ def generate_data_svd(data_type, mode):
 
         while(current_counter_index <= end_counter_index):
 
-            if current_counter_index % picture_step == 0:
-                current_counter_index_str = str(current_counter_index)
+            current_counter_index_str = str(current_counter_index)
 
-                while len(start_index_image) > len(current_counter_index_str):
-                    current_counter_index_str = "0" + current_counter_index_str
+            while len(start_index_image) > len(current_counter_index_str):
+                current_counter_index_str = "0" + current_counter_index_str
 
-                img_path = os.path.join(scene_path, prefix_image_name + current_counter_index_str + ".png")
+            img_path = os.path.join(scene_path, prefix_image_name + current_counter_index_str + ".png")
 
-                current_img = Image.open(img_path)
-                img_blocks = processing.divide_in_blocks(current_img, (200, 200))
+            current_img = Image.open(img_path)
+            img_blocks = processing.divide_in_blocks(current_img, (200, 200))
 
-                for id_block, block in enumerate(img_blocks):
+            for id_block, block in enumerate(img_blocks):
 
-                    ###########################
-                    # Metric computation part #
-                    ###########################
+                ###########################
+                # Metric computation part #
+                ###########################
 
-                    data = get_svd_data(data_type, block)
+                data = get_svd_data(data_type, block)
 
-                    ##################
-                    # Data mode part #
-                    ##################
+                ##################
+                # Data mode part #
+                ##################
 
-                    # modify data depending mode
-                    if mode == 'svdne':
+                # modify data depending mode
+                if mode == 'svdne':
 
-                        # getting max and min information from min_max_filename
-                        with open(data_min_max_filename, 'r') as f:
-                            min_val = float(f.readline())
-                            max_val = float(f.readline())
+                    # getting max and min information from min_max_filename
+                    with open(data_min_max_filename, 'r') as f:
+                        min_val = float(f.readline())
+                        max_val = float(f.readline())
 
-                        data = processing.normalize_arr_with_range(data, min_val, max_val)
+                    data = utils.normalize_arr_with_range(data, min_val, max_val)
 
-                    if mode == 'svdn':
-                        data = processing.normalize_arr(data)
+                if mode == 'svdn':
+                    data = utils.normalize_arr(data)
 
-                    # save min and max found from dataset in order to normalize data using whole data known
-                    if mode == 'svd':
+                # save min and max found from dataset in order to normalize data using whole data known
+                if mode == 'svd':
 
-                        current_min = data.min()
-                        current_max = data.max()
+                    current_min = data.min()
+                    current_max = data.max()
 
-                        if current_min < min_val_found:
-                            min_val_found = current_min
+                    if current_min < min_val_found:
+                        min_val_found = current_min
 
-                        if current_max > max_val_found:
-                            max_val_found = current_max
+                    if current_max > max_val_found:
+                        max_val_found = current_max
 
-                    # now write data into current writer
-                    current_file = svd_output_files[id_block]
+                # now write data into current writer
+                current_file = svd_output_files[id_block]
 
-                    # add of index
-                    current_file.write(current_counter_index_str + ';')
+                # add of index
+                current_file.write(current_counter_index_str + ';')
 
-                    for val in data:
-                        current_file.write(str(val) + ";")
+                for val in data:
+                    current_file.write(str(val) + ";")
 
-                    current_file.write('\n')
+                current_file.write('\n')
 
             start_index_image_int = int(start_index_image)
             print(data_type + "_" + mode + "_" + folder_scene + " - " + "{0:.2f}".format((current_counter_index - start_index_image_int) / (end_counter_index - start_index_image_int)* 100.) + "%")
@@ -178,32 +175,30 @@ def generate_data_svd(data_type, mode):
             f.write(str(min_val_found) + '\n')
             f.write(str(max_val_found) + '\n')
 
-    print("%s : end of data generation\n" % mode)
+    print("%s_%s : end of data generation\n" % (data_type, mode))
 
 
 def main():
 
     # default value of p_step
-    p_step = 10
+    p_step = 1
 
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
         print('python generate_all_data.py --metric all')
         print('python generate_all_data.py --metric lab')
-        print('python generate_all_data.py --metric lab --step 10')
+        print('python generate_all_data.py --metric lab')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "hms", ["help=", "metric=", "step="])
+        opts, args = getopt.getopt(sys.argv[1:], "hms", ["help=", "metric="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python generate_all_data.py --metric all --step 10')
+        print('python generate_all_data.py --metric all')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python generate_all_data.py --metric all --step 10')
+            print('python generate_all_data.py --metric all')
             sys.exit()
-        elif o in ("-s", "--step"):
-            p_step = int(a)
         elif o in ("-m", "--metric"):
             p_metric = a
 
@@ -212,12 +207,6 @@ def main():
         else:
             assert False, "unhandled option"
 
-    global picture_step
-    picture_step = p_step
-
-    if picture_step % 10 != 0:
-        assert False, "Picture step variable needs to be divided by ten"
-
     # generate all or specific metric data
     if p_metric == 'all':
         for m in metric_choices:

+ 177 - 62
generate_data_model.py

@@ -13,27 +13,51 @@ import random
 import time
 import json
 
-config_filename   = "config"
-zone_folder       = "zone"
-min_max_filename  = "_min_max_values"
-generic_output_file_svd = '_random.csv'
-output_data_folder = 'data'
+from PIL import Image
+from ipfml import processing, metrics, utils
+
+from modules.utils import config as cfg
+from modules.utils import data as dt
+
+# getting configuration information
+config_filename         = cfg.config_filename
+zone_folder             = cfg.zone_folder
+min_max_filename        = cfg.min_max_filename_extension
 
 # define all scenes values
-scenes = ['Appart1opt02', 'Bureau1', 'Cendrier', 'Cuisine01', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre', 'SdbDroite', 'Selles']
-scenes_indexes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
-choices = ['svd', 'svdn', 'svdne']
-path = './fichiersSVD_light'
-zones = np.arange(16)
-seuil_expe_filename = 'seuilExpe'
-
-def construct_new_line(path_seuil, interval, line, sep, index):
+scenes_list             = cfg.scenes_names
+scenes_indexes          = cfg.scenes_indices
+choices                 = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+metric_choices          = cfg.metric_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+zones_indices           = cfg.zones_indices
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval = sys.maxsize
+max_value_interval = 0
+
+def construct_new_line(path_seuil, interval, line, choice, each, norm):
     begin, end = interval
 
     line_data = line.split(';')
     seuil = line_data[0]
     metrics = line_data[begin+1:end+1]
 
+    metrics = [float(m) for id, m in enumerate(metrics) if id % each == 0 ]
+
+    if norm:
+        if choice == 'svdne':
+            metrics = utils.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            metrics = utils.normalize_arr(metrics)
+
     with open(path_seuil, "r") as seuil_file:
         seuil_learned = int(seuil_file.readline().strip())
 
@@ -43,15 +67,65 @@ def construct_new_line(path_seuil, interval, line, sep, index):
         line = '0'
 
     for idx, val in enumerate(metrics):
-        if index:
-            line += " " + str(idx + 1)
-        line += sep
-        line += val
+        line += ';'
+        line += str(val)
     line += '\n'
 
     return line
 
-def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes, _zones = zones, _percent = 1, _sep=':', _index=True):
+def get_min_max_value_interval(_scenes_list, _interval, _metric):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of maxwell scenes
+        if folder_scene in _scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _metric + "_svd" + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+                    metrics = line_data[begin+1:end+1]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _zones = zones_indices, _percent = 1, _step=1, _each=1, _norm=False, _custom=False):
 
     output_train_filename = _filename + ".train"
     output_test_filename = _filename + ".test"
@@ -72,67 +146,85 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
     scenes = [s for s in scenes if min_max_filename not in s]
 
     for id_scene, folder_scene in enumerate(scenes):
-        scene_path = os.path.join(path, folder_scene)
 
-        zones_folder = []
-        # create zones list
-        for index in zones:
-            index_str = str(index)
-            if len(index_str) < 2:
-                index_str = "0" + index_str
-            zones_folder.append("zone"+index_str)
+        # only take care of maxwell scenes
+        if folder_scene in scenes_list:
 
-        for id_zone, zone_folder in enumerate(zones_folder):
-            zone_path = os.path.join(scene_path, zone_folder)
-            data_filename = _metric + "_" + _choice + generic_output_file_svd
-            data_file_path = os.path.join(zone_path, data_filename)
+            scene_path = os.path.join(path, folder_scene)
 
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
 
-             # getting number of line and read randomly lines
-            f = open(data_file_path)
-            lines = f.readlines()
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
 
-            num_lines = len(lines)
+                # if custom normalization choices then we use svd values not already normalized
+                if _custom:
+                    data_filename = _metric + "_svd" + generic_output_file_svd
+                else:
+                    data_filename = _metric + "_" + _choice + generic_output_file_svd
 
-            lines_indexes = np.arange(num_lines)
-            random.shuffle(lines_indexes)
+                data_file_path = os.path.join(zone_path, data_filename)
 
-            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
 
-            counter = 0
-            # check if user select current scene and zone to be part of training data set
-            for index in lines_indexes:
-                line = construct_new_line(path_seuil, _interval, lines[index], _sep, _index)
+                num_lines = len(lines)
 
-                percent = counter / num_lines
+                lines_indexes = np.arange(num_lines)
+                random.shuffle(lines_indexes)
 
-                if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
-                    train_file.write(line)
-                else:
-                    test_file.write(line)
+                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+                counter = 0
+                # check if user select current scene and zone to be part of training data set
+                for index in lines_indexes:
+
+                    image_index = int(lines[index].split(';')[0])
+                    percent = counter / num_lines
 
-                counter += 1
+                    if image_index % _step == 0:
+                        line = construct_new_line(path_seuil, _interval, lines[index], _choice, _each, _norm)
 
-            f.close()
+                        if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
+                            train_file.write(line)
+                        else:
+                            test_file.write(line)
+
+                    counter += 1
+
+                f.close()
 
     train_file.close()
     test_file.close()
 
+
 def main():
 
+    p_custom = False
+    p_step      = 1
+    p_renderer  = 'all'
+    p_each      = 1
+
     if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3" --percent 0.7 --sep ":" --rowindex "1"')
+        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:z:p:r", ["help=", "output=", "interval=", "kind=", "metric=", "scenes=", "zones=", "percent=", "sep=", "rowindex="])
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:z:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "zones=", "percent=", "renderer=", "step=", "each=", "custom="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3" --percent 0.7 --sep ":" --rowindex "1"')
+        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3" --percent 0.7 --sep ":" --rowindex "1"')
+            print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
+
             sys.exit()
         elif o in ("-o", "--output"):
             p_filename = a
@@ -151,25 +243,48 @@ def main():
                 p_zones = [a.strip()]
         elif o in ("-p", "--percent"):
             p_percent = float(a)
-        elif o in ("-s", "--sep"):
-            p_sep = a
-        elif o in ("-r", "--rowindex"):
-            if int(a) == 1:
-                p_rowindex = True
-            else:
-                p_rowindex = False
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+        elif o in ("-e", "--each"):
+            p_each = int(a)
+        elif o in ("-r", "--renderer"):
+            p_renderer = a
+
+            if p_renderer not in cfg.renderer_choices:
+                assert False, "Unknown renderer choice, %s" % cfg.renderer_choices
+        elif o in ("-c", "--custom"):
+            p_custom = a
         else:
             assert False, "unhandled option"
 
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
     # getting scenes from indexes user selection
     scenes_selected = []
 
     for scene_id in p_scenes:
         index = scenes_indexes.index(scene_id.strip())
-        scenes_selected.append(scenes[index])
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(scenes_list, p_interval, p_metric)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
 
     # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_zones, p_percent, p_sep, p_rowindex)
+    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_zones, p_percent, p_step, p_each, p_custom)
 
 if __name__== "__main__":
     main()

+ 0 - 277
generate_data_model_r.py

@@ -1,277 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Sep 14 21:02:42 2018
-
-@author: jbuisine
-"""
-
-from __future__ import print_function
-import sys, os, getopt
-import numpy as np
-import random
-import time
-import json
-
-from PIL import Image
-from ipfml import processing, metrics
-
-from modules.utils import config as cfg
-
-# getting configuration information
-config_filename         = cfg.config_filename
-zone_folder             = cfg.zone_folder
-min_max_filename        = cfg.min_max_filename_extension
-
-# define all scenes values
-scenes_list             = cfg.scenes_names
-scenes_indexes          = cfg.scenes_indices
-choices                 = cfg.normalization_choices
-path                    = cfg.dataset_path
-zones                   = cfg.zones_indices
-seuil_expe_filename     = cfg.seuil_expe_filename
-
-metric_choices          = cfg.metric_choices_labels
-output_data_folder      = cfg.output_data_folder
-custom_min_max_folder   = cfg.min_max_custom_folder
-min_max_ext             = cfg.min_max_filename_extension
-zones_indices           = cfg.zones_indices
-
-generic_output_file_svd = '_random.csv'
-
-min_value_interval = sys.maxsize
-max_value_interval = 0
-
-def construct_new_line(path_seuil, interval, line, norm, sep, index):
-    begin, end = interval
-
-    line_data = line.split(';')
-    seuil = line_data[0]
-    metrics = line_data[begin+1:end+1]
-
-    metrics = [float(m) for m in metrics]
-
-    # TODO : check if it's always necessary to do that (loss of information for svd)
-    if norm:
-        metrics = processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
-
-    with open(path_seuil, "r") as seuil_file:
-        seuil_learned = int(seuil_file.readline().strip())
-
-    if seuil_learned > int(seuil):
-        line = '1'
-    else:
-        line = '0'
-
-    for idx, val in enumerate(metrics):
-        if index:
-            line += " " + str(idx + 1)
-        line += sep
-        line += str(val)
-    line += '\n'
-
-    return line
-
-def get_min_max_value_interval(_filename, _interval, _choice, _metric):
-
-    global min_value_interval, max_value_interval
-
-    scenes = os.listdir(path)
-
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    for id_scene, folder_scene in enumerate(scenes):
-
-        # only take care of maxwell scenes
-        if folder_scene in scenes_list:
-
-            scene_path = os.path.join(path, folder_scene)
-
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
-
-            # shuffle list of zones (=> randomly choose zones)
-            random.shuffle(zones_folder)
-
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
-                data_file_path = os.path.join(zone_path, data_filename)
-
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
-
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for line in lines:
-
-                    begin, end = _interval
-
-                    line_data = line.split(';')
-                    metrics = line_data[begin+1:end+1]
-                    metrics = [float(m) for m in metrics]
-
-                    min_value = min(metrics)
-                    max_value = max(metrics)
-
-                    if min_value < min_value_interval:
-                        min_value_interval = min_value
-
-                    if max_value > max_value_interval:
-                        max_value_interval = max_value
-
-                    counter += 1
-
-
-def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _zones = zones_indices, _percent = 1, _norm = False, _sep=':', _index=True):
-
-    output_train_filename = _filename + ".train"
-    output_test_filename = _filename + ".test"
-
-    if not '/' in output_train_filename:
-        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
-
-    # create path if not exists
-    if not os.path.exists(output_data_folder):
-        os.makedirs(output_data_folder)
-
-    train_file = open(output_train_filename, 'w')
-    test_file = open(output_test_filename, 'w')
-
-    scenes = os.listdir(path)
-
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    for id_scene, folder_scene in enumerate(scenes):
-
-        # only take care of maxwell scenes
-        if folder_scene in scenes_list:
-
-            scene_path = os.path.join(path, folder_scene)
-
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
-
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
-                data_file_path = os.path.join(zone_path, data_filename)
-
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
-
-                num_lines = len(lines)
-
-                lines_indexes = np.arange(num_lines)
-                random.shuffle(lines_indexes)
-
-                path_seuil = os.path.join(zone_path, seuil_expe_filename)
-
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for index in lines_indexes:
-                    line = construct_new_line(path_seuil, _interval, lines[index], _norm, _sep, _index)
-
-                    percent = counter / num_lines
-
-                    if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
-                        train_file.write(line)
-                    else:
-                        test_file.write(line)
-
-                    counter += 1
-
-                f.close()
-
-    train_file.close()
-    test_file.close()
-
-
-def main():
-
-    p_custom = False
-
-    if len(sys.argv) <= 1:
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:z:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "zones=", "percent=", "sep=", "rowindex=", "custom="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-
-            sys.exit()
-        elif o in ("-o", "--output"):
-            p_filename = a
-        elif o in ("-i", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-k", "--kind"):
-            p_kind = a
-        elif o in ("-m", "--metric"):
-            p_metric = a
-        elif o in ("-s", "--scenes"):
-            p_scenes = a.split(',')
-        elif o in ("-z", "--zones"):
-            if ',' in a:
-                p_zones = list(map(int, a.split(',')))
-            else:
-                p_zones = [a.strip()]
-        elif o in ("-p", "--percent"):
-            p_percent = float(a)
-        elif o in ("-s", "--sep"):
-            p_sep = a
-        elif o in ("-r", "--rowindex"):
-            if int(a) == 1:
-                p_rowindex = True
-            else:
-                p_rowindex = False
-        elif o in ("-c", "--custom"):
-            p_custom = a
-        else:
-            assert False, "unhandled option"
-
-    # getting scenes from indexes user selection
-    scenes_selected = []
-
-    for scene_id in p_scenes:
-        index = scenes_indexes.index(scene_id.strip())
-        scenes_selected.append(scenes_list[index])
-
-    # find min max value if necessary to renormalize data
-    if p_custom:
-        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
-
-        # write new file to save
-        if not os.path.exists(custom_min_max_folder):
-            os.makedirs(custom_min_max_folder)
-
-        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
-        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
-
-        with open(min_max_filename_path, 'w') as f:
-            f.write(str(min_value_interval) + '\n')
-            f.write(str(max_value_interval) + '\n')
-
-    # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_zones, p_percent, p_custom, p_sep, p_rowindex)
-
-if __name__== "__main__":
-    main()

+ 92 - 50
generate_data_model_random.py

@@ -14,9 +14,10 @@ import time
 import json
 
 from PIL import Image
-from ipfml import processing, metrics
+from ipfml import processing, metrics, utils
 
 from modules.utils import config as cfg
+from modules.utils import data as dt
 
 # getting configuration information
 config_filename         = cfg.config_filename
@@ -24,9 +25,10 @@ zone_folder             = cfg.zone_folder
 min_max_filename        = cfg.min_max_filename_extension
 
 # define all scenes values
-scenes_list             = cfg.scenes_names
-scenes_indexes          = cfg.scenes_indices
-choices                 = cfg.normalization_choices
+all_scenes_list         = cfg.scenes_names
+all_scenes_indices      = cfg.scenes_indices
+
+normalization_choices   = cfg.normalization_choices
 path                    = cfg.dataset_path
 zones                   = cfg.zones_indices
 seuil_expe_filename     = cfg.seuil_expe_filename
@@ -38,21 +40,27 @@ min_max_ext             = cfg.min_max_filename_extension
 
 generic_output_file_svd = '_random.csv'
 
-min_value_interval = sys.maxsize
-max_value_interval = 0
+min_value_interval      = sys.maxsize
+max_value_interval      = 0
+
 
-def construct_new_line(path_seuil, interval, line, norm, sep, index):
+def construct_new_line(path_seuil, interval, line, choice, each, norm):
     begin, end = interval
 
     line_data = line.split(';')
     seuil = line_data[0]
     metrics = line_data[begin+1:end+1]
 
-    metrics = [float(m) for m in metrics]
+    # keep only if modulo result is 0 (keep only each wanted values)
+    metrics = [float(m) for id, m in enumerate(metrics) if id % each == 0]
 
     # TODO : check if it's always necessary to do that (loss of information for svd)
     if norm:
-        metrics = processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+
+        if choice == 'svdne':
+            metrics = utils.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            metrics = utils.normalize_arr(metrics)
 
     with open(path_seuil, "r") as seuil_file:
         seuil_learned = int(seuil_file.readline().strip())
@@ -63,15 +71,13 @@ def construct_new_line(path_seuil, interval, line, norm, sep, index):
         line = '0'
 
     for idx, val in enumerate(metrics):
-        if index:
-            line += " " + str(idx + 1)
-        line += sep
+        line += ';'
         line += str(val)
     line += '\n'
 
     return line
 
-def get_min_max_value_interval(_filename, _interval, _choice, _metric):
+def get_min_max_value_interval(_scenes_list, _interval, _metric):
 
     global min_value_interval, max_value_interval
 
@@ -83,7 +89,7 @@ def get_min_max_value_interval(_filename, _interval, _choice, _metric):
     for id_scene, folder_scene in enumerate(scenes):
 
         # only take care of maxwell scenes
-        if folder_scene in scenes_list:
+        if folder_scene in _scenes_list:
 
             scene_path = os.path.join(path, folder_scene)
 
@@ -95,26 +101,26 @@ def get_min_max_value_interval(_filename, _interval, _choice, _metric):
                     index_str = "0" + index_str
                 zones_folder.append("zone"+index_str)
 
-            # shuffle list of zones (=> randomly choose zones)
-            random.shuffle(zones_folder)
-
             for id_zone, zone_folder in enumerate(zones_folder):
+
                 zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
+
+                # if custom normalization choices then we use svd values not already normalized
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+
                 data_file_path = os.path.join(zone_path, data_filename)
 
                 # getting number of line and read randomly lines
                 f = open(data_file_path)
                 lines = f.readlines()
 
-                counter = 0
                 # check if user select current scene and zone to be part of training data set
                 for line in lines:
 
-
                     begin, end = _interval
 
                     line_data = line.split(';')
+
                     metrics = line_data[begin+1:end+1]
                     metrics = [float(m) for m in metrics]
 
@@ -127,10 +133,8 @@ def get_min_max_value_interval(_filename, _interval, _choice, _metric):
                     if max_value > max_value_interval:
                         max_value_interval = max_value
 
-                    counter += 1
-
 
-def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _nb_zones = 4, _percent = 1, _norm = False, _sep=':', _index=True):
+def generate_data_model(_scenes_list, _filename, _interval, _choice, _metric, _scenes, _nb_zones = 4, _percent = 1, _random=0, _step=1, _each=1, _custom = False):
 
     output_train_filename = _filename + ".train"
     output_test_filename = _filename + ".test"
@@ -142,18 +146,18 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
     if not os.path.exists(output_data_folder):
         os.makedirs(output_data_folder)
 
-    train_file = open(output_train_filename, 'w')
-    test_file = open(output_test_filename, 'w')
-
     scenes = os.listdir(path)
 
     # remove min max file from scenes folder
     scenes = [s for s in scenes if min_max_filename not in s]
 
+    train_file_data = []
+    test_file_data  = []
+
     for id_scene, folder_scene in enumerate(scenes):
 
         # only take care of maxwell scenes
-        if folder_scene in scenes_list:
+        if folder_scene in _scenes_list:
 
             scene_path = os.path.join(path, folder_scene)
 
@@ -166,11 +170,19 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
                 zones_folder.append("zone"+index_str)
 
             # shuffle list of zones (=> randomly choose zones)
-            random.shuffle(zones_folder)
+            # only in random mode
+            if _random:
+                random.shuffle(zones_folder)
 
             for id_zone, zone_folder in enumerate(zones_folder):
                 zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
+
+                # if custom normalization choices then we use svd values not already normalized
+                if _custom:
+                    data_filename = _metric + "_svd"+ generic_output_file_svd
+                else:
+                    data_filename = _metric + "_" + _choice + generic_output_file_svd
+
                 data_file_path = os.path.join(zone_path, data_filename)
 
                 # getting number of line and read randomly lines
@@ -179,48 +191,64 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
 
                 num_lines = len(lines)
 
-                lines_indexes = np.arange(num_lines)
-                random.shuffle(lines_indexes)
+                # randomly shuffle image
+                if _random:
+                    random.shuffle(lines)
 
                 path_seuil = os.path.join(zone_path, seuil_expe_filename)
 
                 counter = 0
                 # check if user select current scene and zone to be part of training data set
-                for index in lines_indexes:
-                    line = construct_new_line(path_seuil, _interval, lines[index], _norm, _sep, _index)
+                for data in lines:
 
                     percent = counter / num_lines
+                    image_index = int(data.split(';')[0])
+
+                    if image_index % _step == 0:
+                        line = construct_new_line(path_seuil, _interval, data, _choice, _each, _custom)
 
-                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
-                        train_file.write(line)
-                    else:
-                        test_file.write(line)
+                        if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                            train_file_data.append(line)
+                        else:
+                            test_file_data.append(line)
 
                     counter += 1
 
                 f.close()
 
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    for line in train_file_data:
+        train_file.write(line)
+
+    for line in test_file_data:
+        test_file.write(line)
+
     train_file.close()
     test_file.close()
 
 
 def main():
 
-    p_custom = False
+    p_custom    = False
+    p_step      = 1
+    p_renderer  = 'all'
+    p_each      = 1
 
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 renderer all  --custom min_max_filename')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "sep=", "rowindex=", "custom="])
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:r:p:s:e:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "random=", "percent=", "step=", "each=", "renderer=", "custom="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 --renderer all --custom min_max_filename')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 --renderer all --custom min_max_filename')
             sys.exit()
         elif o in ("-o", "--output"):
             p_filename = a
@@ -228,36 +256,50 @@ def main():
             p_interval = list(map(int, a.split(',')))
         elif o in ("-k", "--kind"):
             p_kind = a
+
+            if p_kind not in normalization_choices:
+                assert False, "Invalid normalization choice, %s" % normalization_choices
+
         elif o in ("-m", "--metric"):
             p_metric = a
         elif o in ("-s", "--scenes"):
             p_scenes = a.split(',')
         elif o in ("-n", "--nb_zones"):
             p_nb_zones = int(a)
+        elif o in ("-r", "--random"):
+            p_random = int(a)
         elif o in ("-p", "--percent"):
             p_percent = float(a)
         elif o in ("-s", "--sep"):
             p_sep = a
-        elif o in ("-r", "--rowindex"):
-            if int(a) == 1:
-                p_rowindex = True
-            else:
-                p_rowindex = False
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+        elif o in ("-e", "--each"):
+            p_each = int(a)
+        elif o in ("-r", "--renderer"):
+            p_renderer = a
+
+            if p_renderer not in cfg.renderer_choices:
+                assert False, "Unknown renderer choice, %s" % cfg.renderer_choices
         elif o in ("-c", "--custom"):
             p_custom = a
         else:
             assert False, "unhandled option"
 
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
     # getting scenes from indexes user selection
     scenes_selected = []
 
     for scene_id in p_scenes:
-        index = scenes_indexes.index(scene_id.strip())
+        index = scenes_indices.index(scene_id.strip())
         scenes_selected.append(scenes_list[index])
 
     # find min max value if necessary to renormalize data
     if p_custom:
-        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
+        get_min_max_value_interval(scenes_list, p_interval, p_metric)
 
         # write new file to save
         if not os.path.exists(custom_min_max_folder):
@@ -271,7 +313,7 @@ def main():
             f.write(str(max_value_interval) + '\n')
 
     # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_custom, p_sep, p_rowindex)
+    generate_data_model(scenes_list, p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_random, p_step, p_each, p_custom)
 
 if __name__== "__main__":
     main()

+ 0 - 277
generate_data_model_random_maxwell.py

@@ -1,277 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Sep 14 21:02:42 2018
-
-@author: jbuisine
-"""
-
-from __future__ import print_function
-import sys, os, getopt
-import numpy as np
-import random
-import time
-import json
-
-from PIL import Image
-from ipfml import processing, metrics
-
-from modules.utils import config as cfg
-
-# getting configuration information
-config_filename         = cfg.config_filename
-zone_folder             = cfg.zone_folder
-min_max_filename        = cfg.min_max_filename_extension
-
-# define all scenes values
-scenes_list             = cfg.maxwell_scenes_names
-scenes_indexes          = cfg.maxwell_scenes_indices
-choices                 = cfg.normalization_choices
-path                    = cfg.dataset_path
-zones                   = cfg.zones_indices
-seuil_expe_filename     = cfg.seuil_expe_filename
-
-metric_choices          = cfg.metric_choices_labels
-output_data_folder      = cfg.output_data_folder
-custom_min_max_folder   = cfg.min_max_custom_folder
-min_max_ext             = cfg.min_max_filename_extension
-
-generic_output_file_svd = '_random.csv'
-
-min_value_interval = sys.maxsize
-max_value_interval = 0
-
-def construct_new_line(path_seuil, interval, line, norm, sep, index):
-    begin, end = interval
-
-    line_data = line.split(';')
-    seuil = line_data[0]
-    metrics = line_data[begin+1:end+1]
-
-    metrics = [float(m) for m in metrics]
-
-    # TODO : check if it's always necessary to do that (loss of information for svd)
-    if norm:
-        metrics = processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
-
-    with open(path_seuil, "r") as seuil_file:
-        seuil_learned = int(seuil_file.readline().strip())
-
-    if seuil_learned > int(seuil):
-        line = '1'
-    else:
-        line = '0'
-
-    for idx, val in enumerate(metrics):
-        if index:
-            line += " " + str(idx + 1)
-        line += sep
-        line += str(val)
-    line += '\n'
-
-    return line
-
-def get_min_max_value_interval(_filename, _interval, _choice, _metric):
-
-    global min_value_interval, max_value_interval
-
-    scenes = os.listdir(path)
-
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    for id_scene, folder_scene in enumerate(scenes):
-
-        # only take care of maxwell scenes
-        if folder_scene in scenes_list:
-
-            scene_path = os.path.join(path, folder_scene)
-
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
-
-            # shuffle list of zones (=> randomly choose zones)
-            random.shuffle(zones_folder)
-
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
-                data_file_path = os.path.join(zone_path, data_filename)
-
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
-
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for line in lines:
-
-
-                    begin, end = _interval
-
-                    line_data = line.split(';')
-                    metrics = line_data[begin+1:end+1]
-                    metrics = [float(m) for m in metrics]
-
-                    min_value = min(metrics)
-                    max_value = max(metrics)
-
-                    if min_value < min_value_interval:
-                        min_value_interval = min_value
-
-                    if max_value > max_value_interval:
-                        max_value_interval = max_value
-
-                    counter += 1
-
-
-def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _nb_zones = 4, _percent = 1, _norm = False, _sep=':', _index=True):
-
-    output_train_filename = _filename + ".train"
-    output_test_filename = _filename + ".test"
-
-    if not '/' in output_train_filename:
-        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
-
-    # create path if not exists
-    if not os.path.exists(output_data_folder):
-        os.makedirs(output_data_folder)
-
-    train_file = open(output_train_filename, 'w')
-    test_file = open(output_test_filename, 'w')
-
-    scenes = os.listdir(path)
-
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
-    for id_scene, folder_scene in enumerate(scenes):
-
-        # only take care of maxwell scenes
-        if folder_scene in scenes_list:
-
-            scene_path = os.path.join(path, folder_scene)
-
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
-
-            # shuffle list of zones (=> randomly choose zones)
-            random.shuffle(zones_folder)
-
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
-                data_file_path = os.path.join(zone_path, data_filename)
-
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
-
-                num_lines = len(lines)
-
-                lines_indexes = np.arange(num_lines)
-                random.shuffle(lines_indexes)
-
-                path_seuil = os.path.join(zone_path, seuil_expe_filename)
-
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for index in lines_indexes:
-                    line = construct_new_line(path_seuil, _interval, lines[index], _norm, _sep, _index)
-
-                    percent = counter / num_lines
-
-                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
-                        train_file.write(line)
-                    else:
-                        test_file.write(line)
-
-                    counter += 1
-
-                f.close()
-
-    train_file.close()
-    test_file.close()
-
-
-def main():
-
-    p_custom = False
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "sep=", "rowindex=", "custom="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
-            sys.exit()
-        elif o in ("-o", "--output"):
-            p_filename = a
-        elif o in ("-i", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-k", "--kind"):
-            p_kind = a
-        elif o in ("-m", "--metric"):
-            p_metric = a
-        elif o in ("-s", "--scenes"):
-            p_scenes = a.split(',')
-        elif o in ("-n", "--nb_zones"):
-            p_nb_zones = int(a)
-        elif o in ("-p", "--percent"):
-            p_percent = float(a)
-        elif o in ("-s", "--sep"):
-            p_sep = a
-        elif o in ("-r", "--rowindex"):
-            if int(a) == 1:
-                p_rowindex = True
-            else:
-                p_rowindex = False
-        elif o in ("-c", "--custom"):
-            p_custom = a
-        else:
-            assert False, "unhandled option"
-
-    # getting scenes from indexes user selection
-    scenes_selected = []
-
-    for scene_id in p_scenes:
-        index = scenes_indexes.index(scene_id.strip())
-        scenes_selected.append(scenes_list[index])
-
-    # find min max value if necessary to renormalize data
-    if p_custom:
-        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
-
-        # write new file to save
-        if not os.path.exists(custom_min_max_folder):
-            os.makedirs(custom_min_max_folder)
-
-        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
-        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
-
-        with open(min_max_filename_path, 'w') as f:
-            f.write(str(min_value_interval) + '\n')
-            f.write(str(max_value_interval) + '\n')
-
-    # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_custom, p_sep, p_rowindex)
-
-if __name__== "__main__":
-    main()

+ 9 - 1
modules/utils/config.py

@@ -16,13 +16,21 @@ config_filename                 = "config"
 models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2"]
 
 # define all scenes values
+renderer_choices                = ['all', 'maxwell', 'igloo', 'cycle']
+
 scenes_names                    = ['Appart1opt02', 'Bureau1', 'Cendrier', 'Cuisine01', 'EchecsBas', 'PNDVuePlongeante', 'SdbCentre', 'SdbDroite', 'Selles']
 scenes_indices                  = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
 
 maxwell_scenes_names            = ['Appart1opt02', 'Cuisine01', 'SdbCentre', 'SdbDroite']
 maxwell_scenes_indices          = ['A', 'D', 'G', 'H']
 
+igloo_scenes_names              = ['Bureau1', 'PNDVuePlongeante']
+igloo_scenes_indices            = ['B', 'F']
+
+cycle_scenes_names              = ['EchecBas', 'Selles']
+cycle_scenes_indices            = ['E', 'I']
+
 normalization_choices           = ['svd', 'svdn', 'svdne']
 zones_indices                   = np.arange(16)
 
-metric_choices_labels           = ['lab', 'mscn', 'mscn_revisited', 'low_bits_2', 'low_bits_3', 'low_bits_4', 'low_bits_5', 'low_bits_6','low_bits_4_shifted_2']
+metric_choices_labels           = ['lab', 'mscn_revisited', 'low_bits_2', 'low_bits_3', 'low_bits_4', 'low_bits_5', 'low_bits_6','low_bits_4_shifted_2', 'sub_blocks_stats', 'sub_blocks_area', 'sub_blocks_stats_reduced']

+ 176 - 0
modules/utils/data.py

@@ -0,0 +1,176 @@
+from ipfml import processing, metrics, utils
+from modules.utils.config import *
+
+from PIL import Image
+from skimage import color
+
+import numpy as np
+
+
+_scenes_names_prefix   = '_scenes_names'
+_scenes_indices_prefix = '_scenes_indices'
+
+# store all variables from current module context
+context_vars = vars()
+
+
+def get_svd_data(data_type, block):
+    """
+    Method which returns the data type expected
+    """
+
+    if data_type == 'lab':
+
+        block_file_path = '/tmp/lab_img.png'
+        block.save(block_file_path)
+        data = processing.get_LAB_L_SVD_s(Image.open(block_file_path))
+
+    if data_type == 'mscn_revisited':
+
+        img_mscn_revisited = processing.rgb_to_mscn(block)
+
+        # save tmp as img
+        img_output = Image.fromarray(img_mscn_revisited.astype('uint8'), 'L')
+        mscn_revisited_file_path = '/tmp/mscn_revisited_img.png'
+        img_output.save(mscn_revisited_file_path)
+        img_block = Image.open(mscn_revisited_file_path)
+
+        # extract from temp image
+        data = metrics.get_SVD_s(img_block)
+
+    if data_type == 'mscn':
+
+        img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
+        img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
+        img_mscn_norm = processing.normalize_2D_arr(img_mscn)
+
+        img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
+
+        data = metrics.get_SVD_s(img_mscn_gray)
+
+    if data_type == 'low_bits_6':
+
+        low_bits_6 = processing.rgb_to_LAB_L_low_bits(block, 6)
+        data = metrics.get_SVD_s(low_bits_6)
+
+    if data_type == 'low_bits_5':
+
+        low_bits_5 = processing.rgb_to_LAB_L_low_bits(block, 5)
+        data = metrics.get_SVD_s(low_bits_5)
+
+    if data_type == 'low_bits_4':
+
+        low_bits_4 = processing.rgb_to_LAB_L_low_bits(block, 4)
+        data = metrics.get_SVD_s(low_bits_4)
+
+    if data_type == 'low_bits_3':
+
+        low_bits_3 = processing.rgb_to_LAB_L_low_bits(block, 3)
+        data = metrics.get_SVD_s(low_bits_3)
+
+    if data_type == 'low_bits_2':
+
+        low_bits_2 = processing.rgb_to_LAB_L_low_bits(block, 2)
+        data = metrics.get_SVD_s(low_bits_2)
+
+    if data_type == 'low_bits_4_shifted_2':
+
+        data = metrics.get_SVD_s(processing.rgb_to_LAB_L_bits(block, (3, 6)))
+
+    if data_type == 'sub_blocks_stats':
+
+        block = np.asarray(block)
+        width, height, _= block.shape
+        sub_width, sub_height = int(width / 4), int(height / 4)
+
+        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
+
+        data = []
+
+        for sub_b in sub_blocks:
+
+            # by default use the whole lab L canal
+            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
+
+            # get information we want from svd
+            data.append(np.mean(l_svd_data))
+            data.append(np.median(l_svd_data))
+            data.append(np.percentile(l_svd_data, 25))
+            data.append(np.percentile(l_svd_data, 75))
+            data.append(np.var(l_svd_data))
+
+            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=100)
+            data.append(area_under_curve)
+
+        # convert into numpy array after computing all stats
+        data = np.asarray(data)
+
+    if data_type == 'sub_blocks_stats_reduced':
+
+        block = np.asarray(block)
+        width, height, _= block.shape
+        sub_width, sub_height = int(width / 4), int(height / 4)
+
+        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
+
+        data = []
+
+        for sub_b in sub_blocks:
+
+            # by default use the whole lab L canal
+            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
+
+            # get information we want from svd
+            data.append(np.mean(l_svd_data))
+            data.append(np.median(l_svd_data))
+            data.append(np.percentile(l_svd_data, 25))
+            data.append(np.percentile(l_svd_data, 75))
+            data.append(np.var(l_svd_data))
+
+        # convert into numpy array after computing all stats
+        data = np.asarray(data)
+
+    if data_type == 'sub_blocks_area':
+
+        block = np.asarray(block)
+        width, height, _= block.shape
+        sub_width, sub_height = int(width / 8), int(height / 8)
+
+        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
+
+        data = []
+
+        for sub_b in sub_blocks:
+
+            # by default use the whole lab L canal
+            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
+
+            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=50)
+            data.append(area_under_curve)
+
+        # convert into numpy array after computing all stats
+        data = np.asarray(data)
+
+
+    return data
+
+def get_renderer_scenes_indices(renderer_name):
+
+    if renderer_name not in renderer_choices:
+        raise ValueError("Unknown renderer name")
+
+    if renderer_name == 'all':
+        return scenes_indices
+    else:
+        return context_vars[renderer_name + _scenes_indices_prefix]
+
+def get_renderer_scenes_names(renderer_name):
+
+    if renderer_name not in renderer_choices:
+        raise ValueError("Unknown renderer name")
+
+    if renderer_name == 'all':
+        return scenes_names
+    else:
+        return context_vars[renderer_name + _scenes_names_prefix]
+

+ 0 - 72
modules/utils/data_type.py

@@ -1,72 +0,0 @@
-from ipfml import processing, metrics
-from PIL import Image
-from skimage import color
-
-import numpy as np
-
-def get_svd_data(data_type, block):
-    """
-    Method which returns the data type expected
-    """
-
-    if data_type == 'lab':
-
-        block_file_path = '/tmp/lab_img.png'
-        block.save(block_file_path)
-        data = processing.get_LAB_L_SVD_s(Image.open(block_file_path))
-
-    if data_type == 'mscn_revisited':
-
-        img_mscn_revisited = processing.rgb_to_mscn(block)
-
-        # save tmp as img
-        img_output = Image.fromarray(img_mscn_revisited.astype('uint8'), 'L')
-        mscn_revisited_file_path = '/tmp/mscn_revisited_img.png'
-        img_output.save(mscn_revisited_file_path)
-        img_block = Image.open(mscn_revisited_file_path)
-
-        # extract from temp image
-        data = metrics.get_SVD_s(img_block)
-
-    if data_type == 'mscn':
-
-        img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
-        img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
-        img_mscn_norm = processing.normalize_2D_arr(img_mscn)
-
-        img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
-
-        data = metrics.get_SVD_s(img_mscn_gray)
-
-    if data_type == 'low_bits_6':
-
-        low_bits_6 = processing.rgb_to_LAB_L_low_bits(block, 6)
-        data = metrics.get_SVD_s(low_bits_6)
-
-    if data_type == 'low_bits_5':
-
-        low_bits_5 = processing.rgb_to_LAB_L_low_bits(block, 5)
-        data = metrics.get_SVD_s(low_bits_5)
-
-    if data_type == 'low_bits_4':
-
-        low_bits_4 = processing.rgb_to_LAB_L_low_bits(block, 4)
-        data = metrics.get_SVD_s(low_bits_4)
-
-    if data_type == 'low_bits_3':
-
-        low_bits_3 = processing.rgb_to_LAB_L_low_bits(block, 3)
-        data = metrics.get_SVD_s(low_bits_3)
-
-    if data_type == 'low_bits_2':
-
-        low_bits_2 = processing.rgb_to_LAB_L_low_bits(block, 2)
-        data = metrics.get_SVD_s(low_bits_2)
-
-    if data_type == 'low_bits_4_shifted_2':
-
-        data = metrics.get_SVD_s(processing.rgb_to_LAB_L_bits(block, (3, 6)))
-
-    return data
-
-

+ 44 - 23
predict_noisy_image_svd.py

@@ -2,17 +2,17 @@ from sklearn.externals import joblib
 
 import numpy as np
 
-from ipfml import processing
+from ipfml import processing, utils
 from PIL import Image
 
 import sys, os, getopt
 
 from modules.utils import config as cfg
-from modules.utils import data_type as dt
+from modules.utils import data as dt
 
 path                  = cfg.dataset_path
 min_max_ext           = cfg.min_max_filename_extension
-metric_choices       = cfg.metric_choices_labels
+metric_choices        = cfg.metric_choices_labels
 normalization_choices = cfg.normalization_choices
 
 custom_min_max_folder = cfg.min_max_custom_folder
@@ -51,7 +51,7 @@ def main():
 
             if not p_mode in normalization_choices:
                 assert False, "Mode of normalization not recognized"
-        elif o in ("-m", "--custom"):
+        elif o in ("-c", "--custom"):
             p_custom = a
 
         else:
@@ -65,32 +65,53 @@ def main():
 
     data = dt.get_svd_data(p_metric, img)
 
-    # check mode to normalize data
-    if p_mode == 'svdne':
+    # get interval values
+    begin, end = p_interval
 
-        # set min_max_filename if custom use
-        if p_custom:
-            min_max_filename = custom_min_max_folder + '/' +  p_custom
-        else:
-            min_max_file_path = path + '/' + p_metric + min_max_ext
+    # check if custom min max file is used
+    if p_custom:
+
+        test_data = data[begin:end]
 
-        # need to read min_max_file
-        file_path = os.path.join(os.path.join(os.path.dirname(__file__),'../'), min_max_file_path)
-        with open(file_path, 'r') as f:
-            min = float(f.readline().replace('\n', ''))
-            max = float(f.readline().replace('\n', ''))
+        if p_mode == 'svdne':
 
-        l_values = processing.normalize_arr_with_range(data, min, max)
+            # set min_max_filename if custom use
+            min_max_file_path = custom_min_max_folder + '/' +  p_custom
+
+            # need to read min_max_file
+            file_path = os.path.join(os.path.dirname(__file__), min_max_file_path)
+            with open(file_path, 'r') as f:
+                min_val = float(f.readline().replace('\n', ''))
+                max_val = float(f.readline().replace('\n', ''))
+
+            test_data = utils.normalize_arr_with_range(test_data, min_val, max_val)
+
+        if p_mode == 'svdn':
+            test_data = utils.normalize_arr(test_data)
 
-    elif p_mode == 'svdn':
-        l_values = processing.normalize_arr(data)
     else:
-        l_values = data
 
+        # check mode to normalize data
+        if p_mode == 'svdne':
+
+            # set min_max_filename if custom use
+            min_max_file_path = path + '/' + p_metric + min_max_ext
+
+            # need to read min_max_file
+            file_path = os.path.join(os.path.dirname(__file__), min_max_file_path)
+            with open(file_path, 'r') as f:
+                min_val = float(f.readline().replace('\n', ''))
+                max_val = float(f.readline().replace('\n', ''))
+
+            l_values = utils.normalize_arr_with_range(data, min_val, max_val)
+
+        elif p_mode == 'svdn':
+            l_values = utils.normalize_arr(data)
+        else:
+            l_values = data
+
+        test_data = l_values[begin:end]
 
-    # get interval values
-    begin, end = p_interval
-    test_data = l_values[begin:end]
 
     # get prediction of model
     prediction = model.predict([test_data])[0]

+ 1 - 1
predict_seuil_expe.py

@@ -2,7 +2,7 @@ from sklearn.externals import joblib
 
 import numpy as np
 
-from ipfml import processing
+from ipfml import processing, utils
 from PIL import Image
 
 import sys, os, getopt

+ 1 - 1
runAll_maxwell.sh

@@ -12,7 +12,7 @@ if [ "${erased}" == "Y" ]; then
     touch ${file_path}
 
     # add of header
-    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; F1_val; F1_test; F1_all' >> ${file_path}
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
 
 fi
 

+ 52 - 0
runAll_maxwell_area.sh

@@ -0,0 +1,52 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start_index; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+metric="sub_blocks_area"
+start_index=0
+end_index=16
+number=16
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+
+for nb_zones in {4,6,8,10,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+            echo $FILENAME
+
+            # only compute if necessary (perhaps server will fall.. Just in case)
+            if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                echo "${MODEL_NAME} results already generated..."
+            else
+                python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+            fi
+        done
+    done
+done
+
+

+ 24 - 0
runAll_maxwell_custom.sh

@@ -0,0 +1,24 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+for size in {"4","8","16","26","32","40"}; do
+
+    for metric in {"lab","mscn","mscn_revisited","low_bits_2","low_bits_3","low_bits_4","low_bits_5","low_bits_6","low_bits_4_shifted_2"}; do
+        bash generateAndTrain_maxwell_custom.sh ${size} ${metric}
+    done
+done

+ 52 - 0
runAll_maxwell_sub_blocks_stats.sh

@@ -0,0 +1,52 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start_index; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+metric="sub_blocks_stats"
+start_index=0
+end_index=24
+number=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+
+for nb_zones in {4,6,8,10,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+            echo $FILENAME
+
+            # only compute if necessary (perhaps server will fall.. Just in case)
+            if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                echo "${MODEL_NAME} results already generated..."
+            else
+                python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+            fi
+        done
+    done
+done
+
+

+ 52 - 0
runAll_maxwell_sub_blocks_stats_reduced.sh

@@ -0,0 +1,52 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start_index; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+fi
+
+metric="sub_blocks_stats_reduced"
+start_index=0
+end_index=24
+number=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+
+for nb_zones in {4,6,8,10,12}; do
+
+    for mode in {"svd","svdn","svdne"}; do
+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+
+            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+            echo $FILENAME
+
+            # only compute if necessary (perhaps server will fall.. Just in case)
+            if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                echo "${MODEL_NAME} results already generated..."
+            else
+                python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --metric ${metric}
+            fi
+        done
+    done
+done
+
+

+ 2 - 2
run_maxwell_simulation.sh

@@ -34,11 +34,11 @@ for size in {"4","8","16","26","32","40"}; do
                         FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
 
-                        if grep -q "${MODEL_NAME}" "${simulate_models}"; then
+                        if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
                             echo "Run simulation for model ${MODEL_NAME}"
 
                             # by default regenerate model
-                            python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --sep ';' --rowindex '0'
+                            python generate_data_model_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1
 
                             python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 

+ 2 - 2
run_maxwell_simulation_custom.sh

@@ -35,11 +35,11 @@ for size in {"4","8","16","26","32","40"}; do
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
 
-                        if grep -q "${MODEL_NAME}" "${simulate_models}"; then
+                        if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
                             echo "Run simulation for model ${MODEL_NAME}"
 
                             # by default regenerate model
-                            python generate_data_model_random_maxwell.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --norm 0 --sep ';' --rowindex '0' --custom ${CUSTOM_MIN_MAX_FILENAME}
+                            python generate_data_model_random.py --output ${FILENAME} --interval "${start},${end}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
 
                             python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
 

+ 23 - 1
save_model_result_in_md_maxwell.py

@@ -1,6 +1,6 @@
 from sklearn.utils import shuffle
 from sklearn.externals import joblib
-from sklearn.metrics import accuracy_score, f1_score
+from sklearn.metrics import accuracy_score, f1_score, recall_score, roc_auc_score
 from sklearn.model_selection import cross_val_score
 from sklearn.model_selection import train_test_split
 
@@ -194,9 +194,18 @@ def main():
     test_accuracy = accuracy_score(y_test, y_test_model)
 
     y_train_model = model.predict(x_dataset_train)
+
     train_f1 = f1_score(y_dataset_train, y_train_model)
+    train_recall = recall_score(y_dataset_train, y_train_model)
+    train_roc_auc = roc_auc_score(y_dataset_train, y_train_model)
+
     val_f1 = f1_score(y_val, y_val_model)
+    val_recall = recall_score(y_val, y_val_model)
+    val_roc_auc = roc_auc_score(y_val, y_val_model)
+
     test_f1 = f1_score(y_test, y_test_model)
+    test_recall = recall_score(y_test, y_test_model)
+    test_roc_auc = roc_auc_score(y_test, y_test_model)
 
     # stats of all dataset
     all_x_data = pd.concat([x_dataset_train, X_test, X_val])
@@ -205,6 +214,8 @@ def main():
     all_y_model = model.predict(all_x_data)
     all_accuracy = accuracy_score(all_y_data, all_y_model)
     all_f1_score = f1_score(all_y_data, all_y_model)
+    all_recall_score = recall_score(all_y_data, all_y_model)
+    all_roc_auc_score = roc_auc_score(all_y_data, all_y_model)
 
     # stats of dataset sizes
     total_samples = final_df_train_size + val_set_size + test_set_size
@@ -224,9 +235,20 @@ def main():
     model_scores.append(all_accuracy)
 
     model_scores.append(train_f1)
+    model_scores.append(train_recall)
+    model_scores.append(train_roc_auc)
+
     model_scores.append(val_f1)
+    model_scores.append(val_recall)
+    model_scores.append(val_roc_auc)
+
     model_scores.append(test_f1)
+    model_scores.append(test_recall)
+    model_scores.append(test_roc_auc)
+
     model_scores.append(all_f1_score)
+    model_scores.append(all_recall_score)
+    model_scores.append(all_roc_auc_score)
 
     # TODO : improve...
     # check if it's always the case...

+ 1 - 1
testModelByScene_maxwell.sh

@@ -63,7 +63,7 @@ for scene in {"A","D","G","H"}; do
 
   FILENAME="data/data_${INPUT_MODE}_${INPUT_METRIC}_B${INPUT_BEGIN}_E${INPUT_END}_scene${scene}"
 
-  python generate_data_model.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --metric ${INPUT_METRIC} --scenes "${scene}" --zones "${zones}" --percent 1 --sep ";" --rowindex "0"
+  python generate_data_model.py --output ${FILENAME} --interval "${INPUT_BEGIN},${INPUT_END}" --kind ${INPUT_MODE} --metric ${INPUT_METRIC} --scenes "${scene}" --zones "${zones}" --percent 1
 
   python prediction_scene.py --data "$FILENAME.train" --model ${INPUT_MODEL} --output "${INPUT_MODEL}_Scene${scene}_mode_${INPUT_MODE}_metric_${INPUT_METRIC}.prediction" --scene ${scene}