Parcourir la source

Update of simulations scripts; New data generation file;

Jérôme BUISINE il y a 5 ans
Parent
commit
dea1dbfb30

+ 3 - 0
.gitignore

@@ -4,6 +4,9 @@ saved_models/*
 threshold_map/*
 models_info/*
 custom_norm/*
+learned_zones/*
+corr_indices/*
+.ipynb_checkpoints
 
 # simulate_models.csv
 

Fichier diff supprimé car celui-ci est trop grand
+ 721 - 0
corr_analysys.ipynb


+ 39 - 26
display_simulation_curves.py

@@ -2,28 +2,48 @@ import numpy as np
 import pandas as pd
 
 import matplotlib.pyplot as plt
-import os, sys, getopt
+import os, sys, argparse
 
 from modules.utils.data import get_svd_data
 
-label_freq = 6
+from modules.utils import config as cfg
 
-def display_curves(folder_path):
+learned_zones_folder = cfg.learned_zones_folder
+models_name          = cfg.models_names_list
+label_freq           = 6
+
+def display_curves(folder_path, model_name):
     """
     @brief Method used to display simulation given .csv files
     @param folder_path, folder which contains all .csv files obtained during simulation
+    @param model_name, current name of model
     @return nothing
     """
 
-    data_files = os.listdir(folder_path)
+    for name in models_name:
+        if name in model_name:
+            data_filename = model_name
+            learned_zones_folder_path = os.path.join(learned_zones_folder, data_filename)
+
+    data_files = [x for x in os.listdir(folder_path) if '.png' not in x]
 
     scene_names = [f.split('_')[3] for f in data_files]
 
+    print(scene_names)
     for id, f in enumerate(data_files):
 
         print(scene_names[id])
         path_file = os.path.join(folder_path, f)
 
+        scenes_zones_used_file_path = os.path.join(learned_zones_folder_path, scene_names[id] + '.csv')
+
+        zones_used = []
+
+        with open(scenes_zones_used_file_path, 'r') as f:
+            zones_used = [int(x) for x in f.readline().split(';') if x != '']
+
+        print(zones_used)
+
         df = pd.read_csv(path_file, header=None, sep=";")
 
         fig=plt.figure(figsize=(35, 22))
@@ -48,6 +68,10 @@ def display_curves(folder_path):
             fig.add_subplot(4, 4, (index + 1))
             plt.plot(row[5:])
 
+            if index in zones_used:
+                ax = plt.gca()
+                ax.set_facecolor((0.9, 0.95, 0.95))
+
             # draw vertical line from (70,100) to (70, 250)
             plt.plot([counter_index, counter_index], [-2, 2], 'k-', lw=2, color='red')
             plt.ylabel('Not noisy / Noisy', fontsize=18)
@@ -65,28 +89,17 @@ def display_curves(folder_path):
 
 def main():
 
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python display_simulation_curves.py --folder "path"')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hm:s:k", ["help=", "folder="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python display_simulation_curves.py --folder "path"')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python display_simulation_curves.py --folder "path"')
-            sys.exit()
-        elif o in ("-f", "--folder"):
-            p_folder = a
-
-        else:
-            assert False, "unhandled option"
-
-
-    display_curves(p_folder)
+    parser = argparse.ArgumentParser(description="Display simulations curves from simulation data")
+
+    parser.add_argument('--folder', type=str, help='Folder which contains simulations data for scenes')
+    parser.add_argument('--model', type=str, help='Name of the model used for simulations')
+
+    args = parser.parse_args()
+
+    p_folder = args.folder
+    p_model = args.model
+
+    display_curves(p_folder, p_model)
 
 if __name__== "__main__":
     main()

+ 1 - 1
generateAndTrain_maxwell.sh

@@ -44,7 +44,7 @@ for counter in {0..4}; do
         for mode in {"svd","svdn","svdne"}; do
             for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
-                FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                 MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
 
                 echo $FILENAME

+ 1 - 1
generateAndTrain_maxwell_custom.sh

@@ -44,7 +44,7 @@ for counter in {0..4}; do
         for mode in {"svd","svdn","svdne"}; do
             for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
-                FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                 MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                 CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
 

+ 79 - 93
generate_data_model.py

@@ -7,7 +7,7 @@ Created on Fri Sep 14 21:02:42 2018
 """
 
 from __future__ import print_function
-import sys, os, getopt
+import sys, os, argparse
 import numpy as np
 import random
 import time
@@ -21,7 +21,7 @@ from modules.utils import data as dt
 
 # getting configuration information
 config_filename         = cfg.config_filename
-zone_folder             = cfg.zone_folder
+learned_folder          = cfg.learned_zones_folder
 min_max_filename        = cfg.min_max_filename_extension
 
 # define all scenes values
@@ -32,6 +32,8 @@ path                    = cfg.dataset_path
 zones                   = cfg.zones_indices
 seuil_expe_filename     = cfg.seuil_expe_filename
 
+renderer_choices        = cfg.renderer_choices
+normalization_choices   = cfg.normalization_choices
 metric_choices          = cfg.metric_choices_labels
 output_data_folder      = cfg.output_data_folder
 custom_min_max_folder   = cfg.min_max_custom_folder
@@ -140,66 +142,71 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
     train_file = open(output_train_filename, 'w')
     test_file = open(output_test_filename, 'w')
 
-    scenes = os.listdir(path)
+    for id_scene, folder_scene in enumerate(scenes_list):
 
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
+        # only take care of maxwell scenes
+        scene_path = os.path.join(path, folder_scene)
 
-    for id_scene, folder_scene in enumerate(scenes):
+        zones_indices = zones
 
-        # only take care of maxwell scenes
-        if folder_scene in scenes_list:
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
 
-            scene_path = os.path.join(path, folder_scene)
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
 
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
 
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
+        with open(file_learned_path, 'w') as f:
+            for i in _zones:
+                f.write(str(i) + ';')
 
-                # if custom normalization choices then we use svd values not already normalized
-                if _custom:
-                    data_filename = _metric + "_svd" + generic_output_file_svd
-                else:
-                    data_filename = _metric + "_" + _choice + generic_output_file_svd
+        for id_zone, index_folder in enumerate(zones_indices):
 
-                data_file_path = os.path.join(zone_path, data_filename)
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
 
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
+            zone_path = os.path.join(scene_path, current_zone_folder)
 
-                num_lines = len(lines)
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _metric + "_svd" + generic_output_file_svd
+            else:
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
 
-                lines_indexes = np.arange(num_lines)
-                random.shuffle(lines_indexes)
+            data_file_path = os.path.join(zone_path, data_filename)
 
-                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
 
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for index in lines_indexes:
+            num_lines = len(lines)
+
+            lines_indexes = np.arange(num_lines)
+            random.shuffle(lines_indexes)
 
-                    image_index = int(lines[index].split(';')[0])
-                    percent = counter / num_lines
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
 
-                    if image_index % _step == 0:
-                        line = construct_new_line(path_seuil, _interval, lines[index], _choice, _each, _norm)
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for index in lines_indexes:
 
-                        if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
-                            train_file.write(line)
-                        else:
-                            test_file.write(line)
+                image_index = int(lines[index].split(';')[0])
+                percent = counter / num_lines
 
-                    counter += 1
+                if image_index % _step == 0:
+                    line = construct_new_line(path_seuil, _interval, lines[index], _choice, _each, _norm)
 
-                f.close()
+                    if id_zone in _zones and folder_scene in _scenes and percent <= _percent:
+                        train_file.write(line)
+                    else:
+                        test_file.write(line)
+
+                counter += 1
+
+            f.close()
 
     train_file.close()
     test_file.close()
@@ -207,55 +214,34 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
 
 def main():
 
-    p_custom = False
-    p_step      = 1
-    p_renderer  = 'all'
-    p_each      = 1
-
-    if len(sys.argv) <= 1:
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:z:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "zones=", "percent=", "renderer=", "step=", "each=", "custom="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python generate_data_model.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --zones "1, 2, 3, 4" --percent 0.7 --renderer all --step 10 --each 1 --custom min_max_filename')
-
-            sys.exit()
-        elif o in ("-o", "--output"):
-            p_filename = a
-        elif o in ("-i", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-k", "--kind"):
-            p_kind = a
-        elif o in ("-m", "--metric"):
-            p_metric = a
-        elif o in ("-s", "--scenes"):
-            p_scenes = a.split(',')
-        elif o in ("-z", "--zones"):
-            if ',' in a:
-                p_zones = list(map(int, a.split(',')))
-            else:
-                p_zones = [a.strip()]
-        elif o in ("-p", "--percent"):
-            p_percent = float(a)
-        elif o in ("-s", "--step"):
-            p_step = int(a)
-        elif o in ("-e", "--each"):
-            p_each = int(a)
-        elif o in ("-r", "--renderer"):
-            p_renderer = a
-
-            if p_renderer not in cfg.renderer_choices:
-                assert False, "Unknown renderer choice, %s" % cfg.renderer_choices
-        elif o in ("-c", "--custom"):
-            p_custom = a
-        else:
-            assert False, "unhandled option"
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=metric_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--zones', type=str, help='Zones indices to use for training data set')
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)', default=1.0)
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_interval = list(map(int, args.interval.split(',')))
+    p_kind     = args.kind
+    p_metric   = args.metric
+    p_scenes   = args.scenes.split(',')
+    p_zones    = list(map(int, args.zones.split(',')))
+    p_percent  = args.percent
+    p_step     = args.step
+    p_each     = args.each
+    p_renderer = args.renderer
+    p_custom   = args.custom
 
     # list all possibles choices of renderer
     scenes_list = dt.get_renderer_scenes_names(p_renderer)

+ 381 - 0
generate_data_model_corr_random.py

@@ -0,0 +1,381 @@
+    #!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, argparse
+import numpy as np
+import pandas as pd
+import random
+import time
+import json
+import subprocess
+
+from PIL import Image
+from ipfml import processing, metrics, utils
+
+from modules.utils import config as cfg
+from modules.utils import data as dt
+
+# getting configuration information
+config_filename         = cfg.config_filename
+learned_folder          = cfg.learned_zones_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes values
+all_scenes_list         = cfg.scenes_names
+all_scenes_indices      = cfg.scenes_indices
+
+renderer_choices        = cfg.renderer_choices
+normalization_choices   = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+metric_choices          = cfg.metric_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+
+generic_output_file_svd = '_random.csv'
+
+min_value_interval      = sys.maxsize
+max_value_interval      = 0
+
+
+def construct_new_line(path_seuil, indices, line, choice, norm):
+
+    # increase indices values by one to avoid label
+    f = lambda x : x + 1
+    indices = f(indices)
+
+    line_data = np.array(line.split(';'))
+    seuil = line_data[0]
+    metrics = line_data[indices]
+
+    # TODO : check if it's always necessary to do that (loss of information for svd)
+    if norm:
+        if choice == 'svdne':
+            metrics = utils.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+        if choice == 'svdn':
+            metrics = utils.normalize_arr(metrics)
+
+    with open(path_seuil, "r") as seuil_file:
+        seuil_learned = int(seuil_file.readline().strip())
+
+    if seuil_learned > int(seuil):
+        line = '1'
+    else:
+        line = '0'
+
+    for idx, val in enumerate(metrics):
+        line += ';'
+        line += str(val)
+    line += '\n'
+
+    return line
+
+def get_min_max_value_interval(_scenes_list, _indices, _metric):
+
+    global min_value_interval, max_value_interval
+
+    # increase indices values by one to avoid label
+    f = lambda x : x + 1
+    indices = f(_indices)
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of maxwell scenes
+        if folder_scene in _scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+
+                zone_path = os.path.join(scene_path, zone_folder)
+
+                # if custom normalization choices then we use svd values not already normalized
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    line_data = np.array(line.split(';'))
+
+                    metrics = line_data[[_indices]]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+
+def generate_data_model(_scenes_list, _filename, _interval, _choice, _metric, _scenes, _nb_zones = 4, _percent = 1, _random=0, _step=1, _custom = False):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    if not os.path.exists(output_data_folder):
+        os.makedirs(output_data_folder)
+
+    train_file_data = []
+    test_file_data  = []
+
+    for id_scene, folder_scene in enumerate(_scenes_list):
+
+        scene_path = os.path.join(path, folder_scene)
+
+        zones_indices = zones
+
+        # shuffle list of zones (=> randomly choose zones)
+        # only in random mode
+        if _random:
+            random.shuffle(zones_indices)
+
+        # store zones learned
+        learned_zones_indices = zones_indices[:_nb_zones]
+
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
+
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
+
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
+
+        with open(file_learned_path, 'w') as f:
+            for i in learned_zones_indices:
+                f.write(str(i) + ';')
+
+        for id_zone, index_folder in enumerate(zones_indices):
+
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
+
+            zone_path = os.path.join(scene_path, current_zone_folder)
+
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+            else:
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
+
+            data_file_path = os.path.join(zone_path, data_filename)
+
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
+
+            num_lines = len(lines)
+
+            # randomly shuffle image
+            if _random:
+                random.shuffle(lines)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for data in lines:
+
+                percent = counter / num_lines
+                image_index = int(data.split(';')[0])
+
+                if image_index % _step == 0:
+                    line = construct_new_line(path_seuil, _interval, data, _choice, _custom)
+
+                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                        train_file_data.append(line)
+                    else:
+                        test_file_data.append(line)
+
+                counter += 1
+
+            f.close()
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    for line in train_file_data:
+        train_file.write(line)
+
+    for line in test_file_data:
+        test_file.write(line)
+
+    train_file.close()
+    test_file.close()
+
+
+def main():
+
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--n', type=int, help='Number of features wanted')
+    parser.add_argument('--highest', type=int, help='Specify if highest or lowest values are wishes', choices=[0, 1])
+    parser.add_argument('--label', type=int, help='Specify if label correlation is used or not', choices=[0, 1])
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=metric_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set')
+    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)')
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_n        = args.n
+    p_highest  = args.highest
+    p_label    = args.label
+    p_kind     = args.kind
+    p_metric   = args.metric
+    p_scenes   = args.scenes.split(',')
+    p_nb_zones = args.nb_zones
+    p_random   = args.random
+    p_percent  = args.percent
+    p_step     = args.step
+    p_renderer = args.renderer
+    p_custom   = args.custom
+
+    # list all possibles choices of renderer
+    scenes_list = dt.get_renderer_scenes_names(p_renderer)
+    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indices.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # Get indices to keep from correlation information
+    # compute temp data file to get correlation information
+    temp_filename = 'temp'
+    temp_filename_path = os.path.join(cfg.output_data_folder, temp_filename)
+
+    cmd = ['python', 'generate_data_model_random.py',
+            '--output', temp_filename_path,
+            '--interval', '0, 200',
+            '--kind', p_kind,
+            '--metric', p_metric,
+            '--scenes', args.scenes,
+            '--nb_zones', str(16),
+            '--random', str(int(p_random)),
+            '--percent', str(p_percent),
+            '--step', str(p_step),
+            '--each', str(1),
+            '--renderer', p_renderer,
+            '--custom', temp_filename + min_max_ext]
+
+    subprocess.Popen(cmd).wait()
+
+    temp_data_file_path = temp_filename_path + '.train'
+    df = pd.read_csv(temp_data_file_path, sep=';', header=None)
+
+    indices = []
+
+    # compute correlation matrix from whole data scenes of renderer (using or not label column)
+    if p_label:
+
+        # compute pearson correlation between features and label
+        corr = df.corr()
+
+        features_corr = []
+
+        for id_row, row in enumerate(corr):
+            for id_col, val in enumerate(corr[row]):
+                if id_col == 0 and id_row != 0:
+                    features_corr.append(abs(val))
+
+    else:
+        df = df.drop(df.columns[[0]], axis=1)
+
+        # compute pearson correlation between features using only features
+        corr = df[1:200].corr()
+
+        features_corr = []
+
+        for id_row, row in enumerate(corr):
+            correlation_score = 0
+            for id_col, val in enumerate(corr[row]):
+                if id_col != id_row:
+                    correlation_score += abs(val)
+
+            features_corr.append(correlation_score)
+
+    # find `n` min or max indices to keep
+    if p_highest:
+        indices = utils.get_indices_of_highest_values(features_corr, p_n)
+    else:
+        indices = utils.get_indices_of_lowest_values(features_corr, p_n)
+
+    indices = np.sort(indices)
+
+    # save indices found
+    if not os.path.exists(cfg.correlation_indices_folder):
+        os.makedirs(cfg.correlation_indices_folder)
+
+    indices_file_path = os.path.join(cfg.correlation_indices_folder, p_filename.replace(cfg.output_data_folder + '/', '') + '.csv')
+
+    with open(indices_file_path, 'w') as f:
+        for i in indices:
+            f.write(str(i) + ';')
+
+    # find min max value if necessary to renormalize data from `n` indices found
+    if p_custom:
+        get_min_max_value_interval(scenes_list, indices, p_metric)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
+
+    # create database using img folder (generate first time only)
+    generate_data_model(scenes_list, p_filename, indices, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_random, p_step, p_custom)
+
+if __name__== "__main__":
+    main()

+ 90 - 105
generate_data_model_random.py

@@ -7,7 +7,7 @@ Created on Fri Sep 14 21:02:42 2018
 """
 
 from __future__ import print_function
-import sys, os, getopt
+import sys, os, argparse
 import numpy as np
 import random
 import time
@@ -21,7 +21,7 @@ from modules.utils import data as dt
 
 # getting configuration information
 config_filename         = cfg.config_filename
-zone_folder             = cfg.zone_folder
+learned_folder          = cfg.learned_zones_folder
 min_max_filename        = cfg.min_max_filename_extension
 
 # define all scenes values
@@ -33,6 +33,7 @@ path                    = cfg.dataset_path
 zones                   = cfg.zones_indices
 seuil_expe_filename     = cfg.seuil_expe_filename
 
+renderer_choices        = cfg.renderer_choices
 metric_choices          = cfg.metric_choices_labels
 output_data_folder      = cfg.output_data_folder
 custom_min_max_folder   = cfg.min_max_custom_folder
@@ -146,75 +147,82 @@ def generate_data_model(_scenes_list, _filename, _interval, _choice, _metric, _s
     if not os.path.exists(output_data_folder):
         os.makedirs(output_data_folder)
 
-    scenes = os.listdir(path)
-
-    # remove min max file from scenes folder
-    scenes = [s for s in scenes if min_max_filename not in s]
-
     train_file_data = []
     test_file_data  = []
 
-    for id_scene, folder_scene in enumerate(scenes):
+    for id_scene, folder_scene in enumerate(_scenes_list):
 
-        # only take care of maxwell scenes
-        if folder_scene in _scenes_list:
+        scene_path = os.path.join(path, folder_scene)
 
-            scene_path = os.path.join(path, folder_scene)
+        zones_indices = zones
 
-            zones_folder = []
-            # create zones list
-            for index in zones:
-                index_str = str(index)
-                if len(index_str) < 2:
-                    index_str = "0" + index_str
-                zones_folder.append("zone"+index_str)
+        # shuffle list of zones (=> randomly choose zones)
+        # only in random mode
+        if _random:
+            random.shuffle(zones_indices)
 
-            # shuffle list of zones (=> randomly choose zones)
-            # only in random mode
-            if _random:
-                random.shuffle(zones_folder)
+        # store zones learned
+        learned_zones_indices = zones_indices[:_nb_zones]
 
-            for id_zone, zone_folder in enumerate(zones_folder):
-                zone_path = os.path.join(scene_path, zone_folder)
+        # write into file
+        folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
 
-                # if custom normalization choices then we use svd values not already normalized
-                if _custom:
-                    data_filename = _metric + "_svd"+ generic_output_file_svd
-                else:
-                    data_filename = _metric + "_" + _choice + generic_output_file_svd
+        if not os.path.exists(folder_learned_path):
+            os.makedirs(folder_learned_path)
 
-                data_file_path = os.path.join(zone_path, data_filename)
+        file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
 
-                # getting number of line and read randomly lines
-                f = open(data_file_path)
-                lines = f.readlines()
+        with open(file_learned_path, 'w') as f:
+            for i in learned_zones_indices:
+                f.write(str(i) + ';')
 
-                num_lines = len(lines)
+        for id_zone, index_folder in enumerate(zones_indices):
 
-                # randomly shuffle image
-                if _random:
-                    random.shuffle(lines)
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            current_zone_folder = "zone" + index_str
 
-                path_seuil = os.path.join(zone_path, seuil_expe_filename)
+            zone_path = os.path.join(scene_path, current_zone_folder)
 
-                counter = 0
-                # check if user select current scene and zone to be part of training data set
-                for data in lines:
+            # if custom normalization choices then we use svd values not already normalized
+            if _custom:
+                data_filename = _metric + "_svd"+ generic_output_file_svd
+            else:
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
 
-                    percent = counter / num_lines
-                    image_index = int(data.split(';')[0])
+            data_file_path = os.path.join(zone_path, data_filename)
 
-                    if image_index % _step == 0:
-                        line = construct_new_line(path_seuil, _interval, data, _choice, _each, _custom)
+            # getting number of line and read randomly lines
+            f = open(data_file_path)
+            lines = f.readlines()
 
-                        if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
-                            train_file_data.append(line)
-                        else:
-                            test_file_data.append(line)
+            num_lines = len(lines)
 
-                    counter += 1
+            # randomly shuffle image
+            if _random:
+                random.shuffle(lines)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            counter = 0
+            # check if user select current scene and zone to be part of training data set
+            for data in lines:
+
+                percent = counter / num_lines
+                image_index = int(data.split(';')[0])
 
-                f.close()
+                if image_index % _step == 0:
+                    line = construct_new_line(path_seuil, _interval, data, _choice, _each, _custom)
+
+                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                        train_file_data.append(line)
+                    else:
+                        test_file_data.append(line)
+
+                counter += 1
+
+            f.close()
 
     train_file = open(output_train_filename, 'w')
     test_file = open(output_test_filename, 'w')
@@ -231,60 +239,37 @@ def generate_data_model(_scenes_list, _filename, _interval, _choice, _metric, _s
 
 def main():
 
-    p_custom    = False
-    p_step      = 1
-    p_renderer  = 'all'
-    p_each      = 1
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 renderer all  --custom min_max_filename')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:r:p:s:e:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "random=", "percent=", "step=", "each=", "renderer=", "custom="])
-    except getopt.GetoptError:
-        # print help information and exit:
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 --renderer all --custom min_max_filename')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --random 1 --percent 0.7 --step 10 --each 1 --renderer all --custom min_max_filename')
-            sys.exit()
-        elif o in ("-o", "--output"):
-            p_filename = a
-        elif o in ("-i", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-k", "--kind"):
-            p_kind = a
-
-            if p_kind not in normalization_choices:
-                assert False, "Invalid normalization choice, %s" % normalization_choices
-
-        elif o in ("-m", "--metric"):
-            p_metric = a
-        elif o in ("-s", "--scenes"):
-            p_scenes = a.split(',')
-        elif o in ("-n", "--nb_zones"):
-            p_nb_zones = int(a)
-        elif o in ("-r", "--random"):
-            p_random = int(a)
-        elif o in ("-p", "--percent"):
-            p_percent = float(a)
-        elif o in ("-s", "--sep"):
-            p_sep = a
-        elif o in ("-s", "--step"):
-            p_step = int(a)
-        elif o in ("-e", "--each"):
-            p_each = int(a)
-        elif o in ("-r", "--renderer"):
-            p_renderer = a
-
-            if p_renderer not in cfg.renderer_choices:
-                assert False, "Unknown renderer choice, %s" % cfg.renderer_choices
-        elif o in ("-c", "--custom"):
-            p_custom = a
-        else:
-            assert False, "unhandled option"
+    # getting all params
+    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=metric_choices)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
+    parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set')
+    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
+    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)')
+    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
+    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
+    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_filename = args.output
+    p_interval = list(map(int, args.interval.split(',')))
+    p_kind     = args.kind
+    p_metric   = args.metric
+    p_scenes   = args.scenes.split(',')
+    p_nb_zones = args.nb_zones
+    p_random   = args.random
+    p_percent  = args.percent
+    p_step     = args.step
+    p_each     = args.each
+    p_renderer = args.renderer
+    p_custom   = args.custom
+
 
     # list all possibles choices of renderer
     scenes_list = dt.get_renderer_scenes_names(p_renderer)

+ 2 - 0
modules/utils/config.py

@@ -7,6 +7,8 @@ threshold_map_folder            = 'threshold_map'
 models_information_folder       = 'models_info'
 saved_models_folder             = 'saved_models'
 min_max_custom_folder           = 'custom_norm'
+learned_zones_folder            = 'learned_zones'
+correlation_indices_folder      = 'corr_indices'
 
 csv_model_comparisons_filename  = "models_comparisons.csv"
 seuil_expe_filename             = 'seuilExpe'

+ 62 - 42
predict_noisy_image_svd.py

@@ -5,7 +5,7 @@ import numpy as np
 from ipfml import processing, utils
 from PIL import Image
 
-import sys, os, getopt
+import sys, os, argparse
 
 from modules.utils import config as cfg
 from modules.utils import data as dt
@@ -19,46 +19,55 @@ custom_min_max_folder = cfg.min_max_custom_folder
 
 def main():
 
-    p_custom = False
-
-    if len(sys.argv) <= 1:
-        print('Run with default parameters...')
-        print('python predict_noisy_image_svd.py --image path/to/xxxx --interval "0,20" --model path/to/xxxx.joblib --metric lab --mode ["svdn", "svdne"] --custom min_max_file')
-        sys.exit(2)
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "hi:t:m:m:o:c", ["help=", "image=", "interval=", "model=", "metric=", "mode=", "custom="])
-    except getopt.GetoptError:
-        # print help information and exit
-        print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --metric lab --mode ["svdn", "svdne"] --custom min_max_file')
-        sys.exit(2)
-    for o, a in opts:
-        if o == "-h":
-            print('python predict_noisy_image_svd_lab.py --image path/to/xxxx --interval "xx,xx" --model path/to/xxxx.joblib --metric lab --mode ["svdn", "svdne"] --custom min_max_file')
-            sys.exit()
-        elif o in ("-i", "--image"):
-            p_img_file = os.path.join(os.path.dirname(__file__), a)
-        elif o in ("-t", "--interval"):
-            p_interval = list(map(int, a.split(',')))
-        elif o in ("-m", "--model"):
-            p_model_file = os.path.join(os.path.dirname(__file__), a)
-        elif o in ("-m", "--metric"):
-            p_metric = a
-
-            if not p_metric in metric_choices:
-                assert False, "Unknow metric choice"
-        elif o in ("-o", "--mode"):
-            p_mode = a
-
-            if not p_mode in normalization_choices:
-                assert False, "Mode of normalization not recognized"
-        elif o in ("-c", "--custom"):
-            p_custom = a
+    # getting all params
+    parser = argparse.ArgumentParser(description="Script which detects if an image is noisy or not using specific model")
 
-        else:
-            assert False, "unhandled option"
+    parser.add_argument('--image', type=str, help='Image path')
+    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
+    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--metric', type=str, help='Metric data choice', choices=metric_choices)
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_img_file   = args.image
+    p_model_file = args.model
+    p_interval   = list(map(int, args.interval.split(',')))
+    p_mode       = args.mode
+    p_metric     = args.metric
+    p_custom     = args.custom
+
+    if '.joblib' in p_model_file:
+        kind_model = 'sklearn'
 
-    # load of model file
-    model = joblib.load(p_model_file)
+    if '.json' in p_model_file:
+        kind_model = 'keras'
+
+    if 'corr' in p_model_file:
+        corr_model = True
+
+        indices_corr_path = os.path.join(cfg.correlation_indices_folder, p_model_file.split('.')[0] + '.csv')
+
+        with open(indices_corr_path, 'r') as f:
+            data_corr_indices = [int(x) for x in f.readline().split(';') if x != '']
+    else:
+        corr_model = False
+
+
+    if kind_model == 'sklearn':
+        # load of model file
+        model = joblib.load(p_model_file)
+
+    if kind_model == 'keras':
+        with open(p_model_file, 'r') as f:
+            json_model = json.load(f)
+            model = model_from_json(json_model)
+            model.load_weights(p_model_file.replace('.json', '.h5'))
+
+            model.compile(loss='binary_crossentropy',
+                        optimizer='adam',
+                        metrics=['accuracy'])
 
     # load image
     img = Image.open(p_img_file)
@@ -71,7 +80,10 @@ def main():
     # check if custom min max file is used
     if p_custom:
 
-        test_data = data[begin:end]
+        if corr_model:
+            test_data = data[data_corr_indices]
+        else:
+            test_data = data[begin:end]
 
         if p_mode == 'svdne':
 
@@ -110,11 +122,19 @@ def main():
         else:
             l_values = data
 
-        test_data = l_values[begin:end]
+        if corr_model:
+            test_data = data[data_corr_indices]
+        else:
+            test_data = data[begin:end]
 
 
     # get prediction of model
-    prediction = model.predict([test_data])[0]
+    if kind_model == 'sklearn':
+        prediction = model.predict([test_data])[0]
+
+    if kind_model == 'keras':
+        test_data = test_data.reshape(len(test_data), 1)
+        prediction = model.predict_classes([test_data])[0]
 
     # output expected from others scripts
     print(prediction)

+ 1 - 1
prediction_scene.py

@@ -96,7 +96,7 @@ def main():
         y_noisy_pred = model.predict_classes(x_noisy_dataset)
         y_not_noisy_pred = model.predict_classes(x_not_noisy_dataset)
 
-    if kind_model == 'sklean':
+    if kind_model == 'sklearn':
         y_pred = model.predict(x_dataset)
         y_noisy_pred = model.predict(x_noisy_dataset)
         y_not_noisy_pred = model.predict(x_not_noisy_dataset)

+ 1 - 1
runAll_maxwell_area.sh

@@ -30,7 +30,7 @@ for nb_zones in {4,6,8,10,12}; do
     for mode in {"svd","svdn","svdne"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
-            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            FILENAME="data/${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
 
             echo $FILENAME

+ 1 - 1
runAll_maxwell_area_normed.sh

@@ -30,7 +30,7 @@ for nb_zones in {4,6,8,10,12}; do
     for mode in {"svd","svdn","svdne"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
-            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            FILENAME="data/${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
 
             echo $FILENAME

+ 1 - 1
runAll_maxwell_keras.sh

@@ -31,7 +31,7 @@ for metric in {"sub_blocks_stats","sub_blocks_stats_reduced","sub_blocks_area","
         for mode in {"svd","svdn","svdne"}; do
 
             end_index=${metrics_size[${metric}]}
-            FILENAME="data/data_maxwell_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            FILENAME="data/deep_keras_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="deep_keras_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
 
             echo $FILENAME

+ 54 - 0
runAll_maxwell_keras_corr.sh

@@ -0,0 +1,54 @@
+#! bin/bash
+
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+
+erased=$1
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p models_info
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; vector_size; start; end; nb_zones; metric; mode; tran_size; val_size; test_size; train_pct_size; val_pct_size; test_pct_size; train_acc; val_acc; test_acc; all_acc; F1_train; recall_train; roc_auc_train; F1_val; recall_val; roc_auc_val; F1_test; recall_test; roc_auc_test; F1_all; recall_all; roc_auc_all;' >> ${file_path}
+
+
+fi
+
+start_index=0
+end_index=24
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+metric="lab"
+
+for label in {"0","1"}; do
+    for highest in {"0","1"}; do
+        for nb_zones in {4,6,8,10,12}; do
+            for size in {5,10,15,20,25,30,35,40}; do
+                for mode in {"svd","svdn","svdne"}; do
+
+                    FILENAME="data/deep_keras_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+                    MODEL_NAME="deep_keras_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+
+                    echo $FILENAME
+
+                    # only compute if necessary (perhaps server will fall.. Just in case)
+                    if grep -q "${MODEL_NAME}" "${file_path}"; then
+
+                        echo "${MODEL_NAME} results already generated..."
+                    else
+                        python generate_data_model_corr_random.py --output ${FILENAME} --n ${size} --highest ${highest} --label ${label} --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1
+                        python deep_network_keras_svd.py --data ${FILENAME} --output ${MODEL_NAME} --size ${size}
+
+                        # use of interval but it is not really an interval..
+                        python save_model_result_in_md_maxwell.py --interval "${start_index},${size}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric}
+                    fi
+                done
+            done
+        done
+    done
+done
+

+ 1 - 1
runAll_maxwell_sub_blocks_stats.sh

@@ -30,7 +30,7 @@ for nb_zones in {4,6,8,10,12}; do
     for mode in {"svd","svdn","svdne"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
-            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            FILENAME="data/${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
 
             echo $FILENAME

+ 1 - 1
runAll_maxwell_sub_blocks_stats_reduced.sh

@@ -30,7 +30,7 @@ for nb_zones in {4,6,8,10,12}; do
     for mode in {"svd","svdn","svdne"}; do
         for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
-            FILENAME="data/data_maxwell_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            FILENAME="data/${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
             MODEL_NAME="${model}_N${number}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
 
             echo $FILENAME

+ 1 - 1
run_maxwell_simulation.sh

@@ -31,7 +31,7 @@ for size in {"4","8","16","26","32","40"}; do
                  for mode in {"svd","svdn","svdne"}; do
                      for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
-                        FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                        FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
 
                         if grep -xq "${MODEL_NAME}" "${simulate_models}"; then

+ 1 - 1
run_maxwell_simulation_custom.sh

@@ -31,7 +31,7 @@ for size in {"4","8","16","26","32","40"}; do
                  for mode in {"svd","svdn","svdne"}; do
                      for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
 
-                        FILENAME="data/data_maxwell_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
+                        FILENAME="data/${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         MODEL_NAME="${model}_N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}"
                         CUSTOM_MIN_MAX_FILENAME="N${size}_B${start}_E${end}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
 

+ 39 - 0
run_maxwell_simulation_keras_corr_custom.sh

@@ -0,0 +1,39 @@
+#! bin/bash
+
+# file which contains model names we want to use for simulation
+simulate_models="simulate_models_keras_corr.csv"
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+metric="lab"
+
+for label in {"0","1"}; do
+    for highest in {"0","1"}; do
+        for nb_zones in {4,6,8,10,12}; do
+            for size in {5,10,15,20,25,30,35,40}; do
+                for mode in {"svd","svdn","svdne"}; do
+
+                    FILENAME="data/deep_keras_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+                    MODEL_NAME="deep_keras_N${size}_B${start_index}_E${size}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}"
+
+
+                    CUSTOM_MIN_MAX_FILENAME="N${size}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}_corr_L${label}_H${highest}_min_max"
+
+                    if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
+                        echo "Run simulation for model ${MODEL_NAME}"
+
+                        # by default regenerate model
+                        python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                        python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                        python predict_seuil_expe_maxwell_curve.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                        python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric}
+
+                    fi
+                done
+            done
+        done
+    done
+done

+ 38 - 0
run_maxwell_simulation_keras_custom.sh

@@ -0,0 +1,38 @@
+#! bin/bash
+
+# file which contains model names we want to use for simulation
+simulate_models="simulate_models_keras.csv"
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+start_index=0
+metrics_size=( ["sub_blocks_stats"]=24 ["sub_blocks_stats_reduced"]=20 ["sub_blocks_area"]=16 ["sub_blocks_area_normed"]=20)
+
+for metric in {"sub_blocks_stats","sub_blocks_stats_reduced","sub_blocks_area","sub_blocks_area_normed"}; do
+    for nb_zones in {4,6,8,10,12}; do
+
+        for mode in {"svd","svdn","svdne"}; do
+
+            end_index=${metrics_size[${metric}]}
+            FILENAME="data/deep_keras_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+            MODEL_NAME="deep_keras_N${end_index}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}"
+
+            CUSTOM_MIN_MAX_FILENAME="N${size}_B${start_index}_E${end_index}_nb_zones_${nb_zones}_${metric}_${mode}_min_max"
+
+            if grep -xq "${MODEL_NAME}" "${simulate_models}"; then
+                echo "Run simulation for model ${MODEL_NAME}"
+
+                # by default regenerate model
+                python generate_data_model_random.py --output ${FILENAME} --interval "${start_index},${end_index}" --kind ${mode} --metric ${metric} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 40 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                python train_model.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model}
+
+                python predict_seuil_expe_maxwell_curve.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric} --limit_detection '2' --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                python save_model_result_in_md_maxwell.py --interval "${start_index},${end_index}" --model "saved_models/${MODEL_NAME}.json" --mode "${mode}" --metric ${metric}
+
+            fi
+        done
+    done
+done

+ 2 - 1
save_model_result_in_md_maxwell.py

@@ -138,7 +138,8 @@ def main():
     # reconstruct data filename
     for name in models_name:
         if name in current_model_name:
-            current_data_file_path = os.path.join('data', current_model_name.replace(name, 'data_maxwell'))
+            data_filename = current_model_name
+            current_data_file_path = os.path.join('data', data_filename)
 
     print("Current data file ")
     print(current_data_file_path)

+ 1 - 8
simulate_models.csv

@@ -1,8 +1 @@
-ensemble_model_v2_N40_B0_E40_nb_zones_12_low_bits_2_svd
-ensemble_model_v2_N32_B0_E32_nb_zones_12_low_bits_2_svdne
-ensemble_model_N40_B0_E40_nb_zones_12_low_bits_3_svd
-svm_model_N8_B46_E54_nb_zones_12_low_bits_2_svd
-svm_model_N8_B96_E104_nb_zones_4_lab_svdne
-ensemble_model_v2_N8_B0_E8_nb_zones_12_low_bits_2_svdne
-svm_model_N32_B0_E32_nb_zones_12_low_bits_2_svdn
-ensemble_model_v2_N32_B0_E32_nb_zones_12_low_bits_2_svdn
+ensemble_model_v2_N4_B0_E4_nb_zones_12_lab_svdne