Parcourir la source

Add of SVD generation data files

Jérôme BUISINE il y a 5 ans
Parent
commit
748f06528a
4 fichiers modifiés avec 543 ajouts et 7 suppressions
  1. 230 0
      generate_all_data.py
  2. 281 0
      generate_data_model_random.py
  3. BIN
      image_test.png
  4. 32 7
      modules/utils/config.py

+ 230 - 0
generate_all_data.py

@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, getopt
+import numpy as np
+import random
+import time
+import json
+
+from modules.utils.data_type import get_svd_data
+from PIL import Image
+from ipfml import processing
+from ipfml import metrics
+from skimage import color
+
+from modules.utils import config as cfg
+
+# getting configuration information
+zone_folder             = cfg.zone_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list             = cfg.maxwell_scenes_folders
+scenes_indexes          = cfg.scenes_indices
+choices                 = cfg.normalization_choices
+path                    = cfg.generated_folder
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+metric_choices          = cfg.metric_choices_labels
+output_data_folder      = cfg.output_data_folder
+
+end_counter_index       = cfg.default_number_of_images
+
+generic_output_file_svd = '_random.csv'
+picture_step            = 10
+
+# avoid calibration data ?
+calibration_folder      = 'calibration'
+
+def generate_data_svd(data_type, mode):
+    """
+    @brief Method which generates all .csv files from scenes
+    @param data_type,  metric choice
+    @param mode, normalization choice
+    @return nothing
+    """
+
+    scenes = os.listdir(path)
+
+    # filter scene
+    scenes = [s for s in scenes if calibration_folder not in s]
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    # keep in memory min and max data found from data_type
+    min_val_found = sys.maxsize
+    max_val_found = 0
+
+    data_min_max_filename = os.path.join(path, data_type + min_max_filename)
+
+    # go ahead each scenes
+    for id_scene, folder_scene in enumerate(scenes):
+
+        print(folder_scene)
+        scene_path = os.path.join(path, folder_scene)
+
+        # getting output filename
+        output_svd_filename = data_type + "_" + mode + generic_output_file_svd
+
+        # construct each zones folder name
+        zones_folder = []
+        svd_output_files = []
+
+        # get zones list info
+        for index in zones:
+            index_str = str(index)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+
+            current_zone = "zone"+index_str
+            zones_folder.append(current_zone)
+
+            zone_path = os.path.join(scene_path, current_zone)
+
+            if not os.path.exists(zone_path):
+                os.makedirs(zone_path)
+
+            svd_file_path = os.path.join(zone_path, output_svd_filename)
+
+            # add writer into list
+            svd_output_files.append(open(svd_file_path, 'w'))
+
+        counter_index = 1
+
+        while(counter_index <= end_counter_index):
+
+            if counter_index % picture_step == 0:
+                counter_index_str = str(counter_index)
+
+                img_path = os.path.join(scene_path, forlder_scene + "_" + counter_index_str + ".png")
+
+                current_img = Image.open(img_path)
+                img_blocks = processing.divide_in_blocks(current_img, (200, 200))
+
+                for id_block, block in enumerate(img_blocks):
+
+                    ###########################
+                    # Metric computation part #
+                    ###########################
+
+                    data = get_svd_data(data_type, block)
+
+                    ##################
+                    # Data mode part #
+                    ##################
+
+                    # modify data depending mode
+                    if mode == 'svdne':
+
+                        # getting max and min information from min_max_filename
+                        with open(data_min_max_filename, 'r') as f:
+                            min_val = float(f.readline())
+                            max_val = float(f.readline())
+
+                        data = processing.normalize_arr_with_range(data, min_val, max_val)
+
+                    if mode == 'svdn':
+                        data = processing.normalize_arr(data)
+
+                    # save min and max found from dataset in order to normalize data using whole data known
+                    if mode == 'svd':
+
+                        current_min = data.min()
+                        current_max = data.max()
+
+                        if current_min < min_val_found:
+                            min_val_found = current_min
+
+                        if current_max > max_val_found:
+                            max_val_found = current_max
+
+                    # now write data into current writer
+                    current_file = svd_output_files[id_block]
+
+                    # add of index
+                    current_file.write(counter_index_str + ';')
+
+                    for val in data:
+                        current_file.write(str(val) + ";")
+
+                    current_file.write('\n')
+
+            start_index_image_int = int(start_index_image)
+            print(data_type + "_" + mode + "_" + folder_scene + " - " + "{0:.2f}".format((counter_index) / (end_counter_index)* 100.) + "%")
+            sys.stdout.write("\033[F")
+
+            counter_index += 1
+
+        for f in svd_output_files:
+            f.close()
+
+        print('\n')
+
+    # save current information about min file found
+    if mode == 'svd':
+        with open(data_min_max_filename, 'w') as f:
+            f.write(str(min_val_found) + '\n')
+            f.write(str(max_val_found) + '\n')
+
+    print("%s : end of data generation\n" % mode)
+
+
+def main():
+
+    # default value of p_step
+    p_step = 10
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python generate_all_data.py --metric all')
+        print('python generate_all_data.py --metric lab')
+        print('python generate_all_data.py --metric lab --step 10')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "hms", ["help=", "metric=", "step="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python generate_all_data.py --metric all --step 10')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python generate_all_data.py --metric all --step 10')
+            sys.exit()
+        elif o in ("-s", "--step"):
+            p_step = int(a)
+        elif o in ("-m", "--metric"):
+            p_metric = a
+
+            if p_metric != 'all' and p_metric not in metric_choices:
+                assert False, "Invalid metric choice"
+        else:
+            assert False, "unhandled option"
+
+    global picture_step
+    picture_step = p_step
+
+    if picture_step % 10 != 0:
+        assert False, "Picture step variable needs to be divided by ten"
+
+    # generate all or specific metric data
+    if p_metric == 'all':
+        for m in metric_choices:
+            generate_data_svd(m, 'svd')
+            generate_data_svd(m, 'svdn')
+            generate_data_svd(m, 'svdne')
+    else:
+        generate_data_svd(p_metric, 'svd')
+        generate_data_svd(p_metric, 'svdn')
+        generate_data_svd(p_metric, 'svdne')
+
+if __name__== "__main__":
+    main()

+ 281 - 0
generate_data_model_random.py

@@ -0,0 +1,281 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 14 21:02:42 2018
+
+@author: jbuisine
+"""
+
+from __future__ import print_function
+import sys, os, getopt
+import numpy as np
+import random
+import time
+import json
+
+from PIL import Image
+from ipfml import processing, metrics
+
+from modules.utils import config as cfg
+
+# getting configuration information
+zone_folder             = cfg.zone_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list             = cfg.scenes_folders
+scenes_indexes          = cfg.scenes_indices
+choices                 = cfg.normalization_choices
+path                    = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+metric_choices          = cfg.metric_choices_labels
+output_data_folder      = cfg.output_data_folder
+custom_min_max_folder   = cfg.min_max_custom_folder
+min_max_ext             = cfg.min_max_filename_extension
+
+calibration_folder      = 'calibration'
+generic_output_file_svd = '_random.csv'
+
+min_value_interval = sys.maxsize
+max_value_interval = 0
+
+def construct_new_line(path_seuil, interval, line, norm, sep, index):
+    begin, end = interval
+
+    line_data = line.split(';')
+    seuil = line_data[0]
+    metrics = line_data[begin+1:end+1]
+
+    metrics = [float(m) for m in metrics]
+
+    # TODO : check if it's always necessary to do that (loss of information for svd)
+    if norm:
+        metrics = processing.normalize_arr_with_range(metrics, min_value_interval, max_value_interval)
+
+    with open(path_seuil, "r") as seuil_file:
+        seuil_learned = int(seuil_file.readline().strip())
+
+    if seuil_learned > int(seuil):
+        line = '1'
+    else:
+        line = '0'
+
+    for idx, val in enumerate(metrics):
+        if index:
+            line += " " + str(idx + 1)
+        line += sep
+        line += str(val)
+    line += '\n'
+
+    return line
+
+def get_min_max_value_interval(_filename, _interval, _choice, _metric):
+
+    global min_value_interval, max_value_interval
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+    # remove calibration mire from images
+    scenes = [s for s in scenes if calibration_folder not in s]
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of synthesis scenes
+        if folder_scene in scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            # shuffle list of zones (=> randomly choose zones)
+            random.shuffle(zones_folder)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                counter = 0
+                # check if user select current scene and zone to be part of training data set
+                for line in lines:
+
+                    begin, end = _interval
+
+                    line_data = line.split(';')
+                    metrics = line_data[begin+1:end+1]
+                    metrics = [float(m) for m in metrics]
+
+                    min_value = min(metrics)
+                    max_value = max(metrics)
+
+                    if min_value < min_value_interval:
+                        min_value_interval = min_value
+
+                    if max_value > max_value_interval:
+                        max_value_interval = max_value
+
+                    counter += 1
+
+
+def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _nb_zones = 4, _percent = 1, _norm = False, _sep=':', _index=True):
+
+    output_train_filename = _filename + ".train"
+    output_test_filename = _filename + ".test"
+
+    if not '/' in output_train_filename:
+        raise Exception("Please select filename with directory path to save data. Example : data/dataset")
+
+    # create path if not exists
+    if not os.path.exists(output_data_folder):
+        os.makedirs(output_data_folder)
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    scenes = os.listdir(path)
+
+    # remove min max file from scenes folder
+    scenes = [s for s in scenes if min_max_filename not in s]
+
+
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take care of maxwell scenes
+        if folder_scene in scenes_list:
+
+            scene_path = os.path.join(path, folder_scene)
+
+            zones_folder = []
+            # create zones list
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zones_folder.append("zone"+index_str)
+
+            # shuffle list of zones (=> randomly choose zones)
+            random.shuffle(zones_folder)
+
+            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+
+            for id_zone, zone_folder in enumerate(zones_folder):
+                zone_path = os.path.join(scene_path, zone_folder)
+                data_filename = _metric + "_" + _choice + generic_output_file_svd
+                data_file_path = os.path.join(zone_path, data_filename)
+
+                # getting number of line and read randomly lines
+                f = open(data_file_path)
+                lines = f.readlines()
+
+                num_lines = len(lines)
+
+                lines_indexes = np.arange(num_lines)
+                random.shuffle(lines_indexes)
+
+
+                counter = 0
+                # check if user select current scene and zone to be part of training data set
+                for index in lines_indexes:
+                    line = construct_new_line(path_seuil, _interval, lines[index], _norm, _sep, _index)
+
+                    percent = counter / num_lines
+
+                    if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
+                        train_file.write(line)
+                    else:
+                        test_file.write(line)
+
+                    counter += 1
+
+                f.close()
+
+    train_file.close()
+    test_file.close()
+
+
+def main():
+
+    p_custom = False
+
+    if len(sys.argv) <= 1:
+        print('Run with default parameters...')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "sep=", "rowindex=", "custom="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+            sys.exit()
+        elif o in ("-o", "--output"):
+            p_filename = a
+        elif o in ("-i", "--interval"):
+            p_interval = list(map(int, a.split(',')))
+        elif o in ("-k", "--kind"):
+            p_kind = a
+        elif o in ("-m", "--metric"):
+            p_metric = a
+        elif o in ("-s", "--scenes"):
+            p_scenes = a.split(',')
+        elif o in ("-n", "--nb_zones"):
+            p_nb_zones = int(a)
+        elif o in ("-p", "--percent"):
+            p_percent = float(a)
+        elif o in ("-s", "--sep"):
+            p_sep = a
+        elif o in ("-r", "--rowindex"):
+            if int(a) == 1:
+                p_rowindex = True
+            else:
+                p_rowindex = False
+        elif o in ("-c", "--custom"):
+            p_custom = a
+        else:
+            assert False, "unhandled option"
+
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indexes.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # find min max value if necessary to renormalize data
+    if p_custom:
+        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
+
+        # write new file to save
+        if not os.path.exists(custom_min_max_folder):
+            os.makedirs(custom_min_max_folder)
+
+        min_max_folder_path = os.path.join(os.path.dirname(__file__), custom_min_max_folder)
+        min_max_filename_path = os.path.join(min_max_folder_path, p_custom)
+
+        with open(min_max_filename_path, 'w') as f:
+            f.write(str(min_value_interval) + '\n')
+            f.write(str(max_value_interval) + '\n')
+
+    # create database using img folder (generate first time only)
+    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_custom, p_sep, p_rowindex)
+
+if __name__== "__main__":
+    main()

BIN
image_test.png


+ 32 - 7
modules/utils/config.py

@@ -1,7 +1,32 @@
-normalization_choices  = ['svd', 'svdn', 'svdne']
-metric_choices_labels  = ['lab', 'mscn', 'mscn_revisited', 'low_bits_2', 'low_bits_3', 'low_bits_4', 'low_bits_5', 'low_bits_6','low_bits_4_shifted_2']
-image_kinds            = ['RGB', 'Grey']
-noise_labels           = ['cauchy', 'gaussian', 'laplace', 'log_normal', 'mut_white', 'salt_pepper', 'white']
-generated_folder       = 'generated'
-pictures_output_folder = 'curves_pictures'
-filename_ext           = 'png'
+zone_folder                     = "zone"
+output_data_folder              = 'data'
+threshold_map_folder            = 'threshold_map'
+models_information_folder       = 'models_info'
+saved_models_folder             = 'saved_models'
+min_max_custom_folder           = 'custom_norm'
+generated_folder                = 'generated'
+pictures_output_folder          = 'curves_pictures'
+
+csv_model_comparisons_filename  = "models_comparisons.csv"
+seuil_expe_filename             = 'seuilExpe'
+min_max_filename_extension      = "_min_max_values"
+config_filename                 = "config"
+filename_ext                    = 'png'
+default_number_of_images        = 1000
+
+models_names_list               = ["svm_model","ensemble_model","ensemble_model_v2"]
+
+# define all scenes values
+scenes_folders                  = ['appartAopt', 'bureau1', 'cendrierIUT2', 'cuisine01', 'echecs', 'pnd', 'Sdb2', 'Sdb2_D', 'selles_envir']
+scenes_indices                  = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']
+
+maxwell_scenes_folders          = ['appartAopt', 'cuisine01', 'Sdb2', 'Sdb2_D']
+maxwell_scenes_indices          = ['A', 'D', 'G', 'H']
+
+normalization_choices           = ['svd', 'svdn', 'svdne']
+zones_indices                   = np.arange(16)
+
+metric_choices_labels           = ['lab', 'mscn', 'mscn_revisited', 'low_bits_2', 'low_bits_3', 'low_bits_4', 'low_bits_5', 'low_bits_6','low_bits_4_shifted_2']
+
+# noise information
+noise_labels                    = ['cauchy', 'gaussian', 'laplace', 'log_normal', 'mut_white', 'salt_pepper', 'white']