Browse Source

Dataset generation updates

Jérôme BUISINE 4 months ago
parent
commit
a8ebfc08e9
4 changed files with 351 additions and 96 deletions
  1. 8 5
      cnn_models.py
  2. 246 0
      generate/generate_dataset_file.py
  3. 91 89
      generate/generate_reconstructed_folder.py
  4. 6 2
      train_model.py

+ 8 - 5
cnn_models.py

@@ -14,7 +14,7 @@ import tensorflow as tf
 sys.path.insert(0, '') # trick to enable import of main folder module
 
 import custom_config as cfg
-from models import metrics
+#from models import metrics
 
 
 def generate_model_2D(_input_shape, _weights_file=None):
@@ -69,7 +69,8 @@ def generate_model_2D(_input_shape, _weights_file=None):
 
     model.compile(loss='categorical_crossentropy',
                   optimizer='adam',
-                  metrics=['accuracy', metrics.auc])
+                  #metrics=['accuracy', metrics.auc])
+                  metrics=['accuracy'])
 
     return model
 
@@ -128,7 +129,8 @@ def generate_model_3D(_input_shape, _weights_file=None):
 
     model.compile(loss='categorical_crossentropy',
                   optimizer='rmsprop',
-                  metrics=['accuracy', metrics.auc])
+                  #metrics=['accuracy', metrics.auc])
+                  metrics=['accuracy'])
 
     return model
 
@@ -203,7 +205,8 @@ def generate_model_3D_TL(_input_shape, _weights_file=None):
 
     model_final.compile(loss='binary_crossentropy',
                   optimizer='rmsprop',
-                  metrics=['accuracy', metrics.auc])
+                #   metrics=['accuracy', metrics.auc])
+                  metrics=['accuracy'])
 
     return model_final
 
@@ -219,5 +222,5 @@ def get_model(n_channels, _input_shape, _tl=False, _weights_file=None):
     if n_channels == 1:
         return generate_model_2D(_input_shape, _weights_file)
 
-    if n_channels == 3:
+    if n_channels >= 2:
         return generate_model_3D(_input_shape, _weights_file)

+ 246 - 0
generate/generate_dataset_file.py

@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jun 19 11:47:42 2019
+
+@author: jbuisine
+"""
+
+# main imports
+import sys, os, argparse
+import numpy as np
+import random
+
+# images processing imports
+from PIL import Image
+from ipfml.processing.segmentation import divide_in_blocks
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config  as cfg
+from modules.utils import data as dt
+from modules.classes.Transformation import Transformation
+
+# getting configuration information
+zone_folder             = cfg.zone_folder
+learned_folder          = cfg.learned_zones_folder
+min_max_filename        = cfg.min_max_filename_extension
+
+# define all scenes values
+scenes_list             = cfg.scenes_names
+scenes_indices          = cfg.scenes_indices
+dataset_path            = cfg.dataset_path
+zones                   = cfg.zones_indices
+seuil_expe_filename     = cfg.seuil_expe_filename
+
+features_choices        = cfg.features_choices_labels
+output_data_folder      = cfg.output_datasets
+
+generic_output_file_svd = '_random.csv'
+
+def generate_data_model(_filename, _transformations, _dataset_folder, _selected_zones):
+
+    output_train_filename = os.path.join(output_data_folder, _filename, _filename + ".train")
+    output_test_filename = os.path.join(output_data_folder, _filename, _filename + ".val")
+
+    # create path if not exists
+    if not os.path.exists(os.path.join(output_data_folder, _filename)):
+        os.makedirs(os.path.join(output_data_folder, _filename))
+
+    train_file_data = []
+    test_file_data  = []
+
+    # specific number of zones (zones indices)
+    zones = np.arange(16)
+
+    # go ahead each scenes
+    for folder_scene in _selected_zones:
+
+        scene_path = os.path.join(_dataset_folder, folder_scene)
+
+        train_zones = _selected_zones[folder_scene]
+
+        for id_zone, index_folder in enumerate(zones):
+
+            index_str = str(index_folder)
+            if len(index_str) < 2:
+                index_str = "0" + index_str
+            
+            current_zone_folder = "zone" + index_str
+            zone_path = os.path.join(scene_path, current_zone_folder)
+
+            # custom path for interval of reconstruction and metric
+
+            features_path = []
+
+            for transformation in _transformations:
+                
+                # check if it's a static content and create augmented images if necessary
+                if transformation.getName() == 'static':
+                    
+                    # {sceneName}/zoneXX/static
+                    static_metric_path = os.path.join(zone_path, transformation.getName())
+
+                    # img.png
+                    image_name = transformation.getParam().split('/')[-1]
+
+                    # {sceneName}/zoneXX/static/img
+                    image_prefix_name = image_name.replace('.png', '')
+                    image_folder_path = os.path.join(static_metric_path, image_prefix_name)
+                    
+                    if not os.path.exists(image_folder_path):
+                        os.makedirs(image_folder_path)
+
+                    features_path.append(image_folder_path)
+
+                    # get image path to manage
+                    # {sceneName}/static/img.png
+                    transform_image_path = os.path.join(scene_path, transformation.getName(), image_name) 
+                    static_transform_image = Image.open(transform_image_path)
+
+                    static_transform_image_block = divide_in_blocks(static_transform_image, cfg.sub_image_size)[id_zone]
+
+                    dt.augmented_data_image(static_transform_image_block, image_folder_path, image_prefix_name)
+
+                else:
+                    metric_interval_path = os.path.join(zone_path, transformation.getTransformationPath())
+                    features_path.append(metric_interval_path)
+
+            # as labels are same for each metric
+            for label in os.listdir(features_path[0]):
+
+                label_features_path = []
+
+                for path in features_path:
+                    label_path = os.path.join(path, label)
+                    label_features_path.append(label_path)
+
+                # getting images list for each metric
+                features_images_list = []
+                    
+                for index_metric, label_path in enumerate(label_features_path):
+
+                    if _transformations[index_metric].getName() == 'static':
+                        # by default append nothing..
+                        features_images_list.append([])
+                    else:
+                        images = sorted(os.listdir(label_path))
+                        features_images_list.append(images)
+
+                # construct each line using all images path of each
+                for index_image in range(0, len(features_images_list[0])):
+                    
+                    images_path = []
+
+                    # get information about rotation and flip from first transformation (need to be a not static transformation)
+                    current_post_fix =  features_images_list[0][index_image].split(cfg.post_image_name_separator)[-1]
+
+                    # getting images with same index and hence name for each metric (transformation)
+                    for index_metric in range(0, len(features_path)):
+
+                        # custom behavior for static transformation (need to check specific image)
+                        if _transformations[index_metric].getName() == 'static':
+                            # add static path with selecting correct data augmented image
+                            image_name = _transformations[index_metric].getParam().split('/')[-1].replace('.png', '')
+                            img_path = os.path.join(features_path[index_metric], image_name + cfg.post_image_name_separator + current_post_fix)
+                            images_path.append(img_path)
+                        else:
+                            img_path = features_images_list[index_metric][index_image]
+                            images_path.append(os.path.join(label_features_path[index_metric], img_path))
+
+                    if label == cfg.noisy_folder:
+                        line = '1;'
+                    else:
+                        line = '0;'
+
+                    # compute line information with all images paths
+                    for id_path, img_path in enumerate(images_path):
+                        if id_path < len(images_path) - 1:
+                            line = line + img_path + '::'
+                        else:
+                            line = line + img_path
+                    
+                    line = line + '\n'
+
+                    if id_zone in train_zones:
+                        train_file_data.append(line)
+                    else:
+                        test_file_data.append(line)
+
+    train_file = open(output_train_filename, 'w')
+    test_file = open(output_test_filename, 'w')
+
+    random.shuffle(train_file_data)
+    random.shuffle(test_file_data)
+
+    for line in train_file_data:
+        train_file.write(line)
+
+    for line in test_file_data:
+        test_file.write(line)
+
+    train_file.close()
+    test_file.close()
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Compute specific dataset for model using of metric")
+
+    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
+    parser.add_argument('--features', type=str,
+                                     help="list of features choice in order to compute data",
+                                     default='svd_reconstruction, ipca_reconstruction',
+                                     required=True)
+    parser.add_argument('--folder', type=str,
+                        help='folder where generated data are available',
+                        required=True)  
+    parser.add_argument('--params', type=str, 
+                                    help="list of specific param for each metric choice (See README.md for further information in 3D mode)", 
+                                    default='100, 200 :: 50, 25',
+                                    required=True)
+    parser.add_argument('--size', type=str, 
+                                  help="Size of input images",
+                                  default="100, 100")
+    parser.add_argument('--selected_zones', type=str, help='file which contains all selected zones of scene', required=True)    
+
+    args = parser.parse_args()
+
+    p_filename   = args.output
+    p_folder     = args.folder
+    p_features   = list(map(str.strip, args.features.split(',')))
+    p_params     = list(map(str.strip, args.params.split('::')))
+    p_size       = args.size # not necessary to split here
+    p_selected_zones = args.selected_zones
+
+    selected_zones = {}
+    with(open(p_selected_zones, 'r')) as f:
+
+        for line in f.readlines():
+
+            data = line.split(';')
+            del data[-1]
+            scene_name = data[0]
+            thresholds = data[1:]
+
+            selected_zones[scene_name] = [ int(t) for t in thresholds ]
+
+    # create list of Transformation
+    transformations = []
+
+    for id, feature in enumerate(p_features):
+
+        if feature not in features_choices:
+            raise ValueError("Unknown metric, please select a correct metric : ", features_choices)
+
+        transformations.append(Transformation(feature, p_params[id], p_size))
+
+    if transformations[0].getName() == 'static':
+        raise ValueError("The first transformation in list cannot be static")
+
+
+    # create database using img folder (generate first time only)
+    generate_data_model(p_filename, transformations, p_folder, selected_zones)
+
+if __name__== "__main__":
+    main()

+ 91 - 89
generate/generate_reconstructed_folder.py

@@ -64,128 +64,130 @@ def generate_data(transformation, _dataset_path, _output, _human_thresholds, _re
 
         print('Scene {0} of {1} ({2})'.format((id_scene + 1), n_scenes, folder_scene))
         scene_path = os.path.join(_dataset_path, folder_scene)
-        output_scene_path = os.path.join(_output, folder_scene)
+        output_scene_path = os.path.join(cfg.output_data_generated, _output, folder_scene)
 
         # construct each zones folder name
         zones_folder = []
         features_folder = []
-        zones_threshold = _human_thresholds[folder_scene]
 
-        # get zones list info
-        for index in zones:
-            index_str = str(index)
-            if len(index_str) < 2:
-                index_str = "0" + index_str
+        if folder_scene in _human_thresholds:
 
-            current_zone = "zone"+index_str
-            zones_folder.append(current_zone)
-            zone_path = os.path.join(output_scene_path, current_zone)
+            zones_threshold = _human_thresholds[folder_scene]
+            # get zones list info
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
 
-            # custom path for feature
-            feature_path = os.path.join(zone_path, transformation.getName())
+                current_zone = "zone"+index_str
+                zones_folder.append(current_zone)
+                zone_path = os.path.join(output_scene_path, current_zone)
 
-            if not os.path.exists(feature_path):
-                os.makedirs(feature_path)
+                # custom path for feature
+                feature_path = os.path.join(zone_path, transformation.getName())
 
-            # custom path for interval of reconstruction and feature
-            feature_interval_path = os.path.join(zone_path, transformation.getTransformationPath())
-            features_folder.append(feature_interval_path)
+                if not os.path.exists(feature_path):
+                    os.makedirs(feature_path)
 
-            if not os.path.exists(feature_interval_path):
-                os.makedirs(feature_interval_path)
+                # custom path for interval of reconstruction and feature
+                feature_interval_path = os.path.join(zone_path, transformation.getTransformationPath())
+                features_folder.append(feature_interval_path)
 
-            # create for each zone the labels folder
-            labels = [cfg.not_noisy_folder, cfg.noisy_folder]
+                if not os.path.exists(feature_interval_path):
+                    os.makedirs(feature_interval_path)
 
-            for label in labels:
-                label_folder = os.path.join(feature_interval_path, label)
+                # create for each zone the labels folder
+                labels = [cfg.not_noisy_folder, cfg.noisy_folder]
 
-                if not os.path.exists(label_folder):
-                    os.makedirs(label_folder)
+                for label in labels:
+                    label_folder = os.path.join(feature_interval_path, label)
 
-        # get all images of folder
-        scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
-        number_scene_image = len(scene_images)
+                    if not os.path.exists(label_folder):
+                        os.makedirs(label_folder)
 
-        # for each images
-        for id_img, img_path in enumerate(scene_images):
+            # get all images of folder
+            scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
+            number_scene_image = len(scene_images)
 
-            current_img = Image.open(img_path)
-            img_blocks = divide_in_blocks(current_img, cfg.sub_image_size)
+            # for each images
+            for id_img, img_path in enumerate(scene_images):
 
-            current_quality_index = int(get_scene_image_quality(img_path))
+                current_img = Image.open(img_path)
+                img_blocks = divide_in_blocks(current_img, cfg.sub_image_size)
 
-            for id_block, block in enumerate(img_blocks):
+                current_quality_index = int(get_scene_image_quality(img_path))
 
-                ##########################
-                # Image computation part #
-                ##########################
+                for id_block, block in enumerate(img_blocks):
 
-                label_path = features_folder[id_block]
+                    ##########################
+                    # Image computation part #
+                    ##########################
 
-                # get label folder for block
-                if current_quality_index > zones_threshold[id_block]:
-                    label_path = os.path.join(label_path, cfg.not_noisy_folder)
-                else:
-                    label_path = os.path.join(label_path, cfg.noisy_folder)
+                    label_path = features_folder[id_block]
 
-                # check if necessary to compute or not images
-                # Data augmentation!
-                rotations = [0, 90, 180, 270]
+                    # get label folder for block
+                    if current_quality_index > zones_threshold[id_block]:
+                        label_path = os.path.join(label_path, cfg.not_noisy_folder)
+                    else:
+                        label_path = os.path.join(label_path, cfg.noisy_folder)
 
-                #img_flip_labels = ['original', 'horizontal', 'vertical', 'both']
-                img_flip_labels = ['original', 'horizontal']
+                    # check if necessary to compute or not images
+                    # Data augmentation!
+                    rotations = [0, 90, 180, 270]
 
-                output_images_path = []
-                check_path_exists = []
-                # rotate and flip image to increase dataset size
-                for id, flip_label in enumerate(img_flip_labels):
-                    for rotation in rotations:
-                        output_reconstructed_filename = img_path.split('/')[-1].replace('.png', '') + '_' + zones_folder[id_block] + cfg.post_image_name_separator
-                        output_reconstructed_filename = output_reconstructed_filename + flip_label + '_' + str(rotation) + '.png'
-                        output_reconstructed_path = os.path.join(label_path, output_reconstructed_filename)
+                    #img_flip_labels = ['original', 'horizontal', 'vertical', 'both']
+                    img_flip_labels = ['original', 'horizontal']
 
-                        if os.path.exists(output_reconstructed_path):
-                            check_path_exists.append(True)
-                        else:
-                            check_path_exists.append(False)
+                    output_images_path = []
+                    check_path_exists = []
+                    # rotate and flip image to increase dataset size
+                    for id, flip_label in enumerate(img_flip_labels):
+                        for rotation in rotations:
+                            output_reconstructed_filename = img_path.split('/')[-1].replace('.png', '') + '_' + zones_folder[id_block] + cfg.post_image_name_separator
+                            output_reconstructed_filename = output_reconstructed_filename + flip_label + '_' + str(rotation) + '.png'
+                            output_reconstructed_path = os.path.join(label_path, output_reconstructed_filename)
 
-                        output_images_path.append(output_reconstructed_path)
+                            if os.path.exists(output_reconstructed_path):
+                                check_path_exists.append(True)
+                            else:
+                                check_path_exists.append(False)
 
-                # compute only if not exists or necessary to replace
-                if _replace or not np.array(check_path_exists).all():
-                    # compute image
-                    # pass block to grey level
-                    output_block = transformation.getTransformedImage(block)
-                    output_block = np.array(output_block, 'uint8')
-                    
-                    # current output image
-                    output_block_img = Image.fromarray(output_block)
+                            output_images_path.append(output_reconstructed_path)
 
-                    #horizontal_img = output_block_img.transpose(Image.FLIP_LEFT_RIGHT)
-                    #vertical_img = output_block_img.transpose(Image.FLIP_TOP_BOTTOM)
-                    #both_img = output_block_img.transpose(Image.TRANSPOSE)
+                    # compute only if not exists or necessary to replace
+                    if _replace or not np.array(check_path_exists).all():
+                        # compute image
+                        # pass block to grey level
+                        output_block = transformation.getTransformedImage(block)
+                        output_block = np.array(output_block, 'uint8')
+                        
+                        # current output image
+                        output_block_img = Image.fromarray(output_block)
 
-                    #flip_images = [output_block_img, horizontal_img, vertical_img, both_img]
-                    #flip_images = [output_block_img, horizontal_img]
+                        #horizontal_img = output_block_img.transpose(Image.FLIP_LEFT_RIGHT)
+                        #vertical_img = output_block_img.transpose(Image.FLIP_TOP_BOTTOM)
+                        #both_img = output_block_img.transpose(Image.TRANSPOSE)
 
-                    # Only current image img currenlty
-                    flip_images = [output_block_img]
+                        #flip_images = [output_block_img, horizontal_img, vertical_img, both_img]
+                        #flip_images = [output_block_img, horizontal_img]
 
-                    # rotate and flip image to increase dataset size
-                    counter_index = 0 # get current path index
-                    for id, flip in enumerate(flip_images):
-                        for rotation in rotations:
+                        # Only current image img currenlty
+                        flip_images = [output_block_img]
+
+                        # rotate and flip image to increase dataset size
+                        counter_index = 0 # get current path index
+                        for id, flip in enumerate(flip_images):
+                            for rotation in rotations:
 
-                            if _replace or not check_path_exists[counter_index]:
-                                rotated_output_img = flip.rotate(rotation)
-                                rotated_output_img.save(output_images_path[counter_index])
+                                if _replace or not check_path_exists[counter_index]:
+                                    rotated_output_img = flip.rotate(rotation)
+                                    rotated_output_img.save(output_images_path[counter_index])
 
-                            counter_index +=1
+                                counter_index +=1
 
-            write_progress((id_img + 1) / number_scene_image)
+                write_progress((id_img + 1) / number_scene_image)
 
-        print('\n')
+            print('\n')
 
     print("{0}_{1} : end of data generation\n".format(transformation.getName(), transformation.getParam()))
 
@@ -231,8 +233,8 @@ def main():
     for id, feature in enumerate(p_features):
 
         if feature not in features_choices or feature == 'static':
-            raise ValueError("Unknown feature, please select a correct feature (`static` excluded) : ", features_choices)
-
+            raise ValueError("Unknown feature {0}, please select a correct feature (`static` excluded) : {1}".format(feature, features_choices))
+        
         transformations.append(Transformation(feature, p_params[id], p_size))
 
     human_thresholds = {}

+ 6 - 2
train_model.py

@@ -34,6 +34,7 @@ def main():
     parser.add_argument('--epochs', type=int, help='number of epochs used for training model', default=cfg.keras_epochs)
     parser.add_argument('--balancing', type=int, help='specify if balacing of classes is done or not', default="1")
     parser.add_argument('--chanels', type=int, help="given number of chanels if necessary", default=0)
+    parser.add_argument('--size', type=str, help="Size of input images", default="100, 100")
     #parser.add_argument('--val_size', type=float, help='percent of validation data during training process', default=cfg.val_dataset_size)
 
 
@@ -46,10 +47,11 @@ def main():
     p_epochs      = args.epochs
     p_balancing   = bool(args.balancing)
     p_chanels     = args.chanels
+    p_size        = args.size.split(',')
 
     #p_val_size    = args.val_size
     initial_epoch = 0
-        
+
     ########################
     # 1. Get and prepare data
     ########################
@@ -73,7 +75,8 @@ def main():
         n_chanels = p_chanels
 
     print("Number of chanels : ", n_chanels)
-    img_width, img_height = cfg.keras_img_size
+    img_width, img_height = [ int(s) for s in p_size ]
+    print(img_width, img_height)
 
     # specify the number of dimensions
     if K.image_data_format() == 'chanels_first':
@@ -208,6 +211,7 @@ def main():
         # load weights
         weights_filepath = os.path.join(model_backup_folder, last_model_backup)
 
+    print(n_chanels)
     model = models.get_model(n_chanels, input_shape, p_tl, weights_filepath)
     model.summary()