Browse Source

Update of whole project for calculco

Jérôme BUISINE 9 months ago
parent
commit
add1a54332

+ 29 - 1
README.md

@@ -51,9 +51,9 @@ ln -s /path/to/your/data dataset
   - keep model performance
   - run simulation (if necessary)
 - **others/\***: folders which contains others scripts such as script for getting performance of model on specific scene and write it into Mardown file.
-- **data_attributes.py**: files which contains all extracted features implementation from an image.
 - **custom_config.py**: override the main configuration project of `modules/config/global_config.py`
 - **train_model.py**: script which is used to run specific model available.
+- **prediction_model.py**: script which is used to run specific model with data in order to predict.
 
 ### Generated data directories:
 
@@ -124,6 +124,34 @@ All scripts named **prediction/predict_seuil_expe\*.py** are used to simulate mo
 
 Once you have simulation done. Checkout your **threshold_map/%MODEL_NAME%/simulation\_curves\_zones\_\*/** folder and use it with help of **display_simulation_curves.py** script.
 
+
+## Use with Calculco (OAR service)
+
+The `oar.example.sh` is an example of script to run in OAR platform.
+
+```
+oarsub -S oar.sh
+```
+
+Check your JOB_ID
+```
+oarstat
+```
+
+**Note:** Not forget to create symbolic link where it's necessary to logs results
+
+```
+ln -s /where/to/store/you/data data
+ln -s /where/to/store/you/results/ results
+ln -s /where/to/store/you/models_info models_info
+ln -s /where/to/store/you/saved_models saved_models
+```
+
+or simply use this script:
+```
+bash generate_symlinks.sh /where/to/store/you
+```
+
 ## License
 
 [MIT](https://github.com/prise-3d/Thesis-NoiseDetection-CNN/blob/master/LICENSE)

+ 5 - 1
custom_config.py

@@ -13,15 +13,19 @@ backup_model_folder             = 'models_backup'
 
 # file or extensions
 
+perf_prediction_model_path      = 'predications_models_results.csv'
 ## post_image_name_separator       = '___'
 
 # variables
+perf_train_header_file          = "model_name;global_train_size;global_test_size;filtered_train_size;filtered_test_size;f1_train;f1_test;recall_train;recall_test;presicion_train;precision_test;acc_train;acc_test;roc_auc_train;roc_auc_test;\n"
+perf_prediction_header_file    = "data;data_size;model_name;accucary;f1;recall;precision;roc;\n"
 
 features_choices_labels         = ['static', 'svd_reconstruction', 'fast_ica_reconstruction', 'ipca_reconstruction', 'min_diff_filter']
 
 # parameters
 
-keras_epochs                    = 50
+sub_image_size                  = (200, 200)
+keras_epochs                    = 100
 ## keras_batch                     = 32
 ## val_dataset_size                = 0.2
 

+ 1 - 1
generate/generate_dataset.py

@@ -123,7 +123,7 @@ def generate_data_model(_scenes_list, _filename, _transformations, _scenes, _nb_
                     transform_image_path = os.path.join(scene_path, transformation.getName(), image_name) 
                     static_transform_image = Image.open(transform_image_path)
 
-                    static_transform_image_block = divide_in_blocks(static_transform_image, cfg.keras_img_size)[id_zone]
+                    static_transform_image_block = divide_in_blocks(static_transform_image, cfg.sub_image_size)[id_zone]
 
                     dt.augmented_data_image(static_transform_image_block, image_folder_path, image_prefix_name)
 

+ 89 - 77
generate/generate_reconstructed_data.py

@@ -27,7 +27,7 @@ min_max_filename        = cfg.min_max_filename_extension
 
 # define all scenes values
 scenes_list             = cfg.scenes_names
-scenes_indexes          = cfg.scenes_indices
+scenes_indices          = cfg.scenes_indices
 path                    = cfg.dataset_path
 zones                   = cfg.zones_indices
 seuil_expe_filename     = cfg.seuil_expe_filename
@@ -37,7 +37,7 @@ output_data_folder      = cfg.output_data_folder
 
 generic_output_file_svd = '_random.csv'
 
-def generate_data(transformation):
+def generate_data(transformation, _scenes):
     """
     @brief Method which generates all .csv files from scenes
     @return nothing
@@ -50,107 +50,108 @@ def generate_data(transformation):
     # go ahead each scenes
     for id_scene, folder_scene in enumerate(scenes):
 
-        print(folder_scene)
-        scene_path = os.path.join(path, folder_scene)
+        if folder_scene in _scenes:
+            print(folder_scene)
+            scene_path = os.path.join(path, folder_scene)
 
-        # construct each zones folder name
-        zones_folder = []
-        features_folder = []
-        zones_threshold = []
+            # construct each zones folder name
+            zones_folder = []
+            features_folder = []
+            zones_threshold = []
 
-        # get zones list info
-        for index in zones:
-            index_str = str(index)
-            if len(index_str) < 2:
-                index_str = "0" + index_str
+            # get zones list info
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
 
-            current_zone = "zone"+index_str
-            zones_folder.append(current_zone)
-            zone_path = os.path.join(scene_path, current_zone)
+                current_zone = "zone"+index_str
+                zones_folder.append(current_zone)
+                zone_path = os.path.join(scene_path, current_zone)
 
-            with open(os.path.join(zone_path, cfg.seuil_expe_filename)) as f:
-                zones_threshold.append(int(f.readline()))
+                with open(os.path.join(zone_path, cfg.seuil_expe_filename)) as f:
+                    zones_threshold.append(int(f.readline()))
 
-            # custom path for feature
-            feature_path = os.path.join(zone_path, transformation.getName())
+                # custom path for feature
+                feature_path = os.path.join(zone_path, transformation.getName())
 
-            if not os.path.exists(feature_path):
-                os.makedirs(feature_path)
+                if not os.path.exists(feature_path):
+                    os.makedirs(feature_path)
 
-            # custom path for interval of reconstruction and feature
-            feature_interval_path = os.path.join(zone_path, transformation.getTransformationPath())
-            features_folder.append(feature_interval_path)
+                # custom path for interval of reconstruction and feature
+                feature_interval_path = os.path.join(zone_path, transformation.getTransformationPath())
+                features_folder.append(feature_interval_path)
 
-            if not os.path.exists(feature_interval_path):
-                os.makedirs(feature_interval_path)
+                if not os.path.exists(feature_interval_path):
+                    os.makedirs(feature_interval_path)
 
-            # create for each zone the labels folder
-            labels = [cfg.not_noisy_folder, cfg.noisy_folder]
+                # create for each zone the labels folder
+                labels = [cfg.not_noisy_folder, cfg.noisy_folder]
 
-            for label in labels:
-                label_folder = os.path.join(feature_interval_path, label)
+                for label in labels:
+                    label_folder = os.path.join(feature_interval_path, label)
 
-                if not os.path.exists(label_folder):
-                    os.makedirs(label_folder)
+                    if not os.path.exists(label_folder):
+                        os.makedirs(label_folder)
 
-        # get all images of folder
-        scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
-        number_scene_image = len(scene_images)
+            # get all images of folder
+            scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
+            number_scene_image = len(scene_images)
 
-        # for each images
-        for id_img, img_path in enumerate(scene_images):
+            # for each images
+            for id_img, img_path in enumerate(scene_images):
 
-            current_img = Image.open(img_path)
-            img_blocks = divide_in_blocks(current_img, cfg.keras_img_size)
+                current_img = Image.open(img_path)
+                img_blocks = divide_in_blocks(current_img, cfg.sub_image_size)
 
-            current_quality_index = int(get_scene_image_quality(img_path))
+                current_quality_index = int(get_scene_image_quality(img_path))
 
-            for id_block, block in enumerate(img_blocks):
+                for id_block, block in enumerate(img_blocks):
 
-                ##########################
-                # Image computation part #
-                ##########################
-                
-                # pass block to grey level
-                output_block = transformation.getTransformedImage(block)
-                output_block = np.array(output_block, 'uint8')
-                
-                # current output image
-                output_block_img = Image.fromarray(output_block)
+                    ##########################
+                    # Image computation part #
+                    ##########################
+                    
+                    # pass block to grey level
+                    output_block = transformation.getTransformedImage(block)
+                    output_block = np.array(output_block, 'uint8')
+                    
+                    # current output image
+                    output_block_img = Image.fromarray(output_block)
 
-                label_path = features_folder[id_block]
+                    label_path = features_folder[id_block]
 
-                # get label folder for block
-                if current_quality_index > zones_threshold[id_block]:
-                    label_path = os.path.join(label_path, cfg.not_noisy_folder)
-                else:
-                    label_path = os.path.join(label_path, cfg.noisy_folder)
+                    # get label folder for block
+                    if current_quality_index > zones_threshold[id_block]:
+                        label_path = os.path.join(label_path, cfg.not_noisy_folder)
+                    else:
+                        label_path = os.path.join(label_path, cfg.noisy_folder)
 
-                # Data augmentation!
-                rotations = [0, 90, 180, 270]
-                img_flip_labels = ['original', 'horizontal', 'vertical', 'both']
+                    # Data augmentation!
+                    rotations = [0, 90, 180, 270]
+                    img_flip_labels = ['original', 'horizontal', 'vertical', 'both']
 
-                horizontal_img = output_block_img.transpose(Image.FLIP_LEFT_RIGHT)
-                vertical_img = output_block_img.transpose(Image.FLIP_TOP_BOTTOM)
-                both_img = output_block_img.transpose(Image.TRANSPOSE)
+                    horizontal_img = output_block_img.transpose(Image.FLIP_LEFT_RIGHT)
+                    vertical_img = output_block_img.transpose(Image.FLIP_TOP_BOTTOM)
+                    both_img = output_block_img.transpose(Image.TRANSPOSE)
 
-                flip_images = [output_block_img, horizontal_img, vertical_img, both_img]
+                    flip_images = [output_block_img, horizontal_img, vertical_img, both_img]
 
-                # rotate and flip image to increase dataset size
-                for id, flip in enumerate(flip_images):
-                    for rotation in rotations:
-                        rotated_output_img = flip.rotate(rotation)
+                    # rotate and flip image to increase dataset size
+                    for id, flip in enumerate(flip_images):
+                        for rotation in rotations:
+                            rotated_output_img = flip.rotate(rotation)
 
-                        output_reconstructed_filename = img_path.split('/')[-1].replace('.png', '') + '_' + zones_folder[id_block] + cfg.post_image_name_separator
-                        output_reconstructed_filename = output_reconstructed_filename + img_flip_labels[id] + '_' + str(rotation) + '.png'
-                        output_reconstructed_path = os.path.join(label_path, output_reconstructed_filename)
+                            output_reconstructed_filename = img_path.split('/')[-1].replace('.png', '') + '_' + zones_folder[id_block] + cfg.post_image_name_separator
+                            output_reconstructed_filename = output_reconstructed_filename + img_flip_labels[id] + '_' + str(rotation) + '.png'
+                            output_reconstructed_path = os.path.join(label_path, output_reconstructed_filename)
 
-                        rotated_output_img.save(output_reconstructed_path)
+                            rotated_output_img.save(output_reconstructed_path)
 
-            print(transformation.getName() + "_" + folder_scene + " - " + "{0:.2f}".format(((id_img + 1) / number_scene_image)* 100.) + "%")
-            sys.stdout.write("\033[F")
+                print(transformation.getName() + "_" + folder_scene + " - " + "{0:.2f}".format(((id_img + 1) / number_scene_image)* 100.) + "%")
+                sys.stdout.write("\033[F")
 
-        print('\n')
+            print('\n')
 
     print("%s_%s : end of data generation\n" % (transformation.getName(), transformation.getParam()))
 
@@ -171,13 +172,23 @@ def main():
                                 help="specific size of image", 
                                 default='100, 100',
                                 required=True)
+    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
 
     args = parser.parse_args()
 
     p_features  = list(map(str.strip, args.features.split(',')))
     p_params    = list(map(str.strip, args.params.split('::')))
     p_size      = args.size
+    p_scenes    = args.scenes.split(',')
 
+    # getting scenes from indexes user selection
+    scenes_selected = []
+
+    for scene_id in p_scenes:
+        index = scenes_indices.index(scene_id.strip())
+        scenes_selected.append(scenes_list[index])
+
+    # list of transformations
     transformations = []
 
     for id, feature in enumerate(p_features):
@@ -187,9 +198,10 @@ def main():
 
         transformations.append(Transformation(feature, p_params[id], p_size))
 
+    print("Scenes used", scenes_selected)
     # generate all or specific feature data
     for transformation in transformations:
-        generate_data(transformation)
+        generate_data(transformation, scenes_selected)
 
 if __name__== "__main__":
     main()

+ 15 - 0
generate_symlinks.sh

@@ -0,0 +1,15 @@
+#! /bin/bash
+
+if [ -z "$1" ]
+  then
+    echo "No argument supplied"
+    echo "Need to specify where you want to store data"
+    exit 1
+fi
+
+path=$1
+
+for link in {"data","results","saved_models","models_infos","models_backup","threshold_map","learned_zones","custom_norm"}; do
+    rm ${link}
+    ln -s $1/${link} ${link}
+done

+ 15 - 0
oar.example.sh

@@ -0,0 +1,15 @@
+#!/bin/sh
+
+#OAR --array-param-file params.txt
+#OAR -l /nodes=1,walltime=6:00:00
+#OAR -p host="orval02"
+#OAR -t besteffort
+#OAR --notify mail:jerome.buisine@univ-littoral.fr
+#OAR -O /nfs/home/lisic/jbuisine/projects/launchers/logs/Thesis-NoiseDetection-CNN.%jobid%.out
+#OAR -E /nfs/home/lisic/jbuisine/projects/launchers/logs/Thesis-NoiseDetection-CNN.%jobid%.err
+
+# Activiate venv used by python
+. ~/opt/venvs/thesis-venv/bin/activate
+
+# run command
+python ~/projects/Thesis-NoiseDetection-CNN/generate/generate_reconstructed_data.py $@

+ 9 - 6
prediction/predict_noisy_image.py

@@ -9,6 +9,7 @@ from PIL import Image
 # model imports
 from sklearn.externals import joblib
 from keras.models import model_from_json
+from keras import backend as K
 
 # modules imports
 sys.path.insert(0, '') # trick to enable import of main folder module
@@ -38,13 +39,15 @@ def main():
                                     help="list of specific param for each feature choice (See README.md for further information in 3D mode)", 
                                     default='100, 200 :: 50, 25',
                                     required=True)
+    parser.add_argument('--size', type=str, help="Expected output size before processing transformation", default="100,100")
     parser.add_argument('--model', type=str, help='.json file of keras model')
 
     args = parser.parse_args()
 
     p_img_file   = args.image
-    p_features    = list(map(str.strip, args.features.split(',')))
+    p_features   = list(map(str.strip, args.features.split(',')))
     p_params     = list(map(str.strip, args.params.split('::')))
+    p_size       = args.size
     p_model_file = args.model
 
 
@@ -64,21 +67,21 @@ def main():
 
     for id, feature in enumerate(p_features):
 
-        if feature not in feature_choices:
-            raise ValueError("Unknown feature, please select a correct feature : ", feature_choices)
+        if feature not in features_choices:
+            raise ValueError("Unknown feature, please select a correct feature : ", features_choices)
 
-        transformations.append(Transformation(feature, p_params[id]))
+        transformations.append(Transformation(feature, p_params[id], p_size))
 
     # getting transformed image
     transformed_images = []
-
+    
     for transformation in transformations:
         transformed_images.append(transformation.getTransformedImage(img))
 
     data = np.array(transformed_images)
 
     # specify the number of dimensions
-    img_width, img_height = cfg.keras_img_size
+    img_width, img_height = cfg.sub_image_size
     n_channels = len(transformations)
 
     if K.image_data_format() == 'channels_first':

+ 6 - 3
prediction/predict_seuil_expe_curve.py

@@ -47,6 +47,7 @@ def main():
                                     default='100, 200 :: 50, 25',
                                     required=True)
     parser.add_argument('--model', type=str, help='.json file of keras model', required=True)
+    parser.add_argument('--size', type=str, help="Expected output size before processing transformation", default="100,100")
     parser.add_argument('--renderer', type=str, 
                                       help='Renderer choice in order to limit scenes used', 
                                       choices=cfg.renderer_choices, 
@@ -55,9 +56,10 @@ def main():
 
     args = parser.parse_args()
 
-    p_features    = list(map(str.strip, args.features.split(',')))
+    p_features   = list(map(str.strip, args.features.split(',')))
     p_params     = list(map(str.strip, args.params.split('::')))
     p_model_file = args.model
+    p_size       = args.size
     p_renderer   = args.renderer
 
     scenes_list = dt.get_renderer_scenes_names(p_renderer)
@@ -111,7 +113,7 @@ def main():
             for img_path in scene_images:
 
                 current_img = Image.open(img_path)
-                img_blocks = divide_in_blocks(current_img, cfg.keras_img_size)
+                img_blocks = divide_in_blocks(current_img, cfg.sub_image_size)
 
                 current_quality_image = dt.get_scene_image_quality(img_path)
 
@@ -126,7 +128,8 @@ def main():
                         python_cmd = "python predict_noisy_image.py --image " + tmp_file_path + \
                                         " --features " + p_features + \
                                         " --params " + p_params + \
-                                        " --model " + p_model_file 
+                                        " --model " + p_model_file + \
+                                        " --size " + p_size 
 
                         ## call command ##
                         p = subprocess.Popen(python_cmd, stdout=subprocess.PIPE, shell=True)

+ 134 - 0
prediction_model.py

@@ -0,0 +1,134 @@
+# main imports
+import numpy as np
+import pandas as pd
+import sys, os, argparse
+import json
+
+# model imports
+import cnn_models as models
+import tensorflow as tf
+import keras
+from keras import backend as K
+from keras.callbacks import ModelCheckpoint
+from keras.models import model_from_json
+from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
+
+# image processing imports
+import cv2
+from sklearn.utils import shuffle
+
+# config imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Train Keras model and save it into .json file")
+
+    parser.add_argument('--data', type=str, help='dataset filename prefix (without .train and .test)', required=True)
+    parser.add_argument('--model', type=str, help='.json file of keras model')
+
+    args = parser.parse_args()
+
+    p_data_file   = args.data
+    p_model_file  = args.model
+        
+    ########################
+    # 1. Get and prepare data
+    ########################
+    print("Preparing data...")
+    dataset = pd.read_csv(p_data_file, header=None, sep=";")
+
+    print("Dataset size : ", len(dataset))
+
+    # default first shuffle of data
+    dataset = shuffle(dataset)
+
+    print("Reading all images data...")
+
+    # getting number of chanel
+    n_channels = len(dataset[1][1].split('::'))
+    print("Number of channels : ", n_channels)
+
+    img_width, img_height = cfg.keras_img_size
+
+    # specify the number of dimensions
+    if K.image_data_format() == 'channels_first':
+        if n_channels > 1:
+            input_shape = (1, n_channels, img_width, img_height)
+        else:
+            input_shape = (n_channels, img_width, img_height)
+
+    else:
+        if n_channels > 1:
+            input_shape = (1, img_width, img_height, n_channels)
+        else:
+            input_shape = (img_width, img_height, n_channels)
+
+    # `:` is the separator used for getting each img path
+    if n_channels > 1:
+        dataset[1] = dataset[1].apply(lambda x: [cv2.imread(path, cv2.IMREAD_GRAYSCALE) for path in x.split('::')])
+    else:
+        dataset[1] = dataset[1].apply(lambda x: cv2.imread(x, cv2.IMREAD_GRAYSCALE))
+
+    # reshape array data
+    dataset[1] = dataset[1].apply(lambda x: np.array(x).reshape(input_shape))
+
+    # use of the whole data set for training
+    x_dataset = dataset.ix[:,1:]
+    y_dataset = dataset.ix[:,0]
+
+    x_data = []
+    for item in x_dataset.values:
+        #print("Item is here", item)
+        x_data.append(item[0])
+
+    x_data = np.array(x_data)
+
+    print("End of loading data..")
+
+    #######################
+    # 2. Getting model
+    #######################
+
+    with open(p_model_file, 'r') as f:
+        json_model = json.load(f)
+        model = model_from_json(json_model)
+        model.load_weights(p_model_file.replace('.json', '.h5'))
+
+        model.compile(loss='binary_crossentropy',
+                    optimizer='rmsprop',
+                    features=['accuracy'])
+
+
+    # Get results obtained from model
+    y_data_prediction = model.predict(x_data)
+
+    y_prediction = [1 if x > 0.5 else 0 for x in y_data_prediction]
+
+    acc_score = accuracy_score(y_dataset, y_prediction)
+    f1_data_score = f1_score(y_dataset, y_prediction)
+    recall_data_score = recall_score(y_dataset, y_prediction)
+    pres_score = precision_score(y_dataset, y_prediction)
+    roc_score = roc_auc_score(y_dataset, y_prediction)
+
+    # save model performance
+    if not os.path.exists(cfg.results_information_folder):
+        os.makedirs(cfg.results_information_folder)
+
+    perf_file_path = os.path.join(cfg.results_information_folder, cfg.perf_prediction_model_path)
+
+    # write header if necessary
+    if not os.path.exists(perf_file_path):
+        with open(perf_file_path, 'w') as f:
+            f.write(cfg.perf_prediction_header_file)
+
+    # add information into file
+    with open(perf_file_path, 'a') as f:
+        line = p_data_file + ';' + p_model_file + ';' + str(acc_score) + ';' + str(f1_data_score) + ';' + str(recall_data_score) + ';' + str(pres_score) + ';' + str(roc_score)
+        f.write(line)
+
+if __name__== "__main__":
+    main()

+ 2 - 1
requirements.txt

@@ -1,3 +1,4 @@
+numpy
 Pillow
 keras
 tensorflow
@@ -5,5 +6,5 @@ sklearn
 matplotlib
 path.py
 ipfml
-cv2
+opencv-python
 json

+ 0 - 132
run.sh

@@ -1,132 +0,0 @@
-#!/bin/bash
-
-erased=$1
-
-# file which contains model names we want to use for simulation
-file_path="results/models_comparisons.csv"
-
-if [ "${erased}" == "Y" ]; then
-    echo "Previous data file erased..."
-    rm ${file_path}
-    mkdir -p results
-    touch ${file_path}
-
-    # add of header
-    echo 'model_name; global_train_size; global_test_size; filtered_train_size; filtered_test_size; f1_train; f1_test; recall_train; recall_test; presicion_train; precision_test; acc_train; acc_test; roc_auc_train; roc_auc_test;' >> ${file_path}
-fi
-
-renderer="all"
-scenes="A, B, C, D, E, F, G, H, I"
-
-svd_metric="svd_reconstruction"
-ipca_metric="ipca_reconstruction"
-fast_ica_metric="fast_ica_reconstruction"
-
-all_features="${svd_metric},${ipca_metric},${fast_ica_metric}"
-
-# First compute svd_reconstruction
-
-for begin in {80,85,90,95,100,105,110}; do
-  for end in {150,160,170,180,190,200}; do
-  
-    #python generate/generate_reconstructed_data.py --features ${svd_metric} --params "${begin}, ${end}"
-
-    for zone in {6,8,10,12}; do
-      OUTPUT_DATA_FILE="${svd_metric}_nb_zones_${zone}_B${begin}_E${end}"
-
-      if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
-        
-        echo "SVD model ${OUTPUT_DATA_FILE} already generated"
-      
-      else
-      
-        echo "Run computation for SVD model ${OUTPUT_DATA_FILE}"
-
-        python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --features ${svd_metric} --renderer ${renderer} --scenes "${scenes}" --params "${begin}, ${end}" --nb_zones ${zone} --random 1
-        
-        python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE}
-      fi
-    done
-  done
-done
-
-
-# computation of ipca_reconstruction
-ipca_batch_size=55
-
-for component in {10,15,20,25,30,35,45,50}; do
-  python generate/generate_reconstructed_data.py --features ${ipca_metric} --params "${component},${ipca_batch_size}"
-
-  for zone in {6,8,10,12}; do
-    OUTPUT_DATA_FILE="${ipca_metric}_nb_zones_${zone}_N${component}_BS${ipca_batch_size}"
-
-    if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
-      
-      echo "IPCA model ${OUTPUT_DATA_FILE} already generated"
-    
-    else
-    
-      echo "Run computation for IPCA model ${OUTPUT_DATA_FILE}"
-
-      python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --features ${ipca_metric} --renderer ${renderer} --scenes ${scenes} --params "${component},${ipca_batch_size}" --nb_zones ${zone} --random 1
-      python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
-    fi
-  done
-done
-
-
-# computation of fast_ica_reconstruction
-
-for component in {50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}; do
-  python generate/generate_reconstructed_data.py --features ${fast_ica_metric} --params "${component}"
-
-  for zone in {6,8,10,12}; do
-    OUTPUT_DATA_FILE="${fast_ica_metric}_nb_zones_${zone}_N${component}"
-
-    if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
-      
-      echo "Fast ICA model ${OUTPUT_DATA_FILE} already generated"
-    
-    else
-    
-      echo "Run computation for Fast ICA model ${OUTPUT_DATA_FILE}"
-
-      python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --features ${fast_ica_metric} --renderer ${renderer} --scenes ${scenes} --params "${component}" --nb_zones ${zone} --random 1
-      
-      python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
-    fi
-  done
-done
-
-# RUN LATER
-# compute using all transformation methods
-ipca_batch_size=55
-
-: '
-for begin in {80,85,90,95,100,105,110}; do
-  for end in {150,160,170,180,190,200}; do
-    for ipca_component in {10,15,20,25,30,35,45,50}; do
-      for fast_ica_component in {50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}; do
-        for zone in {6,8,10,12}; do
-          OUTPUT_DATA_FILE="${svd_metric}_B${begin}_E${end}_${ipca_metric}__N${ipca_component}_BS${ipca_batch_size}_${fast_ica_metric}_N${fast_ica_component}_nb_zones_${zone}"
-
-          if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
-            
-            echo "Transformation combination model ${OUTPUT_DATA_FILE} already generated"
-          
-          else
-          
-            echo "Run computation for Transformation combination model ${OUTPUT_DATA_FILE}"
-
-            params="${begin}, ${end} :: ${ipca_component}, ${ipca_batch_size} :: ${fast_ica_component}"
-
-            python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${all_features} --renderer ${renderer} --scenes ${scenes} --params "${params}" --nb_zones ${zone} --random 1
-            
-            python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
-          fi
-        done
-      done
-    done
-  done
-done
-'

+ 0 - 61
run_tl_test.sh

@@ -1,61 +0,0 @@
-#!/bin/bash
-
-erased=$1
-
-# file which contains model names we want to use for simulation
-file_path="results/models_comparisons.csv"
-
-if [ "${erased}" == "Y" ]; then
-    echo "Previous data file erased..."
-    rm ${file_path}
-    mkdir -p results
-    touch ${file_path}
-
-    # add of header
-    echo 'model_name; global_train_size; global_test_size; filtered_train_size; filtered_test_size; f1_train; f1_test; recall_train; recall_test; presicion_train; precision_test; acc_train; acc_test; roc_auc_train; roc_auc_test;' >> ${file_path}
-fi
-
-renderer="all"
-scenes="A, B, C, D, E, F, G, H, I"
-
-svd_metric="svd_reconstruction"
-ipca_metric="ipca_reconstruction"
-fast_ica_metric="fast_ica_reconstruction"
-
-all_features="${svd_metric},${ipca_metric},${fast_ica_metric}"
-
-
-# RUN LATER
-# compute using all transformation methods
-
-begin=100
-end=200
-ipca_component=30
-ipca_batch_size=55
-fast_ica_component=60
-zone=12
-
-
-OUTPUT_DATA_FILE="${svd_metric}_B${begin}_E${end}_${ipca_metric}__N${ipca_component}_BS${ipca_batch_size}_${fast_ica_metric}_N${fast_ica_component}_nb_zones_${zone}"
-
-python generate/generate_reconstructed_data.py --features ${svd_metric} --params "${begin}, ${end}"
-
-python generate/generate_reconstructed_data.py --features ${ipca_metric} --params "${ipca_component},${ipca_batch_size}"
-
-python generate/generate_reconstructed_data.py --features ${fast_ica_metric} --params "${fast_ica_component}"
-
-
-if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
-  
-  echo "Transformation combination model ${OUTPUT_DATA_FILE} already generated"
-
-else
-
-  echo "Run computation for Transformation combination model ${OUTPUT_DATA_FILE}"
-
-  params="${begin}, ${end} :: ${ipca_component}, ${ipca_batch_size} :: ${fast_ica_component}"
-
-  python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${all_features} --renderer ${renderer} --scenes ${scenes} --params "${params}" --nb_zones ${zone} --random 1
-  
-  python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} --tl 1 &
-fi

+ 36 - 14
train_model.py

@@ -25,9 +25,9 @@ import custom_config as cfg
 def main():
 
     # default keras configuration
-    config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 8}) 
-    sess = tf.Session(config=config) 
-    keras.backend.set_session(sess)
+    #config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 8}) 
+    #sess = tf.Session(config=config) 
+    #keras.backend.set_session(sess)
 
     parser = argparse.ArgumentParser(description="Train Keras model and save it into .json file")
 
@@ -38,14 +38,16 @@ def main():
     parser.add_argument('--epochs', type=int, help='number of epochs used for training model', default=cfg.keras_epochs)
     parser.add_argument('--val_size', type=float, help='percent of validation data during training process', default=cfg.val_dataset_size)
 
+
     args = parser.parse_args()
 
-    p_data_file  = args.data
-    p_output     = args.output
-    p_tl         = args.tl
-    p_batch_size = args.batch_size
-    p_epochs     = args.epochs
-    p_val_size   = args.val_size
+    p_data_file   = args.data
+    p_output      = args.output
+    p_tl          = args.tl
+    p_batch_size  = args.batch_size
+    p_epochs      = args.epochs
+    p_val_size    = args.val_size
+    initial_epoch = 0
         
     ########################
     # 1. Get and prepare data
@@ -144,17 +146,31 @@ def main():
     # 2. Getting model
     #######################
 
-    if not os.path.exists(cfg.backup_model_folder):
-        os.makedirs(cfg.backup_model_folder)
+    # create backup folder for current model
+    model_backup_folder = os.path.join(cfg.backup_model_folder, p_output)
+    if not os.path.exists(model_backup_folder):
+        os.makedirs(model_backup_folder)
 
-    filepath = os.path.join(cfg.backup_model_folder, p_output + "-{epoch:02d}.hdf5")
+    # add of callback models
+    filepath = os.path.join(cfg.backup_model_folder, p_output, p_output + "__{epoch:02d}.hdf5")
     checkpoint = ModelCheckpoint(filepath, monitor='val_auc', verbose=1, save_best_only=True, mode='max')
     callbacks_list = [checkpoint]
 
     model = models.get_model(n_channels, input_shape, p_tl)
     model.summary()
- 
-    model.fit(x_data_train, y_dataset_train.values, validation_split=p_val_size, epochs=p_epochs, batch_size=p_batch_size, callbacks=callbacks_list)
+
+    # check if backup already exists
+    backups = sorted(os.listdir(model_backup_folder))
+
+    if len(backups) > 0:
+        # TODO : check of initial epoch
+        last_backup = backups[-1]
+        last_epoch = int(last_backup.split('__')[1].replace('.hdf5', ''))
+        initial_epoch = last_epoch
+        print("Previous backup model found.. ")
+        print("Restart from epoch ", last_epoch)
+
+    model.fit(x_data_train, y_dataset_train.values, validation_split=p_val_size, initial_epoch=initial_epoch, epochs=p_epochs, batch_size=p_batch_size, callbacks=callbacks_list)
 
     score = model.evaluate(x_data_test, y_dataset_test, batch_size=p_batch_size)
 
@@ -201,6 +217,12 @@ def main():
 
     perf_file_path = os.path.join(cfg.results_information_folder, cfg.csv_model_comparisons_filename)
 
+    # write header if necessary
+    if not os.path.exists(perf_file_path):
+        with open(perf_file_path, 'w') as f:
+            f.write(cfg.perf_train_header_file)
+            
+    # add information into file
     with open(perf_file_path, 'a') as f:
         line = p_output + ';' + str(len(dataset_train)) + ';' + str(len(dataset_test)) + ';' \
                         + str(final_df_train_size) + ';' + str(final_df_test_size) + ';' \