Browse Source

Prediction scripts added for CNN and LSTM

Jérôme BUISINE 3 months ago
parent
commit
a7545babe8
5 changed files with 455 additions and 37 deletions
  1. 4 1
      custom_config.py
  2. 201 0
      prediction/estimate_thresholds_cnn.py
  3. 208 0
      prediction/estimate_thresholds_lstm.py
  4. 33 22
      train_lstm_weighted.py
  5. 9 14
      train_model.py

+ 4 - 1
custom_config.py

@@ -13,14 +13,17 @@ output_data_folder              = 'data'
 output_data_generated           = os.path.join(output_data_folder, 'generated')
 output_datasets                 = os.path.join(output_data_folder, 'datasets')
 output_zones_learned            = os.path.join(output_data_folder, 'learned_zones')
+output_models                   = os.path.join(output_data_folder, 'saved_models')
+output_results_folder           = os.path.join(output_data_folder, 'results')
 
 ## noisy_folder                    = 'noisy'
 ## not_noisy_folder                = 'notNoisy'
-backup_model_folder             = 'models_backup'
+backup_model_folder             = os.path.join(output_data_folder, 'models_backup')
 
 # file or extensions
 
 perf_prediction_model_path      = 'predictions_models_results.csv'
+results_filename                = 'results.csv'
 ## post_image_name_separator       = '___'
 
 # variables

+ 201 - 0
prediction/estimate_thresholds_cnn.py

@@ -0,0 +1,201 @@
+# main imports
+import numpy as np
+import pandas as pd
+import sys, os, argparse
+
+# image processing
+from PIL import Image
+from ipfml import utils
+from ipfml.processing import transform, segmentation
+
+import matplotlib.pyplot as plt
+
+# model imports
+import joblib
+from keras.models import load_model
+from keras import backend as K
+
+# modules and config imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+import modules.utils.data as dt
+from modules.classes.Transformation import Transformation
+
+def write_progress(progress):
+    barWidth = 180
+
+    output_str = "["
+    pos = barWidth * progress
+    for i in range(barWidth):
+        if i < pos:
+           output_str = output_str + "="
+        elif i == pos:
+           output_str = output_str + ">"
+        else:
+            output_str = output_str + " "
+
+    output_str = output_str + "] " + str(int(progress * 100.0)) + " %\r"
+    print(output_str)
+    sys.stdout.write("\033[F")
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Read and compute entropy data file")
+
+    parser.add_argument('--model', type=str, help='model .h5 file')
+    parser.add_argument('--folder', type=str,
+                        help='folder where scene dataset is available',
+                        required=True)  
+    parser.add_argument('--features', type=str, 
+                                     help="list of features choice in order to compute data",
+                                     default='svd_reconstruction, ipca_reconstruction',
+                                     required=True)
+    parser.add_argument('--params', type=str, 
+                                    help="list of specific param for each feature choice (See README.md for further information in 3D mode)", 
+                                    default='100, 200 :: 50, 25',
+                                    required=True)
+    parser.add_argument('--size', type=str, 
+                                help="specific size of image", 
+                                default='100, 100',
+                                required=True)
+    parser.add_argument('--n_stop', type=int, help='number of detection to make sure to stop', default=1)
+    parser.add_argument('--save', type=str, help='filename where to save input data')
+    parser.add_argument('--label', type=str, help='label to use when saving thresholds')
+
+    args = parser.parse_args()
+
+    p_model    = args.model
+    p_folder   = args.folder
+    p_features = list(map(str.strip, args.features.split(',')))
+    p_params   = list(map(str.strip, args.params.split('::')))
+    p_size     = args.size
+    p_n_stop = args.n_stop
+    p_save     = args.save
+    p_label    = args.label
+
+    # 1. Load expected transformations
+
+    # list of transformations
+    transformations = []
+
+    for id, feature in enumerate(p_features):
+
+        if feature not in cfg.features_choices_labels or feature == 'static':
+            raise ValueError("Unknown feature, please select a correct feature (`static` excluded) : ", cfg.features_choices_labels)
+
+        transformations.append(Transformation(feature, p_params[id], p_size))
+
+    # 2. load model and compile it
+
+    # TODO : check kind of model
+    model = load_model(p_model)
+    # model.compile(loss='binary_crossentropy',
+    #               optimizer='rmsprop',
+    #               metrics=['accuracy'])
+
+
+    estimated_thresholds = []
+    n_estimated_thresholds = []
+
+    scene_path = p_folder
+
+    if not os.path.exists(scene_path):
+        print('Unvalid scene path:', scene_path)
+        exit(0)
+
+    # 3. retrieve human_thresholds
+    # construct zones folder
+    zones_indices = np.arange(16)
+    zones_list = []
+
+    for index in zones_indices:
+
+        index_str = str(index)
+
+        while len(index_str) < 2:
+            index_str = "0" + index_str
+        
+        zones_list.append(cfg.zone_folder + index_str)
+
+
+    # 4. get estimated thresholds using model and specific method
+    images_path = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
+    number_of_images = len(images_path)
+    image_indices = [ dt.get_scene_image_quality(img_path) for img_path in images_path ]
+
+    image_counter = 0
+
+    # append empty list
+    for _ in zones_list:
+        estimated_thresholds.append(None)
+        n_estimated_thresholds.append(0)
+
+    for img_i, img_path in enumerate(images_path):
+
+        blocks = segmentation.divide_in_blocks(Image.open(img_path), (200, 200))
+
+        for index, block in enumerate(blocks):
+            
+            if estimated_thresholds[index] is None:
+                
+                transformed_list = []
+                # compute data here
+                for transformation in transformations:
+                    transformed = transformation.getTransformedImage(block)
+                    transformed_list.append(transformed)
+
+                data = np.array(transformed_list)
+                
+                # compute input size
+                n_chanels, _, _ = data.shape
+
+                if K.image_data_format() == 'chanels_first':
+                    if n_chanels > 1:
+                        data = np.expand_dims(data, axis=0)
+
+                else:
+                    if n_chanels > 1:
+                        data = data.transpose()
+                        data = np.expand_dims(data, axis=0)
+                    else:
+                        data = data.transpose()
+                    
+                data = np.expand_dims(data, axis=0)
+
+                probs = model.predict(np.array(data))[0]
+                prediction = list(probs).index(max(probs))
+                #print(index, ':', image_indices[img_i], '=>', prediction)
+               
+                if prediction == 0:
+                    n_estimated_thresholds[index] += 1
+
+                    # if same number of detection is attempted
+                    if n_estimated_thresholds[index] >= p_n_stop:
+                        estimated_thresholds[index] = image_indices[img_i]
+                else:
+                    n_estimated_thresholds[index] = 0
+
+        # write progress bar
+        write_progress((image_counter + 1) / number_of_images)
+        
+        image_counter = image_counter + 1
+    
+    # default label
+    for i, _ in enumerate(zones_list):
+        if estimated_thresholds[i] == None:
+            estimated_thresholds[i] = image_indices[-1]
+
+    # 6. save estimated thresholds into specific file
+    print('\nEstimated thresholds', estimated_thresholds)
+    if p_save is not None:
+        with open(p_save, 'a') as f:
+            f.write(p_label + ';')
+
+            for t in estimated_thresholds:
+                f.write(str(t) + ';')
+            f.write('\n')
+    
+
+if __name__== "__main__":
+    main()

+ 208 - 0
prediction/estimate_thresholds_lstm.py

@@ -0,0 +1,208 @@
+# main imports
+import numpy as np
+import pandas as pd
+import sys, os, argparse
+
+# image processing
+from PIL import Image
+from ipfml import utils
+from ipfml.processing import transform, segmentation
+
+import matplotlib.pyplot as plt
+
+# model imports
+import joblib
+from keras.models import load_model
+from keras import backend as K
+
+# modules and config imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+import modules.utils.data as dt
+from modules.classes.Transformation import Transformation
+
+def write_progress(progress):
+    barWidth = 180
+
+    output_str = "["
+    pos = barWidth * progress
+    for i in range(barWidth):
+        if i < pos:
+           output_str = output_str + "="
+        elif i == pos:
+           output_str = output_str + ">"
+        else:
+            output_str = output_str + " "
+
+    output_str = output_str + "] " + str(int(progress * 100.0)) + " %\r"
+    print(output_str)
+    sys.stdout.write("\033[F")
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Read and compute entropy data file")
+
+    parser.add_argument('--model', type=str, help='model .h5 file')
+    parser.add_argument('--folder', type=str,
+                        help='folder where scene dataset is available',
+                        required=True)  
+    parser.add_argument('--features', type=str, 
+                                     help="list of features choice in order to compute data",
+                                     default='svd_reconstruction, ipca_reconstruction',
+                                     required=True)
+    parser.add_argument('--params', type=str, 
+                                    help="list of specific param for each feature choice (See README.md for further information in 3D mode)", 
+                                    default='100, 200 :: 50, 25',
+                                    required=True)
+    parser.add_argument('--size', type=str, 
+                                help="specific size of image", 
+                                default='100, 100',
+                                required=True)
+    parser.add_argument('--sequence', type=int, help='sequence size expected', required=True, default=1)
+    parser.add_argument('--n_stop', type=int, help='number of detection to make sure to stop', default=1)
+    parser.add_argument('--save', type=str, help='filename where to save input data')
+    parser.add_argument('--label', type=str, help='label to use when saving thresholds')
+
+    args = parser.parse_args()
+
+    p_model    = args.model
+    p_folder   = args.folder
+    p_features = list(map(str.strip, args.features.split(',')))
+    p_params   = list(map(str.strip, args.params.split('::')))
+    p_size     = args.size
+    p_sequence = args.sequence
+    p_n_stop   = args.n_stop
+    p_save     = args.save
+    p_label    = args.label
+
+    # 1. Load expected transformations
+
+    # list of transformations
+    transformations = []
+
+    for id, feature in enumerate(p_features):
+
+        if feature not in cfg.features_choices_labels or feature == 'static':
+            raise ValueError("Unknown feature, please select a correct feature (`static` excluded) : ", cfg.features_choices_labels)
+
+        transformations.append(Transformation(feature, p_params[id], p_size))
+
+    # 2. load model and compile it
+
+    # TODO : check kind of model
+    model = joblib.load(p_model)
+    model.compile(loss='binary_crossentropy',
+                  optimizer='rmsprop',
+                  metrics=['accuracy'])
+    # model = load_model(p_model)
+    # model.compile(loss='binary_crossentropy',
+    #               optimizer='rmsprop',
+    #               metrics=['accuracy'])
+
+
+    estimated_thresholds = []
+    n_estimated_thresholds = []
+    sequence_list_zones = []
+
+    scene_path = p_folder
+
+    if not os.path.exists(scene_path):
+        print('Unvalid scene path:', scene_path)
+        exit(0)
+
+    # 3. retrieve human_thresholds
+    # construct zones folder
+    zones_indices = np.arange(16)
+    zones_list = []
+
+    for index in zones_indices:
+
+        index_str = str(index)
+
+        while len(index_str) < 2:
+            index_str = "0" + index_str
+        
+        zones_list.append(cfg.zone_folder + index_str)
+
+
+    # 4. get estimated thresholds using model and specific method
+    images_path = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
+    number_of_images = len(images_path)
+    image_indices = [ dt.get_scene_image_quality(img_path) for img_path in images_path ]
+
+    image_counter = 0
+
+    # append empty list
+    for _ in zones_list:
+        estimated_thresholds.append(None)
+        n_estimated_thresholds.append(0)
+        sequence_list_zones.append([])
+
+    for img_i, img_path in enumerate(images_path):
+
+        blocks = segmentation.divide_in_blocks(Image.open(img_path), (200, 200))
+
+        for index, block in enumerate(blocks):
+            
+            sequence_list = sequence_list_zones[index]
+
+            if estimated_thresholds[index] is None:
+                
+                transformed_list = []
+                # compute data here
+                for transformation in transformations:
+                    transformed = transformation.getTransformedImage(block)
+                    transformed_list.append(transformed)
+
+                data = np.array(transformed_list)
+
+                sequence_list.append(data)
+                
+                if len(sequence_list) >= p_sequence:
+                    # compute input size
+                    # n_chanels, _, _ = data.shape
+
+                    input_data = np.array(sequence_list)
+                        
+                    input_data = np.expand_dims(input_data, axis=0)
+
+                    prob = model.predict(np.array(input_data))[0]
+                    #print(index, ':', image_indices[img_i], '=>', prediction)
+                
+                    # if prob is now near to label `0` then image is not longer noisy
+                    if prob < 0.5:
+                        n_estimated_thresholds[index] += 1
+
+                        # if same number of detection is attempted
+                        if n_estimated_thresholds[index] >= p_n_stop:
+                            estimated_thresholds[index] = image_indices[img_i]
+                    else:
+                        n_estimated_thresholds[index] = 0
+
+                    # remove first image
+                    del sequence_list[0]
+
+        # write progress bar
+        write_progress((image_counter + 1) / number_of_images)
+        
+        image_counter = image_counter + 1
+    
+    # default label
+    for i, _ in enumerate(zones_list):
+        if estimated_thresholds[i] == None:
+            estimated_thresholds[i] = image_indices[-1]
+
+    # 6. save estimated thresholds into specific file
+    print('\nEstimated thresholds', estimated_thresholds)
+    if p_save is not None:
+        with open(p_save, 'a') as f:
+            f.write(p_label + ';')
+
+            for t in estimated_thresholds:
+                f.write(str(t) + ';')
+            f.write('\n')
+    
+
+if __name__== "__main__":
+    main()

+ 33 - 22
train_lstm_weighted.py

@@ -200,30 +200,30 @@ def main():
     model.summary()
 
     print("Fitting model with custom class_weight", class_weight)
-    history = model.fit(X_train, y_train, batch_size=16, epochs=50, validation_split = 0.30, verbose=1, shuffle=True, class_weight=class_weight)
+    history = model.fit(X_train, y_train, batch_size=16, epochs=3, validation_split = 0.30, verbose=1, shuffle=True, class_weight=class_weight)
 
     # list all data in history
-    print(history.history.keys())
-    # summarize history for accuracy
-    plt.plot(history.history['accuracy'])
-    plt.plot(history.history['val_accuracy'])
-    plt.title('model accuracy')
-    plt.ylabel('accuracy')
-    plt.xlabel('epoch')
-    plt.legend(['train', 'test'], loc='upper left')
-    plt.show()
-    # summarize history for loss
-    plt.plot(history.history['loss'])
-    plt.plot(history.history['val_loss'])
-    plt.title('model loss')
-    plt.ylabel('loss')
-    plt.xlabel('epoch')
-    plt.legend(['train', 'test'], loc='upper left')
-    plt.show()
-
-    train_score, train_acc = model.evaluate(X_train, y_train, batch_size=1)
-
-    print(train_acc)
+    # print(history.history.keys())
+    # # summarize history for accuracy
+    # plt.plot(history.history['accuracy'])
+    # plt.plot(history.history['val_accuracy'])
+    # plt.title('model accuracy')
+    # plt.ylabel('accuracy')
+    # plt.xlabel('epoch')
+    # plt.legend(['train', 'test'], loc='upper left')
+    # plt.show()
+    # # summarize history for loss
+    # plt.plot(history.history['loss'])
+    # plt.plot(history.history['val_loss'])
+    # plt.title('model loss')
+    # plt.ylabel('loss')
+    # plt.xlabel('epoch')
+    # plt.legend(['train', 'test'], loc='upper left')
+    # plt.show()
+
+    # train_score, train_acc = model.evaluate(X_train, y_train, batch_size=1)
+
+    # print(train_acc)
     y_train_predict = model.predict_classes(X_train)
     y_test_predict = model.predict_classes(X_test)
     y_all_predict = model.predict_classes(X_all)
@@ -256,6 +256,17 @@ def main():
     with open(results_filename, 'a') as f:
         f.write(p_output + ';' + str(acc_train) + ';' + str(auc_train) + ';' + str(acc_test) + ';' + str(auc_test) + '\n')
 
+    # save acc metric information
+    plt.plot(history.history['accuracy'])
+    plt.plot(history.history['val_accuracy'])
+    plt.title('model accuracy')
+    plt.ylabel('accuracy')
+    plt.xlabel('epoch')
+    plt.legend(['train', 'test'], loc='upper left')
+
+    model_history = os.path.join(cfg.output_results_folder, p_output + '.png')
+    plt.savefig(model_history)
+
     # save model using joblib
     if not os.path.exists(cfg.output_models):
         os.makedirs(cfg.output_models)

+ 9 - 14
train_model.py

@@ -203,7 +203,7 @@ def main():
 
         initial_epoch = max_last_epoch
         print("-------------------------------------------------")
-        print("Previous backup model found",  last_model_backup, "with already", initial_epoch, "done...")
+        print("Previous backup model found",  last_model_backup, "with already", initial_epoch, " epoch(s) done...")
         print("Resuming from epoch", str(initial_epoch + 1))
         print("-------------------------------------------------")
 
@@ -221,6 +221,7 @@ def main():
     y_data_categorical = to_categorical(y_data)
     #print(y_data_categorical)
 
+    print(x_data.shape)
     # validation split parameter will use the last `%` data, so here, data will really validate our model
     model.fit(x_data, y_data_categorical, validation_split=validation_split, initial_epoch=initial_epoch, epochs=p_epochs, batch_size=p_batch_size, callbacks=callbacks_list)
 
@@ -229,18 +230,12 @@ def main():
 
     print("Accuracy score on val dataset ", score)
 
-    if not os.path.exists(cfg.saved_models_folder):
-        os.makedirs(cfg.saved_models_folder)
+    if not os.path.exists(cfg.output_models):
+        os.makedirs(cfg.output_models)
 
     # save the model into HDF5 file
-    model_output_path = os.path.join(cfg.saved_models_folder, p_output + '.json')
-    json_model_content = model.to_json()
-
-    with open(model_output_path, 'w') as f:
-        print("Model saved into ", model_output_path)
-        json.dump(json_model_content, f, indent=4)
-
-    model.save_weights(model_output_path.replace('.json', '.h5'))
+    model_output_path = os.path.join(cfg.output_models, p_output + '.h5')
+    model.save(model_output_path)
 
     # Get results obtained from model
     y_train_prediction = model.predict(x_data_train)
@@ -268,10 +263,10 @@ def main():
     roc_val_score = roc_auc_score(y_dataset_val, y_val_prediction)
 
     # save model performance
-    if not os.path.exists(cfg.results_information_folder):
-        os.makedirs(cfg.results_information_folder)
+    if not os.path.exists(cfg.output_results_folder):
+        os.makedirs(cfg.output_results_folder)
 
-    perf_file_path = os.path.join(cfg.results_information_folder, cfg.csv_model_comparisons_filename)
+    perf_file_path = os.path.join(cfg.output_results_folder, cfg.csv_model_comparisons_filename)
 
     # write header if necessary
     if not os.path.exists(perf_file_path):