Browse Source

Update of train model process and modules

Jérôme BUISINE 7 months ago
parent
commit
288e5df499
7 changed files with 238 additions and 18 deletions
  1. 9 9
      cnn_models.py
  2. 42 0
      cross_run.sh
  3. 1 1
      custom_config.py
  4. 1 1
      modules
  5. 6 3
      prediction_model.py
  6. 163 0
      run_all.sh
  7. 16 4
      train_model.py

+ 9 - 9
cnn_models.py

@@ -40,10 +40,10 @@ def generate_model_2D(_input_shape, _weights_file=None):
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
 
-    model.add(Dense(120))
-    model.add(Activation('relu'))
-    model.add(BatchNormalization())
-    model.add(Dropout(0.5))
+    # model.add(Dense(120))
+    # model.add(Activation('sigmoid'))
+    # model.add(BatchNormalization())
+    # model.add(Dropout(0.5))
 
     model.add(Dense(80))
     model.add(Activation('relu'))
@@ -60,15 +60,15 @@ def generate_model_2D(_input_shape, _weights_file=None):
     model.add(BatchNormalization())
     model.add(Dropout(0.5))
 
-    model.add(Dense(1))
-    model.add(Activation('sigmoid'))
+    model.add(Dense(2))
+    model.add(Activation('softmax'))
 
     # reload weights if exists
     if _weights_file is not None:
         model.load_weights(_weights_file)
 
-    model.compile(loss='binary_crossentropy',
-                  optimizer='rmsprop',
+    model.compile(loss='categorical_crossentropy',
+                  optimizer='adam',
                   metrics=['accuracy', metrics.auc])
 
     return model
@@ -126,7 +126,7 @@ def generate_model_3D(_input_shape, _weights_file=None):
     if _weights_file is not None:
         model.load_weights(_weights_file)
 
-    model.compile(loss='binary_crossentropy',
+    model.compile(loss='categorical_crossentropy',
                   optimizer='rmsprop',
                   metrics=['accuracy', metrics.auc])
 

+ 42 - 0
cross_run.sh

@@ -0,0 +1,42 @@
+metric="min_diff_filter"
+scenes="A,B,D,G,H,I"
+
+all_scenes="A,B,C,D,E,F,G,H,I"
+
+# file which contains model names we want to use for simulation
+file_path="results/models_comparisons.csv"
+
+for window in {"3","5","7","9","11"}; do
+    echo python generate/generate_reconstructed_data.py --features ${metric} --params ${window},${window} --size 100,100 --scenes ${all_scenes}
+done
+
+for scene in {"A","B","D","G","H","I"}; do
+
+    # remove current scene test from dataset
+    s="${scenes//,${scene}}"
+    s="${s//${scene},}"
+
+    for zone in {10,11,12}; do
+        for balancing in {0,1}; do
+        
+            OUTPUT_DATA_FILE="${metric}_nb_zones_${zone}_W${width}_H${height}_balancing${balancing}_without_${scene}"
+            OUTPUT_DATA_FILE_TEST="${metric}_nb_zones_${zone}_W${width}_H${height}_balancing${balancing}_scene_${scene}"
+
+            if grep -q "${OUTPUT_DATA_FILE}" "${file_path}"; then
+            
+                echo "SVD model ${OUTPUT_DATA_FILE} already generated"
+
+            else
+
+                #echo "Run computation for SVD model ${OUTPUT_DATA_FILE}"
+                echo python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE_TEST} --features ${metric} --scenes ${scene} --params ${width},${height} --nb_zones ${zone} --random 1 --size 100,100     
+
+                echo python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --features ${metric} --scenes ${s} --params ${width},${height} --nb_zones ${zone} --random 1 --size 100,100     
+                
+                echo python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} --balancing ${balancing}
+                echo python prediction_model.py --data data/${OUTPUT_DATA_FILE_TEST}.train --model saved_models/${OUTPUT_DATA_FILE}.json
+            fi 
+        done
+    done
+done
+

+ 1 - 1
custom_config.py

@@ -25,7 +25,7 @@ features_choices_labels         = ['static', 'svd_reconstruction', 'fast_ica_rec
 # parameters
 
 sub_image_size                  = (200, 200)
-keras_epochs                    = 100
+keras_epochs                    = 30
 ## keras_batch                     = 32
 ## val_dataset_size                = 0.2
 

+ 1 - 1
modules

@@ -1 +1 @@
-Subproject commit 6d2a431612e37ccb35cd05999ce6802320712e7f
+Subproject commit 2a16341c9e6ddd00a931cde34c56984bec4b167f

+ 6 - 3
prediction_model.py

@@ -17,6 +17,9 @@ from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, reca
 import cv2
 from sklearn.utils import shuffle
 
+import seaborn as sns
+import matplotlib.pyplot as plt
+
 # config imports
 sys.path.insert(0, '') # trick to enable import of main folder module
 
@@ -98,13 +101,13 @@ def main():
         model = model_from_json(json_model)
         model.load_weights(p_model_file.replace('.json', '.h5'))
 
-        model.compile(loss='binary_crossentropy',
-                    optimizer='rmsprop')
+        model.compile(loss='categorical_crossentropy',
+                    optimizer='adam')
 
     # Get results obtained from model
     y_data_prediction = model.predict(x_data)
 
-    y_prediction = [1 if x > 0.5 else 0 for x in y_data_prediction]
+    y_prediction = np.argmax(y_data_prediction, axis=1)
 
     acc_score = accuracy_score(y_dataset, y_prediction)
     f1_data_score = f1_score(y_dataset, y_prediction)

+ 163 - 0
run_all.sh

@@ -0,0 +1,163 @@
+#!/bin/bash
+
+erased=$1
+
+# file which contains model names we want to use for simulation
+file_path="results/models_comparisons.csv"
+
+if [ "${erased}" == "Y" ]; then
+    echo "Previous data file erased..."
+    rm ${file_path}
+    mkdir -p results
+    touch ${file_path}
+
+    # add of header
+    echo 'model_name; global_train_size; global_test_size; filtered_train_size; filtered_test_size; f1_train; f1_test; recall_train; recall_test; presicion_train; precision_test; acc_train; acc_test; roc_auc_train; roc_auc_test;' >> ${file_path}
+fi
+
+renderer="all"
+all_scenes="A,B,C,D,E,F,G,H,I"
+scenes="A,B,D,G,H,I"
+test_scene="E"
+
+
+min_diff_metric="min_diff_filter"
+svd_metric="svd_reconstruction"
+ipca_metric="ipca_reconstruction"
+fast_ica_metric="fast_ica_reconstruction"
+
+all_features="${svd_metric},${ipca_metric},${fast_ica_metric}"
+
+for window in {"3","5","7","9","11"}; do
+     echo python generate/generate_reconstructed_data.py --features ${min_diff_metric} --params ${window},${window} --size 100,100 --scenes ${all_scenes}
+done
+
+# First compute svd_reconstruction
+
+for begin in {80,85,90,95,100,105,110}; do
+  for end in {150,160,170,180,190,200}; do
+    
+    echo python generate/generate_reconstructed_data.py --features ${svd_metric} --params ${begin},${end} --size 100,100 --scenes ${all_scenes}
+  
+
+    OUTPUT_DATA_FILE_TEST="${svd_metric}_scene_E_nb_zones_16_B${begin}_E${end}_test"
+    # echo python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --features ${svd_metric} --scenes ${test_scene} --params ${begin},${end} --nb_zones 16 --random 1 --size 100,100
+
+    for zone in {10,11,12}; do
+      for balancing in {0,1}; do
+      
+        OUTPUT_DATA_FILE="${svd_metric}_nb_zones_${zone}_B${begin}_E${end}_balancing${balancing}"
+
+        if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
+            
+            echo "SVD model ${OUTPUT_DATA_FILE} already generated"
+        
+        else
+        
+            echo "Run computation for SVD model ${OUTPUT_DATA_FILE}"
+
+            # echo python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --features ${svd_metric} --scenes ${scenes} --params ${begin},${end} --nb_zones ${zone} --random 1 --size 100,100     
+            
+            # echo python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} --balancing ${balancing}
+            # echo python prediction_model.py --data data/${OUTPUT_DATA_FILE_TEST}.train --model saved_models/${OUTPUT_DATA_FILE}.json
+        fi
+      done
+    done
+  done
+done
+
+
+# computation of ipca_reconstruction
+ipca_batch_size=55
+
+for component in {10,15,20,25,30,35,45,50}; do
+
+  echo python generate/generate_reconstructed_data.py --features ${ipca_metric} --params ${component},${ipca_batch_size} --size 100,100 --scenes ${all_scenes}
+  
+  OUTPUT_DATA_FILE_TEST="${ipca_metric}_scene_E_nb_zones_16_B${begin}_E${end}_test"
+  # echo python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --features ${ipca_metric} --scenes ${test_scene} --params ${component},${ipca_batch_size} --nb_zones 16 --random 1 --size 100,100
+
+  for zone in {10,11,12}; do
+    for balancing in {0,1}; do
+        OUTPUT_DATA_FILE="${ipca_metric}_nb_zones_${zone}_N${component}_BS${ipca_batch_size}_balancing${balancing}"
+
+        if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
+        
+            echo "IPCA model ${OUTPUT_DATA_FILE} already generated"
+        
+        else
+        
+            echo "Run computation for IPCA model ${OUTPUT_DATA_FILE}"
+
+            # echo python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --features ${ipca_metric} --scenes ${scenes} --params ${component},${ipca_batch_size} --nb_zones ${zone} --random 1 --size 100,100
+            
+            # echo python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} --balancing ${balancing}
+            # echo python prediction_model.py --data data/${OUTPUT_DATA_FILE_TEST}.train --model saved_models/${OUTPUT_DATA_FILE}.json
+
+        fi
+    done 
+  done
+done
+
+
+# computation of fast_ica_reconstruction
+
+for component in {50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}; do
+
+  echo python generate/generate_reconstructed_data.py --features ${fast_ica_metric} --params ${component} --size 100,100 --scenes ${all_scenes}
+  
+  OUTPUT_DATA_FILE_TEST="${fast_ica_metric}_scene_E_nb_zones_16_B${begin}_E${end}_test"
+  # echo python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --features ${fast_ica_metric} --scenes ${test_scene} --params ${component} --nb_zones 16 --random 1 --size 100,100
+
+  for zone in {10,11,12}; do
+
+    OUTPUT_DATA_FILE="${fast_ica_metric}_nb_zones_${zone}_N${component}"
+
+    if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
+      
+      echo "Fast ICA model ${OUTPUT_DATA_FILE} already generated"
+    
+    else
+    
+      echo "Run computation for Fast ICA model ${OUTPUT_DATA_FILE}"
+
+      # echo python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --features ${fast_ica_metric} --scenes ${scenes} --params ${component} --nb_zones ${zone} --random 1 --size 100,100
+      
+      # echo python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} --balancing ${balancing}
+      # echo python prediction_model.py --data data/${OUTPUT_DATA_FILE_TEST}.train --model saved_models/${OUTPUT_DATA_FILE}.json
+    fi
+  done
+done
+
+# RUN LATER
+# compute using all transformation methods
+ipca_batch_size=55
+
+: '
+for begin in {80,85,90,95,100,105,110}; do
+  for end in {150,160,170,180,190,200}; do
+    for ipca_component in {10,15,20,25,30,35,45,50}; do
+      for fast_ica_component in {50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}; do
+        for zone in {10,11,12}; do
+          OUTPUT_DATA_FILE="${svd_metric}_B${begin}_E${end}_${ipca_metric}__N${ipca_component}_BS${ipca_batch_size}_${fast_ica_metric}_N${fast_ica_component}_nb_zones_${zone}"
+
+          if grep -xq "${OUTPUT_DATA_FILE}" "${file_path}"; then
+            
+            echo "Transformation combination model ${OUTPUT_DATA_FILE} already generated"
+          
+          else
+          
+            echo "Run computation for Transformation combination model ${OUTPUT_DATA_FILE}"
+
+            params="${begin}, ${end} :: ${ipca_component}, ${ipca_batch_size} :: ${fast_ica_component}"
+
+            python generate/generate_dataset.py --output data/${OUTPUT_DATA_FILE} --metric ${all_features} --renderer ${renderer} --scenes ${scenes} --params "${params}" --nb_zones ${zone} --random 1 --size 100,100
+            
+            python train_model.py --data data/${OUTPUT_DATA_FILE} --output ${OUTPUT_DATA_FILE} &
+          fi
+        done
+      done
+    done
+  done
+done
+'

+ 16 - 4
train_model.py

@@ -11,6 +11,7 @@ import keras
 from keras import backend as K
 from keras.callbacks import ModelCheckpoint
 from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
+from keras.utils import to_categorical
 
 # image processing imports
 import cv2
@@ -184,6 +185,10 @@ def main():
                 max_last_epoch = last_epoch
                 last_model_backup = backup
 
+        if last_model_backup is None:
+            print("Epochs asked is already computer. Noee")
+            sys.exit(1)
+
         initial_epoch = max_last_epoch
         print("-------------------------------------------------")
         print("Previous backup model found",  last_model_backup, "with already", initial_epoch, "done...")
@@ -200,10 +205,14 @@ def main():
     y_data = np.concatenate([y_dataset_train.values, y_dataset_val.values])
     x_data = np.concatenate([x_data_train, x_data_val])
 
+    y_data_categorical = to_categorical(y_data)
+    #print(y_data_categorical)
+
     # validation split parameter will use the last `%` data, so here, data will really validate our model
-    model.fit(x_data, y_data, validation_split=validation_split, initial_epoch=initial_epoch, epochs=p_epochs, batch_size=p_batch_size, callbacks=callbacks_list)
+    model.fit(x_data, y_data_categorical, validation_split=validation_split, initial_epoch=initial_epoch, epochs=p_epochs, batch_size=p_batch_size, callbacks=callbacks_list)
 
-    score = model.evaluate(x_data_val, y_dataset_val, batch_size=p_batch_size)
+    y_dataset_val_categorical = to_categorical(y_dataset_val)
+    score = model.evaluate(x_data_val, y_dataset_val_categorical, batch_size=p_batch_size)
 
     print("Accuracy score on val dataset ", score)
 
@@ -224,8 +233,11 @@ def main():
     y_train_prediction = model.predict(x_data_train)
     y_val_prediction = model.predict(x_data_val)
 
-    y_train_prediction = [1 if x > 0.5 else 0 for x in y_train_prediction]
-    y_val_prediction = [1 if x > 0.5 else 0 for x in y_val_prediction]
+    # y_train_prediction = [1 if x > 0.5 else 0 for x in y_train_prediction]
+    # y_val_prediction = [1 if x > 0.5 else 0 for x in y_val_prediction]
+
+    y_train_prediction = np.argmax(y_train_prediction, axis=1)
+    y_val_prediction = np.argmax(y_val_prediction, axis=1)
 
     acc_train_score = accuracy_score(y_dataset_train, y_train_prediction)
     acc_val_score = accuracy_score(y_dataset_val, y_val_prediction)