Parcourir la source

Update of keras stats models

Jérôme BUISINE il y a 5 ans
Parent
commit
4637cf865e

+ 1 - 2
.gitignore

@@ -14,8 +14,7 @@ fichiersSVD_light/*_min_max_values
 __pycache__
 __pycache__
 
 
 # by default avoid model files and png files
 # by default avoid model files and png files
-*.h5
+saved_models/*.h5
 *.png
 *.png
-!saved_models/*.h5
 !saved_models/*.png
 !saved_models/*.png
 .vscode
 .vscode

+ 2 - 2
deep_network_keras_svd.py

@@ -190,7 +190,7 @@ def main():
 
 
     model = generate_model(input_shape)
     model = generate_model(input_shape)
     model.summary()
     model.summary()
-    model = KerasClassifier(build_fn=model, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch, verbose=0)
+    #model = KerasClassifier(build_fn=model, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch, verbose=0)
 
 
     #######################
     #######################
     # 3. Fit model : use of cross validation to fit model
     # 3. Fit model : use of cross validation to fit model
@@ -200,7 +200,7 @@ def main():
     x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), p_vector_size, 1)
     x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), p_vector_size, 1)
     x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), p_vector_size, 1)
     x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), p_vector_size, 1)
 
 
-    model.fit(x_dataset_train, y_dataset_train, validation_split=0.20)
+    model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
 
 
     score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size)
     score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size)
 
 

+ 43 - 4
prediction_scene.py

@@ -9,6 +9,7 @@ from keras.layers import Conv1D, MaxPooling1D
 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
 from keras import backend as K
 from keras import backend as K
 from keras.models import model_from_json
 from keras.models import model_from_json
+from keras.wrappers.scikit_learn import KerasClassifier
 
 
 import sys, os, getopt
 import sys, os, getopt
 import json
 import json
@@ -44,6 +45,14 @@ def main():
         else:
         else:
             assert False, "unhandled option"
             assert False, "unhandled option"
 
 
+    if '.joblib' in p_model_file:
+        kind_model = 'sklearn'
+        model_ext = '.joblib'
+
+    if '.json' in p_model_file:
+        kind_model = 'keras'
+        model_ext = '.json'
+
     if not os.path.exists(output_model_folder):
     if not os.path.exists(output_model_folder):
         os.makedirs(output_model_folder)
         os.makedirs(output_model_folder)
 
 
@@ -61,11 +70,41 @@ def main():
     y_not_noisy_dataset = not_noisy_dataset.ix[:, 0]
     y_not_noisy_dataset = not_noisy_dataset.ix[:, 0]
     x_not_noisy_dataset = not_noisy_dataset.ix[:, 1:]
     x_not_noisy_dataset = not_noisy_dataset.ix[:, 1:]
 
 
-    model = joblib.load(p_model_file)
+    if kind_model == 'keras':
+        with open(p_model_file, 'r') as f:
+            json_model = json.load(f)
+            model = model_from_json(json_model)
+            model.load_weights(p_model_file.replace('.json', '.h5'))
+
+            model.compile(loss='binary_crossentropy',
+                  optimizer='adam',
+                  metrics=['accuracy'])
+
+        _, vector_size = np.array(x_dataset).shape
+
+        # reshape all data
+        x_dataset = np.array(x_dataset).reshape(len(x_dataset), vector_size, 1)
+        x_noisy_dataset = np.array(x_noisy_dataset).reshape(len(x_noisy_dataset), vector_size, 1)
+        x_not_noisy_dataset = np.array(x_not_noisy_dataset).reshape(len(x_not_noisy_dataset), vector_size, 1)
+
+
+    if kind_model == 'sklearn':
+        model = joblib.load(p_model_file)
+
+    if kind_model == 'keras':
+        y_pred = model.predict_classes(x_dataset)
+        y_noisy_pred = model.predict_classes(x_noisy_dataset)
+        y_not_noisy_pred = model.predict_classes(x_not_noisy_dataset)
+
+    if kind_model == 'sklean':
+        y_pred = model.predict(x_dataset)
+        y_noisy_pred = model.predict(x_noisy_dataset)
+        y_not_noisy_pred = model.predict(x_not_noisy_dataset)
+
+    print("Prediction done")
 
 
-    y_pred = model.predict(x_dataset)
-    y_noisy_pred = model.predict(x_noisy_dataset)
-    y_not_noisy_pred = model.predict(x_not_noisy_dataset)
+    with open('test_result.txt', 'w') as f:
+        f.write(str(y_pred))
 
 
     accuracy_global = accuracy_score(y_dataset, y_pred)
     accuracy_global = accuracy_score(y_dataset, y_pred)
     accuracy_noisy = accuracy_score(y_noisy_dataset, y_noisy_pred)
     accuracy_noisy = accuracy_score(y_noisy_dataset, y_noisy_pred)

+ 57 - 15
save_model_result_in_md_maxwell.py

@@ -8,6 +8,7 @@ from sklearn.model_selection import train_test_split
 from keras.models import Sequential
 from keras.models import Sequential
 from keras.layers import Conv1D, MaxPooling1D
 from keras.layers import Conv1D, MaxPooling1D
 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
 from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
+from keras.wrappers.scikit_learn import KerasClassifier
 from keras import backend as K
 from keras import backend as K
 from keras.models import model_from_json
 from keras.models import model_from_json
 
 
@@ -39,6 +40,7 @@ current_dirpath = os.getcwd()
 def main():
 def main():
 
 
     kind_model = 'keras'
     kind_model = 'keras'
+    model_ext = ''
 
 
     if len(sys.argv) <= 1:
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
         print('Run with default parameters...')
@@ -68,9 +70,7 @@ def main():
         else:
         else:
             assert False, "unhandled option"
             assert False, "unhandled option"
 
 
-
     # call model and get global result in scenes
     # call model and get global result in scenes
-
     begin, end = p_interval
     begin, end = p_interval
 
 
     bash_cmd = "bash testModelByScene_maxwell.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_metric + "'"
     bash_cmd = "bash testModelByScene_maxwell.sh '" + str(begin) + "' '" + str(end) + "' '" + p_model_file + "' '" + p_mode + "' '" + p_metric + "'"
@@ -92,11 +92,13 @@ def main():
 
 
     if '.joblib' in p_model_file:
     if '.joblib' in p_model_file:
         kind_model = 'sklearn'
         kind_model = 'sklearn'
-        md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace('.joblib', '.md'))
+        model_ext = '.joblib'
 
 
     if '.json' in p_model_file:
     if '.json' in p_model_file:
         kind_model = 'keras'
         kind_model = 'keras'
-        md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace('.json', '.md'))
+        model_ext = '.json'
+
+    md_model_path = os.path.join(markdowns_folder, p_model_file.split('/')[-1].replace(model_ext, '.md'))
 
 
     with open(md_model_path, 'w') as f:
     with open(md_model_path, 'w') as f:
         f.write(output.decode("utf-8"))
         f.write(output.decode("utf-8"))
@@ -125,9 +127,8 @@ def main():
 
 
         f.close()
         f.close()
 
 
-
     # Keep model information to compare
     # Keep model information to compare
-    current_model_name = p_model_file.split('/')[-1].replace('.json', '')
+    current_model_name = p_model_file.split('/')[-1].replace(model_ext, '')
 
 
     # Prepare writing in .csv file
     # Prepare writing in .csv file
     output_final_file_path = os.path.join(markdowns_folder, final_csv_model_comparisons)
     output_final_file_path = os.path.join(markdowns_folder, final_csv_model_comparisons)
@@ -183,14 +184,35 @@ def main():
     # 2. Getting model
     # 2. Getting model
     #######################
     #######################
 
 
-    model = joblib.load(p_model_file)
+    if kind_model == 'keras':
+        with open(p_model_file, 'r') as f:
+            json_model = json.load(f)
+            model = model_from_json(json_model)
+            model.load_weights(p_model_file.replace('.json', '.h5'))
+
+            model.compile(loss='binary_crossentropy',
+                        optimizer='adam',
+                        metrics=['accuracy'])
+
+        # reshape all input data
+        x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), end, 1)
+        x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), end, 1)
+
+
+    if kind_model == 'sklearn':
+        model = joblib.load(p_model_file)
 
 
     #######################
     #######################
     # 3. Fit model : use of cross validation to fit model
     # 3. Fit model : use of cross validation to fit model
     #######################
     #######################
-    model.fit(x_dataset_train, y_dataset_train)
 
 
-    val_scores = cross_val_score(model, x_dataset_train, y_dataset_train, cv=5)
+    if kind_model == 'keras':
+        model.fit(x_dataset_train, y_dataset_train, validation_split=0.20, epochs=cfg.keras_epochs, batch_size=cfg.keras_batch)
+
+    if kind_model == 'sklearn':
+        model.fit(x_dataset_train, y_dataset_train)
+
+        train_accuracy = cross_val_score(model, x_dataset_train, y_dataset_train, cv=5)
 
 
     ######################
     ######################
     # 4. Test : Validation and test dataset from .test dataset
     # 4. Test : Validation and test dataset from .test dataset
@@ -208,14 +230,23 @@ def main():
 
 
     X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
     X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
 
 
-    y_test_model = model.predict(X_test)
-    y_val_model = model.predict(X_val)
+    if kind_model == 'keras':
+        y_test_model = model.predict_classes(X_test)
+        y_val_model = model.predict_classes(X_val)
+
+        y_train_model = model.predict_classes(x_dataset_train)
+
+        train_accuracy = accuracy_score(y_dataset_train, y_train_model)
+
+    if kind_model == 'sklearn':
+        y_test_model = model.predict(X_test)
+        y_val_model = model.predict(X_val)
+
+        y_train_model = model.predict(x_dataset_train)
 
 
     val_accuracy = accuracy_score(y_val, y_val_model)
     val_accuracy = accuracy_score(y_val, y_val_model)
     test_accuracy = accuracy_score(y_test, y_test_model)
     test_accuracy = accuracy_score(y_test, y_test_model)
 
 
-    y_train_model = model.predict(x_dataset_train)
-
     train_f1 = f1_score(y_dataset_train, y_train_model)
     train_f1 = f1_score(y_dataset_train, y_train_model)
     train_recall = recall_score(y_dataset_train, y_train_model)
     train_recall = recall_score(y_dataset_train, y_train_model)
     train_roc_auc = roc_auc_score(y_dataset_train, y_train_model)
     train_roc_auc = roc_auc_score(y_dataset_train, y_train_model)
@@ -232,7 +263,18 @@ def main():
     all_x_data = pd.concat([x_dataset_train, X_test, X_val])
     all_x_data = pd.concat([x_dataset_train, X_test, X_val])
     all_y_data = pd.concat([y_dataset_train, y_test, y_val])
     all_y_data = pd.concat([y_dataset_train, y_test, y_val])
 
 
-    all_y_model = model.predict(all_x_data)
+    if kind_model == 'keras':
+        # stats of all dataset
+        all_x_data = pd.concat([pd.DataFrame.from_records(x_dataset_train), X_test, X_val])
+        all_y_data = pd.concat([y_dataset_train, y_test, y_val])
+        all_y_model = model.predict_classes(all_x_data)
+
+    if kind_model == 'sklearn':
+        # stats of all dataset
+        all_x_data = pd.concat([x_dataset_train, X_test, X_val])
+        all_y_data = pd.concat([y_dataset_train, y_test, y_val])
+        all_y_model = model.predict(all_x_data)
+
     all_accuracy = accuracy_score(all_y_data, all_y_model)
     all_accuracy = accuracy_score(all_y_data, all_y_model)
     all_f1_score = f1_score(all_y_data, all_y_model)
     all_f1_score = f1_score(all_y_data, all_y_model)
     all_recall_score = recall_score(all_y_data, all_y_model)
     all_recall_score = recall_score(all_y_data, all_y_model)
@@ -250,7 +292,7 @@ def main():
     model_scores.append(test_set_size / total_samples)
     model_scores.append(test_set_size / total_samples)
 
 
     # add of scores
     # add of scores
-    #model_scores.append(val_scores.mean())
+    model_scores.append(train_accuracy)
     model_scores.append(val_accuracy)
     model_scores.append(val_accuracy)
     model_scores.append(test_accuracy)
     model_scores.append(test_accuracy)
     model_scores.append(all_accuracy)
     model_scores.append(all_accuracy)

BIN
saved_models/deep_keras_N20_B0_E20_nb_zones_4_sub_blocks_stats_svd.h5


BIN
saved_models/deep_keras_N20_B0_E20_nb_zones_4_sub_blocks_stats_svdn.h5