Parcourir la source

Add of specific output file comparisons

Jerome Buisine il y a 5 ans
Parent
commit
ceb944aeb2
2 fichiers modifiés avec 63 ajouts et 0 suppressions
  1. 9 0
      runAll_maxwell.sh
  2. 54 0
      save_model_result_in_md_maxwell.py

+ 9 - 0
runAll_maxwell.sh

@@ -1,5 +1,14 @@
 #! bin/bash
 
+# erase "models_info/models_comparisons.csv" file and write new header
+file_path='models_info/models_comparisons.csv'
+rm ${file_path}
+mkdir -p models_info
+touch ${file_path}
+
+# add of header
+echo 'model_name; vector_size; start; end; nb_zones; metric; mode; train; test; global' >> ${file_path}
+
 for size in {"4","8","16","26","32","40"}; do
 
     for metric in {"lab","mscn","low_bits_4","low_bits_2"}; do

+ 54 - 0
save_model_result_in_md_maxwell.py

@@ -1,6 +1,8 @@
 from sklearn.externals import joblib
+from sklearn.metrics import accuracy_score
 
 import numpy as np
+import pandas as pd
 
 from ipfml import image_processing
 from PIL import Image
@@ -15,6 +17,8 @@ threshold_map_folder = "threshold_map"
 threshold_map_file_prefix = "treshold_map_"
 
 markdowns_folder = "models_info"
+final_csv_model_comparisons = "models_comparisons.csv"
+models_name = ["svm_model","ensemble_model","ensemble_model_v2"]
 
 zones = np.arange(16)
 
@@ -96,6 +100,56 @@ def main():
                         f.write(line)
 
         f.close()
+    
+    # Keep model information to compare
+    current_model_name = p_model_file.split('/')[-1].replace('.joblib', '')
+
+    output_final_file_path = os.path.join(markdowns_folder, final_csv_model_comparisons)
+    output_final_file = open(output_final_file_path, "a")
+
+    print(current_model_name)
+    # reconstruct data filename 
+    for name in models_name:
+        if name in current_model_name:
+            current_data_file_path = os.path.join('data', current_model_name.replace(name, 'data_maxwell'))
+    
+    data_filenames = [current_data_file_path + '.train', current_data_file_path + '.test', 'all']
+
+    accuracy_scores = []
+
+    # go ahead each file
+    for data_file in data_filenames:
+
+        if data_file == 'all':
+
+            dataset_train = pd.read_csv(data_filenames[0], header=None, sep=";")
+            dataset_test = pd.read_csv(data_filenames[1], header=None, sep=";")
+        
+            dataset = pd.concat([dataset_train, dataset_test])
+        else:
+            dataset = pd.read_csv(data_file, header=None, sep=";")
+
+        y_dataset = dataset.ix[:,0]
+        x_dataset = dataset.ix[:,1:]
+
+        model = joblib.load(p_model_file)
+
+        y_pred = model.predict(x_dataset)   
+
+        # add of score obtained
+        accuracy_scores.append(accuracy_score(y_dataset, y_pred))
+
+    # TODO : improve...
+    # check if it's always the case...
+    nb_zones = data_filenames[0].split('_')[7]
+
+    final_file_line = current_model_name + '; ' + str(end - begin) + '; ' + str(begin) + '; ' + str(end) + '; ' + str(nb_zones) + '; ' + p_metric + '; ' + p_mode
+    
+    for s in accuracy_scores:
+        final_file_line += '; ' + str(s)
+
+    output_final_file.write(final_file_line + '\n')
+
 
 if __name__== "__main__":
     main()