il y a 5 ans · 93cdadcec4
--- a/prediction/predict_noisy_image_svd_filters.py
+++ b/prediction/predict_noisy_image_svd_filters.py
@@ -0,0 +1,138 @@
 
				+# main imports
			
 
				+import sys, os, argparse, json
			
 
				+import numpy as np
			
 
				+
			
 
				+# models imports
			
 
				+from keras.models import model_from_json
			
 
				+from sklearn.externals import joblib
			
 
				+
			
 
				+# image processing imports
			
 
				+from ipfml import processing, utils
			
 
				+from PIL import Image
			
 
				+
			
 
				+# modules imports
			
 
				+sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				+
			
 
				+import custom_config as cfg
			
 
				+from data_attributes import get_image_features
			
 
				+
			
 
				+# variables and parameters
			
 
				+path                  = cfg.dataset_path
			
 
				+min_max_ext           = cfg.min_max_filename_extension
			
 
				+features_choices      = cfg.features_choices_labels
			
 
				+normalization_choices = cfg.normalization_choices
			
 
				+
			
 
				+custom_min_max_folder = cfg.min_max_custom_folder
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    # getting all params
			
 
				+    parser = argparse.ArgumentParser(description="Script which detects if an image is noisy or not using specific model")
			
 
				+
			
 
				+    parser.add_argument('--image', type=str, help='Image path')
			
 
				+    parser.add_argument('--solution', type=str, help='Data of solution to specify filters to use')
			
 
				+    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
			
 
				+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=normalization_choices)
			
 
				+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
			
 
				+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    p_img_file   = args.image
			
 
				+    p_model_file = args.model
			
 
				+    p_solution   = list(map(int, args.solution.split(' ')))
			
 
				+    p_mode       = args.mode
			
 
				+    p_feature    = args.feature
			
 
				+    p_custom     = args.custom
			
 
				+
			
 
				+    if '.joblib' in p_model_file:
			
 
				+        kind_model = 'sklearn'
			
 
				+
			
 
				+    if '.json' in p_model_file:
			
 
				+        kind_model = 'keras'
			
 
				+
			
 
				+    if kind_model == 'sklearn':
			
 
				+        # load of model file
			
 
				+        model = joblib.load(p_model_file)
			
 
				+
			
 
				+    if kind_model == 'keras':
			
 
				+        with open(p_model_file, 'r') as f:
			
 
				+            json_model = json.load(f)
			
 
				+            model = model_from_json(json_model)
			
 
				+            model.load_weights(p_model_file.replace('.json', '.h5'))
			
 
				+
			
 
				+            model.compile(loss='binary_crossentropy',
			
 
				+                        optimizer='adam',
			
 
				+                        features=['accuracy'])
			
 
				+
			
 
				+    # load image
			
 
				+    img = Image.open(p_img_file)
			
 
				+
			
 
				+    data = get_image_features(p_feature, img)
			
 
				+
			
 
				+    # get indices of filters data to use (filters selection from solution)
			
 
				+    indices = []
			
 
				+
			
 
				+    for index, value in enumerate(p_solution): 
			
 
				+        if value == 1: 
			
 
				+            indices.append(index*2) 
			
 
				+            indices.append(index*2+1) 
			
 
				+
			
 
				+    # check if custom min max file is used
			
 
				+    if p_custom:
			
 
				+        
			
 
				+        test_data = data[indices]
			
 
				+        
			
 
				+        if p_mode == 'svdne':
			
 
				+
			
 
				+            # set min_max_filename if custom use
			
 
				+            min_max_file_path = custom_min_max_folder + '/' +  p_custom
			
 
				+
			
 
				+            # need to read min_max_file
			
 
				+            file_path = os.path.join(os.path.dirname(__file__), min_max_file_path)
			
 
				+            with open(file_path, 'r') as f:
			
 
				+                min_val = float(f.readline().replace('\n', ''))
			
 
				+                max_val = float(f.readline().replace('\n', ''))
			
 
				+
			
 
				+            test_data = utils.normalize_arr_with_range(test_data, min_val, max_val)
			
 
				+
			
 
				+        if p_mode == 'svdn':
			
 
				+            test_data = utils.normalize_arr(test_data)
			
 
				+
			
 
				+    else:
			
 
				+
			
 
				+        # check mode to normalize data
			
 
				+        if p_mode == 'svdne':
			
 
				+
			
 
				+            # set min_max_filename if custom use
			
 
				+            min_max_file_path = path + '/' + p_feature + min_max_ext
			
 
				+
			
 
				+            # need to read min_max_file
			
 
				+            file_path = os.path.join(os.path.dirname(__file__), min_max_file_path)
			
 
				+            with open(file_path, 'r') as f:
			
 
				+                min_val = float(f.readline().replace('\n', ''))
			
 
				+                max_val = float(f.readline().replace('\n', ''))
			
 
				+
			
 
				+            l_values = utils.normalize_arr_with_range(data, min_val, max_val)
			
 
				+
			
 
				+        elif p_mode == 'svdn':
			
 
				+            l_values = utils.normalize_arr(data)
			
 
				+        else:
			
 
				+            l_values = data
			
 
				+
			
 
				+        test_data = data[indices]
			
 
				+
			
 
				+
			
 
				+    # get prediction of model
			
 
				+    if kind_model == 'sklearn':
			
 
				+        prediction = model.predict([test_data])[0]
			
 
				+
			
 
				+    if kind_model == 'keras':
			
 
				+        test_data = np.asarray(test_data).reshape(1, len(test_data), 1)
			
 
				+        prediction = model.predict_classes([test_data])[0][0]
			
 
				+
			
 
				+    # output expected from others scripts
			
 
				+    print(prediction)
			
 
				+
			
 
				+if __name__== "__main__":
			
 
				+    main()
			
--- a/prediction/predict_seuil_expe.py
+++ b/prediction/predict_seuil_expe.py
@@ -209,8 +209,6 @@ def main():
 
				         print("Scene " + str(id_scene + 1) + "/" + str(len(scenes)) + " Done..")
			
 
				         print("------------------------")
			
 
				 
			
 
				-        time.sleep(1)
			
 
				-
			
 
				 
			
 
				 if __name__== "__main__":
			
 
				     main()
			
--- a/prediction/predict_seuil_expe_maxwell.py
+++ b/prediction/predict_seuil_expe_maxwell.py
@@ -211,8 +211,6 @@ def main():
 
				             print("Scene " + str(id_scene + 1) + "/" + str(len(scenes)) + " Done..")
			
 
				             print("------------------------")
			
 
				 
			
 
				-            time.sleep(10)
			
 
				-
			
 
				 
			
 
				 if __name__== "__main__":
			
 
				     main()
			
--- a/prediction/predict_seuil_expe_maxwell_curve.py
+++ b/prediction/predict_seuil_expe_maxwell_curve.py
@@ -168,7 +168,6 @@ def main():
 
				             print("------------------------")
			
 
				 
			
 
				             print("Model predictions are saved into %s" % map_filename)
			
 
				-            time.sleep(10)
			
 
				 
			
 
				 
			
 
				 if __name__== "__main__":
			
--- a/prediction/predict_seuil_expe_maxwell_curve_filters.py
+++ b/prediction/predict_seuil_expe_maxwell_curve_filters.py
@@ -0,0 +1,174 @@
 
				+# main imports
			
 
				+import sys, os, argparse
			
 
				+import subprocess
			
 
				+import time
			
 
				+import numpy as np
			
 
				+
			
 
				+# image processing imports
			
 
				+from ipfml.processing import segmentation
			
 
				+from PIL import Image
			
 
				+
			
 
				+# models imports
			
 
				+from sklearn.externals import joblib
			
 
				+
			
 
				+# modules imports
			
 
				+sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				+
			
 
				+import custom_config as cfg
			
 
				+from modules.utils import data as dt
			
 
				+
			
 
				+
			
 
				+# variables and parameters
			
 
				+scenes_path               = cfg.dataset_path
			
 
				+min_max_filename          = cfg.min_max_filename_extension
			
 
				+threshold_expe_filename   = cfg.seuil_expe_filename
			
 
				+
			
 
				+threshold_map_folder      = cfg.threshold_map_folder
			
 
				+threshold_map_file_prefix = cfg.threshold_map_folder + "_"
			
 
				+
			
 
				+zones                     = cfg.zones_indices
			
 
				+maxwell_scenes            = cfg.maxwell_scenes_names
			
 
				+normalization_choices     = cfg.normalization_choices
			
 
				+features_choices          = cfg.features_choices_labels
			
 
				+
			
 
				+simulation_curves_zones   = "simulation_curves_zones_"
			
 
				+tmp_filename              = '/tmp/__model__img_to_predict.png'
			
 
				+
			
 
				+current_dirpath = os.getcwd()
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    p_custom = False
			
 
				+        
			
 
				+    parser = argparse.ArgumentParser(description="Script which predicts threshold using specific model")
			
 
				+
			
 
				+    parser.add_argument('--solution', type=str, help='Data of solution to specify filters to use')
			
 
				+    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
			
 
				+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=normalization_choices)
			
 
				+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
			
 
				+    #parser.add_argument('--limit_detection', type=int, help='Specify number of same prediction to stop threshold prediction', default=2)
			
 
				+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # keep p_interval as it is
			
 
				+    p_solution   = args.solution
			
 
				+    p_model_file = args.model
			
 
				+    p_mode       = args.mode
			
 
				+    p_feature    = args.feature
			
 
				+    #p_limit      = args.limit
			
 
				+    p_custom     = args.custom
			
 
				+
			
 
				+    scenes = os.listdir(scenes_path)
			
 
				+    scenes = [s for s in scenes if s in maxwell_scenes]
			
 
				+
			
 
				+    print(scenes)
			
 
				+
			
 
				+    # go ahead each scenes
			
 
				+    for id_scene, folder_scene in enumerate(scenes):
			
 
				+
			
 
				+        # only take in consideration maxwell scenes
			
 
				+        if folder_scene in maxwell_scenes:
			
 
				+
			
 
				+            print(folder_scene)
			
 
				+
			
 
				+            scene_path = os.path.join(scenes_path, folder_scene)
			
 
				+
			
 
				+            threshold_expes = []
			
 
				+            threshold_expes_found = []
			
 
				+            block_predictions_str = []
			
 
				+
			
 
				+            # get all images of folder
			
 
				+            scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
			
 
				+
			
 
				+            start_quality_image = dt.get_scene_image_quality(scene_images[0])
			
 
				+            end_quality_image   = dt.get_scene_image_quality(scene_images[-1])
			
 
				+            # using first two images find the step of quality used
			
 
				+            quality_step_image  = dt.get_scene_image_quality(scene_images[1]) - start_quality_image
			
 
				+
			
 
				+            # get zones list info
			
 
				+            for index in zones:
			
 
				+                index_str = str(index)
			
 
				+                if len(index_str) < 2:
			
 
				+                    index_str = "0" + index_str
			
 
				+                zone_folder = "zone"+index_str
			
 
				+
			
 
				+                threshold_path_file = os.path.join(os.path.join(scene_path, zone_folder), threshold_expe_filename)
			
 
				+
			
 
				+                with open(threshold_path_file) as f:
			
 
				+                    threshold = int(f.readline())
			
 
				+                    threshold_expes.append(threshold)
			
 
				+
			
 
				+                    # Initialize default data to get detected model threshold found
			
 
				+                    threshold_expes_found.append(end_quality_image) # by default use max
			
 
				+
			
 
				+                block_predictions_str.append(index_str + ";" + p_model_file + ";" + str(threshold) + ";" + str(start_quality_image) + ";" + str(quality_step_image))
			
 
				+
			
 
				+
			
 
				+            # for each images
			
 
				+            for img_path in scene_images:
			
 
				+
			
 
				+                current_img = Image.open(img_path)
			
 
				+                current_quality_image = dt.get_scene_image_quality(img_path)
			
 
				+
			
 
				+                img_blocks = segmentation.divide_in_blocks(current_img, (200, 200))
			
 
				+
			
 
				+                for id_block, block in enumerate(img_blocks):
			
 
				+
			
 
				+                    # check only if necessary for this scene (not already detected)
			
 
				+                    #if not threshold_expes_detected[id_block]:
			
 
				+
			
 
				+                        tmp_file_path = tmp_filename.replace('__model__',  p_model_file.split('/')[-1].replace('.joblib', '_'))
			
 
				+                        block.save(tmp_file_path)
			
 
				+
			
 
				+                        python_cmd_line = "python prediction/predict_noisy_image_svd_filters.py --image {0} --solution '{1}' --model {2} --mode {3} --feature {4}"
			
 
				+                        python_cmd = python_cmd_line.format(tmp_file_path, p_solution, p_model_file, p_mode, p_feature) 
			
 
				+
			
 
				+                        # specify use of custom file for min max normalization
			
 
				+                        if p_custom:
			
 
				+                            python_cmd = python_cmd + ' --custom ' + p_custom
			
 
				+
			
 
				+                        ## call command ##
			
 
				+                        p = subprocess.Popen(python_cmd, stdout=subprocess.PIPE, shell=True)
			
 
				+
			
 
				+                        (output, err) = p.communicate()
			
 
				+
			
 
				+                        ## Wait for result ##
			
 
				+                        p_status = p.wait()
			
 
				+
			
 
				+                        prediction = int(output)
			
 
				+
			
 
				+                        # save here in specific file of block all the predictions done
			
 
				+                        block_predictions_str[id_block] = block_predictions_str[id_block] + ";" + str(prediction)
			
 
				+
			
 
				+                        print(str(id_block) + " : " + str(current_quality_image) + "/" + str(threshold_expes[id_block]) + " => " + str(prediction))
			
 
				+
			
 
				+                print("------------------------")
			
 
				+                print("Scene " + str(id_scene + 1) + "/" + str(len(scenes)))
			
 
				+                print("------------------------")
			
 
				+
			
 
				+            # end of scene => display of results
			
 
				+
			
 
				+            # construct path using model name for saving threshold map folder
			
 
				+            model_threshold_path = os.path.join(threshold_map_folder, p_model_file.split('/')[-1].replace('.joblib', ''))
			
 
				+
			
 
				+            # create threshold model path if necessary
			
 
				+            if not os.path.exists(model_threshold_path):
			
 
				+                os.makedirs(model_threshold_path)
			
 
				+
			
 
				+            map_filename = os.path.join(model_threshold_path, simulation_curves_zones + folder_scene)
			
 
				+            f_map = open(map_filename, 'w')
			
 
				+
			
 
				+            for line in block_predictions_str:
			
 
				+                f_map.write(line + '\n')
			
 
				+            f_map.close()
			
 
				+
			
 
				+            print("Scene " + str(id_scene + 1) + "/" + str(len(maxwell_scenes)) + " Done..")
			
 
				+            print("------------------------")
			
 
				+
			
 
				+            print("Model predictions are saved into %s" % map_filename)
			
 
				+
			
 
				+
			
 
				+if __name__== "__main__":
			
 
				+    main()
			
--- a/simulation/run_maxwell_simulation_filters_statistics_all.sh
+++ b/simulation/run_maxwell_simulation_filters_statistics_all.sh
@@ -0,0 +1,46 @@
 
				+#! bin/bash
			
 
				+
			
 
				+# file which contains model names we want to use for simulation
			
 
				+simulate_models="simulate_models_all.csv"
			
 
				+
			
 
				+# selection of four scenes (only maxwell)
			
 
				+scenes="A, D, G, H"
			
 
				+
			
 
				+size="26"
			
 
				+
			
 
				+feature="filters_statistics"
			
 
				+
			
 
				+for nb_zones in {4,6,8,10,12}; do
			
 
				+    for mode in {"svd","svdn","svdne"}; do
			
 
				+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
			
 
				+            for data in {"all","center","split"}; do
			
 
				+
			
 
				+                FILENAME="data/${model}_N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
			
 
				+                MODEL_NAME="${model}_N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
			
 
				+                CUSTOM_MIN_MAX_FILENAME="N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_min_max"
			
 
				+
			
 
				+                # only compute if necessary (perhaps server will fall.. Just in case)
			
 
				+                if grep -q "${FILENAME}" "${simulate_models}"; then
			
 
				+
			
 
				+                    echo "Found ${FILENAME}"
			
 
				+                    line=$(grep -n ${FILENAME} ${simulate_models})
			
 
				+
			
 
				+                    # extract solution
			
 
				+                    IFS=\; read -a fields <<<"$line"
			
 
				+
			
 
				+                    SOLUTION=${fields[1]}
			
 
				+
			
 
				+                    echo "Run simulation for ${MODEL_NAME}... with ${SOLUTION}"
			
 
				+
			
 
				+                    # Use of already generated model
			
 
				+                    python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "0,${size}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				+                    python train_model_filters.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model} --solution "${SOLUTION}"
			
 
				+
			
 
				+                    python prediction/predict_seuil_expe_maxwell_curve_filters.py --solution "${SOLUTION}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --custom ${CUSTOM_MIN_MAX_FILENAME}
			
 
				+
			
 
				+                    #python others/save_model_result_in_md_maxwell.py --solution "${SOLUTION}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature}
			
 
				+                fi
			
 
				+            done
			
 
				+        done
			
 
				+    done
			
 
				+done
			
--- a/train_model_filters.py
+++ b/train_model_filters.py
@@ -0,0 +1,161 @@
 
				+# main imports
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import sys, os, argparse
			
 
				+
			
 
				+# models imports
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.model_selection import GridSearchCV
			
 
				+from sklearn.linear_model import LogisticRegression
			
 
				+from sklearn.ensemble import RandomForestClassifier, VotingClassifier
			
 
				+
			
 
				+import sklearn.svm as svm
			
 
				+from sklearn.utils import shuffle
			
 
				+from sklearn.externals import joblib
			
 
				+from sklearn.metrics import accuracy_score, f1_score
			
 
				+from sklearn.model_selection import cross_val_score
			
 
				+
			
 
				+# modules and config imports
			
 
				+sys.path.insert(0, '') # trick to enable import of main folder module
			
 
				+
			
 
				+import custom_config as cfg
			
 
				+import models as mdl
			
 
				+
			
 
				+# variables and parameters
			
 
				+saved_models_folder = cfg.saved_models_folder
			
 
				+models_list         = cfg.models_names_list
			
 
				+
			
 
				+current_dirpath     = os.getcwd()
			
 
				+output_model_folder = os.path.join(current_dirpath, saved_models_folder)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(description="Train SKLearn model and save it into .joblib file")
			
 
				+
			
 
				+    parser.add_argument('--data', type=str, help='dataset filename prefix (without .train and .test)')
			
 
				+    parser.add_argument('--output', type=str, help='output file name desired for model (without .joblib extension)')
			
 
				+    parser.add_argument('--choice', type=str, help='model choice from list of choices', choices=models_list)
			
 
				+    parser.add_argument('--solution', type=str, help='Data of solution to specify filters to use')
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    p_data_file = args.data
			
 
				+    p_output    = args.output
			
 
				+    p_choice    = args.choice
			
 
				+    p_solution  = list(map(int, args.solution.split(' ')))
			
 
				+
			
 
				+    if not os.path.exists(output_model_folder):
			
 
				+        os.makedirs(output_model_folder)
			
 
				+
			
 
				+    ########################
			
 
				+    # 1. Get and prepare data
			
 
				+    ########################
			
 
				+    dataset_train = pd.read_csv(p_data_file + '.train', header=None, sep=";")
			
 
				+    dataset_test = pd.read_csv(p_data_file + '.test', header=None, sep=";")
			
 
				+
			
 
				+    # default first shuffle of data
			
 
				+    dataset_train = shuffle(dataset_train)
			
 
				+    dataset_test = shuffle(dataset_test)
			
 
				+
			
 
				+    # get dataset with equal number of classes occurences
			
 
				+    noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
			
 
				+    not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
			
 
				+    nb_noisy_train = len(noisy_df_train.index)
			
 
				+
			
 
				+    noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
			
 
				+    not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
			
 
				+    nb_noisy_test = len(noisy_df_test.index)
			
 
				+
			
 
				+    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
			
 
				+    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
			
 
				+
			
 
				+    # shuffle data another time
			
 
				+    final_df_train = shuffle(final_df_train)
			
 
				+    final_df_test = shuffle(final_df_test)
			
 
				+
			
 
				+    final_df_train_size = len(final_df_train.index)
			
 
				+    final_df_test_size = len(final_df_test.index)
			
 
				+
			
 
				+    # use of the whole data set for training
			
 
				+    x_dataset_train = final_df_train.ix[:,1:]
			
 
				+    x_dataset_test = final_df_test.ix[:,1:]
			
 
				+
			
 
				+    y_dataset_train = final_df_train.ix[:,0]
			
 
				+    y_dataset_test = final_df_test.ix[:,0]
			
 
				+
			
 
				+    # get indices of filters data to use (filters selection from solution)
			
 
				+    indices = []
			
 
				+
			
 
				+    print(p_solution)
			
 
				+    for index, value in enumerate(p_solution): 
			
 
				+        if value == 1: 
			
 
				+            indices.append(index*2) 
			
 
				+            indices.append(index*2+1)
			
 
				+
			
 
				+    print(indices)
			
 
				+
			
 
				+    x_dataset_train = x_dataset_train.iloc[:, indices]
			
 
				+    x_dataset_test =  x_dataset_test.iloc[:, indices]
			
 
				+
			
 
				+    #######################
			
 
				+    # 2. Construction of the model : Ensemble model structure
			
 
				+    #######################
			
 
				+
			
 
				+    print("-------------------------------------------")
			
 
				+    print("Train dataset size: ", final_df_train_size)
			
 
				+    model = mdl.get_trained_model(p_choice, x_dataset_train, y_dataset_train)
			
 
				+
			
 
				+    #######################
			
 
				+    # 3. Fit model : use of cross validation to fit model
			
 
				+    #######################
			
 
				+    val_scores = cross_val_score(model, x_dataset_train, y_dataset_train, cv=5)
			
 
				+    print("Accuracy: %0.2f (+/- %0.2f)" % (val_scores.mean(), val_scores.std() * 2))
			
 
				+
			
 
				+    ######################
			
 
				+    # 4. Test : Validation and test dataset from .test dataset
			
 
				+    ######################
			
 
				+
			
 
				+    # we need to specify validation size to 20% of whole dataset
			
 
				+    val_set_size = int(final_df_train_size/3)
			
 
				+    test_set_size = val_set_size
			
 
				+
			
 
				+    total_validation_size = val_set_size + test_set_size
			
 
				+
			
 
				+    if final_df_test_size > total_validation_size:
			
 
				+        x_dataset_test = x_dataset_test[0:total_validation_size]
			
 
				+        y_dataset_test = y_dataset_test[0:total_validation_size]
			
 
				+
			
 
				+    X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
			
 
				+
			
 
				+    y_test_model = model.predict(X_test)
			
 
				+    y_val_model = model.predict(X_val)
			
 
				+
			
 
				+    val_accuracy = accuracy_score(y_val, y_val_model)
			
 
				+    test_accuracy = accuracy_score(y_test, y_test_model)
			
 
				+
			
 
				+    val_f1 = f1_score(y_val, y_val_model)
			
 
				+    test_f1 = f1_score(y_test, y_test_model)
			
 
				+
			
 
				+    ###################
			
 
				+    # 5. Output : Print and write all information in csv
			
 
				+    ###################
			
 
				+
			
 
				+    print("Validation dataset size ", val_set_size)
			
 
				+    print("Validation: ", val_accuracy)
			
 
				+    print("Validation F1: ", val_f1)
			
 
				+    print("Test dataset size ", test_set_size)
			
 
				+    print("Test: ", val_accuracy)
			
 
				+    print("Test F1: ", test_f1)
			
 
				+
			
 
				+    ##################
			
 
				+    # 6. Save model : create path if not exists
			
 
				+    ##################
			
 
				+
			
 
				+    if not os.path.exists(saved_models_folder):
			
 
				+        os.makedirs(saved_models_folder)
			
 
				+
			
 
				+    joblib.dump(model, output_model_folder + '/' + p_output + '.joblib')
			
 
				+
			
 
				+if __name__== "__main__":
			
 
				+    main()