Browse Source

Merge branch 'release/v0.1.7'

Jérôme BUISINE 10 months ago
parent
commit
95654eed38

+ 138 - 0
prediction/predict_noisy_image_svd_filters.py

@@ -0,0 +1,138 @@
+# main imports
+import sys, os, argparse, json
+import numpy as np
+
+# models imports
+from keras.models import model_from_json
+from sklearn.externals import joblib
+
+# image processing imports
+from ipfml import processing, utils
+from PIL import Image
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from data_attributes import get_image_features
+
+# variables and parameters
+path                  = cfg.dataset_path
+min_max_ext           = cfg.min_max_filename_extension
+features_choices      = cfg.features_choices_labels
+normalization_choices = cfg.normalization_choices
+
+custom_min_max_folder = cfg.min_max_custom_folder
+
+def main():
+
+    # getting all params
+    parser = argparse.ArgumentParser(description="Script which detects if an image is noisy or not using specific model")
+
+    parser.add_argument('--image', type=str, help='Image path')
+    parser.add_argument('--solution', type=str, help='Data of solution to specify filters to use')
+    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    p_img_file   = args.image
+    p_model_file = args.model
+    p_solution   = list(map(int, args.solution.split(' ')))
+    p_mode       = args.mode
+    p_feature    = args.feature
+    p_custom     = args.custom
+
+    if '.joblib' in p_model_file:
+        kind_model = 'sklearn'
+
+    if '.json' in p_model_file:
+        kind_model = 'keras'
+
+    if kind_model == 'sklearn':
+        # load of model file
+        model = joblib.load(p_model_file)
+
+    if kind_model == 'keras':
+        with open(p_model_file, 'r') as f:
+            json_model = json.load(f)
+            model = model_from_json(json_model)
+            model.load_weights(p_model_file.replace('.json', '.h5'))
+
+            model.compile(loss='binary_crossentropy',
+                        optimizer='adam',
+                        features=['accuracy'])
+
+    # load image
+    img = Image.open(p_img_file)
+
+    data = get_image_features(p_feature, img)
+
+    # get indices of filters data to use (filters selection from solution)
+    indices = []
+
+    for index, value in enumerate(p_solution): 
+        if value == 1: 
+            indices.append(index*2) 
+            indices.append(index*2+1) 
+
+    # check if custom min max file is used
+    if p_custom:
+        
+        test_data = data[indices]
+        
+        if p_mode == 'svdne':
+
+            # set min_max_filename if custom use
+            min_max_file_path = custom_min_max_folder + '/' +  p_custom
+
+            # need to read min_max_file
+            file_path = os.path.join(os.path.dirname(__file__), min_max_file_path)
+            with open(file_path, 'r') as f:
+                min_val = float(f.readline().replace('\n', ''))
+                max_val = float(f.readline().replace('\n', ''))
+
+            test_data = utils.normalize_arr_with_range(test_data, min_val, max_val)
+
+        if p_mode == 'svdn':
+            test_data = utils.normalize_arr(test_data)
+
+    else:
+
+        # check mode to normalize data
+        if p_mode == 'svdne':
+
+            # set min_max_filename if custom use
+            min_max_file_path = path + '/' + p_feature + min_max_ext
+
+            # need to read min_max_file
+            file_path = os.path.join(os.path.dirname(__file__), min_max_file_path)
+            with open(file_path, 'r') as f:
+                min_val = float(f.readline().replace('\n', ''))
+                max_val = float(f.readline().replace('\n', ''))
+
+            l_values = utils.normalize_arr_with_range(data, min_val, max_val)
+
+        elif p_mode == 'svdn':
+            l_values = utils.normalize_arr(data)
+        else:
+            l_values = data
+
+        test_data = data[indices]
+
+
+    # get prediction of model
+    if kind_model == 'sklearn':
+        prediction = model.predict([test_data])[0]
+
+    if kind_model == 'keras':
+        test_data = np.asarray(test_data).reshape(1, len(test_data), 1)
+        prediction = model.predict_classes([test_data])[0][0]
+
+    # output expected from others scripts
+    print(prediction)
+
+if __name__== "__main__":
+    main()

+ 0 - 2
prediction/predict_seuil_expe.py

@@ -209,8 +209,6 @@ def main():
         print("Scene " + str(id_scene + 1) + "/" + str(len(scenes)) + " Done..")
         print("------------------------")
 
-        time.sleep(1)
-
 
 if __name__== "__main__":
     main()

+ 0 - 2
prediction/predict_seuil_expe_maxwell.py

@@ -211,8 +211,6 @@ def main():
             print("Scene " + str(id_scene + 1) + "/" + str(len(scenes)) + " Done..")
             print("------------------------")
 
-            time.sleep(10)
-
 
 if __name__== "__main__":
     main()

+ 0 - 1
prediction/predict_seuil_expe_maxwell_curve.py

@@ -168,7 +168,6 @@ def main():
             print("------------------------")
 
             print("Model predictions are saved into %s" % map_filename)
-            time.sleep(10)
 
 
 if __name__== "__main__":

+ 174 - 0
prediction/predict_seuil_expe_maxwell_curve_filters.py

@@ -0,0 +1,174 @@
+# main imports
+import sys, os, argparse
+import subprocess
+import time
+import numpy as np
+
+# image processing imports
+from ipfml.processing import segmentation
+from PIL import Image
+
+# models imports
+from sklearn.externals import joblib
+
+# modules imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+from modules.utils import data as dt
+
+
+# variables and parameters
+scenes_path               = cfg.dataset_path
+min_max_filename          = cfg.min_max_filename_extension
+threshold_expe_filename   = cfg.seuil_expe_filename
+
+threshold_map_folder      = cfg.threshold_map_folder
+threshold_map_file_prefix = cfg.threshold_map_folder + "_"
+
+zones                     = cfg.zones_indices
+maxwell_scenes            = cfg.maxwell_scenes_names
+normalization_choices     = cfg.normalization_choices
+features_choices          = cfg.features_choices_labels
+
+simulation_curves_zones   = "simulation_curves_zones_"
+tmp_filename              = '/tmp/__model__img_to_predict.png'
+
+current_dirpath = os.getcwd()
+
+
+def main():
+
+    p_custom = False
+        
+    parser = argparse.ArgumentParser(description="Script which predicts threshold using specific model")
+
+    parser.add_argument('--solution', type=str, help='Data of solution to specify filters to use')
+    parser.add_argument('--model', type=str, help='.joblib or .json file (sklearn or keras model)')
+    parser.add_argument('--mode', type=str, help='Kind of normalization level wished', choices=normalization_choices)
+    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
+    #parser.add_argument('--limit_detection', type=int, help='Specify number of same prediction to stop threshold prediction', default=2)
+    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)
+
+    args = parser.parse_args()
+
+    # keep p_interval as it is
+    p_solution   = args.solution
+    p_model_file = args.model
+    p_mode       = args.mode
+    p_feature    = args.feature
+    #p_limit      = args.limit
+    p_custom     = args.custom
+
+    scenes = os.listdir(scenes_path)
+    scenes = [s for s in scenes if s in maxwell_scenes]
+
+    print(scenes)
+
+    # go ahead each scenes
+    for id_scene, folder_scene in enumerate(scenes):
+
+        # only take in consideration maxwell scenes
+        if folder_scene in maxwell_scenes:
+
+            print(folder_scene)
+
+            scene_path = os.path.join(scenes_path, folder_scene)
+
+            threshold_expes = []
+            threshold_expes_found = []
+            block_predictions_str = []
+
+            # get all images of folder
+            scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
+
+            start_quality_image = dt.get_scene_image_quality(scene_images[0])
+            end_quality_image   = dt.get_scene_image_quality(scene_images[-1])
+            # using first two images find the step of quality used
+            quality_step_image  = dt.get_scene_image_quality(scene_images[1]) - start_quality_image
+
+            # get zones list info
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
+                zone_folder = "zone"+index_str
+
+                threshold_path_file = os.path.join(os.path.join(scene_path, zone_folder), threshold_expe_filename)
+
+                with open(threshold_path_file) as f:
+                    threshold = int(f.readline())
+                    threshold_expes.append(threshold)
+
+                    # Initialize default data to get detected model threshold found
+                    threshold_expes_found.append(end_quality_image) # by default use max
+
+                block_predictions_str.append(index_str + ";" + p_model_file + ";" + str(threshold) + ";" + str(start_quality_image) + ";" + str(quality_step_image))
+
+
+            # for each images
+            for img_path in scene_images:
+
+                current_img = Image.open(img_path)
+                current_quality_image = dt.get_scene_image_quality(img_path)
+
+                img_blocks = segmentation.divide_in_blocks(current_img, (200, 200))
+
+                for id_block, block in enumerate(img_blocks):
+
+                    # check only if necessary for this scene (not already detected)
+                    #if not threshold_expes_detected[id_block]:
+
+                        tmp_file_path = tmp_filename.replace('__model__',  p_model_file.split('/')[-1].replace('.joblib', '_'))
+                        block.save(tmp_file_path)
+
+                        python_cmd_line = "python prediction/predict_noisy_image_svd_filters.py --image {0} --solution '{1}' --model {2} --mode {3} --feature {4}"
+                        python_cmd = python_cmd_line.format(tmp_file_path, p_solution, p_model_file, p_mode, p_feature) 
+
+                        # specify use of custom file for min max normalization
+                        if p_custom:
+                            python_cmd = python_cmd + ' --custom ' + p_custom
+
+                        ## call command ##
+                        p = subprocess.Popen(python_cmd, stdout=subprocess.PIPE, shell=True)
+
+                        (output, err) = p.communicate()
+
+                        ## Wait for result ##
+                        p_status = p.wait()
+
+                        prediction = int(output)
+
+                        # save here in specific file of block all the predictions done
+                        block_predictions_str[id_block] = block_predictions_str[id_block] + ";" + str(prediction)
+
+                        print(str(id_block) + " : " + str(current_quality_image) + "/" + str(threshold_expes[id_block]) + " => " + str(prediction))
+
+                print("------------------------")
+                print("Scene " + str(id_scene + 1) + "/" + str(len(scenes)))
+                print("------------------------")
+
+            # end of scene => display of results
+
+            # construct path using model name for saving threshold map folder
+            model_threshold_path = os.path.join(threshold_map_folder, p_model_file.split('/')[-1].replace('.joblib', ''))
+
+            # create threshold model path if necessary
+            if not os.path.exists(model_threshold_path):
+                os.makedirs(model_threshold_path)
+
+            map_filename = os.path.join(model_threshold_path, simulation_curves_zones + folder_scene)
+            f_map = open(map_filename, 'w')
+
+            for line in block_predictions_str:
+                f_map.write(line + '\n')
+            f_map.close()
+
+            print("Scene " + str(id_scene + 1) + "/" + str(len(maxwell_scenes)) + " Done..")
+            print("------------------------")
+
+            print("Model predictions are saved into %s" % map_filename)
+
+
+if __name__== "__main__":
+    main()

+ 46 - 0
simulation/run_maxwell_simulation_filters_statistics_all.sh

@@ -0,0 +1,46 @@
+#! bin/bash
+
+# file which contains model names we want to use for simulation
+simulate_models="simulate_models_all.csv"
+
+# selection of four scenes (only maxwell)
+scenes="A, D, G, H"
+
+size="26"
+
+feature="filters_statistics"
+
+for nb_zones in {4,6,8,10,12}; do
+    for mode in {"svd","svdn","svdne"}; do
+        for model in {"svm_model","ensemble_model","ensemble_model_v2"}; do
+            for data in {"all","center","split"}; do
+
+                FILENAME="data/${model}_N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
+                MODEL_NAME="${model}_N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_${data}"
+                CUSTOM_MIN_MAX_FILENAME="N${size}_B0_E${size}_nb_zones_${nb_zones}_${feature}_${mode}_${data}_min_max"
+
+                # only compute if necessary (perhaps server will fall.. Just in case)
+                if grep -q "${FILENAME}" "${simulate_models}"; then
+
+                    echo "Found ${FILENAME}"
+                    line=$(grep -n ${FILENAME} ${simulate_models})
+
+                    # extract solution
+                    IFS=\; read -a fields <<<"$line"
+
+                    SOLUTION=${fields[1]}
+
+                    echo "Run simulation for ${MODEL_NAME}... with ${SOLUTION}"
+
+                    # Use of already generated model
+                    python generate/generate_data_model_random_${data}.py --output ${FILENAME} --interval "0,${size}" --kind ${mode} --feature ${feature} --scenes "${scenes}" --nb_zones "${nb_zones}" --percent 1 --renderer "maxwell" --step 10 --random 1 --custom ${CUSTOM_MIN_MAX_FILENAME}
+                    python train_model_filters.py --data ${FILENAME} --output ${MODEL_NAME} --choice ${model} --solution "${SOLUTION}"
+
+                    python prediction/predict_seuil_expe_maxwell_curve_filters.py --solution "${SOLUTION}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature} --custom ${CUSTOM_MIN_MAX_FILENAME}
+
+                    #python others/save_model_result_in_md_maxwell.py --solution "${SOLUTION}" --model "saved_models/${MODEL_NAME}.joblib" --mode "${mode}" --feature ${feature}
+                fi
+            done
+        done
+    done
+done

+ 161 - 0
train_model_filters.py

@@ -0,0 +1,161 @@
+# main imports
+import numpy as np
+import pandas as pd
+import sys, os, argparse
+
+# models imports
+from sklearn.model_selection import train_test_split
+from sklearn.model_selection import GridSearchCV
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier, VotingClassifier
+
+import sklearn.svm as svm
+from sklearn.utils import shuffle
+from sklearn.externals import joblib
+from sklearn.metrics import accuracy_score, f1_score
+from sklearn.model_selection import cross_val_score
+
+# modules and config imports
+sys.path.insert(0, '') # trick to enable import of main folder module
+
+import custom_config as cfg
+import models as mdl
+
+# variables and parameters
+saved_models_folder = cfg.saved_models_folder
+models_list         = cfg.models_names_list
+
+current_dirpath     = os.getcwd()
+output_model_folder = os.path.join(current_dirpath, saved_models_folder)
+
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Train SKLearn model and save it into .joblib file")
+
+    parser.add_argument('--data', type=str, help='dataset filename prefix (without .train and .test)')
+    parser.add_argument('--output', type=str, help='output file name desired for model (without .joblib extension)')
+    parser.add_argument('--choice', type=str, help='model choice from list of choices', choices=models_list)
+    parser.add_argument('--solution', type=str, help='Data of solution to specify filters to use')
+
+    args = parser.parse_args()
+
+    p_data_file = args.data
+    p_output    = args.output
+    p_choice    = args.choice
+    p_solution  = list(map(int, args.solution.split(' ')))
+
+    if not os.path.exists(output_model_folder):
+        os.makedirs(output_model_folder)
+
+    ########################
+    # 1. Get and prepare data
+    ########################
+    dataset_train = pd.read_csv(p_data_file + '.train', header=None, sep=";")
+    dataset_test = pd.read_csv(p_data_file + '.test', header=None, sep=";")
+
+    # default first shuffle of data
+    dataset_train = shuffle(dataset_train)
+    dataset_test = shuffle(dataset_test)
+
+    # get dataset with equal number of classes occurences
+    noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1]
+    not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0]
+    nb_noisy_train = len(noisy_df_train.index)
+
+    noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1]
+    not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0]
+    nb_noisy_test = len(noisy_df_test.index)
+
+    final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train])
+    final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test])
+
+    # shuffle data another time
+    final_df_train = shuffle(final_df_train)
+    final_df_test = shuffle(final_df_test)
+
+    final_df_train_size = len(final_df_train.index)
+    final_df_test_size = len(final_df_test.index)
+
+    # use of the whole data set for training
+    x_dataset_train = final_df_train.ix[:,1:]
+    x_dataset_test = final_df_test.ix[:,1:]
+
+    y_dataset_train = final_df_train.ix[:,0]
+    y_dataset_test = final_df_test.ix[:,0]
+
+    # get indices of filters data to use (filters selection from solution)
+    indices = []
+
+    print(p_solution)
+    for index, value in enumerate(p_solution): 
+        if value == 1: 
+            indices.append(index*2) 
+            indices.append(index*2+1)
+
+    print(indices)
+
+    x_dataset_train = x_dataset_train.iloc[:, indices]
+    x_dataset_test =  x_dataset_test.iloc[:, indices]
+
+    #######################
+    # 2. Construction of the model : Ensemble model structure
+    #######################
+
+    print("-------------------------------------------")
+    print("Train dataset size: ", final_df_train_size)
+    model = mdl.get_trained_model(p_choice, x_dataset_train, y_dataset_train)
+
+    #######################
+    # 3. Fit model : use of cross validation to fit model
+    #######################
+    val_scores = cross_val_score(model, x_dataset_train, y_dataset_train, cv=5)
+    print("Accuracy: %0.2f (+/- %0.2f)" % (val_scores.mean(), val_scores.std() * 2))
+
+    ######################
+    # 4. Test : Validation and test dataset from .test dataset
+    ######################
+
+    # we need to specify validation size to 20% of whole dataset
+    val_set_size = int(final_df_train_size/3)
+    test_set_size = val_set_size
+
+    total_validation_size = val_set_size + test_set_size
+
+    if final_df_test_size > total_validation_size:
+        x_dataset_test = x_dataset_test[0:total_validation_size]
+        y_dataset_test = y_dataset_test[0:total_validation_size]
+
+    X_test, X_val, y_test, y_val = train_test_split(x_dataset_test, y_dataset_test, test_size=0.5, random_state=1)
+
+    y_test_model = model.predict(X_test)
+    y_val_model = model.predict(X_val)
+
+    val_accuracy = accuracy_score(y_val, y_val_model)
+    test_accuracy = accuracy_score(y_test, y_test_model)
+
+    val_f1 = f1_score(y_val, y_val_model)
+    test_f1 = f1_score(y_test, y_test_model)
+
+    ###################
+    # 5. Output : Print and write all information in csv
+    ###################
+
+    print("Validation dataset size ", val_set_size)
+    print("Validation: ", val_accuracy)
+    print("Validation F1: ", val_f1)
+    print("Test dataset size ", test_set_size)
+    print("Test: ", val_accuracy)
+    print("Test F1: ", test_f1)
+
+    ##################
+    # 6. Save model : create path if not exists
+    ##################
+
+    if not os.path.exists(saved_models_folder):
+        os.makedirs(saved_models_folder)
+
+    joblib.dump(model, output_model_folder + '/' + p_output + '.joblib')
+
+if __name__== "__main__":
+    main()