Browse Source

Add of 26 features metric

Jérôme BUISINE 11 months ago
parent
commit
cb6026f2c7
6 changed files with 63 additions and 630 deletions
  1. 1 1
      .gitignore
  2. 3 2
      README.md
  3. 0 355
      analysis/.ipynb
  4. 3 3
      modules/utils/config.py
  5. 40 263
      modules/utils/data.py
  6. 16 6
      modules/utils/filters.py

+ 1 - 1
.gitignore

@@ -10,7 +10,7 @@ corr_indices/*
 
 # simulate_models.csv
 
-fichiersSVD_light
+dataset
 
 .python-version
 __pycache__

+ 3 - 2
README.md

@@ -5,6 +5,7 @@
 Noise detection on synthesis images with 26 attributes obtained using few filters. 
 
 Filters list:
+- average
 - wiener
 - median
 - gaussian
@@ -27,7 +28,7 @@ python generate_all_data.py --metric all
 ### Multiple directories and scripts are available:
 
 
-- **fichiersSVD_light/\***: all scene files information (zones of each scene, SVD descriptor files information and so on...).
+- **dataset/\***: all scene files information (zones of each scene, SVD descriptor files information and so on...).
 - **train_model.py**: script which is used to run specific model available.
 - **data/\***: folder which will contain all *.train* & *.test* files in order to train model.
 - **saved_models/*.joblib**: all scikit learn models saved.
@@ -57,7 +58,7 @@ Parameters explained:
 This is an example of how to train a model
 
 ```bash
-python train_model.py --data 'data/xxxxx.train' --output 'model_file_to_save' --choice 'model_choice'
+python train_model.py --data 'data/xxxx' --output 'model_file_to_save' --choice 'model_choice'
 ```
 
 Expected values for the **choice** parameter are ['svm_model', 'ensemble_model', 'ensemble_model_v2'].

File diff suppressed because it is too large
+ 0 - 355
analysis/.ipynb


+ 3 - 3
modules/utils/config.py

@@ -2,7 +2,7 @@ import numpy as np
 
 zone_folder                     = "zone"
 output_data_folder              = 'data'
-dataset_path                    = 'fichiersSVD_light'
+dataset_path                    = 'dataset'
 threshold_map_folder            = 'threshold_map'
 models_information_folder       = 'models_info'
 saved_models_folder             = 'saved_models'
@@ -35,7 +35,7 @@ cycle_scenes_indices            = ['E', 'I']
 normalization_choices           = ['svd', 'svdn', 'svdne']
 zones_indices                   = np.arange(16)
 
-metric_choices_labels           = ['lab', 'mscn', 'low_bits_2', 'low_bits_3', 'low_bits_4', 'low_bits_5', 'low_bits_6','low_bits_4_shifted_2', 'sub_blocks_stats', 'sub_blocks_area', 'sub_blocks_stats_reduced', 'sub_blocks_area_normed', 'mscn_var_4', 'mscn_var_16', 'mscn_var_64', 'mscn_var_16_max', 'mscn_var_64_max', 'ica_diff', 'svd_trunc_diff', 'ipca_diff', 'svd_reconstruct', 'highest_sv_std_filters', 'lowest_sv_std_filters']
+metric_choices_labels           = ['filters_statistics']
 
-keras_epochs                    = 500
+keras_epochs                    = 100
 keras_batch                     = 32

+ 40 - 263
modules/utils/data.py

@@ -1,5 +1,9 @@
 from ipfml import processing, metrics, utils
+
 from modules.utils.config import *
+from modules.utils.filters import w2d
+
+import cv2
 
 from PIL import Image
 from skimage import color
@@ -25,293 +29,66 @@ def get_svd_data(data_type, block):
     Method which returns the data type expected
     """
 
-    if data_type == 'lab':
-
-        block_file_path = '/tmp/lab_img.png'
-        block.save(block_file_path)
-        data = processing.get_LAB_L_SVD_s(Image.open(block_file_path))
-
-    if data_type == 'mscn':
-
-        img_mscn_revisited = processing.rgb_to_mscn(block)
-
-        # save tmp as img
-        img_output = Image.fromarray(img_mscn_revisited.astype('uint8'), 'L')
-        mscn_revisited_file_path = '/tmp/mscn_revisited_img.png'
-        img_output.save(mscn_revisited_file_path)
-        img_block = Image.open(mscn_revisited_file_path)
-
-        # extract from temp image
-        data = metrics.get_SVD_s(img_block)
-
-    """if data_type == 'mscn':
-
-        img_gray = np.array(color.rgb2gray(np.asarray(block))*255, 'uint8')
-        img_mscn = processing.calculate_mscn_coefficients(img_gray, 7)
-        img_mscn_norm = processing.normalize_2D_arr(img_mscn)
-
-        img_mscn_gray = np.array(img_mscn_norm*255, 'uint8')
-
-        data = metrics.get_SVD_s(img_mscn_gray)
-    """
-
-    if data_type == 'low_bits_6':
-
-        low_bits_6 = processing.rgb_to_LAB_L_low_bits(block, 6)
-        data = metrics.get_SVD_s(low_bits_6)
-
-    if data_type == 'low_bits_5':
-
-        low_bits_5 = processing.rgb_to_LAB_L_low_bits(block, 5)
-        data = metrics.get_SVD_s(low_bits_5)
-
-    if data_type == 'low_bits_4':
-
-        low_bits_4 = processing.rgb_to_LAB_L_low_bits(block, 4)
-        data = metrics.get_SVD_s(low_bits_4)
-
-    if data_type == 'low_bits_3':
-
-        low_bits_3 = processing.rgb_to_LAB_L_low_bits(block, 3)
-        data = metrics.get_SVD_s(low_bits_3)
-
-    if data_type == 'low_bits_2':
-
-        low_bits_2 = processing.rgb_to_LAB_L_low_bits(block, 2)
-        data = metrics.get_SVD_s(low_bits_2)
+    if 'filters_statistics' in data_type:
 
-    if data_type == 'low_bits_4_shifted_2':
+        img_width, img_height = 200, 200
 
-        data = metrics.get_SVD_s(processing.rgb_to_LAB_L_bits(block, (3, 6)))
-
-    if data_type == 'sub_blocks_stats':
-
-        block = np.asarray(block)
-        width, height, _= block.shape
-        sub_width, sub_height = int(width / 4), int(height / 4)
-
-        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
-
-        data = []
-
-        for sub_b in sub_blocks:
-
-            # by default use the whole lab L canal
-            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
-
-            # get information we want from svd
-            data.append(np.mean(l_svd_data))
-            data.append(np.median(l_svd_data))
-            data.append(np.percentile(l_svd_data, 25))
-            data.append(np.percentile(l_svd_data, 75))
-            data.append(np.var(l_svd_data))
-
-            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=100)
-            data.append(area_under_curve)
-
-        # convert into numpy array after computing all stats
-        data = np.asarray(data)
-
-    if data_type == 'sub_blocks_stats_reduced':
+        lab_img = metrics.get_LAB_L(block)
+        arr = np.array(lab_img)
 
-        block = np.asarray(block)
-        width, height, _= block.shape
-        sub_width, sub_height = int(width / 4), int(height / 4)
+        # compute all filters statistics
+        def get_stats(arr, I_filter):
 
-        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
+            e1       = np.abs(arr - I_filter)
+            L        = np.array(e1)
+            mu0      = np.mean(L)
+            A        = L - mu0
+            H        = A * A
+            E        = np.sum(H) / (img_width * img_height)
+            P        = np.sqrt(E)
 
-        data = []
+            return mu0, P
 
-        for sub_b in sub_blocks:
+        stats = []
 
-            # by default use the whole lab L canal
-            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
+        kernel = np.ones((3,3),np.float32)/9
+        stats.append(get_stats(arr, cv2.filter2D(arr,-1,kernel)))
 
-            # get information we want from svd
-            data.append(np.mean(l_svd_data))
-            data.append(np.median(l_svd_data))
-            data.append(np.percentile(l_svd_data, 25))
-            data.append(np.percentile(l_svd_data, 75))
-            data.append(np.var(l_svd_data))
+        kernel = np.ones((5,5),np.float32)/25
+        stats.append(get_stats(arr, cv2.filter2D(arr,-1,kernel)))
 
-        # convert into numpy array after computing all stats
-        data = np.asarray(data)
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (3, 3), 0.5)))
 
-    if data_type == 'sub_blocks_area':
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (3, 3), 1)))
 
-        block = np.asarray(block)
-        width, height, _= block.shape
-        sub_width, sub_height = int(width / 8), int(height / 8)
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (3, 3), 1.5)))
 
-        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
-
-        data = []
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (5, 5), 0.5)))
 
-        for sub_b in sub_blocks:
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (5, 5), 1)))
 
-            # by default use the whole lab L canal
-            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
+        stats.append(get_stats(arr, cv2.GaussianBlur(arr, (5, 5), 1.5)))
 
-            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=50)
-            data.append(area_under_curve)
+        stats.append(get_stats(arr, medfilt2d(arr, [3, 3])))
 
-        # convert into numpy array after computing all stats
-        data = np.asarray(data)
+        stats.append(get_stats(arr, medfilt2d(arr, [5, 5])))
 
-    if data_type == 'sub_blocks_area_normed':
+        stats.append(get_stats(arr, wiener(arr, [3, 3])))
 
-        block = np.asarray(block)
-        width, height, _= block.shape
-        sub_width, sub_height = int(width / 8), int(height / 8)
+        stats.append(get_stats(arr, wiener(arr, [5, 5])))
 
-        sub_blocks = processing.divide_in_blocks(block, (sub_width, sub_height))
+        wave = w2d(arr, 'db1', 2)
+        stats.append(get_stats(arr, np.array(wave, 'float64')))
 
         data = []
 
-        for sub_b in sub_blocks:
-
-            # by default use the whole lab L canal
-            l_svd_data = np.array(processing.get_LAB_L_SVD_s(sub_b))
-            l_svd_data = utils.normalize_arr(l_svd_data)
-
-            area_under_curve = utils.integral_area_trapz(l_svd_data, dx=50)
-            data.append(area_under_curve)
-
-        # convert into numpy array after computing all stats
-        data = np.asarray(data)
-
-    if data_type == 'mscn_var_4':
-
-        data = _get_mscn_variance(block, (100, 100))
+        for stat in stats:
+            data.append(stat[0])
 
-    if data_type == 'mscn_var_16':
-
-        data = _get_mscn_variance(block, (50, 50))
-
-    if data_type == 'mscn_var_64':
-
-        data = _get_mscn_variance(block, (25, 25))
-
-    if data_type == 'mscn_var_16_max':
-
-        data = _get_mscn_variance(block, (50, 50))
-        data = np.asarray(data)
-        size = int(len(data) / 4)
-        indices = data.argsort()[-size:][::-1]
-        data = data[indices]
-
-    if data_type == 'mscn_var_64_max':
-
-        data = _get_mscn_variance(block, (25, 25))
-        data = np.asarray(data)
-        size = int(len(data) / 4)
-        indices = data.argsort()[-size:][::-1]
-        data = data[indices]
-
-    if data_type == 'ica_diff':
-        current_image = metrics.get_LAB_L(block)
-
-        ica = FastICA(n_components=50)
-        ica.fit(current_image)
-
-        image_ica = ica.fit_transform(current_image)
-        image_restored = ica.inverse_transform(image_ica)
-
-        final_image = utils.normalize_2D_arr(image_restored)
-        final_image = np.array(final_image * 255, 'uint8')
-
-        sv_values = utils.normalize_arr(metrics.get_SVD_s(current_image))
-        ica_sv_values = utils.normalize_arr(metrics.get_SVD_s(final_image))
-
-        data = abs(np.array(sv_values) - np.array(ica_sv_values))
-
-    if data_type == 'svd_trunc_diff':
-
-        current_image = metrics.get_LAB_L(block)
-
-        svd = TruncatedSVD(n_components=30, n_iter=100, random_state=42)
-        transformed_image = svd.fit_transform(current_image)
-        restored_image = svd.inverse_transform(transformed_image)
-
-        reduced_image = (current_image - restored_image)
-
-        U, s, V = metrics.get_SVD(reduced_image)
-        data = s
-
-    if data_type == 'ipca_diff':
-
-        current_image = metrics.get_LAB_L(block)
-
-        transformer = IncrementalPCA(n_components=20, batch_size=25)
-        transformed_image = transformer.fit_transform(current_image)
-        restored_image = transformer.inverse_transform(transformed_image)
-
-        reduced_image = (current_image - restored_image)
-
-        U, s, V = metrics.get_SVD(reduced_image)
-        data = s
-
-    if data_type == 'svd_reconstruct':
-
-        reconstructed_interval = (90, 200)
-        begin, end = reconstructed_interval
-
-        lab_img = metrics.get_LAB_L(block)
-        lab_img = np.array(lab_img, 'uint8')
-
-        U, s, V = lin_svd(lab_img, full_matrices=True)
-
-        smat = np.zeros((end-begin, end-begin), dtype=complex)
-        smat[:, :] = np.diag(s[begin:end])
-        output_img = np.dot(U[:, begin:end],  np.dot(smat, V[begin:end, :]))
-
-        output_img = np.array(output_img, 'uint8')
-
-        data = metrics.get_SVD_s(output_img)
-
-    if 'sv_std_filters' in data_type:
-
-        # convert into lab by default to apply filters
-        lab_img = metrics.get_LAB_L(block)
-        arr = np.array(lab_img)
-        images = []
-        
-        # Apply list of filter on arr
-        images.append(medfilt2d(arr, [3, 3]))
-        images.append(medfilt2d(arr, [5, 5]))
-        images.append(wiener(arr, [3, 3]))
-        images.append(wiener(arr, [5, 5]))
-        
-        # By default computation of current block image
-        s_arr = metrics.get_SVD_s(arr)
-        sv_vector = [s_arr]
-
-        # for each new image apply SVD and get SV 
-        for img in images:
-            s = metrics.get_SVD_s(img)
-            sv_vector.append(s)
-            
-        sv_array = np.array(sv_vector)
-        
-        _, len = sv_array.shape
+        for stat in stats:
+            data.append(stat[1])
         
-        sv_std = []
-        
-        # normalize each SV vectors and compute standard deviation for each sub vectors
-        for i in range(len):
-            sv_array[:, i] = utils.normalize_arr(sv_array[:, i])
-            sv_std.append(np.std(sv_array[:, i]))
-        
-        indices = []
-
-        if 'lowest' in data_type:
-            indices = get_lowest_values(sv_std, 200)
-
-        if 'highest' in data_type:
-            indices = get_highest_values(sv_std, 200)
-
-        # data are arranged following std trend computed
-        data = s_arr[indices]
+        data = np.array(data)
 
     return data
 

+ 16 - 6
modules/utils/filters.py

@@ -1,12 +1,22 @@
-import cv2
+import cv2, pywt
 import numpy as np
 from scipy.signal import medfilt2d, wiener, cwt
 
+def w2d(arr, mode='haar', level=1):
+    #convert to float   
+    imArray = arr
+    imArray /= 255
 
-def get_filters(arr):
+    # compute coefficients 
+    coeffs=pywt.wavedec2(imArray, mode, level=level)
 
-    filters = []
+    #Process Coefficients
+    coeffs_H=list(coeffs)  
+    coeffs_H[0] *= 0
 
-    # TODO : get all needed filters and append to filters array
-    
-    return filters 
+    # reconstruction
+    imArray_H = pywt.waverec2(coeffs_H, mode);
+    imArray_H *= 255
+    imArray_H = np.uint8(imArray_H)
+
+    return imArray_H