Parcourir la source

Data generation from noisy images updates

Jérôme BUISINE il y a 5 ans
Parent
commit
3168f0eb83
3 fichiers modifiés avec 126 ajouts et 101 suppressions
  1. 99 77
      generate_all_data.py
  2. 26 24
      generate_data_model_random.py
  3. 1 0
      noise_computation.py

+ 99 - 77
generate_all_data.py

@@ -33,6 +33,7 @@ path                    = cfg.generated_folder
 zones                   = cfg.zones_indices
 seuil_expe_filename     = cfg.seuil_expe_filename
 
+noise_choices           = cfg.noise_labels
 metric_choices          = cfg.metric_choices_labels
 output_data_folder      = cfg.output_data_folder
 
@@ -44,7 +45,7 @@ picture_step            = 10
 # avoid calibration data ?
 calibration_folder      = 'calibration'
 
-def generate_data_svd(data_type, mode):
+def generate_data_svd(data_type, color, mode):
     """
     @brief Method which generates all .csv files from scenes
     @param data_type,  metric choice
@@ -72,102 +73,120 @@ def generate_data_svd(data_type, mode):
         print(folder_scene)
         scene_path = os.path.join(path, folder_scene)
 
-        # getting output filename
-        output_svd_filename = data_type + "_" + mode + generic_output_file_svd
 
-        # construct each zones folder name
-        zones_folder = []
-        svd_output_files = []
+        for noise in noise_choices:
 
-        # get zones list info
-        for index in zones:
-            index_str = str(index)
-            if len(index_str) < 2:
-                index_str = "0" + index_str
+            noise_path = os.path.join(scene_path, noise)
 
-            current_zone = "zone"+index_str
-            zones_folder.append(current_zone)
+            # getting output filename
+            if color:
+                output_svd_filename = data_type + "_color_" + mode + generic_output_file_svd
+            else:
+                output_svd_filename = data_type + "_" + mode + generic_output_file_svd
 
-            zone_path = os.path.join(scene_path, current_zone)
+            # construct each zones folder name
+            zones_folder = []
+            svd_output_files = []
 
-            if not os.path.exists(zone_path):
-                os.makedirs(zone_path)
+            # get zones list info
+            for index in zones:
+                index_str = str(index)
+                if len(index_str) < 2:
+                    index_str = "0" + index_str
 
-            svd_file_path = os.path.join(zone_path, output_svd_filename)
+                current_zone = "zone"+index_str
+                zones_folder.append(current_zone)
 
-            # add writer into list
-            svd_output_files.append(open(svd_file_path, 'w'))
+                zone_path = os.path.join(noise_path, current_zone)
 
-        counter_index = 1
+                if not os.path.exists(zone_path):
+                    os.makedirs(zone_path)
 
-        while(counter_index <= end_counter_index):
+                svd_file_path = os.path.join(zone_path, output_svd_filename)
 
-            if counter_index % picture_step == 0:
-                counter_index_str = str(counter_index)
+                # add writer into list
+                svd_output_files.append(open(svd_file_path, 'w'))
 
-                img_path = os.path.join(scene_path, forlder_scene + "_" + counter_index_str + ".png")
+            counter_index = 1
 
-                current_img = Image.open(img_path)
-                img_blocks = processing.divide_in_blocks(current_img, (200, 200))
+            while(counter_index < end_counter_index):
 
-                for id_block, block in enumerate(img_blocks):
+                if counter_index % picture_step == 0:
+                    counter_index_str = str(counter_index)
 
-                    ###########################
-                    # Metric computation part #
-                    ###########################
+                    if color:
+                        img_path = os.path.join(noise_path, folder_scene + "_" + noise + "_color_" + counter_index_str + ".png")
+                    else:
+                        img_path = os.path.join(noise_path, folder_scene + "_" + noise + "_" + counter_index_str + ".png")
 
-                    data = get_svd_data(data_type, block)
+                    current_img = Image.open(img_path)
+                    img_blocks = processing.divide_in_blocks(current_img, (200, 200))
 
-                    ##################
-                    # Data mode part #
-                    ##################
+                    for id_block, block in enumerate(img_blocks):
 
-                    # modify data depending mode
-                    if mode == 'svdne':
+                        ###########################
+                        # Metric computation part #
+                        ###########################
 
-                        # getting max and min information from min_max_filename
-                        with open(data_min_max_filename, 'r') as f:
-                            min_val = float(f.readline())
-                            max_val = float(f.readline())
+                        data = get_svd_data(data_type, block)
 
-                        data = processing.normalize_arr_with_range(data, min_val, max_val)
+                        ##################
+                        # Data mode part #
+                        ##################
 
-                    if mode == 'svdn':
-                        data = processing.normalize_arr(data)
+                        # modify data depending mode
+                        if mode == 'svdne':
 
-                    # save min and max found from dataset in order to normalize data using whole data known
-                    if mode == 'svd':
+                            # getting max and min information from min_max_filename
+                            with open(data_min_max_filename, 'r') as f:
+                                min_val = float(f.readline())
+                                max_val = float(f.readline())
 
-                        current_min = data.min()
-                        current_max = data.max()
+                            data = processing.normalize_arr_with_range(data, min_val, max_val)
 
-                        if current_min < min_val_found:
-                            min_val_found = current_min
+                        if mode == 'svdn':
+                            data = processing.normalize_arr(data)
 
-                        if current_max > max_val_found:
-                            max_val_found = current_max
+                        # save min and max found from dataset in order to normalize data using whole data known
+                        if mode == 'svd':
 
-                    # now write data into current writer
-                    current_file = svd_output_files[id_block]
+                            current_min = data.min()
+                            current_max = data.max()
 
-                    # add of index
-                    current_file.write(counter_index_str + ';')
+                            if current_min < min_val_found:
+                                min_val_found = current_min
 
-                    for val in data:
-                        current_file.write(str(val) + ";")
+                            if current_max > max_val_found:
+                                max_val_found = current_max
 
-                    current_file.write('\n')
+                        # now write data into current writer
+                        current_file = svd_output_files[id_block]
 
-            start_index_image_int = int(start_index_image)
-            print(data_type + "_" + mode + "_" + folder_scene + " - " + "{0:.2f}".format((counter_index) / (end_counter_index)* 100.) + "%")
-            sys.stdout.write("\033[F")
+                        # add of index
+                        current_file.write(counter_index_str + ';')
 
-            counter_index += 1
+                        for val in data:
+                            current_file.write(str(val) + ";")
 
-        for f in svd_output_files:
-            f.close()
+                        current_file.write('\n')
+
+                if color:
+                    print(data_type + "_" + noise + "_color_" + mode + "_" + folder_scene + " - " + "{0:.2f}".format((counter_index) / (end_counter_index)* 100.) + "%")
+                else:
+                    print(data_type + "_" + noise + "_"+ mode + "_" + folder_scene + " - " + "{0:.2f}".format((counter_index) / (end_counter_index)* 100.) + "%")
+
+                sys.stdout.write("\033[F")
+
+                counter_index += 1
+
+            for f in svd_output_files:
+                f.close()
+
+            if color:
+                print(data_type + "_" + noise + "_color_" + mode + "_" + folder_scene + " - " + "Done...")
+            else:
+                print(data_type + "_" + noise + "_"+ mode + "_" + folder_scene + " - " + "Done...")
 
-        print('\n')
 
     # save current information about min file found
     if mode == 'svd':
@@ -182,25 +201,28 @@ def main():
 
     # default value of p_step
     p_step = 10
+    p_color = 0
 
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
-        print('python generate_all_data.py --metric all')
-        print('python generate_all_data.py --metric lab')
-        print('python generate_all_data.py --metric lab --step 10')
+        print('python generate_all_data.py --metric all --color 0')
+        print('python generate_all_data.py --metric lab --color 0')
+        print('python generate_all_data.py --metric lab --color 1 --step 10')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "hms", ["help=", "metric=", "step="])
+        opts, args = getopt.getopt(sys.argv[1:], "hm:s:c", ["help=", "metric=", "step=", "color="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python generate_all_data.py --metric all --step 10')
+        print('python generate_all_data.py --metric all --color 1 --step 10')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python generate_all_data.py --metric all --step 10')
+            print('python generate_all_data.py --metric all --color 1 --step 10')
             sys.exit()
         elif o in ("-s", "--step"):
             p_step = int(a)
+        elif o in ("-c", "--color"):
+            p_color = int(a)
         elif o in ("-m", "--metric"):
             p_metric = a
 
@@ -218,13 +240,13 @@ def main():
     # generate all or specific metric data
     if p_metric == 'all':
         for m in metric_choices:
-            generate_data_svd(m, 'svd')
-            generate_data_svd(m, 'svdn')
-            generate_data_svd(m, 'svdne')
+            generate_data_svd(m, p_color, 'svd')
+            generate_data_svd(m, p_color, 'svdn')
+            generate_data_svd(m, p_color, 'svdne')
     else:
-        generate_data_svd(p_metric, 'svd')
-        generate_data_svd(p_metric, 'svdn')
-        generate_data_svd(p_metric, 'svdne')
+        generate_data_svd(p_metric, p_color, 'svd')
+        generate_data_svd(p_metric, p_color, 'svdn')
+        generate_data_svd(p_metric, p_color, 'svdne')
 
 if __name__== "__main__":
     main()

+ 26 - 24
generate_data_model_random.py

@@ -41,7 +41,7 @@ generic_output_file_svd = '_random.csv'
 min_value_interval = sys.maxsize
 max_value_interval = 0
 
-def construct_new_line(path_seuil, interval, line, norm, sep, index):
+def construct_new_line(path_seuil, interval, line, norm):
     begin, end = interval
 
     line_data = line.split(';')
@@ -63,15 +63,13 @@ def construct_new_line(path_seuil, interval, line, norm, sep, index):
         line = '0'
 
     for idx, val in enumerate(metrics):
-        if index:
-            line += " " + str(idx + 1)
-        line += sep
+        line += ';'
         line += str(val)
     line += '\n'
 
     return line
 
-def get_min_max_value_interval(_filename, _interval, _choice, _metric):
+def get_min_max_value_interval(_filename, _interval, _choice, _color, _metric):
 
     global min_value_interval, max_value_interval
 
@@ -103,7 +101,12 @@ def get_min_max_value_interval(_filename, _interval, _choice, _metric):
 
             for id_zone, zone_folder in enumerate(zones_folder):
                 zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
+
+                if _color:
+                    data_filename = _metric + "_color_" + _choice + generic_output_file_svd
+                else:
+                    data_filename = _metric + "_" + _choice + generic_output_file_svd
+
                 data_file_path = os.path.join(zone_path, data_filename)
 
                 # getting number of line and read randomly lines
@@ -132,7 +135,7 @@ def get_min_max_value_interval(_filename, _interval, _choice, _metric):
                     counter += 1
 
 
-def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _nb_zones = 4, _percent = 1, _norm = False, _sep=':', _index=True):
+def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _nb_zones = 4, _percent = 1, _color=False, _norm = False):
 
     output_train_filename = _filename + ".train"
     output_test_filename = _filename + ".test"
@@ -152,7 +155,6 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
     # remove min max file from scenes folder
     scenes = [s for s in scenes if min_max_filename not in s]
 
-
     for id_scene, folder_scene in enumerate(scenes):
 
         # only take care of maxwell scenes
@@ -171,11 +173,16 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
             # shuffle list of zones (=> randomly choose zones)
             random.shuffle(zones_folder)
 
-            path_seuil = os.path.join(zone_path, seuil_expe_filename)
+            path_seuil = os.path.join(scene_path, seuil_expe_filename)
 
             for id_zone, zone_folder in enumerate(zones_folder):
                 zone_path = os.path.join(scene_path, zone_folder)
-                data_filename = _metric + "_" + _choice + generic_output_file_svd
+
+                if _color:
+                    data_filename = _metric + "_color_" + _choice + generic_output_file_svd
+                else:
+                    data_filename = _metric + "_" + _choice + generic_output_file_svd
+
                 data_file_path = os.path.join(zone_path, data_filename)
 
                 # getting number of line and read randomly lines
@@ -191,7 +198,7 @@ def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes
                 counter = 0
                 # check if user select current scene and zone to be part of training data set
                 for index in lines_indexes:
-                    line = construct_new_line(path_seuil, _interval, lines[index], _norm, _sep, _index)
+                    line = construct_new_line(path_seuil, _interval, lines[index], _norm)
 
                     percent = counter / num_lines
 
@@ -214,17 +221,17 @@ def main():
 
     if len(sys.argv) <= 1:
         print('Run with default parameters...')
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --color 0 --custom min_max_filename')
         sys.exit(2)
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "sep=", "rowindex=", "custom="])
+        opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r:c:c", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "color=", "custom="])
     except getopt.GetoptError:
         # print help information and exit:
-        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+        print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --color 0 --custom min_max_filename')
         sys.exit(2)
     for o, a in opts:
         if o == "-h":
-            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1 --custom min_max_filename')
+            print('python generate_data_model_random.py --output xxxx --interval 0,20  --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --color 0 --custom min_max_filename')
             sys.exit()
         elif o in ("-o", "--output"):
             p_filename = a
@@ -240,13 +247,8 @@ def main():
             p_nb_zones = int(a)
         elif o in ("-p", "--percent"):
             p_percent = float(a)
-        elif o in ("-s", "--sep"):
-            p_sep = a
-        elif o in ("-r", "--rowindex"):
-            if int(a) == 1:
-                p_rowindex = True
-            else:
-                p_rowindex = False
+        elif o in ("-c", "--color"):
+            p_color = int(a)
         elif o in ("-c", "--custom"):
             p_custom = a
         else:
@@ -261,7 +263,7 @@ def main():
 
     # find min max value if necessary to renormalize data
     if p_custom:
-        get_min_max_value_interval(p_filename, p_interval, p_kind, p_metric)
+        get_min_max_value_interval(p_filename, p_interval, p_kind, p_color, p_metric)
 
         # write new file to save
         if not os.path.exists(custom_min_max_folder):
@@ -275,7 +277,7 @@ def main():
             f.write(str(max_value_interval) + '\n')
 
     # create database using img folder (generate first time only)
-    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_custom, p_sep, p_rowindex)
+    generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_color, p_custom)
 
 if __name__== "__main__":
     main()

+ 1 - 0
noise_computation.py

@@ -88,6 +88,7 @@ def main():
 
         for i in range(1, p_n):
 
+            print(i)
             if i % p_step == 0:
                 p_filename = split_output[0] + "_" + str(i) + "." + filename_ext