generate_dataset.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Jun 19 11:47:42 2019
  5. @author: jbuisine
  6. """
  7. import sys, os, argparse
  8. import numpy as np
  9. import random
  10. import time
  11. import json
  12. from PIL import Image
  13. from ipfml.processing.segmentation import divide_in_blocks
  14. from skimage import color
  15. from modules.utils import config as cfg
  16. from modules.utils import data as dt
  17. from modules.classes.Transformation import Transformation
  18. # getting configuration information
  19. config_filename = cfg.config_filename
  20. zone_folder = cfg.zone_folder
  21. learned_folder = cfg.learned_zones_folder
  22. min_max_filename = cfg.min_max_filename_extension
  23. # define all scenes values
  24. scenes_list = cfg.scenes_names
  25. scenes_indexes = cfg.scenes_indices
  26. choices = cfg.normalization_choices
  27. dataset_path = cfg.dataset_path
  28. zones = cfg.zones_indices
  29. seuil_expe_filename = cfg.seuil_expe_filename
  30. metric_choices = cfg.metric_choices_labels
  31. output_data_folder = cfg.output_data_folder
  32. generic_output_file_svd = '_random.csv'
  33. def generate_data_model(_scenes_list, _filename, _transformations, _scenes, _nb_zones = 4, _random=0, _only_noisy=0):
  34. output_train_filename = _filename + ".train"
  35. output_test_filename = _filename + ".test"
  36. if not '/' in output_train_filename:
  37. raise Exception("Please select filename with directory path to save data. Example : data/dataset")
  38. # create path if not exists
  39. if not os.path.exists(output_data_folder):
  40. os.makedirs(output_data_folder)
  41. train_file_data = []
  42. test_file_data = []
  43. scenes = os.listdir(dataset_path)
  44. # remove min max file from scenes folder
  45. scenes = [s for s in scenes if min_max_filename not in s]
  46. # go ahead each scenes
  47. for id_scene, folder_scene in enumerate(_scenes_list):
  48. scene_path = os.path.join(dataset_path, folder_scene)
  49. config_file_path = os.path.join(scene_path, config_filename)
  50. # only get last image path
  51. with open(config_file_path, "r") as config_file:
  52. last_image_name = config_file.readline().strip()
  53. ref_image_path = os.path.join(scene_path, last_image_name)
  54. zones_indices = zones
  55. # shuffle list of zones (=> randomly choose zones)
  56. # only in random mode
  57. if _random:
  58. random.shuffle(zones_indices)
  59. # store zones learned
  60. learned_zones_indices = zones_indices[:_nb_zones]
  61. # write into file
  62. folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
  63. if not os.path.exists(folder_learned_path):
  64. os.makedirs(folder_learned_path)
  65. file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
  66. with open(file_learned_path, 'w') as f:
  67. for i in learned_zones_indices:
  68. f.write(str(i) + ';')
  69. ref_image_blocks = divide_in_blocks(Image.open(ref_image_path), cfg.keras_img_size)
  70. for id_zone, index_folder in enumerate(zones_indices):
  71. index_str = str(index_folder)
  72. if len(index_str) < 2:
  73. index_str = "0" + index_str
  74. current_zone_folder = "zone" + index_str
  75. zone_path = os.path.join(scene_path, current_zone_folder)
  76. # path of zone of reference image
  77. # ref_image_block_path = os.path.join(zone_path, last_image_name)
  78. # compute augmented images for ref image
  79. current_ref_zone_image = ref_image_blocks[id_zone]
  80. ref_image_name_prefix = last_image_name.replace('.png', '')
  81. dt.augmented_data_image(current_ref_zone_image, zone_path, ref_image_name_prefix)
  82. # get list of all augmented ref images
  83. ref_augmented_images = [os.path.join(zone_path, f) for f in os.listdir(zone_path) if ref_image_name_prefix in f]
  84. # custom path for interval of reconstruction and metric
  85. metrics_path = []
  86. for transformation in _transformations:
  87. # check if it's a static content and create augmented images if necessary
  88. if transformation.getName() == 'static':
  89. # {sceneName}/zoneXX/static
  90. static_metric_path = os.path.join(zone_path, transformation.getName())
  91. # img.png
  92. image_name = transformation.getParam().split('/')[-1]
  93. # {sceneName}/zoneXX/static/img
  94. image_prefix_name = image_name.replace('.png', '')
  95. image_folder_path = os.path.join(static_metric_path, image_prefix_name)
  96. if not os.path.exists(image_folder_path):
  97. os.makedirs(image_folder_path)
  98. metrics_path.append(image_folder_path)
  99. # get image path to manage
  100. # {sceneName}/static/img.png
  101. transform_image_path = os.path.join(scene_path, transformation.getName(), image_name)
  102. static_transform_image = Image.open(transform_image_path)
  103. static_transform_image_block = divide_in_blocks(static_transform_image, cfg.keras_img_size)[id_zone]
  104. # generate augmented data
  105. dt.augmented_data_image(static_transform_image_block, image_folder_path, image_prefix_name)
  106. else:
  107. metric_interval_path = os.path.join(zone_path, transformation.getTransformationPath())
  108. metrics_path.append(metric_interval_path)
  109. # as labels are same for each metric
  110. for label in os.listdir(metrics_path[0]):
  111. if (label == cfg.not_noisy_folder and _only_noisy == 0) or label == cfg.noisy_folder:
  112. label_metrics_path = []
  113. for path in metrics_path:
  114. label_path = os.path.join(path, label)
  115. label_metrics_path.append(label_path)
  116. # getting images list for each metric
  117. metrics_images_list = []
  118. for index_metric, label_path in enumerate(label_metrics_path):
  119. if _transformations[index_metric].getName() == 'static':
  120. # by default append nothing..
  121. metrics_images_list.append([])
  122. else:
  123. images = sorted(os.listdir(label_path))
  124. metrics_images_list.append(images)
  125. # construct each line using all images path of each
  126. for index_image in range(0, len(metrics_images_list[0])):
  127. images_path = []
  128. # get information about rotation and flip from first transformation (need to be a not static transformation)
  129. current_post_fix = metrics_images_list[0][index_image].split(cfg.post_image_name_separator)[-1]
  130. # getting images with same index and hence name for each metric (transformation)
  131. for index_metric in range(0, len(metrics_path)):
  132. # custom behavior for static transformation (need to check specific image)
  133. if _transformations[index_metric].getName() == 'static':
  134. # add static path with selecting correct data augmented image
  135. image_name = _transformations[index_metric].getParam().split('/')[-1].replace('.png', '')
  136. img_path = os.path.join(metrics_path[index_metric], image_name + cfg.post_image_name_separator + current_post_fix)
  137. images_path.append(img_path)
  138. else:
  139. img_path = metrics_images_list[index_metric][index_image]
  140. images_path.append(os.path.join(label_metrics_path[index_metric], img_path))
  141. # get information about rotation and flip
  142. current_post_fix = images_path[0].split(cfg.post_image_name_separator)[-1]
  143. # get ref block which matchs we same information about rotation and flip
  144. augmented_ref_image_block_path = next(img for img in ref_augmented_images
  145. if img.split(cfg.post_image_name_separator)[-1] == current_post_fix)
  146. line = augmented_ref_image_block_path + ';'
  147. # compute line information with all images paths
  148. for id_path, img_path in enumerate(images_path):
  149. if id_path < len(images_path) - 1:
  150. line = line + img_path + '::'
  151. else:
  152. line = line + img_path
  153. line = line + '\n'
  154. if id_zone < _nb_zones and folder_scene in _scenes:
  155. train_file_data.append(line)
  156. else:
  157. test_file_data.append(line)
  158. train_file = open(output_train_filename, 'w')
  159. test_file = open(output_test_filename, 'w')
  160. random.shuffle(train_file_data)
  161. random.shuffle(test_file_data)
  162. for line in train_file_data:
  163. train_file.write(line)
  164. for line in test_file_data:
  165. test_file.write(line)
  166. train_file.close()
  167. test_file.close()
  168. def main():
  169. parser = argparse.ArgumentParser(description="Compute specific dataset for model using of metric")
  170. parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
  171. parser.add_argument('--metrics', type=str,
  172. help="list of metrics choice in order to compute data",
  173. default='svd_reconstruction, ipca_reconstruction',
  174. required=True)
  175. parser.add_argument('--params', type=str,
  176. help="list of specific param for each metric choice (See README.md for further information in 3D mode)",
  177. default='100, 200 :: 50, 25',
  178. required=True)
  179. parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
  180. parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set', choices=list(range(1, 17)))
  181. parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=cfg.renderer_choices, default='all')
  182. parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
  183. parser.add_argument('--only_noisy', type=int, help='Only noisy will be used', choices=[0, 1])
  184. args = parser.parse_args()
  185. p_filename = args.output
  186. p_metrics = list(map(str.strip, args.metrics.split(',')))
  187. p_params = list(map(str.strip, args.params.split('::')))
  188. p_scenes = args.scenes.split(',')
  189. p_nb_zones = args.nb_zones
  190. p_renderer = args.renderer
  191. p_random = args.random
  192. p_only_noisy = args.only_noisy
  193. # create list of Transformation
  194. transformations = []
  195. for id, metric in enumerate(p_metrics):
  196. if metric not in metric_choices:
  197. raise ValueError("Unknown metric, please select a correct metric : ", metric_choices)
  198. transformations.append(Transformation(metric, p_params[id]))
  199. # list all possibles choices of renderer
  200. scenes_list = dt.get_renderer_scenes_names(p_renderer)
  201. scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
  202. # getting scenes from indexes user selection
  203. scenes_selected = []
  204. for scene_id in p_scenes:
  205. index = scenes_indices.index(scene_id.strip())
  206. scenes_selected.append(scenes_list[index])
  207. # create database using img folder (generate first time only)
  208. generate_data_model(scenes_list, p_filename, transformations, scenes_selected, p_nb_zones, p_random, p_only_noisy)
  209. if __name__== "__main__":
  210. main()