generate_data_augmentation.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. # main imports
  2. import sys, os, argparse
  3. import numpy as np
  4. import time
  5. import random
  6. import math
  7. # image processing imports
  8. from PIL import Image
  9. from ipfml.processing import transform, segmentation
  10. from ipfml import utils
  11. # modules imports
  12. sys.path.insert(0, '') # trick to enable import of main folder module
  13. import custom_config as cfg
  14. from modules.utils import data as dt
  15. # getting configuration information
  16. zone_folder = cfg.zone_folder
  17. min_max_filename = cfg.min_max_filename_extension
  18. # define all scenes values
  19. scenes_list = cfg.scenes_names
  20. scenes_indexes = cfg.scenes_indices
  21. path = cfg.dataset_path
  22. zones = cfg.zones_indices
  23. seuil_expe_filename = cfg.seuil_expe_filename
  24. output_data_folder = cfg.output_data_folder
  25. image_scene_size = cfg.image_scene_size
  26. image_zone_size = cfg.image_zone_size
  27. possible_point_zone = cfg.possible_point_zone
  28. data_augmented_filename = cfg.data_augmented_filename
  29. def main():
  30. parser = argparse.ArgumentParser(description="Compute and prepare data augmentation of scenes")
  31. parser.add_argument('--output', type=str, help="output folder expected", required=True)
  32. parser.add_argument('--number', type=int, help="number of images for each sample of scene", required=True)
  33. parser.add_argument('--rotation', type=bool, help="", required=True, default=False)
  34. args = parser.parse_args()
  35. p_output = args.output
  36. p_number = args.number
  37. p_rotation = args.rotation
  38. scenes = os.listdir(path)
  39. # remove min max file from scenes folder
  40. scenes = [s for s in scenes if min_max_filename not in s]
  41. # getting image zone size and usefull information
  42. zone_width, zone_height = image_zone_size
  43. scene_width, scene_height = image_scene_size
  44. nb_x_parts = math.floor(scene_width / zone_width)
  45. output_dataset_filename_path = os.path.join(p_output, data_augmented_filename)
  46. # go ahead each scenes
  47. for folder_scene in scenes:
  48. scene_path = os.path.join(path, folder_scene)
  49. # build output scene path
  50. output_scene_path = os.path.join(p_output, folder_scene)
  51. if not os.path.exists(output_scene_path):
  52. os.makedirs(output_scene_path)
  53. # construct each zones folder name
  54. zones_folder = []
  55. zones_threshold = []
  56. # get zones list info
  57. for index in zones:
  58. index_str = str(index)
  59. if len(index_str) < 2:
  60. index_str = "0" + index_str
  61. current_zone = "zone"+index_str
  62. zones_folder.append(current_zone)
  63. zone_path = os.path.join(scene_path, current_zone)
  64. with open(os.path.join(zone_path, seuil_expe_filename)) as f:
  65. zones_threshold.append(int(f.readline()))
  66. possible_x, possible_y = possible_point_zone
  67. # get all images of folder
  68. scene_images = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if cfg.scene_image_extension in img])
  69. number_scene_image = len(scene_images)
  70. for id_img, img_path in enumerate(scene_images):
  71. current_img = Image.open(img_path)
  72. img = np.array(current_img)
  73. for generation in range(p_number):
  74. p_x, p_y = (random.randrange(possible_x), random.randrange(possible_y))
  75. # extract random zone into scene image
  76. extracted_img = img[p_y:(p_y + zone_height), p_x:(p_x + zone_width)]
  77. extracted_img.shape
  78. pil_extracted_img = Image.fromarray(extracted_img)
  79. # coordinate of specific zone, hence use threshold of zone
  80. if p_x % zone_width == 0 and p_y % zone_height == 0:
  81. zone_index = math.floor(p_x / zone_width) + math.floor(p_y / zone_height) * nb_x_parts
  82. final_threshold = int(zones_threshold[zone_index])
  83. else:
  84. # get zone identifiers of this new zones (from endpoints)
  85. p_top_left = (p_x, p_y)
  86. p_top_right = (p_x + zone_width, p_y)
  87. p_bottom_right = (p_x + zone_width, p_y + zone_height)
  88. p_bottom_left = (p_x, p_y + zone_height)
  89. points = [p_top_left, p_top_right, p_bottom_right, p_bottom_left]
  90. p_zones_indices = []
  91. # for each points get threshold information
  92. for p in points:
  93. x, y = p
  94. zone_index = math.floor(x / zone_width) + math.floor(y / zone_height) * nb_x_parts
  95. p_zones_indices.append(zone_index)
  96. p_thresholds = np.array(zones_threshold)[p_zones_indices]
  97. # get proportions of pixels of img into each zone
  98. overlaps = []
  99. p_x_max = p_x + zone_width
  100. p_y_max = p_y + zone_height
  101. for index, zone_index in enumerate(p_zones_indices):
  102. x_zone = (zone_index % nb_x_parts) * zone_width
  103. y_zone = (math.floor(zone_index / nb_x_parts)) * zone_height
  104. x_max_zone = x_zone + zone_width
  105. y_max_zone = y_zone + zone_height
  106. # computation of overlap
  107. # x_overlap = max(0, min(rect1.right, rect2.right) - max(rect1.left, rect2.left))
  108. # y_overlap = max(0, min(rect1.bottom, rect2.bottom) - max(rect1.top, rect2.top))
  109. x_overlap = max(0, min(x_max_zone, p_x_max) - max(x_zone, p_x))
  110. y_overlap = max(0, min(y_max_zone, p_y_max) - max(y_zone, p_y))
  111. overlapArea = x_overlap * y_overlap
  112. overlaps.append(overlapArea)
  113. overlapSum = sum(overlaps)
  114. proportions = [item / overlapSum for item in overlaps]
  115. final_threshold = 0
  116. for index, proportion in enumerate(proportions):
  117. final_threshold += proportion * p_thresholds[index]
  118. final_threshold = int(final_threshold)
  119. # save image into new scene folder
  120. current_image_postfix = dt.get_scene_image_postfix(img_path)
  121. # prepare output img name
  122. label_img = (int(current_image_postfix) < final_threshold)
  123. extracted_image_name = dt.get_scene_image_prefix(img_path) + '_' + str(generation) + '_x' + str(p_x) + '_y' + str(p_y) + '_label' + str(int(label_img))
  124. # if wished add of rotations images with same final threshold (increase data)
  125. # write new line into global .csv ('threshold', 'filepath')
  126. if p_rotation:
  127. # do rotations and save
  128. rotations = [0, 90, 180, 270]
  129. for rotation in rotations:
  130. rotated_img_name = extracted_image_name + 'rot' + str(rotation) + '_' + current_image_postfix + cfg.scene_image_extension
  131. rotated_img_path = os.path.join(output_scene_path, rotated_img_name)
  132. saved_rotated_img_path = os.path.join(folder_scene, rotated_img_name)
  133. rotated_img = pil_extracted_img.rotate(rotation)
  134. rotated_img.save(rotated_img_path)
  135. csv_line = folder_scene + ';' + str(final_threshold) + ';' + str(int(current_image_postfix)) + ';' + str(int(label_img)) + ';' + saved_rotated_img_path + '\n'
  136. with open(output_dataset_filename_path, 'a') as f:
  137. f.write(csv_line)
  138. else:
  139. extracted_image_name += current_image_postfix + cfg.scene_image_extension
  140. extracted_image_path = os.path.join(output_scene_path, extracted_image_name)
  141. saved_extracted_image_path = os.path.join(output_scene_path, extracted_image_name)
  142. pil_extracted_img.save(extracted_image_path)
  143. csv_line = folder_scene + ';' + str(final_threshold) + ';' + str(int(current_image_postfix)) + ';' + str(int(label_img)) + ';' + saved_extracted_image_path + '\n'
  144. with open(output_dataset_filename_path, 'a') as f:
  145. f.write(csv_line)
  146. print(folder_scene + " - " + "{0:.2f}".format(((id_img * p_number + generation) + 1) / (p_number * number_scene_image) * 100.) + "%")
  147. sys.stdout.write("\033[F")
  148. print('\n', folder_scene, 'done...')
  149. if __name__== "__main__":
  150. main()