get_specific_dataset.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. # main imports
  2. import os, sys
  3. import argparse
  4. import json
  5. import numpy as np
  6. import shutil
  7. # Rawls images
  8. from rawls.rawls import Rawls
  9. # others import
  10. from ipfml import utils
  11. from scipy.signal import savgol_filter
  12. '''
  13. Display progress information as progress bar
  14. '''
  15. def write_progress(progress):
  16. barWidth = 180
  17. output_str = "["
  18. pos = barWidth * progress
  19. for i in range(barWidth):
  20. if i < pos:
  21. output_str = output_str + "="
  22. elif i == pos:
  23. output_str = output_str + ">"
  24. else:
  25. output_str = output_str + " "
  26. output_str = output_str + "] " + str(int(progress * 100.0)) + " %\r"
  27. print(output_str)
  28. sys.stdout.write("\033[F")
  29. def extract_index(filepath):
  30. return int(filepath.split('-')[-1].split('.')[0])
  31. def extracts_linear_indices_rawls(images_path, n_expected=50, i_indices_step=20, o_indices_step=20, start_at=20, smooth_arr=False, gamma=False):
  32. default_add = start_at - 20
  33. # by default
  34. if i_indices_step == 1:
  35. default_add = 0
  36. n_start_images = int(start_at / i_indices_step)
  37. n_counter = 0
  38. # extract variance for each image path
  39. var_arr = []
  40. prev_rawls = None
  41. n_images = len(images_path)
  42. for p in sorted(images_path):
  43. if prev_rawls is None:
  44. temp = Rawls.load(p)
  45. if gamma:
  46. temp.gammaConvert()
  47. prev_rawls = temp
  48. else:
  49. temp = Rawls.load(p)
  50. if gamma:
  51. temp.gammaConvert()
  52. prev_rawls = Rawls.fusion(prev_rawls, temp)
  53. write_progress((n_counter + 1) / n_images)
  54. n_counter += 1
  55. if n_counter >= n_start_images:
  56. # only get center variance of image (800 per 800)
  57. width, heigth, _ = prev_rawls.shape
  58. n_w, n_h = (800, 800) # new expected size
  59. # get center of image
  60. middle_w = int(width / 2)
  61. middle_h = int(heigth / 2)
  62. # start coordinates
  63. s_w = middle_w - int(n_w / 2)
  64. s_h = middle_h - int(n_h / 2)
  65. # end coordinates
  66. e_w = middle_w + int(n_w / 2)
  67. e_h = middle_h + int(n_h / 2)
  68. var_arr.append(np.var(prev_rawls.data[s_w:e_w, s_h:e_h]))
  69. # normalize variance values
  70. norm_arr = np.array(utils.normalize_arr_with_range(var_arr))
  71. if smooth_arr:
  72. norm_arr = utils.normalize_arr_with_range(savgol_filter(norm_arr, 201, 3)) # window size 7, polynomial order 3
  73. # get expected linear step (using n_expectec output images)
  74. linear_steps = utils.normalize_arr_with_range((1 - (np.arange(n_expected) / n_expected)))
  75. # get image indices from variance convergence and linear
  76. # => when linear step is reached we store the index found from variance values
  77. indices_found = []
  78. for i in linear_steps:
  79. find_index = len(linear_steps) - 1
  80. for index, y in enumerate(norm_arr):
  81. if i <= y:
  82. find_index = index
  83. indices_found.append(find_index + 1)
  84. indices = np.array(indices_found) * i_indices_step
  85. # add tricks to avoid same indice
  86. # => when index is same as previous, then add number of samples expected by step
  87. # Example with step of 20 : [20, 20, 20, 100, 200] => [20, 40, 60, 100, 200]
  88. final_indices = []
  89. for index, i in enumerate(indices):
  90. value = indices[index]
  91. if index > 0:
  92. if i <= indices[index - 1]:
  93. value = indices[index - 1] + o_indices_step
  94. indices[index] = value
  95. final_indices.append(value)
  96. return np.array(final_indices) + default_add
  97. def main():
  98. """
  99. main function which is ran when launching script
  100. """
  101. parser = argparse.ArgumentParser(description="Compute new dataset scene")
  102. parser.add_argument('--file', type=str, help='file data extracted from `utils/extract_stats_freq_and_min.py` script', required=True)
  103. parser.add_argument('--rawls_folder', type=str, help='rawls dataset folder with scene', required=True)
  104. parser.add_argument('--png_folder', type=str, help='png dataset folder with scene', required=True)
  105. parser.add_argument('--users', type=int, help='min number of users required per scene', required=True, default=10)
  106. parser.add_argument('--samples', type=int, help='expected samples to get for this dataset', required=True, default=10000)
  107. parser.add_argument('--output', type=str, help='output images folder', required=True)
  108. args = parser.parse_args()
  109. p_file = args.file
  110. p_rawls_folder = args.rawls_folder
  111. p_png_folder = args.png_folder
  112. p_users = args.users
  113. p_samples = args.samples
  114. p_output = args.output
  115. with open(p_file, 'r') as f:
  116. for line in f.readlines():
  117. data = line.split(';')
  118. scene = data[0]
  119. n_users = int(data[1])
  120. min_index = int(data[2])
  121. # remove _partX from scene name
  122. scene_name = scene.split('_')[0] + '_' + scene.split('_')[1]
  123. if n_users >= p_users:
  124. print('Extract custom indices based on minimum index for', scene)
  125. rawls_folder_scene = os.path.join(p_rawls_folder, scene_name)
  126. png_folder_scene = os.path.join(p_png_folder, scene)
  127. if not os.path.exists(rawls_folder_scene):
  128. print(rawls_folder_scene, 'rawls folder does not exist')
  129. else:
  130. # get all rawls files
  131. rawls_files = [ os.path.join(rawls_folder_scene, p) for p in sorted(os.listdir(rawls_folder_scene)) ]
  132. # extract step from these files
  133. input_step = p_samples / len(rawls_files)
  134. print('Extract indices for', scene)
  135. # get indices using min index
  136. indices = extracts_linear_indices_rawls(rawls_files, n_expected=50, i_indices_step=input_step, o_indices_step=20, start_at=min_index, smooth_arr=True, gamma=True)
  137. print('Indices found are', indices)
  138. # create output directory
  139. output_scene_dir = os.path.join(p_output, scene)
  140. if not os.path.exists(output_scene_dir):
  141. os.makedirs(output_scene_dir)
  142. # get expected png image and move it
  143. for index in indices:
  144. str_index = str(index)
  145. while len(str_index) < 5:
  146. str_index = "0" + str_index
  147. image_name = scene + '_' + str_index + '.png'
  148. png_image_path = os.path.join(png_folder_scene, image_name)
  149. # create output filepath
  150. output_img_filepath = os.path.join(output_scene_dir, image_name)
  151. # copy expected image path
  152. shutil.copy2(png_image_path, output_img_filepath)
  153. else:
  154. print('Only', n_users, 'users who passed the experiment for', scene)
  155. if __name__ == "__main__":
  156. main()