generate_dataset.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Jun 19 11:47:42 2019
  5. @author: jbuisine
  6. """
  7. import sys, os, argparse
  8. import numpy as np
  9. import random
  10. import time
  11. import json
  12. from PIL import Image
  13. from ipfml import processing, metrics, utils
  14. from skimage import color
  15. from modules.utils import config as cfg
  16. from modules.utils import data as dt
  17. from preprocessing_functions import svd_reconstruction
  18. # getting configuration information
  19. config_filename = cfg.config_filename
  20. zone_folder = cfg.zone_folder
  21. learned_folder = cfg.learned_zones_folder
  22. min_max_filename = cfg.min_max_filename_extension
  23. # define all scenes values
  24. scenes_list = cfg.scenes_names
  25. scenes_indexes = cfg.scenes_indices
  26. choices = cfg.normalization_choices
  27. path = cfg.dataset_path
  28. zones = cfg.zones_indices
  29. seuil_expe_filename = cfg.seuil_expe_filename
  30. metric_choices = cfg.metric_choices_labels
  31. output_data_folder = cfg.output_data_folder
  32. generic_output_file_svd = '_random.csv'
  33. def generate_data_model(_scenes_list, _filename, _interval, _metric, _scenes, _nb_zones = 4, _random=0):
  34. output_train_filename = _filename + ".train"
  35. output_test_filename = _filename + ".test"
  36. if not '/' in output_train_filename:
  37. raise Exception("Please select filename with directory path to save data. Example : data/dataset")
  38. # create path if not exists
  39. if not os.path.exists(output_data_folder):
  40. os.makedirs(output_data_folder)
  41. train_file_data = []
  42. test_file_data = []
  43. scenes = os.listdir(path)
  44. # remove min max file from scenes folder
  45. scenes = [s for s in scenes if min_max_filename not in s]
  46. begin, end = _interval
  47. # go ahead each scenes
  48. for id_scene, folder_scene in enumerate(_scenes_list):
  49. scene_path = os.path.join(path, folder_scene)
  50. zones_indices = zones
  51. # shuffle list of zones (=> randomly choose zones)
  52. # only in random mode
  53. if _random:
  54. random.shuffle(zones_indices)
  55. # store zones learned
  56. learned_zones_indices = zones_indices[:_nb_zones]
  57. # write into file
  58. folder_learned_path = os.path.join(learned_folder, _filename.split('/')[1])
  59. if not os.path.exists(folder_learned_path):
  60. os.makedirs(folder_learned_path)
  61. file_learned_path = os.path.join(folder_learned_path, folder_scene + '.csv')
  62. with open(file_learned_path, 'w') as f:
  63. for i in learned_zones_indices:
  64. f.write(str(i) + ';')
  65. for id_zone, index_folder in enumerate(zones_indices):
  66. index_str = str(index_folder)
  67. if len(index_str) < 2:
  68. index_str = "0" + index_str
  69. current_zone_folder = "zone" + index_str
  70. zone_path = os.path.join(scene_path, current_zone_folder)
  71. # custom path for metric
  72. metric_path = os.path.join(zone_path, _metric)
  73. # custom path for interval of reconstruction and metric
  74. metric_interval_path = os.path.join(metric_path, str(begin) + "_" + str(end))
  75. for label in os.listdir(metric_interval_path):
  76. label_path = os.path.join(metric_interval_path, label)
  77. images = os.listdir(label_path)
  78. for img in images:
  79. img_path = os.path.join(label_path, img)
  80. line = label + ';' + img_path + '\n'
  81. if id_zone < _nb_zones and folder_scene in _scenes:
  82. train_file_data.append(line)
  83. else:
  84. test_file_data.append(line)
  85. train_file = open(output_train_filename, 'w')
  86. test_file = open(output_test_filename, 'w')
  87. random.shuffle(train_file_data)
  88. random.shuffle(test_file_data)
  89. for line in train_file_data:
  90. train_file.write(line)
  91. for line in test_file_data:
  92. test_file.write(line)
  93. train_file.close()
  94. test_file.close()
  95. def main():
  96. parser = argparse.ArgumentParser(description="Compute specific dataset for model using of metric")
  97. parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
  98. parser.add_argument('--metric', type=str,
  99. help="metric choice in order to compute data (use 'all' if all metrics are needed)",
  100. choices=metric_choices,
  101. required=True)
  102. parser.add_argument('--interval', type=str, help="interval choice if needed by the compression method", default='"100, 200"')
  103. parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
  104. parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set', choices=list(range(1, 17)))
  105. parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=cfg.renderer_choices, default='all')
  106. parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
  107. args = parser.parse_args()
  108. p_filename = args.output
  109. p_metric = args.metric
  110. p_interval = list(map(int, args.interval.split(',')))
  111. p_scenes = args.scenes.split(',')
  112. p_nb_zones = args.nb_zones
  113. p_renderer = args.renderer
  114. p_random = args.random
  115. # list all possibles choices of renderer
  116. scenes_list = dt.get_renderer_scenes_names(p_renderer)
  117. scenes_indices = dt.get_renderer_scenes_indices(p_renderer)
  118. # getting scenes from indexes user selection
  119. scenes_selected = []
  120. for scene_id in p_scenes:
  121. index = scenes_indices.index(scene_id.strip())
  122. scenes_selected.append(scenes_list[index])
  123. # create database using img folder (generate first time only)
  124. generate_data_model(scenes_list, p_filename, p_interval, p_metric, scenes_selected, p_nb_zones, p_random)
  125. if __name__== "__main__":
  126. main()