generate_data_model_random.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Fri Sep 14 21:02:42 2018
  5. @author: jbuisine
  6. """
  7. from __future__ import print_function
  8. import sys, os, getopt
  9. import numpy as np
  10. import random
  11. import time
  12. import json
  13. from PIL import Image
  14. from ipfml import processing
  15. from ipfml import metrics
  16. from modules.utils import config as cfg
  17. # getting configuration information
  18. config_filename = cfg.config_filename
  19. zone_folder = cfg.zone_folder
  20. min_max_filename = cfg.min_max_filename_extension
  21. # define all scenes values
  22. scenes_list = cfg.scenes_names
  23. scenes_indexes = cfg.scenes_indices
  24. choices = cfg.normalization_choices
  25. path = cfg.dataset_path
  26. zones = cfg.zones_indices
  27. seuil_expe_filename = cfg.seuil_expe_filename
  28. metric_choices = cfg.metric_choices_labels
  29. output_data_folder = cfg.output_data_folder
  30. def construct_new_line(path_seuil, interval, line, sep, index):
  31. begin, end = interval
  32. line_data = line.split(';')
  33. seuil = line_data[0]
  34. metrics = line_data[begin+1:end+1]
  35. with open(path_seuil, "r") as seuil_file:
  36. seuil_learned = int(seuil_file.readline().strip())
  37. if seuil_learned > int(seuil):
  38. line = '1'
  39. else:
  40. line = '0'
  41. for idx, val in enumerate(metrics):
  42. if index:
  43. line += " " + str(idx + 1)
  44. line += sep
  45. line += val
  46. line += '\n'
  47. return line
  48. def generate_data_model(_filename, _interval, _choice, _metric, _scenes = scenes_list, _nb_zones = 4, _percent = 1, _sep=':', _index=True):
  49. output_train_filename = _filename + ".train"
  50. output_test_filename = _filename + ".test"
  51. if not '/' in output_train_filename:
  52. raise Exception("Please select filename with directory path to save data. Example : data/dataset")
  53. # create path if not exists
  54. if not os.path.exists(output_data_folder):
  55. os.makedirs(output_data_folder)
  56. train_file = open(output_train_filename, 'w')
  57. test_file = open(output_test_filename, 'w')
  58. scenes = os.listdir(path)
  59. # remove min max file from scenes folder
  60. scenes = [s for s in scenes if min_max_filename not in s]
  61. for id_scene, folder_scene in enumerate(scenes):
  62. scene_path = os.path.join(path, folder_scene)
  63. zones_folder = []
  64. # create zones list
  65. for index in zones:
  66. index_str = str(index)
  67. if len(index_str) < 2:
  68. index_str = "0" + index_str
  69. zones_folder.append("zone"+index_str)
  70. # shuffle list of zones (=> randomly choose zones)
  71. random.shuffle(zones_folder)
  72. for id_zone, zone_folder in enumerate(zones_folder):
  73. zone_path = os.path.join(scene_path, zone_folder)
  74. data_filename = _metric + "_" + _choice + generic_output_file_svd
  75. data_file_path = os.path.join(zone_path, data_filename)
  76. # getting number of line and read randomly lines
  77. f = open(data_file_path)
  78. lines = f.readlines()
  79. num_lines = len(lines)
  80. lines_indexes = np.arange(num_lines)
  81. random.shuffle(lines_indexes)
  82. path_seuil = os.path.join(zone_path, seuil_expe_filename)
  83. counter = 0
  84. # check if user select current scene and zone to be part of training data set
  85. for index in lines_indexes:
  86. line = construct_new_line(path_seuil, _interval, lines[index], _sep, _index)
  87. percent = counter / num_lines
  88. if id_zone < _nb_zones and folder_scene in _scenes and percent <= _percent:
  89. train_file.write(line)
  90. else:
  91. test_file.write(line)
  92. counter += 1
  93. f.close()
  94. train_file.close()
  95. test_file.close()
  96. def main():
  97. if len(sys.argv) <= 1:
  98. print('Run with default parameters...')
  99. print('python generate_data_model_random.py --output xxxx --interval 0,20 --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1')
  100. sys.exit(2)
  101. try:
  102. opts, args = getopt.getopt(sys.argv[1:], "ho:i:k:s:n:p:r", ["help=", "output=", "interval=", "kind=", "metric=","scenes=", "nb_zones=", "percent=", "sep=", "rowindex="])
  103. except getopt.GetoptError:
  104. # print help information and exit:
  105. print('python generate_data_model_random.py --output xxxx --interval 0,20 --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1')
  106. sys.exit(2)
  107. for o, a in opts:
  108. if o == "-h":
  109. print('python generate_data_model_random.py --output xxxx --interval 0,20 --kind svdne --metric lab --scenes "A, B, D" --nb_zones 5 --percent 0.7 --sep : --rowindex 1')
  110. sys.exit()
  111. elif o in ("-o", "--output"):
  112. p_filename = a
  113. elif o in ("-i", "--interval"):
  114. p_interval = list(map(int, a.split(',')))
  115. elif o in ("-k", "--kind"):
  116. p_kind = a
  117. elif o in ("-m", "--metric"):
  118. p_metric = a
  119. elif o in ("-s", "--scenes"):
  120. p_scenes = a.split(',')
  121. elif o in ("-n", "--nb_zones"):
  122. p_nb_zones = int(a)
  123. elif o in ("-p", "--percent"):
  124. p_percent = float(a)
  125. elif o in ("-s", "--sep"):
  126. p_sep = a
  127. elif o in ("-r", "--rowindex"):
  128. if int(a) == 1:
  129. p_rowindex = True
  130. else:
  131. p_rowindex = False
  132. else:
  133. assert False, "unhandled option"
  134. # getting scenes from indexes user selection
  135. scenes_selected = []
  136. for scene_id in p_scenes:
  137. index = scenes_indexes.index(scene_id.strip())
  138. scenes_selected.append(scenes[index])
  139. # create database using img folder (generate first time only)
  140. generate_data_model(p_filename, p_interval, p_kind, p_metric, scenes_selected, p_nb_zones, p_percent, p_sep, p_rowindex)
  141. if __name__== "__main__":
  142. main()