make_dataset.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import numpy as np
  2. import pandas as pd
  3. import os, sys, argparse
  4. import modules.config as cfg
  5. from modules.features import compute_feature
  6. def compute_files(_n, _feature_choice, _each_row, _each_column):
  7. """
  8. Read all folders and files of scenes in order to compute output dataset
  9. """
  10. output_dataset_filename = cfg.output_file_prefix + _feature_choice +'_' + _n + '_column_' + _each_column + '_row_' + _each_row + '.csv'
  11. output_dataset_filename = os.path.join(cfg.output_data_folder, output_dataset_filename)
  12. if not os.path.exists(cfg.output_data_folder):
  13. os.makedirs(cfg.output_data_folder)
  14. output_file = open(output_dataset_filename, 'w')
  15. print('Preparing to store data into ', output_dataset_filename)
  16. scenes = os.listdir(cfg.folder_scenes_path)
  17. # remove min max file from scenes folder
  18. scenes = [s for s in scenes if s not in cfg.folder_and_files_filtered]
  19. scenes = [s for s in scenes if '.csv' not in s] # do not keep generated .csv file
  20. # skip test scene from dataset
  21. scenes = [ s for s in scenes if s not in cfg.test_scenes]
  22. # print(scenes)
  23. counter = 0
  24. number_of_elements = len(scenes) * cfg.number_of_rows * cfg.number_of_columns
  25. #print(number_of_elements, ' to manage')
  26. for scene in scenes:
  27. scene_path = os.path.join(cfg.folder_scenes_path, scene)
  28. for id_column in range(cfg.number_of_columns):
  29. if id_column % int(_each_column) == 0 :
  30. folder_path = os.path.join(scene_path, str(id_column))
  31. for id_row in range(cfg.number_of_rows):
  32. if id_row % int(_each_row) == 0:
  33. pixel_filename = scene + '_' + str(id_column) + '_' + str(id_row) + ".dat"
  34. pixel_file_path = os.path.join(folder_path, pixel_filename)
  35. saved_row = ''
  36. # for each file read content, keep `n` first values and compute mean
  37. with open(pixel_file_path, 'r') as f:
  38. lines = [float(l)/255. for l in f.readlines()]
  39. pixel_values = lines[0:int(_n)]
  40. mean = sum(lines) / float(len(lines))
  41. # if mean != pixel_values[0]:
  42. saved_row += str(mean)
  43. data = compute_feature(_feature_choice, pixel_values)
  44. for val in data:
  45. saved_row += ';' + str(val)
  46. saved_row += '\n'
  47. # store mean and pixel values into .csv row
  48. output_file.write(saved_row)
  49. counter = counter + 1
  50. else:
  51. counter += cfg.number_of_rows
  52. print("{0:.2f}%".format(counter / number_of_elements * 100))
  53. sys.stdout.write("\033[F")
  54. print('\n')
  55. output_file.close()
  56. def main():
  57. parser = argparse.ArgumentParser(description="Compute .csv dataset file")
  58. parser.add_argument('--n', type=str, help='Number of pixel values approximated to keep')
  59. parser.add_argument('--feature', type=str, help='Feature choice to compute from samples', choices=cfg.features_list)
  60. parser.add_argument('--each_row', type=str, help='Keep only values from specific row', default=1)
  61. parser.add_argument('--each_column', type=str, help='Keep only values from specific column', default=1)
  62. args = parser.parse_args()
  63. param_n = args.n
  64. param_feature = args.feature
  65. param_each_row = args.each_row
  66. param_each_column = args.each_column
  67. compute_files(param_n, param_feature, param_each_row, param_each_column)
  68. if __name__== "__main__":
  69. main()