run_openML_surrogate_multi.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. import os, argparse
  2. import shutil
  3. open_ml_problems_folder = 'OpenML_datasets'
  4. surrogate_data_path = 'data/surrogate/data/'
  5. k_params = [100, 150, 200]
  6. k_random = [0, 1]
  7. k_reinit = [0, 1]
  8. every_ls = 50
  9. n_times = 5
  10. def main():
  11. parser = argparse.ArgumentParser(description="Find best features for each OpenML problems")
  12. parser.add_argument('--ils', type=int, help='number of total iteration for ils algorithm', required=True)
  13. parser.add_argument('--ls', type=int, help='number of iteration for Local Search algorithm', required=True)
  14. args = parser.parse_args()
  15. p_ils = args.ils
  16. p_ls = args.ls
  17. open_ml_problems = sorted(os.listdir(open_ml_problems_folder))
  18. for ml_problem in open_ml_problems:
  19. # for each problem prepare specific pre-computed real solution file
  20. ml_problem_name = ml_problem.replace('.csv', '')
  21. ml_problem_path = os.path.join(open_ml_problems_folder, ml_problem)
  22. ml_surrogate_command = f"python find_best_attributes_surrogate_openML_multi.py " \
  23. f"--data {ml_problem_path} " \
  24. f"--ils {p_ils} " \
  25. f"--ls {p_ls} " \
  26. f"--output {ml_problem_name} " \
  27. f"--generate_only 1"
  28. print(f'Running extraction real evaluations data for {ml_problem_name}')
  29. os.system(ml_surrogate_command)
  30. real_evaluation_data_file_path = os.path.join(surrogate_data_path, ml_problem_name)
  31. # for each multi param:
  32. # - copy precomputed real_evaluation_data_file
  33. # - run new instance using specific data
  34. for k in k_params:
  35. for k_r in k_random:
  36. for k_init in k_reinit:
  37. # if not use of k_reinit and use of random, then run multiple times this instance to do mean later
  38. if k_init == 0 and k_r == 1:
  39. for i in range(n_times):
  40. str_index = str(i)
  41. while len(str_index) < 3:
  42. str_index = "0" + str_index
  43. output_problem_name = f'{ml_problem_name}_everyLS_{every_ls}_k{k}_random{k_r}_reinit{k_init}_{str_index}'
  44. # copy pre-computed real evaluation data for this instance
  45. current_output_real_eval_path = os.path.join(surrogate_data_path, output_problem_name)
  46. shutil.copy2(real_evaluation_data_file_path, current_output_real_eval_path)
  47. ml_surrogate_multi_command = f"python find_best_attributes_surrogate_openML_multi.py " \
  48. f"--data {ml_problem_path} " \
  49. f"--ils {p_ils} " \
  50. f"--ls {p_ls} " \
  51. f"--every_ls {every_ls} " \
  52. f"--k_division {k} " \
  53. f"--k_random {k_r} " \
  54. f"--k_dynamic {k_init} " \
  55. f"--output {output_problem_name}"
  56. print(f'Running extraction data for {ml_problem_name} with [ils: {p_ils}, ls: {p_ls}, k: {k}, k_r: {k_r}, k_reinit: {k_init}, i: {i}]')
  57. os.system(ml_surrogate_multi_command)
  58. else:
  59. output_problem_name = f'{ml_problem_name}_everyLS_{every_ls}_k{k}_random{k_r}_reinit{k_init}'
  60. # copy pre-computed real evaluation data for this instance
  61. current_output_real_eval_path = os.path.join(surrogate_data_path, output_problem_name)
  62. shutil.copy2(real_evaluation_data_file_path, current_output_real_eval_path)
  63. ml_surrogate_multi_command = f"python find_best_attributes_surrogate_openML_multi.py " \
  64. f"--data {ml_problem_path} " \
  65. f"--ils {p_ils} " \
  66. f"--ls {p_ls} " \
  67. f"--every_ls {every_ls} " \
  68. f"--k_division {k} " \
  69. f"--k_random {k_r} " \
  70. f"--k_dynamic {k_init} " \
  71. f"--output {output_problem_name}"
  72. print(f'Running extraction data for {ml_problem_name} with [ils: {p_ils}, ls: {p_ls}, k: {k}, k_r: {k_r}, k_reinit: {k_init}]')
  73. os.system(ml_surrogate_multi_command)
  74. if __name__ == "__main__":
  75. main()