run_all_openML.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import os, argparse
  2. params = {
  3. "variance_threshold": [
  4. "0.001",
  5. "0.01",
  6. "0.05",
  7. "0.1",
  8. ],
  9. "kbest": [
  10. "0.9",
  11. "0.8",
  12. "0.7",
  13. "0.6",
  14. ],
  15. "linearSVC": [
  16. "0.1",
  17. "1",
  18. "10",
  19. "100"
  20. ],
  21. "tree": [
  22. "10",
  23. "50",
  24. "100",
  25. "200",
  26. ],
  27. "rfecv": [
  28. "3",
  29. "4",
  30. "5"
  31. ]
  32. }
  33. open_ml_problems_folder = 'OpenML_datasets'
  34. def main():
  35. parser = argparse.ArgumentParser(description="Get features extraction from specific methods and params")
  36. parser.add_argument('--ntrain', type=int, help='number of training in order to keep mean of score', default=1)
  37. parser.add_argument('--output', type=str, help='output features selection results', required=True)
  38. args = parser.parse_args()
  39. p_ntrain = args.ntrain
  40. p_output = args.output
  41. open_ml_problems = sorted(os.listdir(open_ml_problems_folder))
  42. for ml_problem in open_ml_problems:
  43. ml_problem_name = ml_problem.replace('.csv', '')
  44. ml_problem_path = os.path.join(open_ml_problems_folder, ml_problem)
  45. for key, values in params.items():
  46. for param in values:
  47. print(f'Run features selection for OpenML `{ml_problem_name}` problem with {{method: {key}, params: {param}, ntrain: {p_ntrain}}}')
  48. command_str = f'python features_selection/run_method_openML.py ' \
  49. f'--data {ml_problem_path} ' \
  50. f'--method {key} ' \
  51. f'--params {param} ' \
  52. f'--ntrain {p_ntrain} ' \
  53. f'--output {p_output}'
  54. os.system(command_str)
  55. if __name__ == "__main__":
  56. main()