1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- import os, argparse
- params = {
- "variance_threshold": [
- "0.001",
- "0.01",
- "0.05",
- "0.1",
- ],
- "kbest": [
- "0.9",
- "0.8",
- "0.7",
- "0.6",
- ],
- "linearSVC": [
- "0.1",
- "1",
- "10",
- "100"
- ],
- "tree": [
- "10",
- "50",
- "100",
- "200",
- ],
- "rfecv": [
- "3",
- "4",
- "5"
- ]
- }
- open_ml_problems_folder = 'OpenML_datasets'
- def main():
- parser = argparse.ArgumentParser(description="Get features extraction from specific methods and params")
- parser.add_argument('--ntrain', type=int, help='number of training in order to keep mean of score', default=1)
- parser.add_argument('--output', type=str, help='output features selection results', required=True)
- args = parser.parse_args()
- p_ntrain = args.ntrain
- p_output = args.output
- open_ml_problems = sorted(os.listdir(open_ml_problems_folder))
- for ml_problem in open_ml_problems:
- ml_problem_name = ml_problem.replace('.csv', '')
- ml_problem_path = os.path.join(open_ml_problems_folder, ml_problem)
- for key, values in params.items():
- for param in values:
- print(f'Run features selection for OpenML `{ml_problem_name}` problem with {{method: {key}, params: {param}, ntrain: {p_ntrain}}}')
- command_str = f'python features_selection/run_method_openML.py ' \
- f'--data {ml_problem_path} ' \
- f'--method {key} ' \
- f'--params {param} ' \
- f'--ntrain {p_ntrain} ' \
- f'--output {p_output}'
-
- os.system(command_str)
- if __name__ == "__main__":
- main()
|