from keras.preprocessing.image import ImageDataGenerator from keras.models import Sequential from keras.layers import Conv1D, MaxPooling1D from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization from keras import backend as K import matplotlib.pyplot as plt from sklearn.utils import shuffle import numpy as np import pandas as pd from ipfml import image_processing from PIL import Image import sys, os, getopt import subprocess import time vector_size = 100 epochs = 100 batch_size = 24 input_shape = (vector_size, 1) filename = "svd_model" def f1(y_true, y_pred): def recall(y_true, y_pred): """Recall metric. Only computes a batch-wise average of recall. Computes the recall, a metric for multi-label classification of how many relevant items are selected. """ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall def precision(y_true, y_pred): """Precision metric. Only computes a batch-wise average of precision. Computes the precision, a metric for multi-label classification of how many selected items are relevant. """ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision precision = precision(y_true, y_pred) recall = recall(y_true, y_pred) return 2*((precision*recall)/(precision+recall+K.epsilon())) def generate_model(): model = Sequential() #model.add(Conv1D(128, (10), input_shape=input_shape)) #model.add(Activation('relu')) #model.add(Conv1D(128, (10))) #model.add(Activation('relu')) #model.add(Conv1D(128, (10))) #model.add(Activation('relu')) #model.add(MaxPooling1D(pool_size=(2))) #model.add(Conv1D(64, (10))) #model.add(Activation('relu')) #model.add(Conv1D(64, (10))) #model.add(Activation('relu')) #model.add(Conv1D(64, (10))) #model.add(Activation('relu')) #model.add(MaxPooling1D(pool_size=(2))) #model.add(Conv1D(32, (10))) #model.add(Activation('relu')) #model.add(Conv1D(32, (10))) #model.add(Activation('relu')) #model.add(Conv1D(32, (10))) #model.add(Activation('relu')) #model.add(MaxPooling1D(pool_size=(2))) model.add(Flatten(input_shape=input_shape)) #model.add(Dense(2048)) #model.add(Activation('relu')) #model.add(BatchNormalization()) #model.add(Dropout(0.3)) model.add(Dense(1024)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(Dense(512)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(Dense(256)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(Dense(128)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(Dense(20)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f1]) return model def main(): if len(sys.argv) <= 1: print('Run with default parameters...') print('python save_model_result_in_md.py --data filename') sys.exit(2) try: opts, args = getopt.getopt(sys.argv[1:], "hd", ["help=", "data="]) except getopt.GetoptError: # print help information and exit: print('python save_model_result_in_md.py --data filename') sys.exit(2) for o, a in opts: if o == "-h": print('python save_model_result_in_md.py --data filename') sys.exit() elif o in ("-d", "--data"): p_datafile = a else: assert False, "unhandled option" ########################### # 1. Get and prepare data ########################### dataset_train = pd.read_csv(p_datafile + '.train', header=None, sep=";") dataset_test = pd.read_csv(p_datafile + '.test', header=None, sep=";") # default first shuffle of data dataset_train = shuffle(dataset_train) dataset_test = shuffle(dataset_test) # get dataset with equal number of classes occurences noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 1] not_noisy_df_train = dataset_train[dataset_train.ix[:, 0] == 0] nb_noisy_train = len(noisy_df_train.index) noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 1] not_noisy_df_test = dataset_test[dataset_test.ix[:, 0] == 0] nb_noisy_test = len(noisy_df_test.index) final_df_train = pd.concat([not_noisy_df_train[0:nb_noisy_train], noisy_df_train]) final_df_test = pd.concat([not_noisy_df_test[0:nb_noisy_test], noisy_df_test]) # shuffle data another time final_df_train = shuffle(final_df_train) final_df_test = shuffle(final_df_test) final_df_train_size = len(final_df_train.index) final_df_test_size = len(final_df_test.index) # use of the whole data set for training x_dataset_train = final_df_train.ix[:,1:] x_dataset_test = final_df_test.ix[:,1:] y_dataset_train = final_df_train.ix[:,0] y_dataset_test = final_df_test.ix[:,0] ####################### # 2. Getting model ####################### model = generate_model() model.summary() ####################### # 3. Fit model : use of cross validation to fit model ####################### # reshape input data x_dataset_train = np.array(x_dataset_train).reshape(len(x_dataset_train), vector_size, 1) x_dataset_test = np.array(x_dataset_test).reshape(len(x_dataset_test), vector_size, 1) model.fit(x_dataset_train, y_dataset_train, epochs=epochs, batch_size=batch_size, validation_split=0.20) score = model.evaluate(x_dataset_test, y_dataset_test, batch_size=batch_size) print(score) if __name__== "__main__": main()