Преглед на файлове

Merge branch 'release/v0.0.4'

jbuisine преди 6 години
родител
ревизия
f37ea802a1

+ 7 - 0
.gitignore

@@ -1,3 +1,10 @@
 # project data
 data
 .python-version
+__pycache__
+
+# by default avoid model files and png files
+*.h5
+*.png
+!saved_models/*.h5
+!saved_models/*.png

+ 16 - 2
README.md

@@ -8,20 +8,34 @@ pip install -r requirements.txt
 
 ## How to use
 
-Generate dataset (run only once time) :
+Generate dataset (run only once time or clean data folder before) :
 ```
 python generate_dataset.py
 ```
 
 It will split scenes and generate all data you need for your neural network.
-You can specify the number of sub images you want in the script by modifying NUMBER_SUB_IMAGES variables.
+You can specify the number of sub images you want in the script by modifying **_NUMBER_SUB_IMAGES_** variables.
 
+There are 3 kinds of Neural Networks :
+- **classification_cnn_keras.py** : *based croped on images*
+- **classification_cnn_keras_crossentropy.py** : *based croped on images which are randomly split for training*
+- **classification_cnn_keras_svd.py** : *based on svd metrics of image*
+
+Note that the image input size need to change in you used specific size for your croped images.
 
 After your built your neural network in classification_cnn_keras.py, you just have to run it :
 ```
 python classification_cnn_keras.py
 ```
 
+## Modules
+
+This project contains modules :
+- **modules/image_metrics** : *where all computed metrics function are developed*
+- **modules/model_helper** : *contains helpful function to save or display model information and performance*
+
+All these modules will be enhanced during development of the project
+
 ## How to contribute
 
 This git project uses [git-flow](https://danielkummer.github.io/git-flow-cheatsheet/) implementation. You are free to contribute to it.

+ 147 - 65
classification_cnn_keras.py

@@ -23,12 +23,16 @@ data/
             ...
 ```
 '''
+import sys, os, getopt
 
 from keras.preprocessing.image import ImageDataGenerator
 from keras.models import Sequential
 from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
 from keras.layers import Activation, Dropout, Flatten, Dense
 from keras import backend as K
+from keras.utils import plot_model
+
+from modules.model_helper import plot_info
 
 
 # dimensions of our images.
@@ -46,68 +50,146 @@ if K.image_data_format() == 'channels_first':
 else:
     input_shape = (img_width, img_height, 3)
 
-model = Sequential()
-model.add(Conv2D(60, (2, 2), input_shape=input_shape))
-model.add(Activation('relu'))
-model.add(MaxPooling2D(pool_size=(2, 2)))
-
-model.add(Conv2D(40, (2, 2)))
-model.add(Activation('relu'))
-model.add(MaxPooling2D(pool_size=(2, 2)))
-
-model.add(Conv2D(20, (2, 2)))
-model.add(Activation('relu'))
-model.add(MaxPooling2D(pool_size=(2, 2)))
-
-model.add(Conv2D(10, (2, 2)))
-model.add(Activation('relu'))
-model.add(MaxPooling2D(pool_size=(2, 2)))
-
-model.add(Flatten())
-model.add(Dense(60))
-model.add(Activation('relu'))
-model.add(Dropout(0.4))
-
-model.add(Dense(30))
-model.add(Activation('relu'))
-model.add(Dropout(0.2))
-
-model.add(Dense(1))
-model.add(Activation('sigmoid'))
-
-model.compile(loss='binary_crossentropy',
-              optimizer='rmsprop',
-              metrics=['accuracy'])
-
-# this is the augmentation configuration we will use for training
-train_datagen = ImageDataGenerator(
-    rescale=1. / 255,
-    shear_range=0.2,
-    zoom_range=0.2,
-    horizontal_flip=True)
-
-# this is the augmentation configuration we will use for testing:
-# only rescaling
-test_datagen = ImageDataGenerator(rescale=1. / 255)
-
-train_generator = train_datagen.flow_from_directory(
-    train_data_dir,
-    target_size=(img_width, img_height),
-    batch_size=batch_size,
-    class_mode='binary')
-
-validation_generator = test_datagen.flow_from_directory(
-    validation_data_dir,
-    target_size=(img_width, img_height),
-    batch_size=batch_size,
-    class_mode='binary')
-
-model.summary()
-model.fit_generator(
-    train_generator,
-    steps_per_epoch=nb_train_samples // batch_size,
-    epochs=epochs,
-    validation_data=validation_generator,
-    validation_steps=nb_validation_samples // batch_size)
-
-model.save_weights('noise_classification_img100.h5')
+'''
+Method which returns model to train
+@return : DirectoryIterator
+'''
+def generate_model():
+
+    model = Sequential()
+    model.add(Conv2D(60, (2, 2), input_shape=input_shape))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(Conv2D(40, (2, 2)))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(Conv2D(20, (2, 2)))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(Conv2D(10, (2, 2)))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(Flatten())
+    model.add(Dense(60))
+    model.add(Activation('relu'))
+    model.add(Dropout(0.4))
+
+    model.add(Dense(30))
+    model.add(Activation('relu'))
+    model.add(Dropout(0.2))
+
+    model.add(Dense(1))
+    model.add(Activation('sigmoid'))
+
+    model.compile(loss='binary_crossentropy',
+                  optimizer='rmsprop',
+                  metrics=['accuracy'])
+
+    return model
+
+'''
+Method which loads train data
+@return : DirectoryIterator
+'''
+def load_train_data():
+    # this is the augmentation configuration we will use for training
+    train_datagen = ImageDataGenerator(
+        rescale=1. / 255,
+        shear_range=0.2,
+        zoom_range=0.2,
+        horizontal_flip=True)
+
+    train_generator = train_datagen.flow_from_directory(
+        train_data_dir,
+        target_size=(img_width, img_height),
+        batch_size=batch_size,
+        class_mode='binary')
+
+    return train_generator
+
+'''
+Method which loads validation data
+@return : DirectoryIterator
+'''
+def load_validation_data():
+
+    # this is the augmentation configuration we will use for testing:
+    # only rescaling
+    test_datagen = ImageDataGenerator(rescale=1. / 255)
+
+    validation_generator = test_datagen.flow_from_directory(
+        validation_data_dir,
+        target_size=(img_width, img_height),
+        batch_size=batch_size,
+        class_mode='binary')
+
+    return validation_generator
+
+def main():
+
+    global batch_size
+    global epochs
+
+    if len(sys.argv) <= 1:
+        print('No output file defined...')
+        print('classification_cnn_keras_svd.py --output xxxxx')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "ho:b:e:d", ["help", "directory=", "output=", "batch_size=", "epochs="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('classification_cnn_keras_svd.py --output xxxxx')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('classification_cnn_keras_svd.py --output xxxxx')
+            sys.exit()
+        elif o in ("-o", "--output"):
+            filename = a
+        elif o in ("-b", "--batch_size"):
+            batch_size = int(a)
+        elif o in ("-e", "--epochs"):
+            epochs = int(a)
+        elif o in ("-d", "--directory"):
+            directory = a
+        else:
+            assert False, "unhandled option"
+
+
+    # load of model
+    model = generate_model()
+    model.summary()
+
+    if(directory):
+        print('Your model information will be saved into %s...' % directory)
+
+    history = model.fit_generator(
+        load_train_data(),
+        steps_per_epoch=nb_train_samples // batch_size,
+        epochs=epochs,
+        validation_data=load_validation_data(),
+        validation_steps=nb_validation_samples // batch_size)
+
+    # if user needs output files
+    if(filename):
+
+        # update filename by folder
+        if(directory):
+            # create folder if necessary
+            if not os.path.exists(directory):
+                os.makedirs(directory)
+            filename = directory + "/" + filename
+
+        # save plot file history
+        plot_info.save(history, filename)
+
+        plot_model(model, to_file=str(('%s.png' % filename)))
+        model.save_weights(str('%s.h5' % filename))
+
+
+if __name__ == "__main__":
+    main()

+ 66 - 19
classification_cnn_keras_cross_validation.py

@@ -23,15 +23,16 @@ data/
             ...
 ```
 '''
+import sys, os, getopt
 
 from keras.preprocessing.image import ImageDataGenerator
 from keras.models import Sequential
 from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
 from keras.layers import Activation, Dropout, Flatten, Dense
 from keras import backend as K
-from sklearn.cross_validation import StratifiedKFold
 from keras.utils import plot_model
 
+from modules.model_helper import plot_info
 
 # dimensions of our images.
 img_width, img_height = 100, 100
@@ -50,7 +51,11 @@ else:
 
 
 
-def create_model():
+'''
+Method which returns model to train
+@return : DirectoryIterator
+'''
+def generate_model():
     # create your model using this function
     model = Sequential()
     model.add(Conv2D(60, (2, 2), input_shape=input_shape))
@@ -98,8 +103,6 @@ def create_model():
                   optimizer='rmsprop',
                   metrics=['accuracy'])
 
-    model.summary()
-    plot_model(model, to_file='noise_classification_img100.png', show_shapes=True)
     return model
 
 def load_data():
@@ -111,10 +114,6 @@ def load_data():
         zoom_range=0.2,
         horizontal_flip=True)
 
-    # this is the augmentation configuration we will use for testing:
-    # only rescaling
-    test_datagen = ImageDataGenerator(rescale=1. / 255)
-
     train_generator = train_datagen.flow_from_directory(
         train_data_dir,
         target_size=(img_width, img_height),
@@ -123,15 +122,9 @@ def load_data():
 
     return train_generator
 
-    #validation_generator = test_datagen.flow_from_directory(
-    #    validation_data_dir,
-    #    target_size=(img_width, img_height),
-    #    batch_size=batch_size,
-    #    class_mode='binary')
-
 def train_and_evaluate_model(model, data_train, data_test):
 
-    model.fit_generator(
+    return model.fit_generator(
         data_train,
         steps_per_epoch=nb_train_samples // batch_size,
         epochs=epochs,
@@ -139,7 +132,41 @@ def train_and_evaluate_model(model, data_train, data_test):
         validation_data=data_test,
         validation_steps=nb_validation_samples // batch_size)
 
-if __name__ == "__main__":
+def main():
+
+    global batch_size
+    global epochs
+
+    if len(sys.argv) <= 1:
+        print('No output file defined...')
+        print('classification_cnn_keras_svd.py --output xxxxx')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "ho:b:e:d", ["help", "directory=", "output=", "batch_size=", "epochs="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('classification_cnn_keras_svd.py --output xxxxx')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('classification_cnn_keras_svd.py --output xxxxx')
+            sys.exit()
+        elif o in ("-o", "--output"):
+            filename = a
+        elif o in ("-b", "--batch_size"):
+            batch_size = int(a)
+        elif o in ("-e", "--epochs"):
+            epochs = int(a)
+        elif o in ("-d", "--directory"):
+            directory = a
+        else:
+            assert False, "unhandled option"
+
+
+    # load of model
+    model = generate_model()
+    model.summary()
+
     n_folds = 10
 
     data_generator = ImageDataGenerator(rescale=1./255, validation_split=0.33)
@@ -151,7 +178,27 @@ if __name__ == "__main__":
     validation_generator = data_generator.flow_from_directory(train_data_dir, target_size=(img_width, img_height), shuffle=True, seed=13,
                                                          class_mode='binary', batch_size=batch_size, subset="validation")
 
-    model = create_model()
-    train_and_evaluate_model(model, train_generator, validation_generator)
+    # now run model
+    history = train_and_evaluate_model(model, train_generator, validation_generator)
+
+    print("directory %s " % directory)
+    if(directory):
+        print('Your model information will be saved into %s...' % directory)
+    # if user needs output files
+    if(filename):
+
+        # update filename by folder
+        if(directory):
+            # create folder if necessary
+            if not os.path.exists(directory):
+                os.makedirs(directory)
+            filename = directory + "/" + filename
 
-    model.save_weights('noise_classification_img100.h5')
+        # save plot file history
+        plot_info.save(history, filename)
+
+        plot_model(model, to_file=str(('%s.png' % filename)))
+        model.save_weights(str('%s.h5' % filename))
+
+if __name__ == "__main__":
+    main()

+ 166 - 95
classification_cnn_keras_svd.py

@@ -23,6 +23,7 @@ data/
             ...
 ```
 '''
+import sys, os, getopt
 
 from keras.preprocessing.image import ImageDataGenerator
 from keras.models import Sequential
@@ -31,17 +32,20 @@ from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
 from keras.optimizers import Adam
 from keras.regularizers import l2
 from keras import backend as K
-from numpy.linalg import svd
+from keras.utils import plot_model
+
+import matplotlib.pyplot as plt
+
 import tensorflow as tf
 import numpy as np
-from PIL import Image
 
-from scipy import misc
-import matplotlib.pyplot as plt
-import keras as k
+from modules.model_helper import plot_info
+from modules.image_metrics import svd_metric
 
+
+# configuration
 # dimensions of our images.
-img_width, img_height = int(100), 1
+img_width, img_height = 100, 1
 
 train_data_dir = 'data/train'
 validation_data_dir = 'data/validation'
@@ -50,97 +54,164 @@ nb_validation_samples = 3600
 epochs = 200
 batch_size = 30
 
-# configuration
-config = tf.ConfigProto(intra_op_parallelism_threads=6, inter_op_parallelism_threads=6, \
-                        allow_soft_placement=True, device_count = {'CPU': 6})
-session = tf.Session(config=config)
-K.set_session(session)
-
-def svd_singular(image):
-    U, s, V = svd(image, full_matrices=False)
-    s = s[0:img_width]
-    result = s.reshape([img_width, 1, 1]) # one shape per canal
-    return result
-
 if K.image_data_format() == 'channels_first':
     input_shape = (3, img_width, img_height)
 else:
     input_shape = (img_width, img_height, 3)
 
-model = Sequential()
-
-model.add(Conv2D(100, (2, 1), input_shape=input_shape))
-model.add(Activation('relu'))
-model.add(MaxPooling2D(pool_size=(2, 1)))
-
-model.add(Conv2D(80, (2, 1)))
-model.add(Activation('relu'))
-model.add(AveragePooling2D(pool_size=(2, 1)))
-
-model.add(Conv2D(50, (2, 1)))
-model.add(Activation('relu'))
-model.add(MaxPooling2D(pool_size=(2, 1)))
-
-model.add(Flatten())
-model.add(BatchNormalization())
-model.add(Dense(300, kernel_regularizer=l2(0.01)))
-model.add(Activation('relu'))
-model.add(Dropout(0.4))
-
-model.add(Dense(30, kernel_regularizer=l2(0.01)))
-model.add(BatchNormalization())
-model.add(Activation('relu'))
-model.add(Dropout(0.3))
-
-model.add(Dense(100, kernel_regularizer=l2(0.01)))
-model.add(BatchNormalization())
-model.add(Activation('relu'))
-model.add(Dropout(0.2))
-
-model.add(Dense(20, kernel_regularizer=l2(0.01)))
-model.add(BatchNormalization())
-model.add(Activation('relu'))
-model.add(Dropout(0.1))
-
-model.add(Dense(1))
-model.add(Activation('sigmoid'))
-
-model.compile(loss='binary_crossentropy',
-              optimizer='rmsprop',
-              metrics=['accuracy'])
-
-# this is the augmentation configuration we will use for training
-train_datagen = ImageDataGenerator(
-    #rescale=1. / 255,
-    #shear_range=0.2,
-    #zoom_range=0.2,
-    #horizontal_flip=True,
-    preprocessing_function=svd_singular)
-# this is the augmentation configuration we will use for testing:
-# only rescaling
-test_datagen = ImageDataGenerator(
-    #rescale=1. / 255,
-    preprocessing_function=svd_singular)
-
-train_generator = train_datagen.flow_from_directory(
-    train_data_dir,
-    target_size=(img_width, img_height),
-    batch_size=batch_size,
-    class_mode='binary')
-
-validation_generator = test_datagen.flow_from_directory(
-    validation_data_dir,
-    target_size=(img_width, img_height),
-    batch_size=batch_size,
-    class_mode='binary')
-
-
-model.summary()
-model.fit_generator(
-    train_generator,
-    steps_per_epoch=nb_train_samples // batch_size,
-    epochs=epochs,
-    validation_data=validation_generator,
-    validation_steps=nb_validation_samples // batch_size)
-
-model.save_weights('noise_classification_img100.h5')
+'''
+Method which returns model to train
+@return : DirectoryIterator
+'''
+def generate_model():
+
+    model = Sequential()
+
+    model.add(Conv2D(100, (2, 1), input_shape=input_shape))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 1)))
+
+    model.add(Conv2D(80, (2, 1)))
+    model.add(Activation('relu'))
+    model.add(AveragePooling2D(pool_size=(2, 1)))
+
+    model.add(Conv2D(50, (2, 1)))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 1)))
+
+    model.add(Flatten())
+    model.add(BatchNormalization())
+    model.add(Dense(300, kernel_regularizer=l2(0.01)))
+    model.add(Activation('relu'))
+    model.add(Dropout(0.4))
+
+    model.add(Dense(30, kernel_regularizer=l2(0.01)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.3))
+
+    model.add(Dense(100, kernel_regularizer=l2(0.01)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.2))
+
+    model.add(Dense(20, kernel_regularizer=l2(0.01)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.1))
+
+    model.add(Dense(1))
+    model.add(Activation('sigmoid'))
+
+    model.compile(loss='binary_crossentropy',
+                  optimizer='rmsprop',
+                  metrics=['accuracy'])
+
+    return model
+
+'''
+Method which loads train data
+@return : DirectoryIterator
+'''
+def load_train_data():
+
+    # this is the augmentation configuration we will use for training
+    train_datagen = ImageDataGenerator(
+        rescale=1. / 255,
+        #shear_range=0.2,
+        #zoom_range=0.2,
+        #horizontal_flip=True,
+        preprocessing_function=svd_metric.get_s_model_data)
+
+    train_generator = train_datagen.flow_from_directory(
+        train_data_dir,
+        target_size=(img_width, img_height),
+        batch_size=batch_size,
+        class_mode='binary')
+
+    return train_generator
+
+'''
+Method which loads validation data
+@return : DirectoryIterator
+'''
+def load_validation_data():
+
+    # this is the augmentation configuration we will use for testing:
+    # only rescaling
+    test_datagen = ImageDataGenerator(
+        rescale=1. / 255,
+        preprocessing_function=svd_metric.get_s_model_data)
+
+    validation_generator = test_datagen.flow_from_directory(
+        validation_data_dir,
+        target_size=(img_width, img_height),
+        batch_size=batch_size,
+        class_mode='binary')
+
+    return validation_generator
+
+def main():
+
+    global batch_size
+    global epochs
+
+    if len(sys.argv) <= 1:
+        print('No output file defined...')
+        print('classification_cnn_keras_svd.py --output xxxxx')
+        sys.exit(2)
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "ho:b:e:d", ["help", "directory=", "output=", "batch_size=", "epochs="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print('classification_cnn_keras_svd.py --output xxxxx')
+        sys.exit(2)
+    for o, a in opts:
+        if o == "-h":
+            print('classification_cnn_keras_svd.py --output xxxxx')
+            sys.exit()
+        elif o in ("-o", "--output"):
+            filename = a
+        elif o in ("-b", "--batch_size"):
+            batch_size = int(a)
+        elif o in ("-e", "--epochs"):
+            epochs = int(a)
+        elif o in ("-d", "--directory"):
+            directory = a
+        else:
+            assert False, "unhandled option"
+
+
+    # load of model
+    model = generate_model()
+    model.summary()
+
+    if(directory):
+        print('Your model information will be saved into %s...' % directory)
+
+    history = model.fit_generator(
+        load_train_data(),
+        steps_per_epoch=nb_train_samples // batch_size,
+        epochs=epochs,
+        validation_data=load_validation_data(),
+        validation_steps=nb_validation_samples // batch_size)
+
+    # if user needs output files
+    if(filename):
+
+        # update filename by folder
+        if(directory):
+            # create folder if necessary
+            if not os.path.exists(directory):
+                os.makedirs(directory)
+            filename = directory + "/" + filename
+
+        # save plot file history
+        plot_info.save(history, filename)
+
+        plot_model(model, to_file=str(('%s.png' % filename)))
+        model.save_weights(str('%s.h5' % filename))
+
+
+if __name__ == "__main__":
+    main()

+ 0 - 0
modules/__init__.py


+ 0 - 0
modules/image_metrics/__init__.py


+ 30 - 0
modules/image_metrics/svd_metric.py

@@ -0,0 +1,30 @@
+# module file which contains all image metrics used in project
+
+from numpy.linalg import svd
+from PIL import Image
+from scipy import misc
+
+'''
+Method which extracts SVD features from image and returns 's' vector
+@return 's' vector
+'''
+def get_s_model_data(image):
+    U, s, V = svd(image, full_matrices=False)
+    size = len(s)
+    result = s.reshape([size, 1, 1]) # one shape per canal
+    return result
+
+def get(image):
+    return svd(image, full_matrices=False)
+
+def get_s(image):
+    U, s, V = svd(image, full_matrices=False)
+    return s
+
+def get_U(image):
+    U, s, V = svd(image, full_matrices=False)
+    return U
+
+def get_V(image):
+    U, s, V = svd(image, full_matrices=False)
+    return V

+ 0 - 0
modules/model_helper/__init__.py


+ 47 - 0
modules/model_helper/plot_info.py

@@ -0,0 +1,47 @@
+# module filewhich contains helpful display function
+
+import matplotlib.pyplot as plt
+
+'''
+Function which saves data from neural network model
+'''
+def save(history, filename):
+    # summarize history for accuracy
+    plt.plot(history.history['acc'])
+    plt.plot(history.history['val_acc'])
+    plt.title('model accuracy')
+    plt.ylabel('accuracy')
+    plt.xlabel('epoch')
+    plt.legend(['train', 'test'], loc='upper left')
+    plt.savefig(str('%s_accuracy.png' % filename))
+
+    # clear plt history
+    plt.gcf().clear()
+
+    # summarize history for loss
+    plt.plot(history.history['loss'])
+    plt.plot(history.history['val_loss'])
+    plt.title('model loss')
+    plt.ylabel('loss')
+    plt.xlabel('epoch')
+    plt.legend(['train', 'test'], loc='upper left')
+    plt.savefig(str('%s_loss.png' % filename))
+
+def show(history, filename):
+    # summarize history for accuracy
+    plt.plot(history.history['acc'])
+    plt.plot(history.history['val_acc'])
+    plt.title('model accuracy')
+    plt.ylabel('accuracy')
+    plt.xlabel('epoch')
+    plt.legend(['train', 'test'], loc='upper left')
+    plt.show()
+
+    # summarize history for loss
+    plt.plot(history.history['loss'])
+    plt.plot(history.history['val_loss'])
+    plt.title('model loss')
+    plt.ylabel('loss')
+    plt.xlabel('epoch')
+    plt.legend(['train', 'test'], loc='upper left')
+    plt.show()