Monday, March 23, 2020

Beginning Deep Learning, Working with the Boston Housing Dataset

This code is all part of my deep learning journey and as always, is being placed here so I can always revisit it as I continue to expand on my learning of this topic.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python3


'''
 Beginning my deep learning journey -
 This part of the journey focus on the binary or two class classification problem.
 Learning to classify the Boston Housing dataset into positive and negative reviews based on 
 text content


 File: dlBoston.py
 Author: Nik Alleyne
 Author Blog: www.securitynik.com
 Date: 2020-02-04
'''

import numpy as np
from keras.datasets import boston_housing
from keras import (models, layers)
from matplotlib import pyplot as plt


def build_model(X_train):
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    
    '''
    Final layer does not have an activation function defined
    By not specifying an activation function, the model is 
    free to learn and predict the linear values
    '''

    model.add(layers.Dense(1))

    '''
    Mean Squared Error (mse) is widely used in regression problems
    Mean Absolute Error (mae) is used to monitor the absolute value
    between the predictions and the targets
    '''
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model




def main():
    # Devide the data into training and testing sets
    (X_train, y_train), (X_test, y_test) = boston_housing.load_data()
    

    # Get the shape of both the training and testing set
    print('\n[*] X_train shape: {}'.format(X_train.shape))
    print('[*] y_train shape: {}'.format(y_train.shape))
    print('[*] X_test shape: {}'.format(X_test.shape))
    print('[*] y_test shape: {}'.format(y_test.shape))

    print('\n[*] Sample records from X_train\n {}'.format(X_train))
    print('\n[*] Sample record from y_train\n {}'.format(y_train))

    mean = X_train.mean(axis=0)
    X_train -= mean
    print('\n[*] X_train after finding the mean \n{}'.format(X_train))

    std_deviation = X_train.std(axis=0)
    X_train /= std_deviation
    print('\n[*] X_train after finding the standard deviation \n{}'.format(X_train))

    X_test -= mean
    X_test /= std_deviation
    print('\n[*] X_test after finding the mean \n{}'.format(X_test))
    

    #Setting up cross validation
    k = 4
    num_validation_samples = len(X_train) // k
    print('[*] Num Validation samples {}'.format(num_validation_samples))
    
    num_epochs = 2
    all_scores = []
    mae_histories = []

    for i in range(k):
        print('[*] Proessing fold: {}'.format(i))
        X_train_val = X_train[i * num_validation_samples: (i + 1) * num_validation_samples]
        y_train_val = y_train[i * num_validation_samples: (i + 1) * num_validation_samples]

        X_train_patial = np.concatenate([X_train[:i * num_validation_samples], X_train[(i+1) * num_validation_samples:]], axis=0)
        y_train_patial = np.concatenate([y_train[:i * num_validation_samples], y_train[(i+1) * num_validation_samples:]], axis=0)
        model = build_model(X_train)
        nn_history = model.fit(X_train_patial, y_train_patial, epochs=num_epochs, batch_size=1, verbose=1)
        val_mse, val_mae = model.evaluate(X_train_val, y_train_val, verbose=1)
        all_scores.append(val_mae)
        
    print('[*] History information from nn_history.history \n{}'.format(nn_history.history))
    mae_histories = nn_history.history['mae']

    print('\n[*] X_train Validation samples\n{}'.format(X_train_val))
    print('\n[*] y_train validation samples\n{}'.format(y_train_val))

    print('[*] All scores \n{}'.format(all_scores))
    print('[*] Here is the mean of all scores {}'.format(np.mean(all_scores)))
    print('[*] Here is the mae scores {}'.format(mae_histories))
    print('[*] Here is the mae mean scores {}'.format(np.mean(mae_histories)))


if __name__ == '__main__':
    main()


'''
References:
https://www.manning.com/books/deep-learning-with-python

'''

No comments:

Post a Comment