1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | #!/usr/bin/env python3 ''' Beginning my deep learning journey - This part of the journey focus on the binary or two class classification problem. Learning to classify the Reuters Newswire dataset into positive and negative reviews based on text content File: dlReuters.py Author: Nik Alleyne Author Blog: www.securitynik.com Date: 2020-02-04 ''' import numpy as np from keras import (models, layers) from keras.datasets import reuters from keras.utils.np_utils import to_categorical from matplotlib import pyplot as plt from keras.utils.vis_utils import plot_model # Function used to vectorize data, making into a set of 1s nd 0s def vectorize_data(sequences, dimension=10000): results = np.zeros((len(sequences), dimension)) for i, sequence in enumerate(sequences): results[i, sequence] = 1 return results def main(): ''' Split the dataset into training and testing using the 10,000 most frequently occurring word ''' (X_train, y_train), (X_test, y_test) = reuters.load_data(num_words=10000) # Get the shape of the training and testing data print('\n[*] X_train shape: {}'.format(X_train.shape)) print('[*] y_train shape: {}'.format(y_train.shape)) print('[*] X_test shape: {}'.format(X_test.shape)) print('[*] y_test shape: {}'.format(y_test.shape)) # Each sample record is a list of integers print('[*] Sample record from X_train: \n{}'.format(X_train[0])) #Vectorize the training data X_train = vectorize_data(X_train).astype('float32') X_test = vectorize_data(X_test).astype('float32') # One Hot Encode the labels y_train = to_categorical(y_train) y_test = to_categorical(y_test) print('[*] Sample y_train data after one hot encoded \n{}'.format(y_train)) # Create a validation set X_train_val = X_train[:1000] y_train_val = y_train[:1000] X_train_partial = X_train[1000:] y_train_partial = y_train[1000:] # Build the 3 node neural network model = models.Sequential() model.add(layers.Dense(64, activation='relu', input_shape=(10000,))) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(46, activation='softmax')) # Compile the model model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc']) # Fit the model nn_history = model.fit(X_train, y_train, epochs=20, batch_size=512, validation_data=(X_train_val, y_train_val)) print('\n[*] Here is the content of nn_history.history \n{}'.format(nn_history.history)) # Plotting the loss for for the training and validation set val_loss = nn_history.history['val_loss'] train_loss = nn_history.history['loss'] print('[*] Length of validation Loss is {}'.format(len(val_loss))) print('[*] Length of training Loss is {}'.format(len(train_loss))) val_acc = nn_history.history['val_acc'] train_acc = nn_history.history['acc'] epochs = range(1, len(val_loss) + 1) plt.figure(figsize=(8,8)) plt.plot(epochs, val_loss, color='green', marker='+', label='Validation Loss', linestyle='dashed', linewidth=2, markersize=15) plt.plot(epochs, train_loss, color='red', marker='.', label='Training Loss', linestyle='dashed', linewidth=2, markersize=15) plt.plot(epochs, val_acc, color='blue', marker='*', label='Validation Accuracy', linestyle='dashed', linewidth=2, markersize=15) plt.plot(epochs, train_acc, color='orange', marker='d', label='Training Accuracy', linestyle='dashed', linewidth=2, markersize=15) plt.xlabel('Epoch') plt.ylabel('Loss/Accuracy') plt.title('Training/Validation vs Accuracy/Loss') plt.legend() plt.show() plt.close('all') # Evaluate the test data results = model.evaluate(X_test, y_test) print('[*] Loss on the test data is: {}'.format(results[0])) print('[*] Accuracy on the test data is: {}'.format(results[1])) #Perform a prediction on the test set reuters_predict = model.predict(X_test) print('[*] Here are your predictions \n{}'.format(reuters_predict)) print('[*] Here is the model summary \n{}'.format(model.summary())) print('[*] Here is the shape for predictions: {}'.format(reuters_predict[0].shape)) #Visualize the model via a graph plot_model(model, to_file='/tmp/reuters-model.png', show_shapes=True, show_layer_names=True) print('[*] Largest prediction entry {}'.format(np.argmax(reuters_predict[0]))) if __name__ == '__main__': main() ''' References: https://www.manning.com/books/deep-learning-with-python https://matplotlib.org/3.1.0/api/_as_gen/matplotlib.pyplot.plot.html#matplotlib.pyplot.plot https://matplotlib.org/3.1.0/api/markers_api.html#module-matplotlib.markers ''' |
Monday, March 23, 2020
Beginning Deep Learning Working With the Reuters Dataset
This code is all part of my deep learning journey and as always, is being placed here so I can always revisit it as I continue to expand on my learning of this topic.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment