1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | #!/usr/bin/env python3 ''' Continuing my machine learning journey Author: Nik Alleyne Author Blog: www.securitynik.com file: payment_fraud.csv ''' import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import (accuracy_score, classification_report, confusion_matrix) def main(): train_test_size = 0.33 # import dataset fraud_df = pd.read_csv('./payment_fraud.csv', verbose=True) print('[*] First 10 records \n{}'.format(fraud_df.head(10))) print('\n[*] Here is another 10 sample records \n{}'.format(fraud_df.sample(10))) print('\n[*] Converting the payment method from categorical variable to numeric ...') fraud_df.paymentMethod.replace(['paypal', 'creditcard', 'storecredit'], [0,1,2], inplace=True) print('[*] Here is another 10 sample records after conversion \n{}'.format(fraud_df.sample(10))) # Create the X axis X_axis = fraud_df.drop('label', axis=1) print('[*] Sample 10 records from the X_axis \n{}'.format(X_axis.sample(10))) # Create the Y axis y_axis = fraud_df['label'] print('\n[*] Sample 10 records from the X_axis \n{}'.format(y_axis.sample(10))) print('[*] Shape of the X_axis is: {}'.format(X_axis.shape)) print('[*] Shape of the y_axis is: {}'.format(y_axis.shape)) # Split the data into training and testing set print('[*] Splitting the data. Testing size is: {}%'.format(train_test_size*100)) X_train, X_test, y_train, y_test = train_test_split(X_axis, y_axis, test_size=train_test_size, shuffle=True) print('\n[*] Shape of the X_train is: {}'.format(X_train.shape)) print('[*] Shape of the X_test is: {}'.format(X_test.shape)) print('\n[*] Shape of the y_train is: {}'.format(y_train.shape)) print('[*] Shape of the y_test is: {}'.format(y_test.shape)) ''' First use logistic regression classifier The solver was set to 'lbfgs' because of a warning which was produced when the classifier was created without it ''' lr_clf = LogisticRegression(solver='lbfgs') lr_clf.fit(X_train, y_train) print('\n[*] Here is the lr_clf aftering being fitted \n{}'.format(lr_clf)) # Making a prediction on the test data predict_fraud = lr_clf.predict(X_test) print('\n[*] Here are your prediction on possible fraudlent transactions \n{}'.format([i for i in predict_fraud])) # Test the accuracy print('\n[*] Accuracy Score: {}'.format(accuracy_score(predict_fraud, y_test))) print('[*] Confusion Matrix: \n{}'.format(confusion_matrix(y_test, predict_fraud))) print('[*] Classification report on your prediction \n{}'.format(classification_report(y_test,predict_fraud))) # Let's make a prediction on a user's input a = [[1,2,3,4,5]] print('[*] Enter your 5 feature values as command separated') user_input = input('[*] Example: 1,2,3,4,5: ') user_input = user_input.split(',') user_input = [int(i) for i in user_input] print('[*] You Entered: {}'.format(user_input)) print('[*] Here is your label:{} \n 0-Not Fraud \n 1-Fraud'.format(lr_clf.predict([user_input]))) print('[*] Here is the probability score: {}'.format(lr_clf.predict_proba([user_input]))) if __name__ == '__main__': main() ''' Reference: https://raw.githubusercontent.com/oreilly-mlsec/book-resources/master/chapter2/datasets/payment_fraud.csv https://www.amazon.com/Machine-Learning-Security-Protecting-Algorithms-dp-1491979909/dp/1491979909/ https://stackoverflow.com/questions/23307301/replacing-column-values-in-a-pandas-dataframe https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.get_dummies.htddml ''' |
Monday, March 23, 2020
Beginning Machine Learning - Detecting Fraudulent Transactions
This code is all part of my deep learning journey and as always, is being placed here so I can always revisit it as I continue to expand on my learning of this topic.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment