#!/usr/bin/env python3 ''' This code is based on me learning more about Linear Regression This is part of me expanding my knowledge on machine learning This version of the code uses the sickit learn Author: Nik Alleyne blog: www.securitynik.com filename: linearRegresAlgo_v3.py ''' import numpy as np import pandas as pd from matplotlib import pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split def main(): print('*[*] Beginning Linear regresion ...') # Reading Data - This file was downloaded fro GitHub. # See the reference section for the URL df = pd.read_csv('./headbrain.csv',sep=',', dtype='int64', verbose=True) print('[*] First 10 records \n {}' .format(df.head(10))) print('[*] Quick description of the dataframe: \n{}'.format(df.describe())) print('[*] {} rows, columns '.format(df.shape)) #Let's now create the X and Y axis using X = np.array(df['Head Size(cm^3)'].values).reshape(-1, 1) Y = np.array(df['Brain Weight(grams)'].values) #Split the dataset into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.30, random_state=10) lr = LinearRegression() lr.fit(X_train, y_train) print('[*] When X is 4234 the predicted value of y is{}'.format(lr.predict([[4234]]))) r_sqr_score = lr.score(X, Y) print('[*] The R2 score is {}'.format(r_sqr_score)) if __name__ == '__main__': main()
The output from the above code is as follow:
root@securitynik:~/ML# ./linearRegresAlgo_v3.py *[*] Beginning Linear regresion ... Tokenization took: 0.06 ms Type conversion took: 0.21 ms Parser memory cleanup took: 0.00 ms [*] First 10 records Gender Age Range Head Size(cm^3) Brain Weight(grams) 0 1 1 4512 1530 1 1 1 3738 1297 2 1 1 4261 1335 3 1 1 3777 1282 4 1 1 4177 1590 5 1 1 3585 1300 6 1 1 3785 1400 7 1 1 3559 1255 8 1 1 3613 1355 9 1 1 3982 1375 [*] Quick description of the dataframe: Gender Age Range Head Size(cm^3) Brain Weight(grams) count 237.000000 237.000000 237.000000 237.000000 mean 1.434599 1.535865 3633.991561 1282.873418 std 0.496753 0.499768 365.261422 120.340446 min 1.000000 1.000000 2720.000000 955.000000 25% 1.000000 1.000000 3389.000000 1207.000000 50% 1.000000 2.000000 3614.000000 1280.000000 75% 2.000000 2.000000 3876.000000 1350.000000 max 2.000000 2.000000 4747.000000 1635.000000 [*] (237, 4) rows, columns [*] When X is 4234 the predicted value of y is[1441.04828161] [*] The R2 score is 0.6388174521966088
No comments:
Post a Comment