Thursday, December 12, 2019

Pandas DataFrame Basics

* While the Pandas Series is like an array, the Pandas DataFrame is like a spreadsheet.
* Have both rows and columns which are generally labeled
* Rows represents the index
* DataFrame has two axis. These are "axis=0" and "axis=1".
* Axis=0 represents the columns. As in, if you wish to access all rows for a specific column, you should use "axis=0"
* Axis=1 represents the rows. This is if you wish to access all columns for a given row, ou use "axis=1"


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3

import numpy as np
import pandas as pd


def main():
    my_data = {
            'User-1': [10, 'M', 'Cricketer'],
            'User-2': [30, 'F', 'BasketBall' ],
            'User-3': [15, 'F', 'Table Tennis'],
            'User-4': [100, 'M', 'History'],
            'User-5': [50, 'F', 'Soccer']
            }

    users_df = pd.DataFrame(my_data)
    print('\n[*] Current view of the dataframe \n {}'.format(users_df))
    print('\n[*] Here are your indexes \n {}'.format(users_df.index))
    print('\n[*] Here are your columns \n {}'.format(users_df.columns))
    print('\n[*] Here are your values \n {}'.format(users_df.values))

    # Add a new column
    users_df['new_index'] = ['Num', 'Sex', 'Sports']
    print('[*] The new dataframe \n {}'.format(users_df))

    #Change the index to the newly created column and make the change on the existing dataframe
    users_df.set_index('new_index', inplace=True)
    
    # Add a name to the newly created index
    users_df.columns.name = 'New Index'
    print('\n[*] users_df with new index column \n{}'.format(users_df))

    # to access a single column
    print('\n[*] Print information on User-2 \n {}'.format(users_df['User-2']))
    
    # To access multiple columns, leverage a list
    print('\n[*] Print information on User-2 and User-5 \n {}'.format(users_df[['User-2', 'User-5']]))

    # Access information for the entire row for sports
    print('\n[*] Print information on User-2 and User-5 \n {}'.format(users_df.loc['Num']))

    # To figure out the type of data returned
    print('\n[*] Type for the return column \n {}'.format(type(users_df.loc['Num'])))

    # Print inforation for User-3 and Sports. Notice the usage of '.at'. Also this has to be row,column
    print('\n[*] Print information on User 2 sports  \n {}'.format(users_df.at['Sports', 'User-2']))

    # Let's now transpose our dataframe. That is make the columns rows and the rows into columns
    users_transpose_df = users_df.T
    print('\n[*] Here we transpose the dataframe. We made the columns into rows and the rows into columns \n {}'.format(users_transpose_df))
    
    # find everyone whose Num is less than 50
    print('\n[*] Here is everyone whose age is less than 50 \n {}'.format((users_transpose_df.Num < 50))) 

    # Create a new column based on the information just returned
    users_transpose_df['derived_num_lt_50'] = users_transpose_df.Num < 50

    print('\n[*] Here is your new dataframe with its derived column \n {}'.format(users_transpose_df))

    # Let's now add a row and print it out
    users_transpose_df.loc['User-6'] = [70, 'M', 'Volleyball', 0]
    print('\n[*] New row added for User-6\n {}'.format(users_transpose_df))

    # Let's now describe the dataframe
    print('\n[*] Describing the dataframe \n {}'.format(users_transpose_df.describe()))

    # We can also describe specific column. In this case the Num
    print('\n[*] Describing the Num column \n {}'.format(users_transpose_df.Num.describe()))

    # Whereas the index was set above, we can reset the index
    print('\n[*] Index reset. Note the new index to the left with the incrementing numbers \n {}'.format(users_transpose_df.reset_index()))
    

if __name__ == '__main__':
    main()

No comments:

Post a Comment