* Have both rows and columns which are generally labeled
* Rows represents the index
* DataFrame has two axis. These are "axis=0" and "axis=1".
* Axis=0 represents the columns. As in, if you wish to access all rows for a specific column, you should use "axis=0"
* Axis=1 represents the rows. This is if you wish to access all columns for a given row, ou use "axis=1"
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | #!/usr/bin/env python3 import numpy as np import pandas as pd def main(): my_data = { 'User-1': [10, 'M', 'Cricketer'], 'User-2': [30, 'F', 'BasketBall' ], 'User-3': [15, 'F', 'Table Tennis'], 'User-4': [100, 'M', 'History'], 'User-5': [50, 'F', 'Soccer'] } users_df = pd.DataFrame(my_data) print('\n[*] Current view of the dataframe \n {}'.format(users_df)) print('\n[*] Here are your indexes \n {}'.format(users_df.index)) print('\n[*] Here are your columns \n {}'.format(users_df.columns)) print('\n[*] Here are your values \n {}'.format(users_df.values)) # Add a new column users_df['new_index'] = ['Num', 'Sex', 'Sports'] print('[*] The new dataframe \n {}'.format(users_df)) #Change the index to the newly created column and make the change on the existing dataframe users_df.set_index('new_index', inplace=True) # Add a name to the newly created index users_df.columns.name = 'New Index' print('\n[*] users_df with new index column \n{}'.format(users_df)) # to access a single column print('\n[*] Print information on User-2 \n {}'.format(users_df['User-2'])) # To access multiple columns, leverage a list print('\n[*] Print information on User-2 and User-5 \n {}'.format(users_df[['User-2', 'User-5']])) # Access information for the entire row for sports print('\n[*] Print information on User-2 and User-5 \n {}'.format(users_df.loc['Num'])) # To figure out the type of data returned print('\n[*] Type for the return column \n {}'.format(type(users_df.loc['Num']))) # Print inforation for User-3 and Sports. Notice the usage of '.at'. Also this has to be row,column print('\n[*] Print information on User 2 sports \n {}'.format(users_df.at['Sports', 'User-2'])) # Let's now transpose our dataframe. That is make the columns rows and the rows into columns users_transpose_df = users_df.T print('\n[*] Here we transpose the dataframe. We made the columns into rows and the rows into columns \n {}'.format(users_transpose_df)) # find everyone whose Num is less than 50 print('\n[*] Here is everyone whose age is less than 50 \n {}'.format((users_transpose_df.Num < 50))) # Create a new column based on the information just returned users_transpose_df['derived_num_lt_50'] = users_transpose_df.Num < 50 print('\n[*] Here is your new dataframe with its derived column \n {}'.format(users_transpose_df)) # Let's now add a row and print it out users_transpose_df.loc['User-6'] = [70, 'M', 'Volleyball', 0] print('\n[*] New row added for User-6\n {}'.format(users_transpose_df)) # Let's now describe the dataframe print('\n[*] Describing the dataframe \n {}'.format(users_transpose_df.describe())) # We can also describe specific column. In this case the Num print('\n[*] Describing the Num column \n {}'.format(users_transpose_df.Num.describe())) # Whereas the index was set above, we can reset the index print('\n[*] Index reset. Note the new index to the left with the incrementing numbers \n {}'.format(users_transpose_df.reset_index())) if __name__ == '__main__': main() |
No comments:
Post a Comment