1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | #!/usr/bin/env python3 ''' Using the iris dataset to learn more about groupby ''' import numpy as np import pandas as pd from matplotlib import pyplot as plt def main(): iris_df = pd.read_csv('./iris.data') print('[*] First 10 records \n {}'.format(iris_df.head(10))) #Get the column names print('\n[*] Column names \n{}'.format(iris_df.columns)) # To determine the different species within the dataset print('\n[*] The numer of unique species is: {}'.format(len(set(iris_df.species)))) print('[*] The unique species in the dataset are: \n{}'.format(set(iris_df.species))) # Let's now group these by species group_by_species = iris_df.groupby('species') # Get the group and their indicies print('\n[*] Iris dataset now grouped by species \n {}'.format(group_by_species.indices)) # Let's get the keys for above print('\n[*] Iris dataset keys \n {}'.format(group_by_species.indices.keys())) # Let's get the values for above print('\n[*] Iris dataset values \n {}'.format(group_by_species.indices.values())) # Iterating through the group for key, value in group_by_species: print('\n \\//-->> Group Starts Here <<--\\//') print('\n [*]{0} {1} \n'.format(key, value)) print('\n \\//-->> Group Ends Here <<--\\// \n') # Rather than iterating, we could have just view the contents of the list print('\n\n[*] List view - Datasets group by species \n {}'.format(list(group_by_species))) # Get a specific group print('\n[*] Data for the Iris-setosa group \n {}'.format(group_by_species.get_group('Iris-setosa'))) if __name__ == '__main__': main() |
Posts in this series:
Beginning Numpy
Beginning Pandas
Pandas String Operations, etc.
No comments:
Post a Comment