Learning by practicing: Pandas GroupBy

Thursday, December 12, 2019

Pandas GroupBy

Learning about Pandas GroupBy from the perspective of the Iris Dataset

#!/usr/bin/env python3

'''
    Using the iris dataset to learn more about groupby
'''

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

def main():
    iris_df = pd.read_csv('./iris.data')
    print('[*] First 10 records \n {}'.format(iris_df.head(10)))

    #Get the column names
    print('\n[*] Column names \n{}'.format(iris_df.columns))

    # To determine the different species within the dataset
    print('\n[*] The numer of unique species is: {}'.format(len(set(iris_df.species))))
    print('[*] The unique species in the dataset are:  \n{}'.format(set(iris_df.species)))
    
    # Let's now group these by species
    group_by_species = iris_df.groupby('species')
    
    # Get the group and their indicies
    print('\n[*] Iris dataset now grouped by species \n {}'.format(group_by_species.indices))
    
    # Let's get the keys for above
    print('\n[*] Iris dataset keys \n {}'.format(group_by_species.indices.keys()))

    # Let's get the values for above
    print('\n[*] Iris dataset values \n {}'.format(group_by_species.indices.values()))

    # Iterating through the group
    for key, value in group_by_species:
        print('\n \\//-->>   Group Starts Here   <<--\\//')
        print('\n [*]{0} {1} \n'.format(key, value))
        print('\n \\//-->>   Group Ends Here   <<--\\// \n')


    # Rather than iterating, we could have just view the contents of the list
    print('\n\n[*] List view - Datasets group by species \n {}'.format(list(group_by_species)))

    # Get a specific group
    print('\n[*] Data for the Iris-setosa group \n {}'.format(group_by_species.get_group('Iris-setosa')))


if __name__ == '__main__':
    main()

Posts in this series:
Beginning Numpy
Beginning Pandas
Pandas String Operations, etc.

Pandas DataFrame Basics

Pandas GroupBy

Wireless Security Analysis with Pandas

Learning by practicing

Thursday, December 12, 2019

Pandas GroupBy

No comments:

Post a Comment