Thursday, December 12, 2019

Pandas String Operations, etc.

Still learning about Pandas


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3

'''
    Pandas strings, etc

'''

import pandas as pd
import numpy as np
import string

def main():
    # Create the first series consisting of name and age
    series_name_age = pd.Series(np.random.randint(1,50,26), name='age' ,index=list(string.ascii_lowercase[:26]))
    series_name_age.index.name = 'Name'  
    print('[*] Content of series_name_age \n{}'.format(series_name_age))

    #Create a second series consisting of name and income
    series_name_income = pd.Series(np.random.randint(100000,500000,26), name='Income', index=list(string.ascii_lowercase[:26]))
    series_name_income.index.name = 'Name' 
    print('\n[*] Content of series_name_income \n{}'.format(series_name_income))

    # Considering the values reported in the income series, print the salary of those making above 400K
    print('\n[*] Here are the list of people making above 400K \n {}'.format(series_name_income > 400000))
    
    # While the above only showed True or False, let's see the actual values
    print('\n[*] Actual income values \n{}'.format(series_name_income[series_name_income > 400000]))


    # Check to see if everyone makes a salary above 100000
    print('\n[*] Does everyone make above 100000? \n{}'.format((series_name_income > 100000).all()))

    # Check to see if everyone makes a salary above 400000
    print('\n[*] Does everyone make above 400000? \n{}'.format((series_name_income > 400000).all()))

    # Check to see if anyone, not everyone makes above 450000
    print('\n[*] Does anyone make above 450000? \n{}'.format((series_name_income > 450000).any()))


    # To convert a series to a different type just do as shown below:
    print('\n[*] Series_name_income as String \n{}'.format(series_name_income.to_string()))
    print('\n[*] Series_name_income as List \n{}'.format(series_name_income.to_list()))
    print('\n[*] Series_name_income as Dict \n{}'.format(series_name_income.to_dict()))
    print('\n[*] Series_name_income as Json \n{}'.format(series_name_income.to_json()))


    #Let's test to see if any of the values which were generated for income or age were duplicated
    print('\n[*] These are the unique values for age: \n{}'.format(series_name_age.unique()))
    print('\n[*] These are the unique values for income: \n{}'.format(series_name_income.unique()))

    # Let's now look for numbers which might have been duplicated and the number of times they appear
    print('\n[*] Age values usage and their occurrences: \n{}'.format(series_name_age.value_counts()))
    print('\n[*] Income value usage and their occurrences: \n{}'.format(series_name_income.value_counts()))

    # Let's get the minimum income and age
    print('\n[*] The minimum value for age: \n{}'.format(series_name_age.min()))
    print('\n[*] The minimum value for income: \n{}'.format(series_name_income.min()))

    # Let's get the maximum income and age
    print('\n[*] The max value for age: \n{}'.format(series_name_age.max()))
    print('\n[*] The max value for income: \n{}'.format(series_name_income.max()))

    # Now that we have the min and max of age and income, let's find the mean
    print('\n[*] The mean value for age to two decimals: \n{:.2f}'.format(series_name_age.mean()))
    print('\n[*] The mean value for income to two decimals: \n{:.2f}'.format(series_name_income.mean()))


if __name__ == '__main__':
    main()

Posts in this series:
Beginning Numpy
Beginning Pandas
Pandas String Operations, etc.

No comments:

Post a Comment