How to find highest and lowest value and aggregate into a string in Pandas, Pysimplegui

217 Views Asked by At

I have a dataframe in pandas

This code is part of a function in a GUI and I'm trying to create one line of string that would mention the highest count of COVID cases in a country within a continent whereas the continent is selected from the user.

This is the dataset I am using: https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv

Once user selects, a continent, for e.g. Asia

My graph would show all the total number of case for all the countries in Asia. I'm trying to add one more of line at plt.xlabel that summarizes the country with highest number of case and country with lowest number of case.

expected output: In Asia, X country has the highest number of cases with xx,xxx cases and X country has the lowest number of cases with x,xxx cases.

Here is my code:

import PySimpleGUI as sg
import matplotlib.pyplot as plt


def graphUI(dfIn):
    # getting the continent and location to use as value for dropdown search
    df_continent = dfIn[1].drop_duplicates()
    df_location = dfIn[2].drop_duplicates()
    #for LinePlot Options
    linePlotList = ['New Case', 'Total Death', 'New Death']
    layout = [
        [sg.Text('Display Graph')],
        [sg.Text('Continent:'),
         sg.Combo(df_continent.values[1:-1].tolist(), default_value="Continent", key="-continent-",change_submits=True)],
        [sg.Text('Location: '), sg.Combo(df_location.values[1:].tolist(), default_value="Location", key="-location-")],
        [sg.Text('Only For Line Plot: '), sg.Combo(linePlotList, default_value="New Case", key="-linePlot-")],
        [sg.Button('Bar plot', key='bar', tooltip='View Graph'),
         sg.Button('Line plot', key='line', tooltip='View Graph'),
         sg.Button('Cancel')]
    ]
    window = sg.Window('Search and Filter', layout)
    while True:
        event, values = window.read()
        #on combo continent changes value, it will run the code below
        if event == "-continent-":
            if values['-continent-'] != df_continent.values[0]:
                #run checkBoxUpdate function to update the list of country inside the selected continent
                formUpdate = checkBoxupdate(dfIn, values['-continent-'])
                #update the window by finding the element of location combo and update the latest country value
                window.FindElement('-location-').Update(values=formUpdate.values[1:].tolist())
        # Once user press Ok button, get all values and compare to df
        if event == "bar":
            searchedDf = dfIn[::]
            if values['-continent-'] != df_continent.values[0]:
                barchart(searchedDf,values)
        if event == "line":
            searchedDf = dfIn[::]
            if values['-location-'] != df_continent.values[1]:
                selectedLineChoice = values['-linePlot-']
                linePlot(searchedDf,values,selectedLineChoice)
        elif event == "Cancel" or event is None:
            window.close()
            return dfIn


def barchart(searchedDf,values) :
    # getting the continent and location to use as value for dropdown search
    searchedDf = searchedDf[searchedDf.isin([values['-continent-']]).any(axis=1)]

    #drop duplicates country and keep latest
    searchedDf = searchedDf.drop_duplicates(subset=[2], keep='last')

    allcountry = list(searchedDf[2])
    highestInfected = list(map(int, searchedDf[4]))

    # Access the values which were entered and store in lists
    plt.figure(figsize=(10, 5))

    plt.barh(allcountry, highestInfected)
    #set axist label to smaller size
    plt.tick_params(axis='y', which='major', labelsize=6)
    plt.suptitle('Total Case of ' + values['-continent-'])
    plt.xlabel('In ' + values['-continent-'] + 'has the most number of cases.' )
    plt.show()

def linePlot(searchedDf, values,selectedLineChoice):
    # getting the continent and location to use as value for dropdown search
    searchedDf = searchedDf[searchedDf.isin([values['-location-']]).any(axis=1)]
    eachDate = list(searchedDf[3])
    if selectedLineChoice == 'New Case':
        selectedLineChoiceValues = list(map(int, searchedDf[5]))
    if selectedLineChoice == 'Total Death':
        selectedLineChoiceValues = list(map(int, searchedDf[6]))
    if selectedLineChoice == 'New Death':
        selectedLineChoiceValues = list(map(int, searchedDf[7]))

    #set frequency of the date on x axis to appear on lower freq
    frequency = 50
    plt.plot(eachDate , selectedLineChoiceValues)

    plt.xticks(eachDate[::frequency])
    plt.xticks(rotation=45)
    plt.tick_params(axis='x', which='major', labelsize=6)
    plt.suptitle('Total New Case of ' + values['-location-'])

    plt.ylabel(selectedLineChoice, fontsize=10)
    plt.show()


def checkBoxupdate(dfIn, input):
    #search the DF for the selected continents
    searchedDf = dfIn[dfIn.isin([input]).any(axis=1)]
    #drop duplicates country of the selected continenets and return
    df_location = searchedDf[2].drop_duplicates()
    return df_location
1

There are 1 best solutions below

2
On BEST ANSWER

Understanding that the subject of this question is to display the maximum and minimum values on the x-axis labels for the selected continent, I created the following code.

import matplotlib.pyplot as plt
import pandas as pd
import requests

url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
df = pd.read_csv(url, sep=',')
df.fillna(0, inplace=True)

continent = 'Asia'
country = 'Indonesia'
searchedDf = df.copy()

searchedDf = searchedDf[searchedDf.isin([continent]).any(axis=1)]
# total_cases -> max, min 
casesDf = searchedDf.copy()
cases_ = casesDf.groupby(['location'])[['date','total_cases']].last().reset_index()
cases_max_df = cases_[cases_['total_cases'] == max(cases_['total_cases'])]
cases_min_df = cases_[cases_['total_cases'] == min(cases_['total_cases'])]

searchedDf = searchedDf[searchedDf.isin([country]).any(axis=1)]
#drop duplicates country and keep latest
searchedDf = searchedDf.drop_duplicates(subset=['continent'], keep='last')
# print(searchedDf)

allcountry = list(searchedDf['location'])
highestInfected = list(map(int, searchedDf['total_cases']))

# Access the values which were entered and store in lists
plt.figure(figsize=(10, 5))

plt.barh(allcountry, highestInfected)
#set axist label to smaller size
plt.tick_params(axis='y', which='major', labelsize=16)
plt.suptitle('Total Case of ' + continent)
labels = ('In ' + continent + ' has the most number of cases.\n'
          + str(cases_max_df['location'].values[0]) + ':' + str(cases_max_df['total_cases'].values[0]) + '\n'
          + str(cases_min_df['location'].values[0]) + ':' + str(cases_min_df['total_cases'].values[0]))

plt.xlabel(labels, fontsize=18)

plt.show()

enter image description here

import datetime
searchedDf['date'] = pd.to_datetime(searchedDf['date'])
searchedDf['yyyy-mm'] = str(searchedDf['date'].dt.year) + '-' + str(searchedDf['date'].dt.month)
month_gb = searchedDf.groupby('yyyy-mm')['total-cases'].sum()