如何在 Pandas 中找到最高值和最低值并聚合成一个字符串,Pysimplegui

How to find highest and lowest value and aggregate into a string in Pandas, Pysimplegui

我在 pandas

中有一个数据框

此代码是 GUI 中的一个函数的一部分,我正在尝试创建一行字符串,该字符串将提及一个大陆内某个国家/地区的 COVID 病例数最多,而该大陆是从用户中选择的。

这是我正在使用的数据集:https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv

用户选择大陆后,例如亚洲

我的图表将显示亚洲所有国家/地区的所有病例总数。 我正在尝试在 plt.xlabel 处再添加一行,总结病例数最多的国家和病例数最少的国家。

预期输出: 在亚洲,X国病例数最多,有xx,xxx例,X国病例数最少,有x,xxx例。

这是我的代码:

import PySimpleGUI as sg
import matplotlib.pyplot as plt


def graphUI(dfIn):
    # getting the continent and location to use as value for dropdown search
    df_continent = dfIn[1].drop_duplicates()
    df_location = dfIn[2].drop_duplicates()
    #for LinePlot Options
    linePlotList = ['New Case', 'Total Death', 'New Death']
    layout = [
        [sg.Text('Display Graph')],
        [sg.Text('Continent:'),
         sg.Combo(df_continent.values[1:-1].tolist(), default_value="Continent", key="-continent-",change_submits=True)],
        [sg.Text('Location: '), sg.Combo(df_location.values[1:].tolist(), default_value="Location", key="-location-")],
        [sg.Text('Only For Line Plot: '), sg.Combo(linePlotList, default_value="New Case", key="-linePlot-")],
        [sg.Button('Bar plot', key='bar', tooltip='View Graph'),
         sg.Button('Line plot', key='line', tooltip='View Graph'),
         sg.Button('Cancel')]
    ]
    window = sg.Window('Search and Filter', layout)
    while True:
        event, values = window.read()
        #on combo continent changes value, it will run the code below
        if event == "-continent-":
            if values['-continent-'] != df_continent.values[0]:
                #run checkBoxUpdate function to update the list of country inside the selected continent
                formUpdate = checkBoxupdate(dfIn, values['-continent-'])
                #update the window by finding the element of location combo and update the latest country value
                window.FindElement('-location-').Update(values=formUpdate.values[1:].tolist())
        # Once user press Ok button, get all values and compare to df
        if event == "bar":
            searchedDf = dfIn[::]
            if values['-continent-'] != df_continent.values[0]:
                barchart(searchedDf,values)
        if event == "line":
            searchedDf = dfIn[::]
            if values['-location-'] != df_continent.values[1]:
                selectedLineChoice = values['-linePlot-']
                linePlot(searchedDf,values,selectedLineChoice)
        elif event == "Cancel" or event is None:
            window.close()
            return dfIn


def barchart(searchedDf,values) :
    # getting the continent and location to use as value for dropdown search
    searchedDf = searchedDf[searchedDf.isin([values['-continent-']]).any(axis=1)]

    #drop duplicates country and keep latest
    searchedDf = searchedDf.drop_duplicates(subset=[2], keep='last')

    allcountry = list(searchedDf[2])
    highestInfected = list(map(int, searchedDf[4]))

    # Access the values which were entered and store in lists
    plt.figure(figsize=(10, 5))

    plt.barh(allcountry, highestInfected)
    #set axist label to smaller size
    plt.tick_params(axis='y', which='major', labelsize=6)
    plt.suptitle('Total Case of ' + values['-continent-'])
    plt.xlabel('In ' + values['-continent-'] + 'has the most number of cases.' )
    plt.show()

def linePlot(searchedDf, values,selectedLineChoice):
    # getting the continent and location to use as value for dropdown search
    searchedDf = searchedDf[searchedDf.isin([values['-location-']]).any(axis=1)]
    eachDate = list(searchedDf[3])
    if selectedLineChoice == 'New Case':
        selectedLineChoiceValues = list(map(int, searchedDf[5]))
    if selectedLineChoice == 'Total Death':
        selectedLineChoiceValues = list(map(int, searchedDf[6]))
    if selectedLineChoice == 'New Death':
        selectedLineChoiceValues = list(map(int, searchedDf[7]))

    #set frequency of the date on x axis to appear on lower freq
    frequency = 50
    plt.plot(eachDate , selectedLineChoiceValues)

    plt.xticks(eachDate[::frequency])
    plt.xticks(rotation=45)
    plt.tick_params(axis='x', which='major', labelsize=6)
    plt.suptitle('Total New Case of ' + values['-location-'])

    plt.ylabel(selectedLineChoice, fontsize=10)
    plt.show()


def checkBoxupdate(dfIn, input):
    #search the DF for the selected continents
    searchedDf = dfIn[dfIn.isin([input]).any(axis=1)]
    #drop duplicates country of the selected continenets and return
    df_location = searchedDf[2].drop_duplicates()
    return df_location

了解到此问题的主题是在所选大洲的 x-axis 标签上显示最大值和最小值,因此我创建了以下代码。

import matplotlib.pyplot as plt
import pandas as pd
import requests

url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
df = pd.read_csv(url, sep=',')
df.fillna(0, inplace=True)

continent = 'Asia'
country = 'Indonesia'
searchedDf = df.copy()

searchedDf = searchedDf[searchedDf.isin([continent]).any(axis=1)]
# total_cases -> max, min 
casesDf = searchedDf.copy()
cases_ = casesDf.groupby(['location'])[['date','total_cases']].last().reset_index()
cases_max_df = cases_[cases_['total_cases'] == max(cases_['total_cases'])]
cases_min_df = cases_[cases_['total_cases'] == min(cases_['total_cases'])]

searchedDf = searchedDf[searchedDf.isin([country]).any(axis=1)]
#drop duplicates country and keep latest
searchedDf = searchedDf.drop_duplicates(subset=['continent'], keep='last')
# print(searchedDf)

allcountry = list(searchedDf['location'])
highestInfected = list(map(int, searchedDf['total_cases']))

# Access the values which were entered and store in lists
plt.figure(figsize=(10, 5))

plt.barh(allcountry, highestInfected)
#set axist label to smaller size
plt.tick_params(axis='y', which='major', labelsize=16)
plt.suptitle('Total Case of ' + continent)
labels = ('In ' + continent + ' has the most number of cases.\n'
          + str(cases_max_df['location'].values[0]) + ':' + str(cases_max_df['total_cases'].values[0]) + '\n'
          + str(cases_min_df['location'].values[0]) + ':' + str(cases_min_df['total_cases'].values[0]))

plt.xlabel(labels, fontsize=18)

plt.show()

import datetime
searchedDf['date'] = pd.to_datetime(searchedDf['date'])
searchedDf['yyyy-mm'] = str(searchedDf['date'].dt.year) + '-' + str(searchedDf['date'].dt.month)
month_gb = searchedDf.groupby('yyyy-mm')['total-cases'].sum()