当风向是分类值时如何绘制风玫瑰图

How to plot a windrose when the wind direction is a categorical value

根据数据集 Australia Rainfall,我正在尝试预测 RainTomorrow。下面是我的代码:

使用 opendatasets 库直接从 Kaggle 下载数据集

import opendatasets as od  
dataset_url = 'https://www.kaggle.com/jsphyg/weather-dataset-rattle-package'
od.download(dataset_url)

导入必要的库

import os
import pandas as pd
import numpy as np

import plotly.express as px
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (10,6)
matplotlib.rcParams['figure.facecolor'] = '#00000000'

正在加载数据集

data_dir = './weather-dataset-rattle-package'
os.listdir(data_dir)
train_csv = data_dir + '/weatherAUS.csv'
raw_df = pd.read_csv(train_csv)

探索 WindGustDir 变量

print('WindGustDir contains', len(raw_df['WindGustDir'].unique()), 'labels')
raw_df['WindGustDir'].unique()
raw_df.WindGustDir.value_counts()
pd.get_dummies(raw_df.WindGustDir, drop_first=True, dummy_na=True).head()
pd.get_dummies(raw_df.WindGustDir, drop_first=True, dummy_na=True).sum(axis=0)

绘制 Windrose

from windrose import WindroseAxes

ax = WindroseAxes.from_ax()
ax.bar(raw_df.WindGustDir, raw_df.Rainfall, normed=True, opening=0.8, 
edgecolor='white')
ax.set_legend()

我无法弄清楚哪些列应该与 WindGustDir 一起使用,或者它们是否是比较 RainTomorrowWindGustDir 的任何其他选项。

错误信息

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
e:\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
     57     try:
---> 58         return bound(*args, **kwds)
     59     except TypeError:

TypeError: '<' not supported between instances of 'float' and 'str'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-253-1a1f0fa6bf7a> in <module>
      1 ax = WindroseAxes.from_ax()
----> 2 ax.bar(direction=df.WindGustDir, var=df.Rainfall, normed=True, opening=0.8, edgecolor='white')
      3 ax.set_legend()

e:\Anaconda3\lib\site-packages\windrose\windrose.py in bar(self, direction, var, **kwargs)
    547         """
    548 
--> 549         bins, nbins, nsector, colors, angles, kwargs = self._init_plot(
    550             direction, var, **kwargs
    551         )

e:\Anaconda3\lib\site-packages\windrose\windrose.py in _init_plot(self, direction, var, **kwargs)
    359 
    360         # Set the global information dictionnary
--> 361         self._info["dir"], self._info["bins"], self._info["table"] = histogram(
    362             direction, var, bins, nsector, normed, blowto
    363         )

e:\Anaconda3\lib\site-packages\windrose\windrose.py in histogram(direction, var, bins, nsector, normed, blowto)
    746         direction[direction >= 360.] = direction[direction >= 360.] - 360
    747 
--> 748     table = histogram2d(x=var, y=direction, bins=[var_bins, dir_bins], normed=False)[0]
    749     # add the last value to the first to have the table of North winds
    750     table[:, 0] = table[:, 0] + table[:, -1]

<__array_function__ internals> in histogram2d(*args, **kwargs)

e:\Anaconda3\lib\site-packages\numpy\lib\twodim_base.py in histogram2d(x, y, bins, range, normed, weights, density)
    742         xedges = yedges = asarray(bins)
    743         bins = [xedges, yedges]
--> 744     hist, edges = histogramdd([x, y], bins, range, normed, weights, density)
    745     return hist, edges[0], edges[1]
    746 

<__array_function__ internals> in histogramdd(*args, **kwargs)

e:\Anaconda3\lib\site-packages\numpy\lib\histograms.py in histogramdd(sample, bins, range, normed, weights, density)
   1071 
   1072     # Compute the bin number each sample falls into.
-> 1073     Ncount = tuple(
   1074         # avoid np.digitize to work around gh-11022
   1075         np.searchsorted(edges[i], sample[:, i], side='right')

e:\Anaconda3\lib\site-packages\numpy\lib\histograms.py in <genexpr>(.0)
   1073     Ncount = tuple(
   1074         # avoid np.digitize to work around gh-11022
-> 1075         np.searchsorted(edges[i], sample[:, i], side='right')
   1076         for i in _range(D)
   1077     )

<__array_function__ internals> in searchsorted(*args, **kwargs)

e:\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in searchsorted(a, v, side, sorter)
   1346 
   1347     """
-> 1348     return _wrapfunc(a, 'searchsorted', v, side=side, sorter=sorter)
   1349 
   1350 

e:\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
     65         # Call _wrapit from within the except clause to ensure a potential
     66         # exception has a traceback chain.
---> 67         return _wrapit(obj, method, *args, **kwds)
     68 
     69 

e:\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in _wrapit(obj, method, *args, **kwds)
     42     except AttributeError:
     43         wrap = None
---> 44     result = getattr(asarray(obj), method)(*args, **kwds)
     45     if wrap:
     46         if not isinstance(result, mu.ndarray):

TypeError: '<' not supported between instances of 'float' and 'str'
  • 看来direction参数必须是数字。
  • 创建一个 dict,其中每个 key'WindGustDir' 中的每个方向,对应的值是以度为单位的浮点数。
  • .map dictdf.WindGustDir 和 plot
  • 或者,创建并绘制一个新列
    • df.insert(loc=8, column='WindGustDirDeg', value=df.WindGustDir.map(wind_dir_deg))
import pandas as pd
from windrose import WindroseAxes
import numpy as np

# load the downloaded data and dropna
df = pd.read_csv('weatherAUS/weatherAUS.csv').dropna(subset=['WindGustDir'])

# create a dict for WindGustDir to numeric values
wind_dir = ['E', 'ENE', 'NE', 'NNE', 'N', 'NNE', 'NW', 'WNW', 'W', 'WSW', 'SW', 'SSW', 'S', 'SSE', 'SE', 'ESE']
degrees = np.arange(0, 360, 22.5)
wind_dir_deg = dict((zip(wind_dir, degrees)))

# plot and map WindGustDir to the dict
ax = WindroseAxes.from_ax()
ax.bar(direction=df.WindGustDir.map(wind_dir_deg), var=df.Rainfall, normed=True, opening=0.8, edgecolor='white')
ax.set_legend()