building panda dataframe from cloudant data, error: If using all scalar values, you must pass an index
building panda dataframe from cloudant data, error: If using all scalar values, you must pass an index
我刚开始 pandas。我为错误消息找到的所有答案都没有解决我的错误。我正在尝试根据 IBM cloudant 查询构建的字典构建数据框。我正在使用 jupyter 笔记本。具体报错信息为:If using all scalar values, you must pass an index
我认为我的错误所在的代码部分在此处:
def read_high_low_temp(location):
USERNAME = "*************"
PASSWORD = "*************"
client = Cloudant(USERNAME,PASSWORD, url = "https://**********" )
client.connect()
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'temp': 'desc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(columns = ['Temperature','Time','Date'])
df.set_index('Time', inplace= True)
for row in temp_dict:
value_list.append(row['temp'])
temp_df=pd.DataFrame({'Temperature':row['temp'],'Time':row['t'], 'Date':row['d']}, index=['Time'])
df=df.append(temp_df)
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
我的数据(Jupyter 的输出)如下所示:
Temperature Time Date
Time 51.6 05:07:18 12-31-2020
Time 51.6 04:59:00 12-31-2020
Time 51.5 04:50:31 12-31-2020
Time 51.5 05:15:38 12-31-2020
Time 51.5 05:03:09 12-31-2020
... ... ... ...
Time 45.3 11:56:34 12-31-2020
Time 45.3 11:52:22 12-31-2020
Time 45.3 11:14:15 12-31-2020
Time 45.2 10:32:05 12-31-2020
Time 45.2 10:36:22 12-31-2020
[164 rows x 3 columns]
我的完整代码如下:
import numpy as np
import pandas as pd
import seaborn as sns
import os, shutil, glob, time, subprocess, re, sys, sqlite3, logging
#import RPi.GPIO as GPIO
from datetime import datetime
import datetime as dt
import cloudant
from cloudant.client import Cloudant
from cloudant.query import Query
from cloudant.result import QueryResult
from cloudant.error import ResultException
import seaborn as sns
def read_high_low_temp(location):
USERNAME = "******"
PASSWORD = "******"
client = Cloudant(USERNAME,PASSWORD, url = "********" )
client.connect()
# location='Backyard'
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'temp': 'desc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(columns = ['Temperature','Time','Date'])
df.set_index('Time')
for row in temp_dict:
temp_df=pd.DataFrame({'Temperature':row['temp'],'Time':row['t'], 'Date':row['d']}, index=['Time'])
df=df.append(temp_df)
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
print ("Cloudant Jupyter Query test\nThe hour = ",dt.datetime.now().hour)
msg1, values=read_high_low_temp("Backyard")
print (msg1)
print(values)
sns.lineplot(values)
来自 Jupyter 的完整错误消息是:
C:\Users\ustl02870\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-2-34956d8dafb0> in <module>
53
54 #df = sns.load_dataset(values)
---> 55 sns.lineplot(values)
56 #print (values)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\relational.py in lineplot(x, y, hue, size, style, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, units, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend, ax, **kwargs)
686 data=data, variables=variables,
687 estimator=estimator, ci=ci, n_boot=n_boot, seed=seed,
--> 688 sort=sort, err_style=err_style, err_kws=err_kws, legend=legend,
689 )
690
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\relational.py in __init__(self, data, variables, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend)
365 )
366
--> 367 super().__init__(data=data, variables=variables)
368
369 self.estimator = estimator
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in __init__(self, data, variables)
602 def __init__(self, data=None, variables={}):
603
--> 604 self.assign_variables(data, variables)
605
606 for var, cls in self._semantic_mappings.items():
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in assign_variables(self, data, variables)
666 self.input_format = "long"
667 plot_data, variables = self._assign_variables_longform(
--> 668 data, **variables,
669 )
670
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in _assign_variables_longform(self, data, **kwargs)
924 # Construct a tidy plot DataFrame. This will convert a number of
925 # types automatically, aligning on index in case of pandas objects
--> 926 plot_data = pd.DataFrame(plot_data)
927
928 # Reduce the variables dictionary to fields with valid data
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
527
528 elif isinstance(data, dict):
--> 529 mgr = init_dict(data, index, columns, dtype=dtype)
530 elif isinstance(data, ma.MaskedArray):
531 import numpy.ma.mrecords as mrecords
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in init_dict(data, index, columns, dtype)
285 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
286 ]
--> 287 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
288
289
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype, verify_integrity)
78 # figure out the index, if necessary
79 if index is None:
---> 80 index = extract_index(arrays)
81 else:
82 index = ensure_index(index)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in extract_index(data)
389
390 if not indexes and not raw_lengths:
--> 391 raise ValueError("If using all scalar values, you must pass an index")
392
393 if have_series:
ValueError: If using all scalar values, you must pass an index
问题是您到处都将“时间”指定为索引。在 seaborn.lineplot 文档中查看数据框的外观:https://seaborn.pydata.org/generated/seaborn.lineplot.html
你能不试试这个 df.set_index('Time')
部分吗?
我用来自@Ena 的 help/direction 解决了我的问题,结果证明我犯了几个错误。用外行的话来说 1) 我试图绘制一个元组,而它应该是一个数据框,2) 我的数据在字典中,我正在遍历它试图构建一个元组,而我应该使用内置的熊猫工具来构建一个字典中的数据框 3) 我的代码应该编写成没有标量值,这样就不需要索引,最后 4) 我试图在应该使用元组作为我的 seaborn 绘图的数据时使用它一个数据框。这是现在可以使用的代码。
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import pandas as pd
import seaborn as sns
import os, shutil, glob, time, subprocess, sys
from datetime import datetime
import datetime as dt
from matplotlib import pyplot as plt
import cloudant
from cloudant.client import Cloudant
from cloudant.query import Query
from cloudant.result import QueryResult
from cloudant.error import ResultException
import seaborn as sns
def read_high_low_temp(location):
USERNAME = "****************"
PASSWORD = "*****************"
client = Cloudant(USERNAME,PASSWORD, url = "**************************" )
client.connect()
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'t': 'asc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(temp_dict)
value_list=[]
for row in temp_dict:
value_list.append(row['temp'])
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
msg1, values=read_high_low_temp("Backyard")
g=sns.catplot(x='t', y='temp', data=values, kind='bar',color="darkblue",height=8.27, aspect=11.7/8.27)
print("the minimum temp is:", values['temp'].min(), " the maximum temp is:", values['temp'].max())
plt.xticks(rotation=45)
g.set(xlabel='Time', ylabel='Temperature')
plt.ylim(values['temp'].min()-1, values['temp'].max()+1)
plt.savefig("2021-01-01-temperature graph.png")
g.set_xticklabels(step=10)
我刚开始 pandas。我为错误消息找到的所有答案都没有解决我的错误。我正在尝试根据 IBM cloudant 查询构建的字典构建数据框。我正在使用 jupyter 笔记本。具体报错信息为:If using all scalar values, you must pass an index
我认为我的错误所在的代码部分在此处:
def read_high_low_temp(location):
USERNAME = "*************"
PASSWORD = "*************"
client = Cloudant(USERNAME,PASSWORD, url = "https://**********" )
client.connect()
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'temp': 'desc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(columns = ['Temperature','Time','Date'])
df.set_index('Time', inplace= True)
for row in temp_dict:
value_list.append(row['temp'])
temp_df=pd.DataFrame({'Temperature':row['temp'],'Time':row['t'], 'Date':row['d']}, index=['Time'])
df=df.append(temp_df)
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
我的数据(Jupyter 的输出)如下所示:
Temperature Time Date
Time 51.6 05:07:18 12-31-2020
Time 51.6 04:59:00 12-31-2020
Time 51.5 04:50:31 12-31-2020
Time 51.5 05:15:38 12-31-2020
Time 51.5 05:03:09 12-31-2020
... ... ... ...
Time 45.3 11:56:34 12-31-2020
Time 45.3 11:52:22 12-31-2020
Time 45.3 11:14:15 12-31-2020
Time 45.2 10:32:05 12-31-2020
Time 45.2 10:36:22 12-31-2020
[164 rows x 3 columns]
我的完整代码如下:
import numpy as np
import pandas as pd
import seaborn as sns
import os, shutil, glob, time, subprocess, re, sys, sqlite3, logging
#import RPi.GPIO as GPIO
from datetime import datetime
import datetime as dt
import cloudant
from cloudant.client import Cloudant
from cloudant.query import Query
from cloudant.result import QueryResult
from cloudant.error import ResultException
import seaborn as sns
def read_high_low_temp(location):
USERNAME = "******"
PASSWORD = "******"
client = Cloudant(USERNAME,PASSWORD, url = "********" )
client.connect()
# location='Backyard'
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'temp': 'desc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(columns = ['Temperature','Time','Date'])
df.set_index('Time')
for row in temp_dict:
temp_df=pd.DataFrame({'Temperature':row['temp'],'Time':row['t'], 'Date':row['d']}, index=['Time'])
df=df.append(temp_df)
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
print ("Cloudant Jupyter Query test\nThe hour = ",dt.datetime.now().hour)
msg1, values=read_high_low_temp("Backyard")
print (msg1)
print(values)
sns.lineplot(values)
来自 Jupyter 的完整错误消息是:
C:\Users\ustl02870\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-2-34956d8dafb0> in <module>
53
54 #df = sns.load_dataset(values)
---> 55 sns.lineplot(values)
56 #print (values)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\relational.py in lineplot(x, y, hue, size, style, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, units, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend, ax, **kwargs)
686 data=data, variables=variables,
687 estimator=estimator, ci=ci, n_boot=n_boot, seed=seed,
--> 688 sort=sort, err_style=err_style, err_kws=err_kws, legend=legend,
689 )
690
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\relational.py in __init__(self, data, variables, estimator, ci, n_boot, seed, sort, err_style, err_kws, legend)
365 )
366
--> 367 super().__init__(data=data, variables=variables)
368
369 self.estimator = estimator
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in __init__(self, data, variables)
602 def __init__(self, data=None, variables={}):
603
--> 604 self.assign_variables(data, variables)
605
606 for var, cls in self._semantic_mappings.items():
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in assign_variables(self, data, variables)
666 self.input_format = "long"
667 plot_data, variables = self._assign_variables_longform(
--> 668 data, **variables,
669 )
670
~\AppData\Local\Programs\Python\Python37\lib\site-packages\seaborn\_core.py in _assign_variables_longform(self, data, **kwargs)
924 # Construct a tidy plot DataFrame. This will convert a number of
925 # types automatically, aligning on index in case of pandas objects
--> 926 plot_data = pd.DataFrame(plot_data)
927
928 # Reduce the variables dictionary to fields with valid data
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
527
528 elif isinstance(data, dict):
--> 529 mgr = init_dict(data, index, columns, dtype=dtype)
530 elif isinstance(data, ma.MaskedArray):
531 import numpy.ma.mrecords as mrecords
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in init_dict(data, index, columns, dtype)
285 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
286 ]
--> 287 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
288
289
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype, verify_integrity)
78 # figure out the index, if necessary
79 if index is None:
---> 80 index = extract_index(arrays)
81 else:
82 index = ensure_index(index)
~\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\core\internals\construction.py in extract_index(data)
389
390 if not indexes and not raw_lengths:
--> 391 raise ValueError("If using all scalar values, you must pass an index")
392
393 if have_series:
ValueError: If using all scalar values, you must pass an index
问题是您到处都将“时间”指定为索引。在 seaborn.lineplot 文档中查看数据框的外观:https://seaborn.pydata.org/generated/seaborn.lineplot.html
你能不试试这个 df.set_index('Time')
部分吗?
我用来自@Ena 的 help/direction 解决了我的问题,结果证明我犯了几个错误。用外行的话来说 1) 我试图绘制一个元组,而它应该是一个数据框,2) 我的数据在字典中,我正在遍历它试图构建一个元组,而我应该使用内置的熊猫工具来构建一个字典中的数据框 3) 我的代码应该编写成没有标量值,这样就不需要索引,最后 4) 我试图在应该使用元组作为我的 seaborn 绘图的数据时使用它一个数据框。这是现在可以使用的代码。
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import pandas as pd
import seaborn as sns
import os, shutil, glob, time, subprocess, sys
from datetime import datetime
import datetime as dt
from matplotlib import pyplot as plt
import cloudant
from cloudant.client import Cloudant
from cloudant.query import Query
from cloudant.result import QueryResult
from cloudant.error import ResultException
import seaborn as sns
def read_high_low_temp(location):
USERNAME = "****************"
PASSWORD = "*****************"
client = Cloudant(USERNAME,PASSWORD, url = "**************************" )
client.connect()
my_database = client["temps"]
query = Query(my_database,selector= {'_id': {'$gt': 0}, 'l':location, 'd':dt.datetime.now().strftime("%m-%d-%Y")}, fields=['temp','t','d'],sort=[{'t': 'asc'}])
temp_dict={}
temp_dict=query(limit=1000, skip=5)['docs']
df = pd.DataFrame(temp_dict)
value_list=[]
for row in temp_dict:
value_list.append(row['temp'])
message="the highest temp in the " + location + " is: " + str(max(value_list)) + " the lowest " + str(min(value_list))
return message, df
msg1, values=read_high_low_temp("Backyard")
g=sns.catplot(x='t', y='temp', data=values, kind='bar',color="darkblue",height=8.27, aspect=11.7/8.27)
print("the minimum temp is:", values['temp'].min(), " the maximum temp is:", values['temp'].max())
plt.xticks(rotation=45)
g.set(xlabel='Time', ylabel='Temperature')
plt.ylim(values['temp'].min()-1, values['temp'].max()+1)
plt.savefig("2021-01-01-temperature graph.png")
g.set_xticklabels(step=10)