如何将交换 FileAttachments 传递给 pd.read_excel?
How to pass exchange FileAttachments to pd.read_excel?
我用 exchangelib 创建了一个过滤器来获取多封包含 .xlsx 文件的电子邮件。下一步应该是放入一个 pd.DataFrame.
虽然我在遍历过滤器时尝试 pd.read_excel()
,但我无法将 attachment.content 传递到 pd.read_excel。
我尝试了几种组合,例如 pd.read_excel(attachment.content)
、pd.read_excel(open(attachment.content,'rb'))
。请参阅下面我最后一次尝试 io.BytesIO:
import pandas as pd
import exchangelib
from exchangelib import EWSTimeZone,EWSDateTime,FileAttachment,HTMLBody
import datetime
from dateutil.parser import parse
from ipywidgets import interact
from ipywidgets import interact_manual
import io
def get_outages(filterstart,filterende,location):
credentials = exchangelib.Credentials('my.user@provider.com', 'passwd')
account = exchangelib.Account('my.user@provider.com', credentials=credentials, autodiscover=True)
tz = EWSTimeZone.localzone()
myfolder_delay = account.inbox/'Delay'
outages=pd.DataFrame
filterstart=datetime.datetime.strptime(filterstart,"%d.%m.%Y %H:%M")
filterende=datetime.datetime.strptime(filterende,"%d.%m.%Y %H:%M")
#filterstart=filterstart+datetime.timedelta(hours=1)
filterende=filterende+datetime.timedelta(hours=1)
filter = myfolder_delay.filter(datetime_received__range=tz.localize(EWSDateTime(filterstart.year, filterstart.month, filterstart.day, filterstart.hour, filterstart.minute)), tz.localize(EWSDateTime(filterende.year, filterende.month, filterende.day, filterende.hour, filterende.minute))))
for item in filter:
print(item.subject)
for attachment in item.attachments:
stream_str = io.BytesIO(attachment.content)
outages=pd.read_excel(stream_str.getvalue(),engine='xlrd')
interact_manual(get_outages, filterstart='11.07.2018 00:00',
filterende='11.07.2018 23:59',location='Location')
**ValueError**
.
.
.
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\excel.py in __init__(self, io, **kwds)
394 self.book = xlrd.open_workbook(self._io)
395 else:
--> 396 raise ValueError('Must explicitly set engine if not passing in'
397 ' buffer or path for io.')
398
ValueError: Must explicitly set engine if not passing in buffer or path for io.
read_excel()
想要一个类文件对象或文件路径,但 Attactment.content
是一个 bytes
对象。您可以将内容写入文件并指向 read_excel()
,或者将内容转换为 BytesIO
。这样的东西应该可以工作(未经测试):
from io import BytesIO
import pandas as pd
pd.read_excel(BytesIO(attachment.content))
我用 exchangelib 创建了一个过滤器来获取多封包含 .xlsx 文件的电子邮件。下一步应该是放入一个 pd.DataFrame.
虽然我在遍历过滤器时尝试 pd.read_excel()
,但我无法将 attachment.content 传递到 pd.read_excel。
我尝试了几种组合,例如 pd.read_excel(attachment.content)
、pd.read_excel(open(attachment.content,'rb'))
。请参阅下面我最后一次尝试 io.BytesIO:
import pandas as pd
import exchangelib
from exchangelib import EWSTimeZone,EWSDateTime,FileAttachment,HTMLBody
import datetime
from dateutil.parser import parse
from ipywidgets import interact
from ipywidgets import interact_manual
import io
def get_outages(filterstart,filterende,location):
credentials = exchangelib.Credentials('my.user@provider.com', 'passwd')
account = exchangelib.Account('my.user@provider.com', credentials=credentials, autodiscover=True)
tz = EWSTimeZone.localzone()
myfolder_delay = account.inbox/'Delay'
outages=pd.DataFrame
filterstart=datetime.datetime.strptime(filterstart,"%d.%m.%Y %H:%M")
filterende=datetime.datetime.strptime(filterende,"%d.%m.%Y %H:%M")
#filterstart=filterstart+datetime.timedelta(hours=1)
filterende=filterende+datetime.timedelta(hours=1)
filter = myfolder_delay.filter(datetime_received__range=tz.localize(EWSDateTime(filterstart.year, filterstart.month, filterstart.day, filterstart.hour, filterstart.minute)), tz.localize(EWSDateTime(filterende.year, filterende.month, filterende.day, filterende.hour, filterende.minute))))
for item in filter:
print(item.subject)
for attachment in item.attachments:
stream_str = io.BytesIO(attachment.content)
outages=pd.read_excel(stream_str.getvalue(),engine='xlrd')
interact_manual(get_outages, filterstart='11.07.2018 00:00',
filterende='11.07.2018 23:59',location='Location')
**ValueError**
.
.
.
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\excel.py in __init__(self, io, **kwds)
394 self.book = xlrd.open_workbook(self._io)
395 else:
--> 396 raise ValueError('Must explicitly set engine if not passing in'
397 ' buffer or path for io.')
398
ValueError: Must explicitly set engine if not passing in buffer or path for io.
read_excel()
想要一个类文件对象或文件路径,但 Attactment.content
是一个 bytes
对象。您可以将内容写入文件并指向 read_excel()
,或者将内容转换为 BytesIO
。这样的东西应该可以工作(未经测试):
from io import BytesIO
import pandas as pd
pd.read_excel(BytesIO(attachment.content))