如何释放 Outlook 中 COM 对象的内存,在处理数千封电子邮件时内存不足?
How to free memory of COM objects in Outlook, which runs out of memory when processing several thousand emails?
我有一个 Python 脚本可以获取收件箱文件夹中所有电子邮件的电子邮件 ID。
但是,当达到几千封电子邮件时,Outlook 会抛出内存不足的异常。
异常:
Printing emails...
Traceback (most recent call last):
File "print_emails.py", line 53, in main
print_emails()
File "print_emails.py", line 43, in print_emails
primary_emails, primary_email_ids = get_emails_and_ids(primary_source_folder)
File "print_emails.py", line 29, in get_emails_and_ids
property_accessor = item.PropertyAccessor
File "C:\Program Files\Python38\lib\site-packages\win32com\client\__init__.py", line 474, in __getattr__
return self._ApplyTypes_(*args)
File "C:\Program Files\Python38\lib\site-packages\win32com\client\__init__.py", line 467, in _ApplyTypes_
self._oleobj_.InvokeTypes(dispid, 0, wFlags, retType, argTypes, *args),
pywintypes.com_error: (-2147352567, 'Exception occurred.', (4096, 'Microsoft Outlook', 'Out of memory or system resources. Close some windows or programs and try again.', None, 0, -2147024882), None)
Press enter to exit...
我尝试了两种不同的方法:迭代一次(get_emails_and_ids
)和两次(get_emails
和 get_email_ids
)。
好像跟PropertyAccessor
几千次有关。如果我只是使用 get_emails
获取电子邮件,它可以很好地处理 38,000 封电子邮件,但是当我开始使用 PropertyAccessor
获取 ID 数千次时,它就会耗尽内存。我必须释放旧的 属性 访问器吗?
安装:
pip install -U pypiwin32
代码:
#!/usr/bin/env python
from typing import Any, List, Tuple, Set
import traceback
import win32com.client
PidTagInternetMessageId = "http://schemas.microsoft.com/mapi/proptag/0x1035001F"
primary_account_email = "user@domain.tld"
primary_source_folder_name = "Inbox"
def get_emails(folder) -> List:
return [item for item in folder.Items if "_MailItem" in str(type(item))]
def get_email_ids(emails) -> Set[str]:
return {email_id for email in emails if len(email_id := email.PropertyAccessor.GetProperty(PidTagInternetMessageId)) > 0}
def get_emails_and_ids(folder) -> Tuple[List, Set[str]]:
emails = []
email_ids = set()
for item in folder.Items:
if "_MailItem" in str(type(item)):
emails.append(item)
property_accessor = item.PropertyAccessor
email_id = property_accessor.GetProperty(PidTagInternetMessageId)
email_ids.add(email_id)
return emails, email_ids
def print_emails() -> None:
outlook = win32com.client.gencache.EnsureDispatch("Outlook.Application")
namespace = outlook.GetNamespace("MAPI")
primary_account = namespace.Folders[primary_account_email]
primary_folders = primary_account.Folders
primary_source_folder = primary_folders[primary_source_folder_name]
primary_emails, primary_email_ids = get_emails_and_ids(primary_source_folder)
# primary_emails = get_emails(primary_source_folder)
# primary_email_ids = get_email_ids(primary_emails)
print(primary_email_ids)
def main(*args: Tuple[Any, ...]) -> None:
try:
print(f"Printing emails...")
print_emails()
print()
print("Done.")
except Exception:
traceback.print_exc()
print()
print("Press enter to exit...")
input()
if __name__ == "__main__":
main()
尝试用从 1 到 Items.Count 的循环替换“for
”循环(使用 Items(i) 检索项目)- 不确定 Python,但在其他语言“foreach
”循环倾向于保存引用的集合中的所有项目,直到循环退出。
我的解决方案是不将所有电子邮件 (MailItem objects) 存储在一个列表中。如果我需要列表中的电子邮件,当我处理电子邮件时,我应该 list.pop()
或立即将其从列表中删除。使用 PropertyAccessor
并将电子邮件保留在列表中会导致 Outlook 将对象保留在内存中并导致 Outlook 运行 内存不足。
我摆脱了 get_emails
和 get_emails_and_ids
函数以及 re-wrote get_email_ids
函数以仅存储电子邮件消息 ID 但不将电子邮件对象存储在名单:
def get_email_ids(folder) -> Tuple[Set[str], int]:
email_ids = set()
items = folder.Items
i = 0
for item in items:
if "_MailItem" in str(type(item)):
i += 1
property_accessor = item.PropertyAccessor
email_id = property_accessor.GetProperty(PidTagInternetMessageId)
if len(email_id) > 0:
email_ids.add(email_id)
if i % 500 == 0:
print(f" Retrieved {i} email IDs.")
return email_ids, i
我写的其他脚本现在快多了,至少需要 10 分钟。以前,它每秒处理几封电子邮件,需要几个小时。
我有一个 Python 脚本可以获取收件箱文件夹中所有电子邮件的电子邮件 ID。 但是,当达到几千封电子邮件时,Outlook 会抛出内存不足的异常。
异常:
Printing emails...
Traceback (most recent call last):
File "print_emails.py", line 53, in main
print_emails()
File "print_emails.py", line 43, in print_emails
primary_emails, primary_email_ids = get_emails_and_ids(primary_source_folder)
File "print_emails.py", line 29, in get_emails_and_ids
property_accessor = item.PropertyAccessor
File "C:\Program Files\Python38\lib\site-packages\win32com\client\__init__.py", line 474, in __getattr__
return self._ApplyTypes_(*args)
File "C:\Program Files\Python38\lib\site-packages\win32com\client\__init__.py", line 467, in _ApplyTypes_
self._oleobj_.InvokeTypes(dispid, 0, wFlags, retType, argTypes, *args),
pywintypes.com_error: (-2147352567, 'Exception occurred.', (4096, 'Microsoft Outlook', 'Out of memory or system resources. Close some windows or programs and try again.', None, 0, -2147024882), None)
Press enter to exit...
我尝试了两种不同的方法:迭代一次(get_emails_and_ids
)和两次(get_emails
和 get_email_ids
)。
好像跟PropertyAccessor
几千次有关。如果我只是使用 get_emails
获取电子邮件,它可以很好地处理 38,000 封电子邮件,但是当我开始使用 PropertyAccessor
获取 ID 数千次时,它就会耗尽内存。我必须释放旧的 属性 访问器吗?
安装:
pip install -U pypiwin32
代码:
#!/usr/bin/env python
from typing import Any, List, Tuple, Set
import traceback
import win32com.client
PidTagInternetMessageId = "http://schemas.microsoft.com/mapi/proptag/0x1035001F"
primary_account_email = "user@domain.tld"
primary_source_folder_name = "Inbox"
def get_emails(folder) -> List:
return [item for item in folder.Items if "_MailItem" in str(type(item))]
def get_email_ids(emails) -> Set[str]:
return {email_id for email in emails if len(email_id := email.PropertyAccessor.GetProperty(PidTagInternetMessageId)) > 0}
def get_emails_and_ids(folder) -> Tuple[List, Set[str]]:
emails = []
email_ids = set()
for item in folder.Items:
if "_MailItem" in str(type(item)):
emails.append(item)
property_accessor = item.PropertyAccessor
email_id = property_accessor.GetProperty(PidTagInternetMessageId)
email_ids.add(email_id)
return emails, email_ids
def print_emails() -> None:
outlook = win32com.client.gencache.EnsureDispatch("Outlook.Application")
namespace = outlook.GetNamespace("MAPI")
primary_account = namespace.Folders[primary_account_email]
primary_folders = primary_account.Folders
primary_source_folder = primary_folders[primary_source_folder_name]
primary_emails, primary_email_ids = get_emails_and_ids(primary_source_folder)
# primary_emails = get_emails(primary_source_folder)
# primary_email_ids = get_email_ids(primary_emails)
print(primary_email_ids)
def main(*args: Tuple[Any, ...]) -> None:
try:
print(f"Printing emails...")
print_emails()
print()
print("Done.")
except Exception:
traceback.print_exc()
print()
print("Press enter to exit...")
input()
if __name__ == "__main__":
main()
尝试用从 1 到 Items.Count 的循环替换“for
”循环(使用 Items(i) 检索项目)- 不确定 Python,但在其他语言“foreach
”循环倾向于保存引用的集合中的所有项目,直到循环退出。
我的解决方案是不将所有电子邮件 (MailItem objects) 存储在一个列表中。如果我需要列表中的电子邮件,当我处理电子邮件时,我应该 list.pop()
或立即将其从列表中删除。使用 PropertyAccessor
并将电子邮件保留在列表中会导致 Outlook 将对象保留在内存中并导致 Outlook 运行 内存不足。
我摆脱了 get_emails
和 get_emails_and_ids
函数以及 re-wrote get_email_ids
函数以仅存储电子邮件消息 ID 但不将电子邮件对象存储在名单:
def get_email_ids(folder) -> Tuple[Set[str], int]:
email_ids = set()
items = folder.Items
i = 0
for item in items:
if "_MailItem" in str(type(item)):
i += 1
property_accessor = item.PropertyAccessor
email_id = property_accessor.GetProperty(PidTagInternetMessageId)
if len(email_id) > 0:
email_ids.add(email_id)
if i % 500 == 0:
print(f" Retrieved {i} email IDs.")
return email_ids, i
我写的其他脚本现在快多了,至少需要 10 分钟。以前,它每秒处理几封电子邮件,需要几个小时。