Python 代码 运行 太慢了,我需要找到一种方法来为 python 进程分配更多内存吗?
Python code is running too slow, do I need to find a way to allocate more memory to python process?
我在 64 位 windows 10 笔记本电脑上使用 64 位 3.6.3 python,具有 12 GB RAM。
我有 python 可以提取 zip 文件的代码 (tar.gz)。如果我使用代码,它需要很长时间(~1.5 小时),但如果我直接使用 7zip 解压缩它只需要不到 5 分钟,所以我猜测有一些东西阻碍了 python 的处理能力。
我正在尝试 运行 此代码 https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/udacity/1_notmnist.ipynb
为了方便,这里给出解压的具体命令。
import tarfile
tar = tarfile.open(filename)
sys.stdout.flush()
tar.extractall(data_root)
tar.close()
这是完整的代码。
from __future__ import print_function
import os
import sys
import tarfile
from six.moves.urllib.request import urlretrieve
# Config the matplotlib backend as plotting inline in IPython
url = 'https://commondatastorage.googleapis.com/books1000/'
last_percent_reported = None
data_root = '.' # Change me to store data elsewhere
def download_progress_hook(count, blockSize, totalSize):
"""A hook to report the progress of a download. This is mostly intended for users with
slow internet connections. Reports every 5% change in download progress.
"""
global last_percent_reported
percent = int(count * blockSize * 100 / totalSize)
if last_percent_reported != percent:
if percent % 5 == 0:
sys.stdout.write("%s%%" % percent)
sys.stdout.flush()
else:
sys.stdout.write(".")
sys.stdout.flush()
last_percent_reported = percent
def maybe_download(filename, expected_bytes, force=False):
"""Download a file if not present, and make sure it's the right size."""
dest_filename = os.path.join(data_root, filename)
if force or not os.path.exists(dest_filename):
print('Attempting to download:', filename)
filename, _ = urlretrieve(url + filename, dest_filename, reporthook=download_progress_hook)
print('\nDownload Complete!')
statinfo = os.stat(dest_filename)
if statinfo.st_size == expected_bytes:
print('Found and verified', dest_filename)
else:
raise Exception(
'Failed to verify ' + dest_filename + '. Can you get to it with a browser?')
return dest_filename
train_filename = maybe_download('notMNIST_large.tar.gz', 247336696)
test_filename = maybe_download('notMNIST_small.tar.gz', 8458043)
num_classes = 10
def maybe_extract(filename, force=False):
root = os.path.splitext(os.path.splitext(filename)[0])[0] # remove .tar.gz
if os.path.isdir(root) and not force:
# You may override by setting force=True.
print('%s already present - Skipping extraction of %s.' % (root, filename))
else:
print('Extracting data for %s. This may take a while. Please wait.' % root)
tar = tarfile.open(filename)
sys.stdout.flush()
tar.extractall(data_root)
tar.close()
data_folders = [
os.path.join(root, d) for d in sorted(os.listdir(root))
if os.path.isdir(os.path.join(root, d))]
if len(data_folders) != num_classes:
raise Exception(
'Expected %d folders, one per class. Found %d instead.' % (
num_classes, len(data_folders)))
print(data_folders)
return data_folders
train_folders = maybe_extract(train_filename)
test_folders = maybe_extract(test_filename)
我在 64 位 windows 10 笔记本电脑上使用 64 位 3.6.3 python,具有 12 GB 的 RAM。
Tarfile 模块在纯 Python 中实现。 7zip 是用 C++ 实现的。
Python 中的 Tarfile 比 7zip 慢 60/5 = 12 倍。
提取太多文件通常很慢。
老实说,Tarfile 做得很好。有超过 500000 个文件要提取。
我在 64 位 windows 10 笔记本电脑上使用 64 位 3.6.3 python,具有 12 GB RAM。
我有 python 可以提取 zip 文件的代码 (tar.gz)。如果我使用代码,它需要很长时间(~1.5 小时),但如果我直接使用 7zip 解压缩它只需要不到 5 分钟,所以我猜测有一些东西阻碍了 python 的处理能力。
我正在尝试 运行 此代码 https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/udacity/1_notmnist.ipynb
为了方便,这里给出解压的具体命令。
import tarfile
tar = tarfile.open(filename)
sys.stdout.flush()
tar.extractall(data_root)
tar.close()
这是完整的代码。
from __future__ import print_function
import os
import sys
import tarfile
from six.moves.urllib.request import urlretrieve
# Config the matplotlib backend as plotting inline in IPython
url = 'https://commondatastorage.googleapis.com/books1000/'
last_percent_reported = None
data_root = '.' # Change me to store data elsewhere
def download_progress_hook(count, blockSize, totalSize):
"""A hook to report the progress of a download. This is mostly intended for users with
slow internet connections. Reports every 5% change in download progress.
"""
global last_percent_reported
percent = int(count * blockSize * 100 / totalSize)
if last_percent_reported != percent:
if percent % 5 == 0:
sys.stdout.write("%s%%" % percent)
sys.stdout.flush()
else:
sys.stdout.write(".")
sys.stdout.flush()
last_percent_reported = percent
def maybe_download(filename, expected_bytes, force=False):
"""Download a file if not present, and make sure it's the right size."""
dest_filename = os.path.join(data_root, filename)
if force or not os.path.exists(dest_filename):
print('Attempting to download:', filename)
filename, _ = urlretrieve(url + filename, dest_filename, reporthook=download_progress_hook)
print('\nDownload Complete!')
statinfo = os.stat(dest_filename)
if statinfo.st_size == expected_bytes:
print('Found and verified', dest_filename)
else:
raise Exception(
'Failed to verify ' + dest_filename + '. Can you get to it with a browser?')
return dest_filename
train_filename = maybe_download('notMNIST_large.tar.gz', 247336696)
test_filename = maybe_download('notMNIST_small.tar.gz', 8458043)
num_classes = 10
def maybe_extract(filename, force=False):
root = os.path.splitext(os.path.splitext(filename)[0])[0] # remove .tar.gz
if os.path.isdir(root) and not force:
# You may override by setting force=True.
print('%s already present - Skipping extraction of %s.' % (root, filename))
else:
print('Extracting data for %s. This may take a while. Please wait.' % root)
tar = tarfile.open(filename)
sys.stdout.flush()
tar.extractall(data_root)
tar.close()
data_folders = [
os.path.join(root, d) for d in sorted(os.listdir(root))
if os.path.isdir(os.path.join(root, d))]
if len(data_folders) != num_classes:
raise Exception(
'Expected %d folders, one per class. Found %d instead.' % (
num_classes, len(data_folders)))
print(data_folders)
return data_folders
train_folders = maybe_extract(train_filename)
test_folders = maybe_extract(test_filename)
我在 64 位 windows 10 笔记本电脑上使用 64 位 3.6.3 python,具有 12 GB 的 RAM。
Tarfile 模块在纯 Python 中实现。 7zip 是用 C++ 实现的。
Python 中的 Tarfile 比 7zip 慢 60/5 = 12 倍。
提取太多文件通常很慢。
老实说,Tarfile 做得很好。有超过 500000 个文件要提取。