Unexpected error when loading the model: problem in predictor - ModuleNotFoundError: No module named 'torchvision'
Unexpected error when loading the model: problem in predictor - ModuleNotFoundError: No module named 'torchvision'
我一直在尝试通过我的虚拟机实例上的控制台将我的模型部署到 AI 平台进行预测,但我收到错误消息“(gcloud.beta.ai-platform.versions.create) 创建版本失败。检测到错误模型错误:"Failed to load model: Unexpected error when loading the model: problem in predictor - ModuleNotFoundError: No module named 'torchvision' (Error code: 0)"
我需要同时包含 torch
和 torchvision
。我按照这个问题 , but I couldn't fetch the files pointed to by user gogasca. I tried downloading this .whl file from Pytorch website and uploading it to my cloud storage but got the same error that there is no module torchvision
, even though this version is supposed to include both torch and torchvision. Also tried using Cloud AI compatible packages here 中的步骤操作,但它们不包括 torchvision
.
我尝试在 --package-uris
参数中为 torch
和 torchvision
指向两个单独的 .whl 文件,它们指向我的云存储中的文件,但后来我得到了错误超出内存容量。这很奇怪,因为它们的总大小约为 130Mb。导致缺少 torchvision
的我的命令示例如下所示:
gcloud beta ai-platform versions create version_1 \
--model online_pred_1 \
--runtime-version 1.15 \
--python-version 3.7 \
--origin gs://BUCKET/model-dir \
--package-uris gs://BUCKET/staging-dir/my_package-0.1.tar.gz,gs://BUCKET/torchvision-dir/torch-1.4.0+cpu-cp37-cp37m-linux_x86_64.whl \
--prediction-class predictor.MyPredictor
我已经尝试指向我从不同来源获得的 .whl 文件的不同组合,但要么没有模块错误,要么没有足够的内存。我不明白模块在这种情况下如何交互以及为什么编译器认为没有这样的模块。我该如何解决这个问题?或者,我如何自己编译一个包含 torch
和 torchvision
的包。能否请您给出详细的答案,因为我对包管理和 bash 脚本不是很熟悉。
这是我使用的代码,torch_model.py
:
from torch import nn
class EthnicityClassifier44(nn.Module):
def __init__(self, num_classes=2):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=1, padding=3)
self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv22 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
self.relu = nn.ReLU(inplace=False)
self.fc1 = nn.Linear(8*8*128, 128)
self.fc2 = nn.Linear(128, 128)
self.fc4 = nn.Linear(128, num_classes)
def forward(self, x):
x = self.relu(self.conv1(x))
x = self.maxpool1(x)
x = self.relu(self.conv22(x))
x = self.maxpool2(x)
x = self.maxpool3(self.relu(self.conv3(x)))
x = self.maxpool4(self.relu(self.conv4(x)))
x = self.relu(self.fc1(x.view(x.shape[0], -1)))
x = self.relu(self.fc2(x))
x = self.fc4(x)
return x
这是predictor_py
:
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face
import torch
import torchvision
from torchvision import transforms
from torch.nn import functional as F
from PIL import Image
from sklearn.externals import joblib
import numpy as np
import os
import torch_model
class MyPredictor(object):
import torch
import torchvision
def __init__(self, model, preprocessor, device):
"""Stores artifacts for prediction. Only initialized via `from_path`.
"""
self._resnet = model
self._mtcnn_mult = preprocessor
self._device = device
self.get_std_tensor = transforms.Compose([
np.float32,
np.uint8,
transforms.ToTensor(),
])
self.tensor2pil = transforms.ToPILImage(mode='RGB')
self.trans_resnet = transforms.Compose([
transforms.Resize((100, 100)),
np.float32,
transforms.ToTensor()
])
def predict(self, instances, **kwargs):
pil_transform = transforms.Resize((512, 512))
imarr = np.asarray(instances)
pil_im = Image.fromarray(imarr)
image = pil_im.convert('RGB')
pil_im_512 = pil_transform(image)
boxes, _ = self._mtcnn_mult(pil_im_512)
box = boxes[0]
face_tensor = extract_face(pil_im_512, box, margin=40)
std_tensor = self.get_std_tensor(face_tensor.permute(1, 2, 0))
cropped_pil_im = self.tensor2pil(std_tensor)
face_tensor = self.trans_resnet(cropped_pil_im)
face_tensor4d = face_tensor.unsqueeze(0)
face_tensor4d = face_tensor4d.to(self._device)
prediction = self._resnet(face_tensor4d)
preds = F.softmax(prediction, dim=1).detach().numpy().reshape(-1)
print('probability of (class1, class2) = ({:.4f}, {:.4f})'.format(preds[0], preds[1]))
return preds.tolist()
@classmethod
def from_path(cls, model_dir):
import torch
import torchvision
import torch_model
model_path = os.path.join(model_dir, 'class44_M40RefinedExtra_bin_no_norm_7860.joblib')
classifier = joblib.load(model_path)
mtcnn_path = os.path.join(model_dir, 'mtcnn_mult.joblib')
mtcnn_mult = joblib.load(mtcnn_path)
device_path = os.path.join(model_dir, 'device_cpu.joblib')
device = joblib.load(device_path)
return cls(classifier, mtcnn_mult, device)
和setup.py
:
from setuptools import setup
REQUIRED_PACKAGES = ['opencv-python-headless', 'facenet-pytorch']
setup(
name="my_package",
version="0.1",
include_package_data=True,
scripts=["predictor.py", "torch_model.py"],
install_requires=REQUIRED_PACKAGES
)
解决方案是将以下包放入自定义预测代码的 setup.py
文件中:
REQUIRED_PACKAGES = ['torchvision==0.5.0', 'torch @ https://download.pytorch.org/whl/cpu/torch-1.4.0%2Bcpu-cp37-cp37m-linux_x86_64.whl', 'opencv-python', 'facenet-pytorch']
然后我遇到了自定义 class 实例化的不同问题,但是 this 文章解释得很好。所以我能够成功地将我的模型部署到 AI 平台进行预测。
我一直在尝试通过我的虚拟机实例上的控制台将我的模型部署到 AI 平台进行预测,但我收到错误消息“(gcloud.beta.ai-platform.versions.create) 创建版本失败。检测到错误模型错误:"Failed to load model: Unexpected error when loading the model: problem in predictor - ModuleNotFoundError: No module named 'torchvision' (Error code: 0)"
我需要同时包含 torch
和 torchvision
。我按照这个问题 torchvision
, even though this version is supposed to include both torch and torchvision. Also tried using Cloud AI compatible packages here 中的步骤操作,但它们不包括 torchvision
.
我尝试在 --package-uris
参数中为 torch
和 torchvision
指向两个单独的 .whl 文件,它们指向我的云存储中的文件,但后来我得到了错误超出内存容量。这很奇怪,因为它们的总大小约为 130Mb。导致缺少 torchvision
的我的命令示例如下所示:
gcloud beta ai-platform versions create version_1 \
--model online_pred_1 \
--runtime-version 1.15 \
--python-version 3.7 \
--origin gs://BUCKET/model-dir \
--package-uris gs://BUCKET/staging-dir/my_package-0.1.tar.gz,gs://BUCKET/torchvision-dir/torch-1.4.0+cpu-cp37-cp37m-linux_x86_64.whl \
--prediction-class predictor.MyPredictor
我已经尝试指向我从不同来源获得的 .whl 文件的不同组合,但要么没有模块错误,要么没有足够的内存。我不明白模块在这种情况下如何交互以及为什么编译器认为没有这样的模块。我该如何解决这个问题?或者,我如何自己编译一个包含 torch
和 torchvision
的包。能否请您给出详细的答案,因为我对包管理和 bash 脚本不是很熟悉。
这是我使用的代码,torch_model.py
:
from torch import nn
class EthnicityClassifier44(nn.Module):
def __init__(self, num_classes=2):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=1, padding=3)
self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv22 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
self.relu = nn.ReLU(inplace=False)
self.fc1 = nn.Linear(8*8*128, 128)
self.fc2 = nn.Linear(128, 128)
self.fc4 = nn.Linear(128, num_classes)
def forward(self, x):
x = self.relu(self.conv1(x))
x = self.maxpool1(x)
x = self.relu(self.conv22(x))
x = self.maxpool2(x)
x = self.maxpool3(self.relu(self.conv3(x)))
x = self.maxpool4(self.relu(self.conv4(x)))
x = self.relu(self.fc1(x.view(x.shape[0], -1)))
x = self.relu(self.fc2(x))
x = self.fc4(x)
return x
这是predictor_py
:
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face
import torch
import torchvision
from torchvision import transforms
from torch.nn import functional as F
from PIL import Image
from sklearn.externals import joblib
import numpy as np
import os
import torch_model
class MyPredictor(object):
import torch
import torchvision
def __init__(self, model, preprocessor, device):
"""Stores artifacts for prediction. Only initialized via `from_path`.
"""
self._resnet = model
self._mtcnn_mult = preprocessor
self._device = device
self.get_std_tensor = transforms.Compose([
np.float32,
np.uint8,
transforms.ToTensor(),
])
self.tensor2pil = transforms.ToPILImage(mode='RGB')
self.trans_resnet = transforms.Compose([
transforms.Resize((100, 100)),
np.float32,
transforms.ToTensor()
])
def predict(self, instances, **kwargs):
pil_transform = transforms.Resize((512, 512))
imarr = np.asarray(instances)
pil_im = Image.fromarray(imarr)
image = pil_im.convert('RGB')
pil_im_512 = pil_transform(image)
boxes, _ = self._mtcnn_mult(pil_im_512)
box = boxes[0]
face_tensor = extract_face(pil_im_512, box, margin=40)
std_tensor = self.get_std_tensor(face_tensor.permute(1, 2, 0))
cropped_pil_im = self.tensor2pil(std_tensor)
face_tensor = self.trans_resnet(cropped_pil_im)
face_tensor4d = face_tensor.unsqueeze(0)
face_tensor4d = face_tensor4d.to(self._device)
prediction = self._resnet(face_tensor4d)
preds = F.softmax(prediction, dim=1).detach().numpy().reshape(-1)
print('probability of (class1, class2) = ({:.4f}, {:.4f})'.format(preds[0], preds[1]))
return preds.tolist()
@classmethod
def from_path(cls, model_dir):
import torch
import torchvision
import torch_model
model_path = os.path.join(model_dir, 'class44_M40RefinedExtra_bin_no_norm_7860.joblib')
classifier = joblib.load(model_path)
mtcnn_path = os.path.join(model_dir, 'mtcnn_mult.joblib')
mtcnn_mult = joblib.load(mtcnn_path)
device_path = os.path.join(model_dir, 'device_cpu.joblib')
device = joblib.load(device_path)
return cls(classifier, mtcnn_mult, device)
和setup.py
:
from setuptools import setup
REQUIRED_PACKAGES = ['opencv-python-headless', 'facenet-pytorch']
setup(
name="my_package",
version="0.1",
include_package_data=True,
scripts=["predictor.py", "torch_model.py"],
install_requires=REQUIRED_PACKAGES
)
解决方案是将以下包放入自定义预测代码的 setup.py
文件中:
REQUIRED_PACKAGES = ['torchvision==0.5.0', 'torch @ https://download.pytorch.org/whl/cpu/torch-1.4.0%2Bcpu-cp37-cp37m-linux_x86_64.whl', 'opencv-python', 'facenet-pytorch']
然后我遇到了自定义 class 实例化的不同问题,但是 this 文章解释得很好。所以我能够成功地将我的模型部署到 AI 平台进行预测。