在 pytorch 中使用可分离的 2D 卷积实现 3D 高斯模糊
Implementing a 3D gaussian blur using separable 2D convolutions in pytorch
我正在尝试在 pytorch 中实现类似高斯的 3D 体积模糊。我可以很容易地通过与 2D 高斯核进行卷积来对 2D 图像进行 2D 模糊处理,同样的方法似乎也适用于具有 3D 高斯核的 3D。但是,它在 3D 中非常慢(尤其是 sigmas/kernel 尺寸较大时)。我知道这也可以通过与 2D 内核卷积 3 次来完成,这应该更快,但我无法让它工作。我的测试用例如下。
import torch
import torch.nn.functional as F
VOL_SIZE = 21
def make_gaussian_kernel(sigma):
ks = int(sigma * 5)
if ks % 2 == 0:
ks += 1
ts = torch.linspace(-ks // 2, ks // 2 + 1, ks)
gauss = torch.exp((-(ts / sigma)**2 / 2))
kernel = gauss / gauss.sum()
return kernel
def test_3d_gaussian_blur(blur_sigma=2):
# Make a test volume
vol = torch.zeros([VOL_SIZE] * 3)
vol[VOL_SIZE // 2, VOL_SIZE // 2, VOL_SIZE // 2] = 1
# 3D convolution
vol_in = vol.reshape(1, 1, *vol.shape)
k = make_gaussian_kernel(blur_sigma)
k3d = torch.einsum('i,j,k->ijk', k, k, k)
k3d = k3d / k3d.sum()
vol_3d = F.conv3d(vol_in, k3d.reshape(1, 1, *k3d.shape), stride=1, padding=len(k) // 2)
# Separable 2D convolution
vol_in = vol.reshape(1, *vol.shape)
k2d = torch.einsum('i,j->ij', k, k)
k2d = k2d / k2d.sum()
k2d = k2d.expand(VOL_SIZE, 1, *k2d.shape)
for i in range(3):
vol_in = vol_in.permute(0, 3, 1, 2)
vol_in = F.conv2d(vol_in, k2d, stride=1, padding=len(k) // 2, groups=VOL_SIZE)
vol_3d_sep = vol_in
torch.allclose(vol_3d, vol_3d_sep) # --> False
非常感谢任何帮助!
理论上您可以使用三个 2d 卷积计算 3d 高斯卷积,但这意味着您必须减小 2d 核的大小,因为您在每个方向上都有效地进行了卷积 两次.
但在计算上更有效(以及您通常想要的)是分离成一维内核。我改变了你的功能的第二部分来实现这个。 (而且我必须说我真的很喜欢你基于排列的方法!)因为你使用的是 3d 体积,所以你不能很好地使用 conv2d
或 conv1d
函数,所以最好的是真的仅使用 conv3d
即使您只是计算一维卷积。
请注意,allclose
使用了 1e-8
的阈值,我们使用此方法无法达到该阈值,这可能是由于取消错误。
def test_3d_gaussian_blur(blur_sigma=2):
# Make a test volume
vol = torch.randn([VOL_SIZE] * 3) # using something other than zeros
vol[VOL_SIZE // 2, VOL_SIZE // 2, VOL_SIZE // 2] = 1
# 3D convolution
vol_in = vol.reshape(1, 1, *vol.shape)
k = make_gaussian_kernel(blur_sigma)
k3d = torch.einsum('i,j,k->ijk', k, k, k)
k3d = k3d / k3d.sum()
vol_3d = F.conv3d(vol_in, k3d.reshape(1, 1, *k3d.shape), stride=1, padding=len(k) // 2)
# Separable 1D convolution
vol_in = vol[None, None, ...]
# k2d = torch.einsum('i,j->ij', k, k)
# k2d = k2d / k2d.sum() # not necessary if kernel already sums to zero, check:
# print(f'{k2d.sum()=}')
k1d = k[None, None, :, None, None]
for i in range(3):
vol_in = vol_in.permute(0, 1, 4, 2, 3)
vol_in = F.conv3d(vol_in, k1d, stride=1, padding=(len(k) // 2, 0, 0))
vol_3d_sep = vol_in
print((vol_3d- vol_3d_sep).abs().max()) # something ~1e-7
print(torch.allclose(vol_3d, vol_3d_sep)) # allclose checks if it is around 1e-8
附录:如果你真的想滥用conv2d
来处理你可以尝试的卷
# separate 3d kernel into 1d + 2d
vol_in = vol[None, None, ...]
k2d = torch.einsum('i,j->ij', k, k)
k2d = k2d.expand(VOL_SIZE, 1, len(k), len(k))
# k2d = k2d / k2d.sum() # not necessary if kernel already sums to zero, check:
# print(f'{k2d.sum()=}')
k1d = k[None, None, :, None, None]
vol_in = F.conv3d(vol_in, k1d, stride=1, padding=(len(k) // 2, 0, 0))
vol_in = vol_in[0, ...]
# abuse conv2d-groups argument for volume dimension, works only for 1 channel volumes
vol_in = F.conv2d(vol_in, k2d, stride=1, padding=(len(k) // 2, len(k) // 2), groups=VOL_SIZE)
vol_3d_sep = vol_in
或者单独使用 conv2d
你可以这样做:
# separate 3d kernel into 1d + 2d
vol_in = vol[None, ...]
# 1d kernel
k1d = k[None, None, :, None]
k1d = k1d.expand(VOL_SIZE, 1, len(k), 1)
# 2d kernel
k2d = torch.einsum('i,j->ij', k, k)
k2d = k2d.expand(VOL_SIZE, 1, len(k), len(k))
vol_in = vol_in.permute(0, 2, 1, 3)
vol_in = F.conv2d(vol_in, k1d, stride=1, padding=(len(k) // 2, 0), groups=VOL_SIZE)
vol_in = vol_in.permute(0, 2, 1, 3)
vol_in = F.conv2d(vol_in, k2d, stride=1, padding=(len(k) // 2, len(k) // 2), groups=VOL_SIZE)
vol_3d_sep = vol_in
这些应该仍然比 三个 连续的 2d 卷积更快。
任何发现此问题的人。以前的最佳答案仍然使用 F.conv3d 来完成这项工作。在我的例子中,使用 F.conv1d 重写更快,使这个卷积真正分离成 1d。
import torch
import torch.nn.functional as F
import cv2
VOL_SIZE = (10, 20, 30)
KS = 5
def test_3d_gaussian_blur(ks=5, blur_sigma=2):
# Make a test volume
vol = torch.randn(VOL_SIZE) # using something other than zeros
# 3D convolution
vol_in = vol.reshape(1, 1, *vol.shape)
k = torch.from_numpy(cv2.getGaussianKernel(ks, blur_sigma)).squeeze().float()
k3d = torch.einsum('i,j,k->ijk', k, k, k)
k3d = k3d / k3d.sum()
vol_3d = F.conv3d(vol_in, k3d.reshape(1, 1, *k3d.shape), stride=1, padding=len(k) // 2)
# Separable 1D convolution
k1d = k.view(1, 1, -1)
for _ in range(3):
vol = F.conv1d(vol.reshape(-1, 1, vol.size(2)), k1d, padding=ks // 2).view(*vol.shape)
vol = vol.permute(2, 0, 1)
print((vol_3d- vol).abs().max()) # something ~1e-7
print(torch.allclose(vol_3d, vol, atol=1e-6))
test_3d_gaussian_blur()
我正在尝试在 pytorch 中实现类似高斯的 3D 体积模糊。我可以很容易地通过与 2D 高斯核进行卷积来对 2D 图像进行 2D 模糊处理,同样的方法似乎也适用于具有 3D 高斯核的 3D。但是,它在 3D 中非常慢(尤其是 sigmas/kernel 尺寸较大时)。我知道这也可以通过与 2D 内核卷积 3 次来完成,这应该更快,但我无法让它工作。我的测试用例如下。
import torch
import torch.nn.functional as F
VOL_SIZE = 21
def make_gaussian_kernel(sigma):
ks = int(sigma * 5)
if ks % 2 == 0:
ks += 1
ts = torch.linspace(-ks // 2, ks // 2 + 1, ks)
gauss = torch.exp((-(ts / sigma)**2 / 2))
kernel = gauss / gauss.sum()
return kernel
def test_3d_gaussian_blur(blur_sigma=2):
# Make a test volume
vol = torch.zeros([VOL_SIZE] * 3)
vol[VOL_SIZE // 2, VOL_SIZE // 2, VOL_SIZE // 2] = 1
# 3D convolution
vol_in = vol.reshape(1, 1, *vol.shape)
k = make_gaussian_kernel(blur_sigma)
k3d = torch.einsum('i,j,k->ijk', k, k, k)
k3d = k3d / k3d.sum()
vol_3d = F.conv3d(vol_in, k3d.reshape(1, 1, *k3d.shape), stride=1, padding=len(k) // 2)
# Separable 2D convolution
vol_in = vol.reshape(1, *vol.shape)
k2d = torch.einsum('i,j->ij', k, k)
k2d = k2d / k2d.sum()
k2d = k2d.expand(VOL_SIZE, 1, *k2d.shape)
for i in range(3):
vol_in = vol_in.permute(0, 3, 1, 2)
vol_in = F.conv2d(vol_in, k2d, stride=1, padding=len(k) // 2, groups=VOL_SIZE)
vol_3d_sep = vol_in
torch.allclose(vol_3d, vol_3d_sep) # --> False
非常感谢任何帮助!
理论上您可以使用三个 2d 卷积计算 3d 高斯卷积,但这意味着您必须减小 2d 核的大小,因为您在每个方向上都有效地进行了卷积 两次.
但在计算上更有效(以及您通常想要的)是分离成一维内核。我改变了你的功能的第二部分来实现这个。 (而且我必须说我真的很喜欢你基于排列的方法!)因为你使用的是 3d 体积,所以你不能很好地使用 conv2d
或 conv1d
函数,所以最好的是真的仅使用 conv3d
即使您只是计算一维卷积。
请注意,allclose
使用了 1e-8
的阈值,我们使用此方法无法达到该阈值,这可能是由于取消错误。
def test_3d_gaussian_blur(blur_sigma=2):
# Make a test volume
vol = torch.randn([VOL_SIZE] * 3) # using something other than zeros
vol[VOL_SIZE // 2, VOL_SIZE // 2, VOL_SIZE // 2] = 1
# 3D convolution
vol_in = vol.reshape(1, 1, *vol.shape)
k = make_gaussian_kernel(blur_sigma)
k3d = torch.einsum('i,j,k->ijk', k, k, k)
k3d = k3d / k3d.sum()
vol_3d = F.conv3d(vol_in, k3d.reshape(1, 1, *k3d.shape), stride=1, padding=len(k) // 2)
# Separable 1D convolution
vol_in = vol[None, None, ...]
# k2d = torch.einsum('i,j->ij', k, k)
# k2d = k2d / k2d.sum() # not necessary if kernel already sums to zero, check:
# print(f'{k2d.sum()=}')
k1d = k[None, None, :, None, None]
for i in range(3):
vol_in = vol_in.permute(0, 1, 4, 2, 3)
vol_in = F.conv3d(vol_in, k1d, stride=1, padding=(len(k) // 2, 0, 0))
vol_3d_sep = vol_in
print((vol_3d- vol_3d_sep).abs().max()) # something ~1e-7
print(torch.allclose(vol_3d, vol_3d_sep)) # allclose checks if it is around 1e-8
附录:如果你真的想滥用conv2d
来处理你可以尝试的卷
# separate 3d kernel into 1d + 2d
vol_in = vol[None, None, ...]
k2d = torch.einsum('i,j->ij', k, k)
k2d = k2d.expand(VOL_SIZE, 1, len(k), len(k))
# k2d = k2d / k2d.sum() # not necessary if kernel already sums to zero, check:
# print(f'{k2d.sum()=}')
k1d = k[None, None, :, None, None]
vol_in = F.conv3d(vol_in, k1d, stride=1, padding=(len(k) // 2, 0, 0))
vol_in = vol_in[0, ...]
# abuse conv2d-groups argument for volume dimension, works only for 1 channel volumes
vol_in = F.conv2d(vol_in, k2d, stride=1, padding=(len(k) // 2, len(k) // 2), groups=VOL_SIZE)
vol_3d_sep = vol_in
或者单独使用 conv2d
你可以这样做:
# separate 3d kernel into 1d + 2d
vol_in = vol[None, ...]
# 1d kernel
k1d = k[None, None, :, None]
k1d = k1d.expand(VOL_SIZE, 1, len(k), 1)
# 2d kernel
k2d = torch.einsum('i,j->ij', k, k)
k2d = k2d.expand(VOL_SIZE, 1, len(k), len(k))
vol_in = vol_in.permute(0, 2, 1, 3)
vol_in = F.conv2d(vol_in, k1d, stride=1, padding=(len(k) // 2, 0), groups=VOL_SIZE)
vol_in = vol_in.permute(0, 2, 1, 3)
vol_in = F.conv2d(vol_in, k2d, stride=1, padding=(len(k) // 2, len(k) // 2), groups=VOL_SIZE)
vol_3d_sep = vol_in
这些应该仍然比 三个 连续的 2d 卷积更快。
任何发现此问题的人。以前的最佳答案仍然使用 F.conv3d 来完成这项工作。在我的例子中,使用 F.conv1d 重写更快,使这个卷积真正分离成 1d。
import torch
import torch.nn.functional as F
import cv2
VOL_SIZE = (10, 20, 30)
KS = 5
def test_3d_gaussian_blur(ks=5, blur_sigma=2):
# Make a test volume
vol = torch.randn(VOL_SIZE) # using something other than zeros
# 3D convolution
vol_in = vol.reshape(1, 1, *vol.shape)
k = torch.from_numpy(cv2.getGaussianKernel(ks, blur_sigma)).squeeze().float()
k3d = torch.einsum('i,j,k->ijk', k, k, k)
k3d = k3d / k3d.sum()
vol_3d = F.conv3d(vol_in, k3d.reshape(1, 1, *k3d.shape), stride=1, padding=len(k) // 2)
# Separable 1D convolution
k1d = k.view(1, 1, -1)
for _ in range(3):
vol = F.conv1d(vol.reshape(-1, 1, vol.size(2)), k1d, padding=ks // 2).view(*vol.shape)
vol = vol.permute(2, 0, 1)
print((vol_3d- vol).abs().max()) # something ~1e-7
print(torch.allclose(vol_3d, vol, atol=1e-6))
test_3d_gaussian_blur()