OPENCV:实施 Frankot-Chellappa 算法时遇到问题(法线深度)
OPENCV: Trouble implementing Frankot-Chellappa Algorithm (depth from normal)
我正在尝试使用 frankot/chellappa 算法从法线重建 surface/depthmap。
行和列是我要为其重建深度的 img 的大小。
我这样获得法向量:
rows, cols = imglist[0].shape
def find_NormAlbedo(sources, imglist, rows, cols):
'''
:param sources: a list of light source coordinates as [x,y,z] coordinates per light source
(shape (20,3) for 20 sources)
:param imglist: a list of all images for one object
:param rows: shape[0] of every image
:param cols: shape[1] of every image
:return: returns normals and albedo's for an object
'''
normal = np.zeros_like(imglist[0], dtype=np.ndarray)
albedo = np.zeros_like(imglist[0])
# for every pixel
for x in range(rows):
for y in range(cols):
I = [] # intensity matrix of pixel x,y per image
S = [] # lightsources
for i in range(len(imglist)):
img = imglist[i]
I.append([img[x][y]])
S.append(sources[i])
# Least squares solution if S is invertible
# pseudoinverse
pseudoS = np.linalg.pinv(S)
ntilde = pseudoS @ I
p = np.linalg.norm(ntilde, ord=2)
if p != 0.:
n = ntilde / p
n = n.flatten()
# print(n)
# print(n.shape)
else:
n = np.zeros_like(ntilde.flatten())
normal[x][y] = n
albedo[x][y] = p
return normal, albedo
但怀疑这是错误的,因为我的反照率看起来与我在示例中看到的完全不同,但不知道我的错误在哪里...
然后我尝试使用 wavepy 函数从中获取表面 surface_to_grad:
def depthfromgradient(normalmap):
'''
:param normalmap: Previously obtained normals per pixel
:return: Surface/Depth map from normalmap
'''
surfacep = np.zeros_like(normalmap)
surfaceq = np.zeros_like(normalmap)
for row in range(rows):
for x in range(cols):
#print(x)
a, b, c = normalmap[row][x]
#print(a, b, c)
if c !=0:
p = -a / c # p=dZ/dx
q = -b / c # q=dZ/dy
surfacep[row][x] = p
surfaceq[row][x] = q
return surface_from_grad.frankotchellappa(surfacep, surfaceq, reflec_pad=True)
我的目标是使用 cv.imshow() 可视化深度贴图和法线贴图,但我不确定哪里出错了。这些是我 questions/ideas 出错的地方:
-反照率图是否合理?如果不是,我想我误解了这个算法的一部分。
-我的深度图有复数,这正常吗?这些从哪里来?
-我看了法线贴图、反照率贴图和深度贴图的形状,它们的形状都是 (640, 500),但我只能看到反照率贴图,其他的给我以下错误,这里有什么问题?:
cv2.imshow('DepthMap', surface)
TypeError: Expected cv::UMat for argument 'mat'
欢迎任何有助于缩小此问题范围的帮助。
注意:在使用 imshow() 之前,我尝试将所有内容都转换为 np 数组。
问题与我在第一条评论中描述的完全一样。您为 p
(成为您的 albedo
图像)获得的值范围从 0 到 1998.34。当你将它存储在一个字节中时,你只是得到低位 8 位,它环绕。
如果你改变这个:
albedo[x][y] = p
对此:
albedo[x,y] = p/8
您会发现生成的图像看起来很棒。
顺便说一句,您可以进行多项优化。在 xxx[x][y]
和 numpy 数组的地方,改为 xxx[x,y]
。当您构建光源时,而不是
I = [] # intensity matrix of pixel x,y per image
S = [] # lightsources
for i in range(len(imglist)):
img = imglist[i]
I.append([img[x][y]])
S.append(sources[i])
做
I = [img[x,y] for img in imglist]
S = sources[:len(imglist),:]
您的 obtainData
函数可以通过以下方式提高可读性:
# Read all images
paths = (
fr".\PSData\PSData\{things[i]}\Objects\Image_01.png",
fr".\PSData\PSData\{things[i]}\Objects\Image_02.png",
fr".\PSData\PSData\{things[i]}\Objects\Image_03.png",
fr".\PSData\PSData\{things[i]}\Objects\Image_04.png",
fr".\PSData\PSData\{things[i]}\Objects\Image_05.png"
)
imgs = [cv2.imread(p,0) for p in paths]
...
# Apply masks to images: cv2.bitwise
imglist = [cv2.bitwise_or(img, img, mask=mask(img, threshold)) for img in imgs]
根据wavepy.surface_from_grad.frankotchellappa()
的文档,复杂的部分可以忽略。在此基础上,它们可以用 matplotlib 显示以获得像这样的图像:
import matplotlib.pyplot as plt
from matplotlib.ticker import LinearLocator
import numpy as np
ax = plt.figure().add_subplot(projection='3d')
surface = [[-2.19312825e+00-1.62679906e-02j,-1.76625653e+00-1.62093005e-02j ,-1.21359325e+00-1.63381200e-02j,-5.91501045e-01-1.45226036e-02j ,-9.24685295e-02-1.73373776e-03j, 1.59549135e-01+8.17785543e-05j , 2.90806910e-01-4.70409288e-05j, 3.47604091e-01+1.16492161e-05j],[-2.40280993e+00+1.62679906e-02j,-1.66061279e+00+1.62093005e-02j ,-1.11510109e+00+1.63381200e-02j,-3.08374007e-01+1.45226036e-02j , 1.57978453e-01+1.73373776e-03j, 2.59650686e-01-8.17785543e-05j , 3.63995725e-01+4.70409288e-05j, 4.01138015e-01-1.16492161e-05j],[-2.23283386e+00-1.62679906e-02j,-1.37689420e+00-1.62093005e-02j ,-7.79238609e-01-1.63381200e-02j, 6.13042608e-02-1.45226036e-02j , 4.21894421e-01-1.73373776e-03j, 4.22562434e-01+8.17785543e-05j , 4.81126228e-01-4.70409288e-05j, 4.97191034e-01+1.16492161e-05j],[-2.03380248e+00+1.62679906e-02j,-1.15214942e+00+1.62093005e-02j ,-5.31293338e-01+1.63381200e-02j, 3.06448040e-01+1.45226036e-02j ,6.43854839e-01+1.73373776e-03j, 5.97558594e-01-8.17785543e-05j ,6.18487416e-01+4.70409288e-05j, 6.16102854e-01-1.16492161e-05j],[-1.78298688e+00-1.62679906e-02j,-8.95377575e-01-1.62093005e-02j ,2.76063262e-01-1.63381200e-02j, 5.45799539e-01-1.45226036e-02j ,8.50153479e-01-1.73373776e-03j, 7.66200038e-01+8.17785543e-05j ,7.57462265e-01-4.70409288e-05j, 7.39255327e-01+1.16492161e-05j],[-1.48865585e+00+1.62679906e-02j,-6.06732343e-01+1.62093005e-02j ,2.25981613e-03+1.63381200e-02j, 7.88696843e-01+1.45226036e-02j ,1.04351159e+00+1.73373776e-03j, 9.19975122e-01-8.17785543e-05j ,8.83235485e-01+4.70409288e-05j, 8.50665864e-01-1.16492161e-05j],[-1.20317686e+00-1.62679906e-02j,-3.29863821e-01-1.62093005e-02j ,2.50911348e-01-1.63381200e-02j, 9.99471222e-01-1.45226036e-02j ,1.21789476e+00-1.73373776e-03j, 1.04762150e+00+8.17785543e-05j ,9.81522674e-01-4.70409288e-05j, 9.36002572e-01+1.16492161e-05j],[-7.74950625e-01+1.62679906e-02j, 9.04024675e-02+1.62093005e-02j
,6.51399438e-01+1.63381200e-02j, 1.32951041e+00+1.45226036e-02j
,1.36765625e+00+1.73373776e-03j, 1.12552107e+00-8.17785543e-05j
,1.03744750e+00+4.70409288e-05j, 9.82554438e-01-1.16492161e-05j]]
surface=np.array(surface)
X = np.arange(0,8)
Y = np.arange(0,8)
Z = [surface[x,y].real for x in X for y in Y]
xx, yy = np.meshgrid(X, Y)
Z= np.array(Z)
Z= np.reshape(Z, xx.shape)
colortuple = ('g', 'cyan')
colors = np.empty(xx.shape, dtype=str)
for y in range(len(Y)):
for x in range(len(Y)):
colors[y, x] = colortuple[(x + y) % len(colortuple)]
surf = ax.plot_surface(xx, yy, Z, facecolors=colors, linewidth=0)
ax.zaxis.set_major_locator(LinearLocator(6))
plt.show()
我正在尝试使用 frankot/chellappa 算法从法线重建 surface/depthmap。 行和列是我要为其重建深度的 img 的大小。
我这样获得法向量:
rows, cols = imglist[0].shape
def find_NormAlbedo(sources, imglist, rows, cols):
'''
:param sources: a list of light source coordinates as [x,y,z] coordinates per light source
(shape (20,3) for 20 sources)
:param imglist: a list of all images for one object
:param rows: shape[0] of every image
:param cols: shape[1] of every image
:return: returns normals and albedo's for an object
'''
normal = np.zeros_like(imglist[0], dtype=np.ndarray)
albedo = np.zeros_like(imglist[0])
# for every pixel
for x in range(rows):
for y in range(cols):
I = [] # intensity matrix of pixel x,y per image
S = [] # lightsources
for i in range(len(imglist)):
img = imglist[i]
I.append([img[x][y]])
S.append(sources[i])
# Least squares solution if S is invertible
# pseudoinverse
pseudoS = np.linalg.pinv(S)
ntilde = pseudoS @ I
p = np.linalg.norm(ntilde, ord=2)
if p != 0.:
n = ntilde / p
n = n.flatten()
# print(n)
# print(n.shape)
else:
n = np.zeros_like(ntilde.flatten())
normal[x][y] = n
albedo[x][y] = p
return normal, albedo
但怀疑这是错误的,因为我的反照率看起来与我在示例中看到的完全不同,但不知道我的错误在哪里...
然后我尝试使用 wavepy 函数从中获取表面 surface_to_grad:
def depthfromgradient(normalmap):
'''
:param normalmap: Previously obtained normals per pixel
:return: Surface/Depth map from normalmap
'''
surfacep = np.zeros_like(normalmap)
surfaceq = np.zeros_like(normalmap)
for row in range(rows):
for x in range(cols):
#print(x)
a, b, c = normalmap[row][x]
#print(a, b, c)
if c !=0:
p = -a / c # p=dZ/dx
q = -b / c # q=dZ/dy
surfacep[row][x] = p
surfaceq[row][x] = q
return surface_from_grad.frankotchellappa(surfacep, surfaceq, reflec_pad=True)
我的目标是使用 cv.imshow() 可视化深度贴图和法线贴图,但我不确定哪里出错了。这些是我 questions/ideas 出错的地方:
-反照率图是否合理?如果不是,我想我误解了这个算法的一部分。
-我的深度图有复数,这正常吗?这些从哪里来?
-我看了法线贴图、反照率贴图和深度贴图的形状,它们的形状都是 (640, 500),但我只能看到反照率贴图,其他的给我以下错误,这里有什么问题?:
cv2.imshow('DepthMap', surface)
TypeError: Expected cv::UMat for argument 'mat'
欢迎任何有助于缩小此问题范围的帮助。
注意:在使用 imshow() 之前,我尝试将所有内容都转换为 np 数组。
问题与我在第一条评论中描述的完全一样。您为 p
(成为您的 albedo
图像)获得的值范围从 0 到 1998.34。当你将它存储在一个字节中时,你只是得到低位 8 位,它环绕。
如果你改变这个:
albedo[x][y] = p
对此:
albedo[x,y] = p/8
您会发现生成的图像看起来很棒。
顺便说一句,您可以进行多项优化。在 xxx[x][y]
和 numpy 数组的地方,改为 xxx[x,y]
。当您构建光源时,而不是
I = [] # intensity matrix of pixel x,y per image
S = [] # lightsources
for i in range(len(imglist)):
img = imglist[i]
I.append([img[x][y]])
S.append(sources[i])
做
I = [img[x,y] for img in imglist]
S = sources[:len(imglist),:]
您的 obtainData
函数可以通过以下方式提高可读性:
# Read all images
paths = (
fr".\PSData\PSData\{things[i]}\Objects\Image_01.png",
fr".\PSData\PSData\{things[i]}\Objects\Image_02.png",
fr".\PSData\PSData\{things[i]}\Objects\Image_03.png",
fr".\PSData\PSData\{things[i]}\Objects\Image_04.png",
fr".\PSData\PSData\{things[i]}\Objects\Image_05.png"
)
imgs = [cv2.imread(p,0) for p in paths]
...
# Apply masks to images: cv2.bitwise
imglist = [cv2.bitwise_or(img, img, mask=mask(img, threshold)) for img in imgs]
根据wavepy.surface_from_grad.frankotchellappa()
的文档,复杂的部分可以忽略。在此基础上,它们可以用 matplotlib 显示以获得像这样的图像:
import matplotlib.pyplot as plt
from matplotlib.ticker import LinearLocator
import numpy as np
ax = plt.figure().add_subplot(projection='3d')
surface = [[-2.19312825e+00-1.62679906e-02j,-1.76625653e+00-1.62093005e-02j ,-1.21359325e+00-1.63381200e-02j,-5.91501045e-01-1.45226036e-02j ,-9.24685295e-02-1.73373776e-03j, 1.59549135e-01+8.17785543e-05j , 2.90806910e-01-4.70409288e-05j, 3.47604091e-01+1.16492161e-05j],[-2.40280993e+00+1.62679906e-02j,-1.66061279e+00+1.62093005e-02j ,-1.11510109e+00+1.63381200e-02j,-3.08374007e-01+1.45226036e-02j , 1.57978453e-01+1.73373776e-03j, 2.59650686e-01-8.17785543e-05j , 3.63995725e-01+4.70409288e-05j, 4.01138015e-01-1.16492161e-05j],[-2.23283386e+00-1.62679906e-02j,-1.37689420e+00-1.62093005e-02j ,-7.79238609e-01-1.63381200e-02j, 6.13042608e-02-1.45226036e-02j , 4.21894421e-01-1.73373776e-03j, 4.22562434e-01+8.17785543e-05j , 4.81126228e-01-4.70409288e-05j, 4.97191034e-01+1.16492161e-05j],[-2.03380248e+00+1.62679906e-02j,-1.15214942e+00+1.62093005e-02j ,-5.31293338e-01+1.63381200e-02j, 3.06448040e-01+1.45226036e-02j ,6.43854839e-01+1.73373776e-03j, 5.97558594e-01-8.17785543e-05j ,6.18487416e-01+4.70409288e-05j, 6.16102854e-01-1.16492161e-05j],[-1.78298688e+00-1.62679906e-02j,-8.95377575e-01-1.62093005e-02j ,2.76063262e-01-1.63381200e-02j, 5.45799539e-01-1.45226036e-02j ,8.50153479e-01-1.73373776e-03j, 7.66200038e-01+8.17785543e-05j ,7.57462265e-01-4.70409288e-05j, 7.39255327e-01+1.16492161e-05j],[-1.48865585e+00+1.62679906e-02j,-6.06732343e-01+1.62093005e-02j ,2.25981613e-03+1.63381200e-02j, 7.88696843e-01+1.45226036e-02j ,1.04351159e+00+1.73373776e-03j, 9.19975122e-01-8.17785543e-05j ,8.83235485e-01+4.70409288e-05j, 8.50665864e-01-1.16492161e-05j],[-1.20317686e+00-1.62679906e-02j,-3.29863821e-01-1.62093005e-02j ,2.50911348e-01-1.63381200e-02j, 9.99471222e-01-1.45226036e-02j ,1.21789476e+00-1.73373776e-03j, 1.04762150e+00+8.17785543e-05j ,9.81522674e-01-4.70409288e-05j, 9.36002572e-01+1.16492161e-05j],[-7.74950625e-01+1.62679906e-02j, 9.04024675e-02+1.62093005e-02j
,6.51399438e-01+1.63381200e-02j, 1.32951041e+00+1.45226036e-02j
,1.36765625e+00+1.73373776e-03j, 1.12552107e+00-8.17785543e-05j
,1.03744750e+00+4.70409288e-05j, 9.82554438e-01-1.16492161e-05j]]
surface=np.array(surface)
X = np.arange(0,8)
Y = np.arange(0,8)
Z = [surface[x,y].real for x in X for y in Y]
xx, yy = np.meshgrid(X, Y)
Z= np.array(Z)
Z= np.reshape(Z, xx.shape)
colortuple = ('g', 'cyan')
colors = np.empty(xx.shape, dtype=str)
for y in range(len(Y)):
for x in range(len(Y)):
colors[y, x] = colortuple[(x + y) % len(colortuple)]
surf = ax.plot_surface(xx, yy, Z, facecolors=colors, linewidth=0)
ax.zaxis.set_major_locator(LinearLocator(6))
plt.show()