在 space 中使用图像中的 2d 点在 python opengl 中获取 3d 点

get 3d point in space using 2d point in image in python opengl

我正在尝试模拟房间中的深度相机,我的相机能够在世界中移动和旋转,并且房间被模拟为围绕 (0,0,0) 的 3d 立方体 单击一个按钮,我想对图像中的 N 个随机点进行采样,并获取这些点与相机的距离("real world" 中的距离)。到目前为止,我已经设法创建了移动相机和立方体的场景 (Example)

我试过 gluUnProject 来获取 3d 点

model_view = np.array(glGetDoublev(GL_MODELVIEW_MATRIX))
proj = np.array(glGetDoublev(GL_PROJECTION_MATRIX))
view = np.array(glGetDoublev(GL_VIEWPORT))

3d_point = gluUnProject(x,y, 0.0)

其中 x,y 是图像中像素的坐标,但是当我检查我知道它们位置(立方体角)的像素时,我得到了随机结果。

我对 openGL 很陌生,所以我可能会遗漏一些东西,从数学角度来说,我想做的就是在像素坐标上应用投影和视图矩阵的逆,但这不起作用。

我在下面附上房间模拟的代码。

提前致谢。

import pygame
from pygame.locals import *
import numpy as np
import random
from OpenGL.GL import *
from OpenGL.GLU import *
display = (800, 600)
import math

def get_cube_information():

    vertices = (
        (1, -1, -1),
        (1, 1, -1),
        (-1, 1, -1),
        (-1, -1, -1),
        (1, -1, 1),
        (1, 1, 1, ),
        (-1, -1, 1),
        (-1, 1, 1),
        )

    edges = (
        (0,1),
        (0,3),
        (0,4),
        (2,1),
        (2,3),
        (2,7),
        (6,3),
        (6,4),
        (6,7),
        (5,1),
        (5,4),
        (5,7),
        )

    surfaces = (
        (0,1,2,3),
        (3,2,7,6),
        (6,7,5,4),
        (4,5,1,0),
        (1,5,7,2),
        (4,0,3,6),
        )

    colors = (
        (1.000, 0.920, 0.000),
        (0.000, 0.860, 0.000),
        (1.000, 0.480, 0.000),
        (1.000, 1.000, 1.000),
        (0.900, 0.000, 0.000),
        (0.000, 0.000, 0.950)
    )
    return vertices, edges, surfaces, colors


def Cube():
    glBegin(GL_QUADS)

    (vertices, edges, surfaces, colors) = get_cube_information()
    for i, surface in enumerate(surfaces):
        x = 0
        color = colors[i]
        for vertex in surface:
            x += 1
            glColor3fv(color)
            glVertex3fv(vertices[vertex])


    glEnd()

    glBegin(GL_LINES)
    for edge in edges:
        for vertex in edge:
            glVertex3fv(vertices[vertex])

    glEnd()


def main():
    pygame.init()
    tx = 0
    ty = 0
    tz = 0
    ry = 0
    rx = 0
    pygame.display.set_mode(display, DOUBLEBUF|OPENGL|RESIZABLE)

    glMatrixMode(GL_PROJECTION)
    gluPerspective(45, (display[0] / display[1]), 0.1, 50.0)

    view_mat = np.matrix(np.identity(4), copy=False, dtype='float32')

    glMatrixMode(GL_MODELVIEW)
    glLoadIdentity()
    glTranslatef(0, 0, 0)
    glGetFloatv(GL_MODELVIEW_MATRIX, view_mat)
    glLoadIdentity()

    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_ESCAPE:
                    pygame.quit()
                    quit()
                if event.key == pygame.K_a:
                    tx = 0.05
                elif event.key == pygame.K_d:
                    tx = -0.05
                elif event.key == pygame.K_w:
                    tz = 0.05
                elif event.key == pygame.K_s:
                    tz = -0.05
                elif event.key == pygame.K_RIGHT:
                    ry = 1.0
                elif event.key == pygame.K_LEFT:
                    ry = -1.0
                elif event.key == pygame.K_UP:
                    rx = -1.0
                elif event.key == pygame.K_DOWN:
                    rx = 1.0
                elif event.key == pygame.K_SPACE:
                    continue
            elif event.type == pygame.KEYUP:
                if event.key == pygame.K_a and tx > 0:
                    tx = 0
                elif event.key == pygame.K_d and tx < 0:
                    tx = 0
                elif event.key == pygame.K_w and tz > 0:
                    tz = 0
                elif event.key == pygame.K_s and tz < 0:
                    tz = 0
                elif event.key == pygame.K_RIGHT and ry > 0:
                    ry = 0.0
                elif event.key == pygame.K_LEFT and ry < 0:
                    ry = 0.0
                elif event.key == pygame.K_DOWN and rx > 0:
                    rx = 0.0
                elif event.key == pygame.K_UP and rx < 0:
                    rx = 0.0
            elif event.type == pygame.MOUSEBUTTONDOWN:
                #here I want to sample the points and return their (x,y) in the image and their distance from the camera.
                continue

        glPushMatrix()
        glLoadIdentity()
        glTranslatef(tx, ty, tz)
        glRotatef(ry, 0, 1, 0)
        glRotatef(rx, 1, 0, 0)

        glMultMatrixf(view_mat)
        glGetFloatv(GL_MODELVIEW_MATRIX, view_mat)
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
        Cube()
        glPopMatrix()
        pygame.display.flip()
        pygame.time.wait(10)
main()

要找到视口上某个点的世界位置,您必须知道该点的深度值。

x 和 y 屏幕位置和深度已转换为 [-1, 1] 范围内的标准化设备坐标。为此,必须知道视口矩形:

ndc = [2.0* x/vp_width - 1.0, 1.0 - 2.0*y/vp_height, depth*2.0 - 1.0]; 

标准化设备space坐标必须通过逆投影矩阵转换到视图space(最后必须执行透视分割)。

使用逆视图矩阵,视图space坐标可以转换为世界坐标space。

gluUnProject 为您完成所有这些,但您必须知道片段的深度。 片段的深度可以通过glReadPixels:

读取
# get mouse position
x, y = pygame.mouse.get_pos()

# get the fragment depth
depth = glReadPixels(x, y, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT)

# get projection matrix, view matrix and the viewport rectangle
model_view = np.array(glGetDoublev(GL_MODELVIEW_MATRIX))
proj = np.array(glGetDoublev(GL_PROJECTION_MATRIX))
view = np.array(glGetIntegerv(GL_VIEWPORT))

# unproject the point
point = gluUnProject(x, y, depth, model_view, proj, view)
print( point )

请注意,您必须启用 Depth Test 否则将不会设置深度缓冲区。这也带来了好处,前面的多边形覆盖了多边形 "behind" 它们:

glEnable(GL_DEPTH_TEST)
Cube()

当然投影矩阵和模型视图矩阵必须适当设置,当vlues分别被glGetDoublev(GL_PROJECTION_MATRIX)读取时glGetDoublev(GL_MODELVIEW_MATRIX)

意思是view矩阵的读取要在设置后进行:

glPushMatrix()
glLoadIdentity()
glTranslatef(tx, ty, tz)
glRotatef(ry, 0, 1, 0)
glRotatef(rx, 1, 0, 0)

glMultMatrixf(view_mat)
glGetFloatv(GL_MODELVIEW_MATRIX, view_mat)

model_view = np.array(glGetDoublev(GL_MODELVIEW_MATRIX))

注意,如果gluUnProject的第4个参数(model)使用单位矩阵,那么gluUnProject不计算世界坐标,而是计算视图坐标。