Conway's game of life in 3D FPS问题

Conway's game of life in 3D FPS problems

我正在尝试在 3D 中实现 conway's game of life。基本上,我正在用一个额外的维度来试验它。

我在游戏开始时实例化了一个立方体列表,并给每个立方体一个索引,该索引将与一个逻辑对象相关联,如果它还活着,我会调用 twgl.drawObjectList,否则我会在我正在使用 requestAnimationFrame 的函数中跳过它。

问题是当我制作一个 50*50*50(125000 个立方体)游戏时,FPS 下降到 1 以下。这是正常的吗?我的做法是否正确?

编辑:

function newGame (xDimV, yDimV, zDimV, gameSelected = false) {
// No game to load
if (!gameSelected) {
    xDim = xDimV;
    yDim = yDimV;
    zDim = zDimV;
} else {
    xDim = gameSelected[0][0].length;
    yDim = gameSelected[0].length;
    zDim = gameSelected.length;
}
myGame = Object.create(game);
myGame.consutructor(xDim , yDim , zDim, gameSelected);
objects = [];
for (var z = 0; z < zDim; z++) {
    for (var y = 0; y < yDim; y++){
        for (var x = 0; x < xDim; x++){

            var uniforms = {
                u_colorMult: chroma.hsv(emod(baseHue + rand(0, 120), 360), rand(0.5,
                                    1), rand(0.5, 1)).gl(),
                u_world: m4.identity(),
                u_worldInverseTranspose: m4.identity(),
                u_worldViewProjection: m4.identity(),
            };

            var drawObjects = [];
            drawObjects.push({
                programInfo: programInfo,
                bufferInfo: cubeBufferInfo,
                uniforms: uniforms,
            });

            objects.push({
                translation: [(x*scale)-xDim*scale/2, (z*scale), (y*scale)-yDim*scale/2],
                scale: scale,
                uniforms: uniforms,
                bufferInfo: cubeBufferInfo,
                programInfo: programInfo,
                drawObject: drawObjects,
                index: [z, y, x],
            });
        }
    }
}
requestAnimationFrame(render);
}

var then = 0;
function render(time) {
time *= 0.001;
var elapsed = time - then;
then = time;

twgl.resizeCanvasToDisplaySize(gl.canvas);
gl.viewport(0, 0, gl.canvas.width, gl.canvas.height);

gl.enable(gl.DEPTH_TEST);
gl.enable(gl.CULL_FACE);
gl.clear(gl.COLOR_BUFFER_BIT | gl.DEPTH_BUFFER_BIT);
gl.clearColor(255, 255, 0, 0.1);
var fovy = 30 * Math.PI / 180;
var projection = m4.perspective(fovy, gl.canvas.clientWidth / gl.canvas.clientHeight, 0.5, 10000);

var eye = [cameraX, cameraY, cameraZ];
var target = [cameraX, cameraY, 10];
var up = [0, 1, 0];

var camera = m4.lookAt(eye, target, up);
var view = m4.inverse(camera);
var viewProjection = m4.multiply(projection, view);
viewProjection =  m4.rotateX(viewProjection, phi);
viewProjection = m4.rotateY(viewProjection, theta);
targetTimer -= elapsed;

objects.forEach(function(obj) {
    var uni = obj.uniforms;
    var world = uni.u_world;
    m4.identity(world);
    m4.translate(world, obj.translation, world);
    m4.scale(world, [obj.scale, obj.scale, obj.scale], world);
    m4.transpose(m4.inverse(world, uni.u_worldInverseTranspose), uni.u_worldInverseTranspose);
    m4.multiply(viewProjection, uni.u_world, uni.u_worldViewProjection);

    if (myGame.life[obj.index[0]][obj.index[1]][obj.index[2]] === 1) {
        twgl.drawObjectList(gl, obj.drawObject);
    }
});
if (targetTimer <= 0 && !paused) {
    targetTimer = targetChangeInterval / speed;
    myGame.nextGen();
    setGameStatus();
    myGame.resetStatus();
}
requestAnimationFrame(render);
}

提前致谢。

fps 下降最有可能来自两件事:

  1. 每个 tick 执行 125k 矩阵运算的开销。
  2. 执行 125k 绘制调用的开销。

你可以看看实例化 http://blog.tojicode.com/2013/07/webgl-instancing-with.html?m=1

并可能将矩阵内容移动到着色器中

125k 立方体很多。典型的 AAA 游戏通常总共进行 1000 到 5000 次绘制调用。网上有各种游戏引擎的故障,以及它们生成一帧的绘制调用次数。

这是a talk with several methods。它包括将所有立方体放在一个巨大的网格中并在 JavaScript 中四处移动,因此它们实际上是一次绘制调用。

如果是我,我会这样做,我会制作一个每个立方体一个像素的纹理。所以对于 125k 的立方体,纹理会像 356x356,虽然我可能会选择更适合立方体大小的东西,比如 500x300(因为每个面切片是 50x50)。对于每个立方体的每个顶点,我都有一个属性,其 UV 指向该纹理中的特定像素。换句话说,对于第一个立方体的第一个顶点,将有一个 UV 重复 36 次的属性,在第二个立方体的新 UV 中,重复 36 次,

 attribute vec2 cubeUV;

然后我可以使用 cubeUV 来查找纹理中的像素,无论立方体是否应该打开或关闭

 attribute vec2 cubeUV;
 uniform sampler2D lifeTexture;     

 void main() {
   float cubeOn = texture2D(lifeTexture, cubeUV).r;
 }

我可以很容易地用

剪出立方体
   if (cubeOn < 0.5) {
     gl_Position = vec4(2, 2, 2, 1);  // outside clip space
     return;
   }

   // otherwise do the calcs for a cube

在这种情况下,立方体不需要移动,所以所有 JavaScript 必须做的每一帧都是计算一些 Uint8Array 的生命,然后调用

gl.bindTexture(gl.TEXTURE_2D, lifeTexture);
gl.texImage2D(gl.TEXTURE_2D, 0, gl.LUMINANCE, width, height, 0,
              gl.LUMINANCE, gl.UNSIGNED_BYTE, lifeStatusUint8Array);

每一帧并进行一次绘制调用。

注意:您可以有效地查看此类着色器的示例 here 除了 shdaer 没有查看具有 life 运行 的纹理相反,它正在查看其中包含 4 秒音频数据的纹理。它还从 vertexId 生成 cubeId 并从 vertexId 生成立方体顶点和法线。这会比将数据放入属性中慢,但它是基于来自纹理的数据定位或绘制立方体的示例。

const vs = `
attribute vec4 position;
attribute vec3 normal;
attribute vec2 cubeUV;

uniform mat4 u_matrix;
uniform sampler2D u_lifeTex;

varying vec3 v_normal;

void main() {
  float on = texture2D(u_lifeTex, cubeUV).r;
  if (on < .5) {
     gl_Position = vec4(20, 20, 20, 1);
     return;
  }
  gl_Position = u_matrix * position;  
  v_normal = normal;
}
`;

const fs = `
precision mediump float;

varying vec3 v_normal;

void main() {
  gl_FragColor = vec4(v_normal * .5 + .5, 1);
}
`;

const oneFace = [
  [ -1, -1, ],
  [  1, -1, ],
  [ -1,  1, ],
  [ -1,  1, ],
  [  1, -1, ],
  [  1,  1, ],
];

const m4 = twgl.m4;
const gl = document.querySelector("canvas").getContext("webgl");

// compiles shaders, links program, looks up locations
const programInfo = twgl.createProgramInfo(gl, [vs, fs]);

const cubeSize = 50;
const texBuf = makeCubeTexBuffer(gl, cubeSize);
const tex = twgl.createTexture(gl, {
  src: texBuf.buffer,
  width: texBuf.width,
  format: gl.LUMINANCE,
  wrap: gl.CLAMP_TO_EDGE,
  minMag: gl.NEAREST,
});

const arrays = makeCubes(cubeSize, texBuf);
// calls gl.createBuffer, gl.bindBuffer, gl.bufferData for each array
const bufferInfo = twgl.createBufferInfoFromArrays(gl, arrays);

function render(time) {
  time *= 0.001; // seconds
  twgl.resizeCanvasToDisplaySize(gl.canvas);
  
  gl.viewport(0, 0, gl.canvas.width, gl.canvas.height);
  gl.enable(gl.DEPTH_TEST);
  //gl.enable(gl.CULL_FACE);
  
  const fov = Math.PI * .25;
  const aspect = gl.canvas.clientWidth / gl.canvas.clientHeight;
  const zNear = .01;
  const zFar  = 1000;
  const projection = m4.perspective(fov, aspect, zNear, zFar);
  
  const radius = cubeSize * 2.5;
  const speed = time * .1;
  const position = [
     Math.sin(speed) * radius, 
     Math.sin(speed * .7) * radius * .7, 
     Math.cos(speed) * radius,
  ];
  const target = [0, 0, 0];
  const up = [0, 1, 0];
  const camera = m4.lookAt(position, target, up);
  
  const view = m4.inverse(camera);
  
  const mat = m4.multiply(projection, view);

  // do life
  // (well, randomly turn on/off cubes)
  for (let i = 0; i < 100; ++i) {
     texBuf.buffer[Math.random() * texBuf.buffer.length | 0] = Math.random() > .5 ? 255 : 0;
  }
  
  gl.bindTexture(gl.TEXTURE_2D, tex);
  gl.texImage2D(gl.TEXTURE_2D, 0, gl.LUMINANCE, texBuf.width, texBuf.height,
                0, gl.LUMINANCE, gl.UNSIGNED_BYTE, texBuf.buffer);
  
  gl.useProgram(programInfo.program)

  // calls gl.bindBuffer, gl.enableVertexAttribArray, gl.vertexAttribPointer
  twgl.setBuffersAndAttributes(gl, programInfo, bufferInfo);

  twgl.setUniforms(programInfo, {
    u_matrix: mat,
    u_lifeTex: tex,
  });

  // calls gl.drawArrays or gl.drawElements
  twgl.drawBufferInfo(gl, bufferInfo);

  requestAnimationFrame(render);
}
requestAnimationFrame(render);

// generate cubes
function makeCube(vertOffset, off, uv, arrays) {
  const positions = arrays.position;
  const normals = arrays.normal;
  const cubeUV = arrays.cubeUV;
  
  for (let f = 0; f < 6; ++f) {
    const axis = f / 2 | 0;    
    const sign = f % 2 ? -1 : 1;
    const major = (axis + 1) % 3;
    const minor = (axis + 2) % 3;

    for (let i = 0; i < 6; ++i) {
      const offset2 = vertOffset * 2;
      const offset3 = vertOffset * 3;
      positions[offset3 + axis ] = off[axis]  + sign;
      positions[offset3 + major] = off[major] + oneFace[i][0];
      positions[offset3 + minor] = off[minor] + oneFace[i][1];
      normals[offset3 + axis ] = sign;
      normals[offset3 + major] = 0;
      normals[offset3 + minor] = 0;
      
      cubeUV[offset2 + 0] = uv[0]; 
      cubeUV[offset2 + 1] = uv[1]; 
      ++vertOffset;
    }
  }
  return vertOffset;
}

function makeCubes(size, texBuf) {
  const numCubes = size * size * size;
  const numVertsPerCube = 36;
  const numVerts = numCubes * numVertsPerCube;
  const slicesAcross = texBuf.width / size | 0;
  const arrays = {
    position: new Float32Array(numVerts * 3),
    normal: new Float32Array(numVerts * 3),
    cubeUV: new Float32Array(numVerts * 2),
  };
  
  let spacing = size * 1.2;
  let vertOffset = 0;
  for (let z = 0; z < size; ++z) {
    const zoff = (z / (size - 1) * 2 - 1) * spacing;
    for (let y = 0; y < size; ++y) {
      const yoff = (y / (size - 1) * 2 - 1) * spacing;
      for (let x = 0; x < size; ++x) {
        const xoff = (x / (size - 1) * 2 - 1) * spacing;
        const sx = z % slicesAcross;
        const sy = z / slicesAcross | 0;
        const uv = [
          (sx * size + x + 0.5) / texBuf.width, 
          (sy * size + y + 0.5) / texBuf.height,
        ];
        vertOffset = makeCube(vertOffset, [xoff, yoff, zoff], uv, arrays);
      }
    }
  }
  arrays.cubeUV = {
    numComponents: 2,
    data: arrays.cubeUV,
  };
  return arrays;
}

function makeCubeTexBuffer(gl, cubeSize) {
  const numCubes = cubeSize * cubeSize * cubeSize;
  const maxTextureSize = Math.min(gl.getParameter(gl.MAX_TEXTURE_SIZE), 2048);
  const maxSlicesAcross = maxTextureSize / cubeSize | 0;
  const slicesAcross = Math.min(cubeSize, maxSlicesAcross);
  const slicesDown = Math.ceil(cubeSize / slicesAcross);
  const width = slicesAcross * cubeSize;
  const height = slicesDown * cubeSize;
  const buffer = new Uint8Array(width * height);
  return {
    buffer: buffer,
    slicesAcross: slicesAcross,
    slicesDown: slicesDown,
    width: width,
    height: height,
  };
}
body { margin: 0; }
canvas { width: 100vw; height: 100vh; display: block; }
<script src="https://twgljs.org/dist/3.x/twgl-full.min.js"></script>
<canvas></canvas> 

从下面的评论中得知,使用大型合并网格似乎比使用实例化绘图快 1.3 倍。这里有 3 个示例

  1. big mesh using texture uvs(同上)
  2. instanced using texture uvs(数据较少,着色器相同)
  3. instanced no texture(无材质,生命数据在buffer/attribute)

对我来说,在我的机器上 #1 可以 60fps 处理 60x60x60 立方体 (216000),而 #2 和 #3 只能以 60fps 处理 56x56x56 立方体 (175616)。当然其他 GPUs/system/browsers 可能会有所不同。