如何从非常稀疏的纹理中快速输出点?
How can I quickly output points from a very sparse texture?
我基本上有一个 512x512x512 WebGLTexture 对象,它是 0。除了大约 3 个体素,它是 1.. 我需要尽快打印出这 3 个体素的 xyz 坐标,以进行科学与我的研究相关的计算应用程序,但我能做的最好的事情是在通过笨重的 WebGL2 方法链传递对象后使用 [parallel] 'for' 循环。有谁知道获取这些坐标的更快方法?有没有办法将 vec3 基元从 fragmentShader 推送到数组?
我一直在寻找有用的扩展,但没有成功。
我通过以下方式在每个时间步将 tbl.compressedTable 推送到一个数组:
var tbl = new Abubu.RgbaCompressedDataFromTexture({
target : env.stipt,
threshold : env.fthrsh,
compressionThresholdChannel : 'r',
});
this.timeSeries.push(time) ;
this.lastRecordedTime = time ;
this.samples.push([tbl.compressedTable]) ;
最后一行是杀手。我正在使用 class 原型:
class RgbaCompressedDataFromTexture extends RgbaCompressedData{
constructor( options={} ){
if ( options.target == undefined &&
options.texture == undefined ) return null ;
var texture ;
texture = readOption(options.target, null ) ;
texture = readOption(options.texture, options.target ) ;
var ttbond = new Float32TextureTableBond({ target : texture } ) ;
ttbond.tex2tab() ;
var table = ttbond.table ;
var width = ttbond.width ;
var height = ttbond.height ;
var op = options ;
op.width = width ;
op.height = height ;
super( table, op ) ;
this.ttbond = ttbond ;
this.texture = texture ;
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* CONSTRUCTOR ENDS
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
扩展 class:
class RgbaCompressedData{
constructor( data, options={}){
if (data == undefined){
log( 'You need to provide data source for compression!') ;
return null ;
}
this.data = new Float32Array(data) ;
this.width = readOption( options.width, data.length/4 ) ;
this.height = readOption( options.height, 1 ) ;
if ( (this.width == (data.length/4)) && height != 1 ){
this.width = (data.length/this.height)/4 ;
}
this.threshold = readOption( options.threshold, 0 ) ;
this.threshold = readOption( options.compressionThreshold,
this.threshold ) ;
this.compressionThresholdChannel
= readOption( options.channel, 'r' ) ;
switch (this.compressionThresholdChannel){
case 'r' :
this.channel = 0 ;
break ;
case 'g' :
this.channel = 1 ;
break ;
case 'b' :
this.channel = 2 ;
break ;
case 'a' :
this.channel = 3 ;
break ;
default :
this.channel = 0 ;
break ;
}
this.compThresholdData = new Float32Array(this.width*this.height) ;
/*------------------------------------------------------------------------
* count number of pixels above the compression threshold
*------------------------------------------------------------------------
*/
this.noAboveThreshold = 0 ;
for(var j=0 ; j<this.height ; j++){
for (var i=0 ; i <this.width; i++){
var indx = i + j*this.width ;
this.compThresholdData[indx]
= this.data[indx*4 + this.channel] ;
if (this.compThresholdData[indx]>this.threshold){
this.noAboveThreshold++ ;
}
}
}
/*------------------------------------------------------------------------
* allocating memory to data
*------------------------------------------------------------------------
*/
this.compressedSize =
Math.ceil( Math.sqrt( this.noAboveThreshold )) ;
this.compressedTable =
new Float32Array(this.compressedSize*this.compressedSize*4 ) ;
this.decompressionMapTable =
new Float32Array(this.compressedSize*this.compressedSize*4 ) ;
this.compressionMapTable =
new Float32Array(this.width*this.height * 4 ) ;
/*------------------------------------------------------------------------
* compress data
*------------------------------------------------------------------------
*/
var num = 0 ;
for(var j=0 ; j<this.height ; j++){
for (var i=0 ; i <this.width; i++){
var indx = i + j*this.width ;
if (this.compThresholdData[indx]>this.threshold){
var jj = Math.floor( num/this.compressedSize) ;
var ii = num - jj*this.compressedSize ;
var x = ii/this.compressedSize
+ 0.5/this.compressedSize ;
var y = jj/this.compressedSize
+ 0.5/this.compressedSize ;
var nindx = ii + jj*this.compressedSize ;
this.compressionMapTable[indx*4 ] = x ;
this.compressionMapTable[indx*4 + 1 ] = y ;
this.decompressionMapTable[nindx*4 ] =
i/this.width + 0.5/this.width ;
this.decompressionMapTable[nindx*4+1] =
j/this.height+ 0.5/this.height ;
for (var k = 0 ; k<4 ; k++){
this.compressedTable[nindx*4+k]
= this.data[indx*4+k] ;
}
num++ ;
}else{
this.compressionMapTable[indx*4 ]
= 1.-0.5/this.compressedSize ;
this.compressionMapTable[indx*4 + 1 ]
= 1.-0.5/this.compressedSize ;
}
}
}
var ii = this.compressedSize -1 ;
var jj = this.compressedSize -1 ;
var nindx = ii + jj*this.compressedSize ;
for (var k = 0 ; k<4 ; k++){
this.compressedTable[nindx*4+k] = 0. ;
}
/*------------------------------------------------------------------------
* setting compressedData, compressionMap, decompressionMap textures
*------------------------------------------------------------------------
*/
this.full = new TableTexture(
this.data,
this.width,
this.height,
{
minFilter : 'nearest' ,
magFilter : 'nearest'
}
) ;
this.sparse = new TableTexture(
this.compressedTable,
this.compressedSize ,
this.compressedSize ,
{
minFilter : 'nearest' ,
magFilter : 'nearest'
}
) ;
this.compressionMap = new TableTexture(
this.compressionMapTable,
this.width,
this.height ,
{
minFilter : 'nearest' ,
magFilter : 'nearest'
}
) ;
this.decompressionMap = new TableTexture(
this.decompressionMapTable ,
this.compressedSize ,
this.compressedSize ,
{
minFilter : 'nearest' ,
magFilter : 'nearest'
}
) ;
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* CONSTRUCTOR ENDS
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
并利用以下 class:
class Float32TextureTableBond{
/*------------------------------------------------------------------------
* constructor
*------------------------------------------------------------------------
*/
constructor( options={}){
if ( options.target == undefined && options.texture == undefined ){
return null ;
} ;
this.texture = readOptions( options.target , null ) ;
this.texture = readOptions( options.texture, this.target ) ;
this.framebuffer = gl.createFramebuffer() ;
gl.bindFramebuffer( gl.READ_FRAMEBUFFER, this.framebuffer) ;
gl.framebufferTexture2D(gl.READ_FRAMEBUFFER, gl.COLOR_ATTACHMENT0,
gl.TEXTURE_2D,
this.target.texture, 0 ) ;
gl.readBuffer( gl.COLOR_ATTACHMENT0 ) ;
this.canRead = (
gl.checkFramebufferStatus(gl.READ_FRAMEBUFFER)
== gl.FRAMEBUFFER_COMPLETE
) ;
gl.bindFramebuffer( gl.READ_FRAMEBUFFER, null) ;
this.width = this.target.width ;
this.height = this.target.height ;
this.table = readOption(options.table,
new Float32Array(this.width*this.height*4 ) ) ;
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* CONSTRUCTOR ENDS
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
没有错误信息,输出正确。当我开始记录数据时,我的模拟速度减慢到一只昏昏欲睡的乌龟的速度。
Is there a way to push vec3 primitives to an array from a fragmentShader?
是的,使用着色器存储缓冲区。大致如下:
layout(std430, binding = 0) buffer Output
{
uvec3 out_vals[];
};
这将需要绑定到一个足够大的缓冲区来存储返回的参数(从我的头脑来看,我认为 std430
允许 vec3
输出类型,但我也有这种奇怪的感觉,输出类型可能需要是 uint
,所以你可能需要一次写 3 个值——遗憾的是记不太清了)
然后您需要确定要写入的输出数组中元素的索引。为此,您可以使用原子计数器缓冲区来确定计数器,例如
layout(binding = 0, offset = 0) uniform atomic_uint out_count;
然后稍后在你的着色器中,从 gl_GlobalInvocatonID
(如果使用计算着色器)或 gl_SamplePosition
为片段着色器生成你的索引,你应该能够写出数据:
uint index = atomicCounterIncrement(out_count);
out_vals[index] = gl_GlobalInvocatonID;
可以直接在着色器存储缓冲区上使用原子操作,但我看到的大多数建议都建议改用 ACB。
我还没有真正考虑清楚,但这里有一些代码可能会给您一些想法。
问题是无法在 WebGL2 AFAIK 中有条件地输出数据。您可以在片段着色器中丢弃,但这在这里似乎没有帮助。
因此,无论如何,首先要考虑的是着色器基于输出进行并行化。如果要绘制 32k 像素,则 GPU 可以并行化 32k 个东西。如果有 1 个像素检查 32k 个东西,GPU 就没有什么可以并行化的了。
所以,这里有一个想法,将 3D 纹理分成 NxNxN 大的单元格,在每个单元格中搜索体素。如果一个单元格是 32x32x32,那么对于 512x512x512 输入,有 4096 个东西要并行化。对于每个单元格,遍历单元格并对匹配项的位置求和
sum = vec4(0)
for each voxel in cell
if voxel === 1
sum += vec4(positionOfVoxel, 1);
outColor = sum;
结果是,如果该单元格中只有 1 个匹配项,则 sum.xyz 将包含该位置,而 sum.w 将为 1。如果有多个匹配项 sum.w将 > 1
下面的代码制作了一个 4096x1 的纹理并为其渲染了一个四边形。它使用 gl_FragCoord.x
来计算每个正在渲染的像素对应于哪个单元格,并对相应单元格的结果求和。
然后它使用 readPixels 读取结果并打印出来。理想情况下,我希望 GPU 本身能够计算出结果,但考虑到你不能有条件地丢弃我没有任何想法。
对于只有一个结果的单元格,打印结果。对于具有多个结果的单元格,另一个着色器扫描单元格。我们知道特定单元格中有多少结果,因此我们可以按 1 个像素渲染 numResults。然后着色器将遍历单元格并只查看它找到的第 N 个结果
int idOfResultWeWant = int(gl_FragCoord.x)
int resultId = 0
for (z...) {
for (y...) {
for (x...) {
if (voxel) {
if (resultId === idOfResultWeWant) {
outColor = position
}
++resultId
}
}
}
下面的代码是惰性的,并且使用一维结果纹理,这意味着它可以处理的最多单元格是 gl.getParameter(gl.MAX_TEXTURE_SIZE)
。对于更大的尺寸,它必须稍微改变一下。
不知道这是不是最快的方法,甚至是最快的方法,但是基于渲染内容的并行概念很重要,而且可以将问题分成更小的部分。
好像使用 16x16x16 单元可能更好,也许我们应该通过将单元本身细分为更小的单元来再次使用第一个着色器而不是第二个着色器。
function main() {
const gl = document.createElement('canvas').getContext('webgl2');
if (!gl) {
return alert('need webgl2');
}
const ext = gl.getExtension('EXT_color_buffer_float');
if (!ext) {
return alert('need EXT_color_buffer_float');
}
const size = 512;
const cellSize = 32;
const cellsPer = size / cellSize;
const numCells = (size * size * size) / (cellSize * cellSize * cellSize);
const dataTexture = twgl.createTexture(gl, {
target: gl.TEXTURE_3D,
width: size,
height: size,
depth: size,
minMag: gl.NEAREST,
internalFormat: gl.R8,
auto: false,
});
function setData(x, y, z) {
log('set voxel:', x, y, z);
gl.texSubImage3D(
gl.TEXTURE_3D, 0, x, y, z, 1, 1, 1,
gl.RED, gl.UNSIGNED_BYTE, new Uint8Array([255]));
}
for (let i = 0; i < 3; ++i) {
const x = randInt(size);
const y = randInt(size);
const z = randInt(size);
setData(x, y, z);
}
setData(128, 267, 234);
setData(128 + 4, 267, 234);
setData(128 + 9, 267, 234);
const cellVS = `#version 300 es
in vec4 position;
void main() {
gl_Position = position;
}
`;
const cellFS = `#version 300 es
precision highp float;
uniform highp sampler3D data;
uniform int cellSize;
out vec4 outColor;
void main() {
// really should use 2D but I'm lazy
int ndx = int(gl_FragCoord.x);
// assumes equal sides
int size = textureSize(data, 0).x;
int cellsPer = size / cellSize;
int cellZ = ndx / cellsPer / cellsPer;
int cellY = ndx / cellsPer % cellsPer;
int cellX = ndx % cellsPer;
ivec3 cell = ivec3(cellX, cellY, cellZ) * cellSize;
vec4 sum = vec4(0);
for (int z = 0; z < cellSize; ++z) {
for (int y = 0; y < cellSize; ++y) {
for (int x = 0; x < cellSize; ++x) {
ivec3 pos = cell + ivec3(x, y, z);
// assumes data is 0 or 1
float occupied = texelFetch(
data,
pos,
0).r;
sum += vec4(pos, 1) * occupied;
}
}
}
outColor = sum;
}
`;
const cellScanFS = `#version 300 es
precision highp float;
uniform highp sampler3D data;
uniform int cellSize;
uniform ivec3 cell; // offset into cell
out vec4 outColor;
void main() {
// really should use 2D but I'm lazy
int idWeWant = int(gl_FragCoord.x);
// assumes equal sides
int size = textureSize(data, 0).x;
int cellsPer = size / cellSize;
vec4 result = vec4(0);
int id = 0;
for (int z = 0; z < cellSize; ++z) {
for (int y = 0; y < cellSize; ++y) {
for (int x = 0; x < cellSize; ++x) {
ivec3 pos = cell + ivec3(x, y, z);
float occupied = texelFetch(
data,
pos,
0).r;
if (occupied > 0.0) {
if (id == idWeWant) {
result = vec4(pos, 1);
}
++id;
}
}
}
}
outColor = result;
}
`;
const cellProgramInfo = twgl.createProgramInfo(gl, [cellVS, cellFS]);
const cellScanProgramInfo = twgl.createProgramInfo(gl, [cellVS, cellScanFS]);
const quadBufferInfo = twgl.primitives.createXYQuadBufferInfo(gl, 2);
// as long as numCells is less than the max
// texture dimensions we can use a
// numCells by 1 result texture.
// If numCells is > max texture dimension
// we'd need to adjust the code to use
// a 2d result texture.
const cellResultWidth = numCells;
const cellResultHeight = 1;
const cellResultFBI = twgl.createFramebufferInfo(gl, [
{ internalFormat: gl.RGBA32F, minMag: gl.NEAREST }
], cellResultWidth, cellResultHeight);
twgl.bindFramebufferInfo(gl, cellResultFBI);
twgl.setBuffersAndAttributes(gl, cellProgramInfo, quadBufferInfo);
gl.useProgram(cellProgramInfo.program);
twgl.setUniforms(cellProgramInfo, {
cellSize,
data: dataTexture,
});
// draw the quad
twgl.drawBufferInfo(gl, quadBufferInfo);
const data = new Float32Array(numCells * 4);
gl.readPixels(0, 0, numCells, 1, gl.RGBA, gl.FLOAT, data);
gl.useProgram(cellScanProgramInfo.program);
{
for (let i = 0; i < numCells; ++i) {
const off = i * 4;
const numResultsInCell = data[off + 3];
if (numResultsInCell) {
if (numResultsInCell === 1) {
log('result at: ', ...data.slice(off, off + 3));
} else {
getResultsForCell(i, numResultsInCell);
}
}
}
}
function getResultsForCell(i, numResultsInCell) {
const cellZ = (i / cellsPer | 0) / cellsPer | 0;
const cellY = (i / cellsPer | 0) % cellsPer;
const cellX = i % cellsPer;
twgl.setUniforms(cellScanProgramInfo, {
cellSize,
data: dataTexture,
cell: [cellX * cellSize, cellY * cellSize, cellZ * cellSize],
});
twgl.drawBufferInfo(gl, quadBufferInfo);
// note: cellResultsFBI is still bound. It's 4096x1
// so we can only get up to 4096 results without switching to
// a 2D texture
gl.viewport(0, 0, numResultsInCell, 1);
const result = new Float32Array(numResultsInCell * 4);
gl.readPixels(0, 0, numResultsInCell, 1, gl.RGBA, gl.FLOAT, result);
for (let j = 0; j < numResultsInCell; ++j) {
const off = j * 4;
log('result at:', ...result.slice(off, off + 3));
}
}
function randInt(min, max) {
return Math.floor(rand(min, max));
}
function rand(min, max) {
if (max === undefined) {
max = min;
min = 0;
}
return Math.random() * (max - min) + min;
}
function log(...args) {
const elem = document.createElement('pre');
elem.textContent = [...args].join(' ');
document.body.appendChild(elem);
}
}
main();
pre { margin: 0; }
<script src="https://twgljs.org/dist/4.x/twgl-full.min.js"></script>
我基本上有一个 512x512x512 WebGLTexture 对象,它是 0。除了大约 3 个体素,它是 1.. 我需要尽快打印出这 3 个体素的 xyz 坐标,以进行科学与我的研究相关的计算应用程序,但我能做的最好的事情是在通过笨重的 WebGL2 方法链传递对象后使用 [parallel] 'for' 循环。有谁知道获取这些坐标的更快方法?有没有办法将 vec3 基元从 fragmentShader 推送到数组?
我一直在寻找有用的扩展,但没有成功。
我通过以下方式在每个时间步将 tbl.compressedTable 推送到一个数组:
var tbl = new Abubu.RgbaCompressedDataFromTexture({
target : env.stipt,
threshold : env.fthrsh,
compressionThresholdChannel : 'r',
});
this.timeSeries.push(time) ;
this.lastRecordedTime = time ;
this.samples.push([tbl.compressedTable]) ;
最后一行是杀手。我正在使用 class 原型:
class RgbaCompressedDataFromTexture extends RgbaCompressedData{
constructor( options={} ){
if ( options.target == undefined &&
options.texture == undefined ) return null ;
var texture ;
texture = readOption(options.target, null ) ;
texture = readOption(options.texture, options.target ) ;
var ttbond = new Float32TextureTableBond({ target : texture } ) ;
ttbond.tex2tab() ;
var table = ttbond.table ;
var width = ttbond.width ;
var height = ttbond.height ;
var op = options ;
op.width = width ;
op.height = height ;
super( table, op ) ;
this.ttbond = ttbond ;
this.texture = texture ;
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* CONSTRUCTOR ENDS
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
扩展 class:
class RgbaCompressedData{
constructor( data, options={}){
if (data == undefined){
log( 'You need to provide data source for compression!') ;
return null ;
}
this.data = new Float32Array(data) ;
this.width = readOption( options.width, data.length/4 ) ;
this.height = readOption( options.height, 1 ) ;
if ( (this.width == (data.length/4)) && height != 1 ){
this.width = (data.length/this.height)/4 ;
}
this.threshold = readOption( options.threshold, 0 ) ;
this.threshold = readOption( options.compressionThreshold,
this.threshold ) ;
this.compressionThresholdChannel
= readOption( options.channel, 'r' ) ;
switch (this.compressionThresholdChannel){
case 'r' :
this.channel = 0 ;
break ;
case 'g' :
this.channel = 1 ;
break ;
case 'b' :
this.channel = 2 ;
break ;
case 'a' :
this.channel = 3 ;
break ;
default :
this.channel = 0 ;
break ;
}
this.compThresholdData = new Float32Array(this.width*this.height) ;
/*------------------------------------------------------------------------
* count number of pixels above the compression threshold
*------------------------------------------------------------------------
*/
this.noAboveThreshold = 0 ;
for(var j=0 ; j<this.height ; j++){
for (var i=0 ; i <this.width; i++){
var indx = i + j*this.width ;
this.compThresholdData[indx]
= this.data[indx*4 + this.channel] ;
if (this.compThresholdData[indx]>this.threshold){
this.noAboveThreshold++ ;
}
}
}
/*------------------------------------------------------------------------
* allocating memory to data
*------------------------------------------------------------------------
*/
this.compressedSize =
Math.ceil( Math.sqrt( this.noAboveThreshold )) ;
this.compressedTable =
new Float32Array(this.compressedSize*this.compressedSize*4 ) ;
this.decompressionMapTable =
new Float32Array(this.compressedSize*this.compressedSize*4 ) ;
this.compressionMapTable =
new Float32Array(this.width*this.height * 4 ) ;
/*------------------------------------------------------------------------
* compress data
*------------------------------------------------------------------------
*/
var num = 0 ;
for(var j=0 ; j<this.height ; j++){
for (var i=0 ; i <this.width; i++){
var indx = i + j*this.width ;
if (this.compThresholdData[indx]>this.threshold){
var jj = Math.floor( num/this.compressedSize) ;
var ii = num - jj*this.compressedSize ;
var x = ii/this.compressedSize
+ 0.5/this.compressedSize ;
var y = jj/this.compressedSize
+ 0.5/this.compressedSize ;
var nindx = ii + jj*this.compressedSize ;
this.compressionMapTable[indx*4 ] = x ;
this.compressionMapTable[indx*4 + 1 ] = y ;
this.decompressionMapTable[nindx*4 ] =
i/this.width + 0.5/this.width ;
this.decompressionMapTable[nindx*4+1] =
j/this.height+ 0.5/this.height ;
for (var k = 0 ; k<4 ; k++){
this.compressedTable[nindx*4+k]
= this.data[indx*4+k] ;
}
num++ ;
}else{
this.compressionMapTable[indx*4 ]
= 1.-0.5/this.compressedSize ;
this.compressionMapTable[indx*4 + 1 ]
= 1.-0.5/this.compressedSize ;
}
}
}
var ii = this.compressedSize -1 ;
var jj = this.compressedSize -1 ;
var nindx = ii + jj*this.compressedSize ;
for (var k = 0 ; k<4 ; k++){
this.compressedTable[nindx*4+k] = 0. ;
}
/*------------------------------------------------------------------------
* setting compressedData, compressionMap, decompressionMap textures
*------------------------------------------------------------------------
*/
this.full = new TableTexture(
this.data,
this.width,
this.height,
{
minFilter : 'nearest' ,
magFilter : 'nearest'
}
) ;
this.sparse = new TableTexture(
this.compressedTable,
this.compressedSize ,
this.compressedSize ,
{
minFilter : 'nearest' ,
magFilter : 'nearest'
}
) ;
this.compressionMap = new TableTexture(
this.compressionMapTable,
this.width,
this.height ,
{
minFilter : 'nearest' ,
magFilter : 'nearest'
}
) ;
this.decompressionMap = new TableTexture(
this.decompressionMapTable ,
this.compressedSize ,
this.compressedSize ,
{
minFilter : 'nearest' ,
magFilter : 'nearest'
}
) ;
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* CONSTRUCTOR ENDS
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
并利用以下 class:
class Float32TextureTableBond{
/*------------------------------------------------------------------------
* constructor
*------------------------------------------------------------------------
*/
constructor( options={}){
if ( options.target == undefined && options.texture == undefined ){
return null ;
} ;
this.texture = readOptions( options.target , null ) ;
this.texture = readOptions( options.texture, this.target ) ;
this.framebuffer = gl.createFramebuffer() ;
gl.bindFramebuffer( gl.READ_FRAMEBUFFER, this.framebuffer) ;
gl.framebufferTexture2D(gl.READ_FRAMEBUFFER, gl.COLOR_ATTACHMENT0,
gl.TEXTURE_2D,
this.target.texture, 0 ) ;
gl.readBuffer( gl.COLOR_ATTACHMENT0 ) ;
this.canRead = (
gl.checkFramebufferStatus(gl.READ_FRAMEBUFFER)
== gl.FRAMEBUFFER_COMPLETE
) ;
gl.bindFramebuffer( gl.READ_FRAMEBUFFER, null) ;
this.width = this.target.width ;
this.height = this.target.height ;
this.table = readOption(options.table,
new Float32Array(this.width*this.height*4 ) ) ;
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* CONSTRUCTOR ENDS
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
没有错误信息,输出正确。当我开始记录数据时,我的模拟速度减慢到一只昏昏欲睡的乌龟的速度。
Is there a way to push vec3 primitives to an array from a fragmentShader?
是的,使用着色器存储缓冲区。大致如下:
layout(std430, binding = 0) buffer Output
{
uvec3 out_vals[];
};
这将需要绑定到一个足够大的缓冲区来存储返回的参数(从我的头脑来看,我认为 std430
允许 vec3
输出类型,但我也有这种奇怪的感觉,输出类型可能需要是 uint
,所以你可能需要一次写 3 个值——遗憾的是记不太清了)
然后您需要确定要写入的输出数组中元素的索引。为此,您可以使用原子计数器缓冲区来确定计数器,例如
layout(binding = 0, offset = 0) uniform atomic_uint out_count;
然后稍后在你的着色器中,从 gl_GlobalInvocatonID
(如果使用计算着色器)或 gl_SamplePosition
为片段着色器生成你的索引,你应该能够写出数据:
uint index = atomicCounterIncrement(out_count);
out_vals[index] = gl_GlobalInvocatonID;
可以直接在着色器存储缓冲区上使用原子操作,但我看到的大多数建议都建议改用 ACB。
我还没有真正考虑清楚,但这里有一些代码可能会给您一些想法。
问题是无法在 WebGL2 AFAIK 中有条件地输出数据。您可以在片段着色器中丢弃,但这在这里似乎没有帮助。
因此,无论如何,首先要考虑的是着色器基于输出进行并行化。如果要绘制 32k 像素,则 GPU 可以并行化 32k 个东西。如果有 1 个像素检查 32k 个东西,GPU 就没有什么可以并行化的了。
所以,这里有一个想法,将 3D 纹理分成 NxNxN 大的单元格,在每个单元格中搜索体素。如果一个单元格是 32x32x32,那么对于 512x512x512 输入,有 4096 个东西要并行化。对于每个单元格,遍历单元格并对匹配项的位置求和
sum = vec4(0)
for each voxel in cell
if voxel === 1
sum += vec4(positionOfVoxel, 1);
outColor = sum;
结果是,如果该单元格中只有 1 个匹配项,则 sum.xyz 将包含该位置,而 sum.w 将为 1。如果有多个匹配项 sum.w将 > 1
下面的代码制作了一个 4096x1 的纹理并为其渲染了一个四边形。它使用 gl_FragCoord.x
来计算每个正在渲染的像素对应于哪个单元格,并对相应单元格的结果求和。
然后它使用 readPixels 读取结果并打印出来。理想情况下,我希望 GPU 本身能够计算出结果,但考虑到你不能有条件地丢弃我没有任何想法。
对于只有一个结果的单元格,打印结果。对于具有多个结果的单元格,另一个着色器扫描单元格。我们知道特定单元格中有多少结果,因此我们可以按 1 个像素渲染 numResults。然后着色器将遍历单元格并只查看它找到的第 N 个结果
int idOfResultWeWant = int(gl_FragCoord.x)
int resultId = 0
for (z...) {
for (y...) {
for (x...) {
if (voxel) {
if (resultId === idOfResultWeWant) {
outColor = position
}
++resultId
}
}
}
下面的代码是惰性的,并且使用一维结果纹理,这意味着它可以处理的最多单元格是 gl.getParameter(gl.MAX_TEXTURE_SIZE)
。对于更大的尺寸,它必须稍微改变一下。
不知道这是不是最快的方法,甚至是最快的方法,但是基于渲染内容的并行概念很重要,而且可以将问题分成更小的部分。
好像使用 16x16x16 单元可能更好,也许我们应该通过将单元本身细分为更小的单元来再次使用第一个着色器而不是第二个着色器。
function main() {
const gl = document.createElement('canvas').getContext('webgl2');
if (!gl) {
return alert('need webgl2');
}
const ext = gl.getExtension('EXT_color_buffer_float');
if (!ext) {
return alert('need EXT_color_buffer_float');
}
const size = 512;
const cellSize = 32;
const cellsPer = size / cellSize;
const numCells = (size * size * size) / (cellSize * cellSize * cellSize);
const dataTexture = twgl.createTexture(gl, {
target: gl.TEXTURE_3D,
width: size,
height: size,
depth: size,
minMag: gl.NEAREST,
internalFormat: gl.R8,
auto: false,
});
function setData(x, y, z) {
log('set voxel:', x, y, z);
gl.texSubImage3D(
gl.TEXTURE_3D, 0, x, y, z, 1, 1, 1,
gl.RED, gl.UNSIGNED_BYTE, new Uint8Array([255]));
}
for (let i = 0; i < 3; ++i) {
const x = randInt(size);
const y = randInt(size);
const z = randInt(size);
setData(x, y, z);
}
setData(128, 267, 234);
setData(128 + 4, 267, 234);
setData(128 + 9, 267, 234);
const cellVS = `#version 300 es
in vec4 position;
void main() {
gl_Position = position;
}
`;
const cellFS = `#version 300 es
precision highp float;
uniform highp sampler3D data;
uniform int cellSize;
out vec4 outColor;
void main() {
// really should use 2D but I'm lazy
int ndx = int(gl_FragCoord.x);
// assumes equal sides
int size = textureSize(data, 0).x;
int cellsPer = size / cellSize;
int cellZ = ndx / cellsPer / cellsPer;
int cellY = ndx / cellsPer % cellsPer;
int cellX = ndx % cellsPer;
ivec3 cell = ivec3(cellX, cellY, cellZ) * cellSize;
vec4 sum = vec4(0);
for (int z = 0; z < cellSize; ++z) {
for (int y = 0; y < cellSize; ++y) {
for (int x = 0; x < cellSize; ++x) {
ivec3 pos = cell + ivec3(x, y, z);
// assumes data is 0 or 1
float occupied = texelFetch(
data,
pos,
0).r;
sum += vec4(pos, 1) * occupied;
}
}
}
outColor = sum;
}
`;
const cellScanFS = `#version 300 es
precision highp float;
uniform highp sampler3D data;
uniform int cellSize;
uniform ivec3 cell; // offset into cell
out vec4 outColor;
void main() {
// really should use 2D but I'm lazy
int idWeWant = int(gl_FragCoord.x);
// assumes equal sides
int size = textureSize(data, 0).x;
int cellsPer = size / cellSize;
vec4 result = vec4(0);
int id = 0;
for (int z = 0; z < cellSize; ++z) {
for (int y = 0; y < cellSize; ++y) {
for (int x = 0; x < cellSize; ++x) {
ivec3 pos = cell + ivec3(x, y, z);
float occupied = texelFetch(
data,
pos,
0).r;
if (occupied > 0.0) {
if (id == idWeWant) {
result = vec4(pos, 1);
}
++id;
}
}
}
}
outColor = result;
}
`;
const cellProgramInfo = twgl.createProgramInfo(gl, [cellVS, cellFS]);
const cellScanProgramInfo = twgl.createProgramInfo(gl, [cellVS, cellScanFS]);
const quadBufferInfo = twgl.primitives.createXYQuadBufferInfo(gl, 2);
// as long as numCells is less than the max
// texture dimensions we can use a
// numCells by 1 result texture.
// If numCells is > max texture dimension
// we'd need to adjust the code to use
// a 2d result texture.
const cellResultWidth = numCells;
const cellResultHeight = 1;
const cellResultFBI = twgl.createFramebufferInfo(gl, [
{ internalFormat: gl.RGBA32F, minMag: gl.NEAREST }
], cellResultWidth, cellResultHeight);
twgl.bindFramebufferInfo(gl, cellResultFBI);
twgl.setBuffersAndAttributes(gl, cellProgramInfo, quadBufferInfo);
gl.useProgram(cellProgramInfo.program);
twgl.setUniforms(cellProgramInfo, {
cellSize,
data: dataTexture,
});
// draw the quad
twgl.drawBufferInfo(gl, quadBufferInfo);
const data = new Float32Array(numCells * 4);
gl.readPixels(0, 0, numCells, 1, gl.RGBA, gl.FLOAT, data);
gl.useProgram(cellScanProgramInfo.program);
{
for (let i = 0; i < numCells; ++i) {
const off = i * 4;
const numResultsInCell = data[off + 3];
if (numResultsInCell) {
if (numResultsInCell === 1) {
log('result at: ', ...data.slice(off, off + 3));
} else {
getResultsForCell(i, numResultsInCell);
}
}
}
}
function getResultsForCell(i, numResultsInCell) {
const cellZ = (i / cellsPer | 0) / cellsPer | 0;
const cellY = (i / cellsPer | 0) % cellsPer;
const cellX = i % cellsPer;
twgl.setUniforms(cellScanProgramInfo, {
cellSize,
data: dataTexture,
cell: [cellX * cellSize, cellY * cellSize, cellZ * cellSize],
});
twgl.drawBufferInfo(gl, quadBufferInfo);
// note: cellResultsFBI is still bound. It's 4096x1
// so we can only get up to 4096 results without switching to
// a 2D texture
gl.viewport(0, 0, numResultsInCell, 1);
const result = new Float32Array(numResultsInCell * 4);
gl.readPixels(0, 0, numResultsInCell, 1, gl.RGBA, gl.FLOAT, result);
for (let j = 0; j < numResultsInCell; ++j) {
const off = j * 4;
log('result at:', ...result.slice(off, off + 3));
}
}
function randInt(min, max) {
return Math.floor(rand(min, max));
}
function rand(min, max) {
if (max === undefined) {
max = min;
min = 0;
}
return Math.random() * (max - min) + min;
}
function log(...args) {
const elem = document.createElement('pre');
elem.textContent = [...args].join(' ');
document.body.appendChild(elem);
}
}
main();
pre { margin: 0; }
<script src="https://twgljs.org/dist/4.x/twgl-full.min.js"></script>