openCL 是否支持向量作为内核参数?
does openCL support vectors as kernel arguments?
我一直在想办法以 openCL 内核形式重写这段代码。它不会特别难转换(摆脱 glm 类型和位掩码),但我坚持的部分是如何传递 _triangles
、_uvs
、_indices
, 和 _normals
到内核。 openCL 中是否有向量的内置功能?
如果没有任何矢量支持,我看到的唯一选择是为我需要返回的 3 个变量中的每一个传递 4 个 float3
类型的数组(_triangles
、_uvs
和 _normals
) 和 _indices
的 2 个 float3
数组。然后在 CPU 中将数组转换回向量并缩小它们以适应。我不太确定将如此多的内存缓冲区传递给内核是一种有效的方法,因为这将是 14 个数组从内核传递和返回。并行化时,我的其他解决方案将不起作用。有没有办法简化这个解决方案,或者更好的纯粹更好的解决方案?
我遇到问题的函数是_addRectangle
,_createMesh
是它将在内核中组合的函数。
void Chunk::_addRectangle(glm::vec3 center, glm::vec3 height, glm::vec3 width, unsigned tex_num, cl_uint LOD)
{
glm::vec3 corner1 = center - (height / 2.0) - (width / 2.0);
glm::vec3 corner2 = center - (height / 2.0) + (width / 2.0);
glm::vec3 corner3 = center + (height / 2.0) + (width / 2.0);
glm::vec3 corner4 = center + (height / 2.0) - (width / 2.0);
glm::vec3 normal = glm::cross(height, width);
glm::vec2 uv1;
glm::vec2 uv2;
glm::vec2 uv3;
glm::vec2 uv4;
if (fabs(normal[1]) == 1.0)
{
uv1 = glm::vec2(1.0 / _tex_atlas_width, 1);
uv2 = glm::vec2(1.0 / _tex_atlas_width, 0);
uv3 = glm::vec2(0, 0);
uv4 = glm::vec2(0, 1);
}
else
{
uv1 = glm::vec2(1.0 / _tex_atlas_width, 1);
uv2 = glm::vec2(1.0 / _tex_atlas_width, 0);
uv3 = glm::vec2(0, 0);
uv4 = glm::vec2(0, 1);
}
float add = (1.0 / double(_tex_atlas_width)) * tex_num;
uv1.x += add;
uv2.x += add;
uv3.x += add;
uv4.x += add;
// triangle 1
_triangles.push_back(corner3);
_triangles.push_back(corner2);
_triangles.push_back(corner1);
_normals.push_back(normal);
_normals.push_back(normal);
_normals.push_back(normal);
_uvs.push_back(uv1);
_uvs.push_back(uv2);
_uvs.push_back(uv3);
_indices.push_back(glm::ivec3(nrOfIndices + 0, nrOfIndices + 1, nrOfIndices + 2));
// triangle 2
_triangles.push_back(corner4);
_normals.push_back(normal);
_uvs.push_back(uv4);
_indices.push_back(glm::ivec3(nrOfIndices + 2, nrOfIndices + 3, nrOfIndices + 0));
nrOfIndices += 4;
}
void Chunk::_createMesh(glm::ivec3 pos, int landmap_flags[96 * 96 * 96], cl_int LOD)
{
std::byte* faces = new std::byte[chunkSize / LOD * chunkSize / LOD * chunkSize / LOD];
int index = 0;
// a index conversion from a single index array to a 3d array
// landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] is
for (int x = LOD; x < chunkSize + LOD; x += LOD) {
for (int y = LOD; y < chunkSize + LOD; y += LOD) {
for (int z = LOD; z < chunkSize + LOD; z += LOD) {
x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD));
faces[index] = (std::byte)0;
if (landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
{
index++;
continue;
}
if (landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] != BLOCK::AIR)
{
if (landmap_flags[(x - LOD) + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::South;
if (landmap_flags[(x + LOD) + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::North;
if (landmap_flags[x + (y - LOD) * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::Down;
if (landmap_flags[x + (y + LOD) * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::Up;
if (landmap_flags[x + y * (chunkSize + (2 * LOD)) + (z - LOD) * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::West;
if (landmap_flags[x + y * (chunkSize + (2 * LOD)) + (z + LOD) * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::East;
}
if (faces[index] == (std::byte)0)
continue;
if ((faces[index] & (std::byte)Direction::North) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2) + (float(LOD) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2)),
glm::vec3(0, LOD, 0),
glm::vec3(0, 0, -LOD),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
if ((faces[index] & (std::byte)Direction::East) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2) + (float(LOD) / 2)),
glm::vec3(0, LOD, 0),
glm::vec3(LOD, 0, 0),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
if ((faces[index] & (std::byte)Direction::South) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2) - (float(LOD) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2)),
glm::vec3(0, LOD, 0),
glm::vec3(0, 0, LOD),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
if ((faces[index] & (std::byte)Direction::West) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2) - (float(LOD) / 2)),
glm::vec3(0, LOD, 0),
glm::vec3(-LOD, 0, 0),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
if ((faces[index] & (std::byte)Direction::Up) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2) + (float(LOD) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2)),
glm::vec3(LOD, 0, 0),
glm::vec3(0, 0, LOD),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
if ((faces[index] & (std::byte)Direction::Down) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2) - (float(LOD) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2)),
glm::vec3(LOD, 0, 0),
glm::vec3(0, 0, -LOD),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
index++;
}
}
}
delete[]faces;
}
谢谢!
编辑:一种可能更有效的存储数据的方法是采用多种 float4 类型。
例如:
const uint n = get_global_id(0);
float4 triangles{1, 2, 3, 4}; // calculated values for each vertex
//(float4 list[size];) from constructor
list[n] = triangles;
在 OpenCL
中有像 float4
等向量类型。更多关于这方面的内容可以阅读 here。
c++
中没有像 std::vector
这样的容器,所以数据必须使用 C 风格的数组传递。
查看问题中的代码部分,_triangles
、_uvs
、_indices
和 _normals
将填充结果,因此适当的缓冲区将需要分配并传递给内核,以便存储结果并在内核完成工作后读回它们。
传递 14 个数组应该不是问题,只要内核的计算强度足够大并且查看代码可能是因为有 2 个嵌套循环。但它看起来很大程度上取决于 chunkSize
和 LOD
变量的大小。您需要尝试一下,看看它的性能如何。
将数据复制回 std::vector
应该没有任何问题 - 只需使用 memcpy
.
我一直在想办法以 openCL 内核形式重写这段代码。它不会特别难转换(摆脱 glm 类型和位掩码),但我坚持的部分是如何传递 _triangles
、_uvs
、_indices
, 和 _normals
到内核。 openCL 中是否有向量的内置功能?
如果没有任何矢量支持,我看到的唯一选择是为我需要返回的 3 个变量中的每一个传递 4 个 float3
类型的数组(_triangles
、_uvs
和 _normals
) 和 _indices
的 2 个 float3
数组。然后在 CPU 中将数组转换回向量并缩小它们以适应。我不太确定将如此多的内存缓冲区传递给内核是一种有效的方法,因为这将是 14 个数组从内核传递和返回。并行化时,我的其他解决方案将不起作用。有没有办法简化这个解决方案,或者更好的纯粹更好的解决方案?
我遇到问题的函数是_addRectangle
,_createMesh
是它将在内核中组合的函数。
void Chunk::_addRectangle(glm::vec3 center, glm::vec3 height, glm::vec3 width, unsigned tex_num, cl_uint LOD)
{
glm::vec3 corner1 = center - (height / 2.0) - (width / 2.0);
glm::vec3 corner2 = center - (height / 2.0) + (width / 2.0);
glm::vec3 corner3 = center + (height / 2.0) + (width / 2.0);
glm::vec3 corner4 = center + (height / 2.0) - (width / 2.0);
glm::vec3 normal = glm::cross(height, width);
glm::vec2 uv1;
glm::vec2 uv2;
glm::vec2 uv3;
glm::vec2 uv4;
if (fabs(normal[1]) == 1.0)
{
uv1 = glm::vec2(1.0 / _tex_atlas_width, 1);
uv2 = glm::vec2(1.0 / _tex_atlas_width, 0);
uv3 = glm::vec2(0, 0);
uv4 = glm::vec2(0, 1);
}
else
{
uv1 = glm::vec2(1.0 / _tex_atlas_width, 1);
uv2 = glm::vec2(1.0 / _tex_atlas_width, 0);
uv3 = glm::vec2(0, 0);
uv4 = glm::vec2(0, 1);
}
float add = (1.0 / double(_tex_atlas_width)) * tex_num;
uv1.x += add;
uv2.x += add;
uv3.x += add;
uv4.x += add;
// triangle 1
_triangles.push_back(corner3);
_triangles.push_back(corner2);
_triangles.push_back(corner1);
_normals.push_back(normal);
_normals.push_back(normal);
_normals.push_back(normal);
_uvs.push_back(uv1);
_uvs.push_back(uv2);
_uvs.push_back(uv3);
_indices.push_back(glm::ivec3(nrOfIndices + 0, nrOfIndices + 1, nrOfIndices + 2));
// triangle 2
_triangles.push_back(corner4);
_normals.push_back(normal);
_uvs.push_back(uv4);
_indices.push_back(glm::ivec3(nrOfIndices + 2, nrOfIndices + 3, nrOfIndices + 0));
nrOfIndices += 4;
}
void Chunk::_createMesh(glm::ivec3 pos, int landmap_flags[96 * 96 * 96], cl_int LOD)
{
std::byte* faces = new std::byte[chunkSize / LOD * chunkSize / LOD * chunkSize / LOD];
int index = 0;
// a index conversion from a single index array to a 3d array
// landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] is
for (int x = LOD; x < chunkSize + LOD; x += LOD) {
for (int y = LOD; y < chunkSize + LOD; y += LOD) {
for (int z = LOD; z < chunkSize + LOD; z += LOD) {
x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD));
faces[index] = (std::byte)0;
if (landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
{
index++;
continue;
}
if (landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] != BLOCK::AIR)
{
if (landmap_flags[(x - LOD) + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::South;
if (landmap_flags[(x + LOD) + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::North;
if (landmap_flags[x + (y - LOD) * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::Down;
if (landmap_flags[x + (y + LOD) * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::Up;
if (landmap_flags[x + y * (chunkSize + (2 * LOD)) + (z - LOD) * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::West;
if (landmap_flags[x + y * (chunkSize + (2 * LOD)) + (z + LOD) * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))] == BLOCK::AIR)
faces[index] |= (std::byte)Direction::East;
}
if (faces[index] == (std::byte)0)
continue;
if ((faces[index] & (std::byte)Direction::North) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2) + (float(LOD) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2)),
glm::vec3(0, LOD, 0),
glm::vec3(0, 0, -LOD),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
if ((faces[index] & (std::byte)Direction::East) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2) + (float(LOD) / 2)),
glm::vec3(0, LOD, 0),
glm::vec3(LOD, 0, 0),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
if ((faces[index] & (std::byte)Direction::South) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2) - (float(LOD) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2)),
glm::vec3(0, LOD, 0),
glm::vec3(0, 0, LOD),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
if ((faces[index] & (std::byte)Direction::West) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2) - (float(LOD) / 2)),
glm::vec3(0, LOD, 0),
glm::vec3(-LOD, 0, 0),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
if ((faces[index] & (std::byte)Direction::Up) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2) + (float(LOD) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2)),
glm::vec3(LOD, 0, 0),
glm::vec3(0, 0, LOD),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
if ((faces[index] & (std::byte)Direction::Down) != (std::byte)0)
{
_addRectangle(
glm::vec3( x + ((chunkSize - 1) * pos.x) - (chunkSize / 2) - (float(LOD - 1) / 2),
y + ((chunkSize - 1) * pos.y) - (chunkSize / 2) - (float(LOD - 1) / 2) - (float(LOD) / 2),
z + ((chunkSize - 1) * pos.z) - (chunkSize / 2) - (float(LOD - 1) / 2)),
glm::vec3(LOD, 0, 0),
glm::vec3(0, 0, -LOD),
landmap_flags[x + y * (chunkSize + (2 * LOD)) + z * (chunkSize + (2 * LOD)) * (chunkSize + (2 * LOD))],
LOD);
}
index++;
}
}
}
delete[]faces;
}
谢谢!
编辑:一种可能更有效的存储数据的方法是采用多种 float4 类型。 例如:
const uint n = get_global_id(0);
float4 triangles{1, 2, 3, 4}; // calculated values for each vertex
//(float4 list[size];) from constructor
list[n] = triangles;
在 OpenCL
中有像 float4
等向量类型。更多关于这方面的内容可以阅读 here。
c++
中没有像 std::vector
这样的容器,所以数据必须使用 C 风格的数组传递。
查看问题中的代码部分,_triangles
、_uvs
、_indices
和 _normals
将填充结果,因此适当的缓冲区将需要分配并传递给内核,以便存储结果并在内核完成工作后读回它们。
传递 14 个数组应该不是问题,只要内核的计算强度足够大并且查看代码可能是因为有 2 个嵌套循环。但它看起来很大程度上取决于 chunkSize
和 LOD
变量的大小。您需要尝试一下,看看它的性能如何。
将数据复制回 std::vector
应该没有任何问题 - 只需使用 memcpy
.