Skip to content

Commit c5feac3

Browse files
committed
Compute Box Blur
1 parent 6e8ac1c commit c5feac3

3 files changed

Lines changed: 98 additions & 15 deletions

File tree

shaders/compute_comp.glsl

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,58 @@
11
#version 460
22

33
layout(set = 0, binding = 0, rgba8) uniform writeonly image2D destinationImage;
4+
layout(set = 0, binding = 1, rgba8) uniform readonly image2D sourceImage;
45

5-
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
6+
#define RADIUS 4
7+
#define GROUP_SIZE 8
8+
#define TILE_DIM (2 * RADIUS + GROUP_SIZE)
9+
10+
// Storage shared for this local invocation
11+
shared vec3 tile[TILE_DIM * TILE_DIM];
12+
13+
vec3 tap(ivec2 pos) {
14+
return tile[pos.x + TILE_DIM * pos.y];
15+
}
16+
17+
layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) in;
618
void main() {
7-
ivec2 destinationIUV = ivec2(gl_GlobalInvocationID.xy);
8-
vec4 rgba = vec4(1.0, 0.0, 1.0, 1.0);
9-
imageStore(destinationImage, destinationIUV, rgba);
19+
ivec2 iuv = ivec2(gl_GlobalInvocationID.xy);
20+
21+
// Populate local memory
22+
if(gl_LocalInvocationIndex < TILE_DIM * TILE_DIM / 4) {
23+
const ivec2 anchor = ivec2(gl_WorkGroupID.xy * GROUP_SIZE - RADIUS);
24+
25+
const ivec2 coord1 = anchor + ivec2(gl_LocalInvocationIndex % TILE_DIM, gl_LocalInvocationIndex / TILE_DIM);
26+
const ivec2 coord2 = anchor + ivec2((gl_LocalInvocationIndex + TILE_DIM * TILE_DIM / 4) % TILE_DIM, (gl_LocalInvocationIndex + TILE_DIM * TILE_DIM / 4) / TILE_DIM);
27+
const ivec2 coord3 = anchor + ivec2((gl_LocalInvocationIndex + TILE_DIM * TILE_DIM / 2) % TILE_DIM, (gl_LocalInvocationIndex + TILE_DIM * TILE_DIM / 2) / TILE_DIM);
28+
const ivec2 coord4 = anchor + ivec2((gl_LocalInvocationIndex + TILE_DIM * TILE_DIM * 3 / 4) % TILE_DIM, (gl_LocalInvocationIndex + TILE_DIM * TILE_DIM * 3 / 4) / TILE_DIM);
29+
30+
const vec3 color0 = imageLoad(sourceImage, coord1).xyz;
31+
const vec3 color1 = imageLoad(sourceImage, coord2).xyz;
32+
const vec3 color2 = imageLoad(sourceImage, coord3).xyz;
33+
const vec3 color3 = imageLoad(sourceImage, coord4).xyz;
34+
35+
tile[gl_LocalInvocationIndex] = color0;
36+
tile[gl_LocalInvocationIndex + TILE_DIM * TILE_DIM / 4] = color1;
37+
tile[gl_LocalInvocationIndex + TILE_DIM * TILE_DIM / 2] = color2;
38+
tile[gl_LocalInvocationIndex + TILE_DIM * TILE_DIM * 3 / 4] = color3;
39+
}
40+
// Make fetches available to all threads
41+
groupMemoryBarrier();
42+
barrier();
43+
44+
ivec2 tapBase = ivec2(gl_LocalInvocationID.xy);
45+
vec4 rgba = imageLoad(sourceImage, iuv);
46+
//vec4 rgba = vec4(tap(tapBase + ivec2(RADIUS)), 1.0);
47+
48+
rgba.rgb = vec3(0.0);
49+
for(int i = 0; i < RADIUS*2; ++i) {
50+
for(int j = 0; j < RADIUS*2; ++j) {
51+
rgba.rgb += imageLoad(sourceImage, (iuv - ivec2(RADIUS) + ivec2(i, j))).rgb;
52+
//rgba.rgb += tap(tapBase + ivec2(i, j));
53+
}
54+
}
55+
rgba.rgb /= vec3(RADIUS*RADIUS*4);
56+
57+
imageStore(destinationImage, iuv, rgba);
1058
}

shaders/compute_comp.spv

4.05 KB
Binary file not shown.

src/main.cpp

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ void recreateRenderPass() {
123123
for(uint32_t i = 0; i < colorBuffers.size(); ++i) {
124124
destroyImage(context, &colorBuffers[i]);
125125
}
126+
for(uint32_t i = 0; i < multisampleTargetBuffers.size(); ++i) {
127+
destroyImage(context, &multisampleTargetBuffers[i]);
128+
}
129+
for(uint32_t i = 0; i < gaussBuffers.size(); ++i) {
130+
destroyImage(context, &gaussBuffers[i]);
131+
}
126132
destroyRenderpass(context, renderPass);
127133
destroyRenderpass(context, gaussRenderPass);
128134
destroyRenderpass(context, gaussRenderPassFinal);
@@ -149,7 +155,7 @@ void recreateRenderPass() {
149155
for (uint32_t i = 0; i < swapchain.images.size(); ++i) {
150156
createImage(context, &depthBuffers.data()[i], swapchain.width, swapchain.height, VK_FORMAT_D32_SFLOAT, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, VK_SAMPLE_COUNT_4_BIT);
151157
createImage(context, &colorBuffers.data()[i], swapchain.width, swapchain.height, swapchain.format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_SAMPLE_COUNT_4_BIT);
152-
createImage(context, &multisampleTargetBuffers.data()[i], swapchain.width, swapchain.height, swapchain.format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
158+
createImage(context, &multisampleTargetBuffers.data()[i], swapchain.width, swapchain.height, swapchain.format, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
153159
createImage(context, &gaussBuffers.data()[i], swapchain.width, swapchain.height, swapchain.format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
154160

155161
VkFramebufferCreateInfo createInfo = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
@@ -317,7 +323,7 @@ void initApplication(SDL_Window* window) {
317323
VkDescriptorPoolSize poolSizes[] = {
318324
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, FRAMES_IN_FLIGHT},
319325
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, FRAMES_IN_FLIGHT},
320-
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, FRAMES_IN_FLIGHT},
326+
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, FRAMES_IN_FLIGHT * 2},
321327
};
322328
VkDescriptorPoolCreateInfo createInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
323329
createInfo.maxSets = FRAMES_IN_FLIGHT * 2;
@@ -567,6 +573,7 @@ void initApplication(SDL_Window* window) {
567573
{
568574
VkDescriptorSetLayoutBinding bindings[] = {
569575
{0, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, 0},
576+
{1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, 0},
570577
};
571578
VkDescriptorSetLayoutCreateInfo createInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
572579
createInfo.bindingCount = ARRAY_COUNT(bindings);
@@ -616,6 +623,7 @@ void renderApplication() {
616623
static float greenChannel = 0.0f;
617624
static float time = 0.0f;
618625
static double frameGpuAvg = 0.0;
626+
static double computeAvg = 0.0;
619627
time += 0.01f;
620628
greenChannel += 0.01f;
621629
if (greenChannel > 1.0f) greenChannel = 0.0f;
@@ -640,13 +648,16 @@ void renderApplication() {
640648
}
641649

642650
// Query timestamps
643-
uint64_t timestamps[2] = {};
651+
uint64_t timestamps[3] = {};
644652
VkResult timestampsValid = VK(vkGetQueryPoolResults(context->device, timestampQueryPools[frameIndex], 0, ARRAY_COUNT(timestamps), sizeof(timestamps), timestamps, sizeof(timestamps[0]), VK_QUERY_RESULT_64_BIT));
645653
if(timestampsValid == VK_SUCCESS) {
646654
double frameGpuBegin = double(timestamps[0]) * context->physicalDeviceProperties.limits.timestampPeriod * 1e-6;
647655
double frameGpuEnd = double(timestamps[1]) * context->physicalDeviceProperties.limits.timestampPeriod * 1e-6;
656+
double comptueEnd = double(timestamps[2]) * context->physicalDeviceProperties.limits.timestampPeriod * 1e-6;
648657
frameGpuAvg = frameGpuAvg * 0.95 + (frameGpuEnd - frameGpuBegin) * 0.05;
658+
computeAvg = computeAvg * 0.95 + (comptueEnd - frameGpuEnd) * 0.05;
649659
//LOG_INFO("GPU frametime: ", frameGpuAvg, "ms");
660+
LOG_INFO("Compute time: ", computeAvg, "ms");
650661
}
651662

652663
VKA(vkResetCommandPool(context->device, commandPools[frameIndex], 0));
@@ -783,18 +794,40 @@ void renderApplication() {
783794
imageBarrier.subresourceRange = subresourceRange;
784795
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, 0, 0, 0, 1, &imageBarrier);
785796
}
797+
{ // MultisampleTarget Shader Read -> Compute Read
798+
VkImageSubresourceRange subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
799+
VkImageMemoryBarrier imageBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
800+
imageBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
801+
imageBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
802+
imageBarrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
803+
imageBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
804+
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
805+
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
806+
imageBarrier.image = multisampleTargetBuffers[imageIndex].image;
807+
imageBarrier.subresourceRange = subresourceRange;
808+
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, 0, 0, 0, 1, &imageBarrier);
809+
}
786810

787811
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipeline);
788812
imageInfo = {0, swapchain.imageViews[imageIndex], VK_IMAGE_LAYOUT_GENERAL};
789-
descriptorWrite = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
790-
descriptorWrite.dstSet = computeDescriptorSets[frameIndex];
791-
descriptorWrite.dstBinding = 0;
792-
descriptorWrite.descriptorCount = 1;
793-
descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
794-
descriptorWrite.pImageInfo = &imageInfo;
795-
vkUpdateDescriptorSets(context->device, 1, &descriptorWrite, 0, 0);
813+
VkDescriptorImageInfo imageInfo2 = {0, multisampleTargetBuffers[imageIndex].view, VK_IMAGE_LAYOUT_GENERAL};
814+
VkWriteDescriptorSet descriptorWrites[2];
815+
descriptorWrites[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
816+
descriptorWrites[0].dstSet = computeDescriptorSets[frameIndex];
817+
descriptorWrites[0].dstBinding = 0;
818+
descriptorWrites[0].descriptorCount = 1;
819+
descriptorWrites[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
820+
descriptorWrites[0].pImageInfo = &imageInfo;
821+
descriptorWrites[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
822+
descriptorWrites[1].dstSet = computeDescriptorSets[frameIndex];
823+
descriptorWrites[1].dstBinding = 1;
824+
descriptorWrites[1].descriptorCount = 1;
825+
descriptorWrites[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
826+
descriptorWrites[1].pImageInfo = &imageInfo2;
827+
vkUpdateDescriptorSets(context->device, ARRAY_COUNT(descriptorWrites), descriptorWrites, 0, 0);
796828
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipelineLayout, 0, 1, &computeDescriptorSets[frameIndex], 0, 0);
797-
vkCmdDispatch(commandBuffer, (swapchain.width + 7) / 8, (swapchain.height + 7) / 8, 1);
829+
#define GROUP_SIZE 8
830+
vkCmdDispatch(commandBuffer, (swapchain.width + (GROUP_SIZE-1)) / GROUP_SIZE, (swapchain.height + (GROUP_SIZE-1)) / GROUP_SIZE, 1);
798831

799832
{ // Swapchain Compute Write -> Present
800833
VkImageSubresourceRange subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
@@ -810,6 +843,8 @@ void renderApplication() {
810843
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, 0, 0, 0, 1, &imageBarrier);
811844
}
812845

846+
VK(vkCmdWriteTimestamp(commandBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, timestampQueryPools[frameIndex], 2));
847+
813848
VKA(vkEndCommandBuffer(commandBuffer));
814849
}
815850

0 commit comments

Comments
 (0)