@@ -123,6 +123,12 @@ void recreateRenderPass() {
123123 for (uint32_t i = 0 ; i < colorBuffers.size (); ++i) {
124124 destroyImage (context, &colorBuffers[i]);
125125 }
126+ for (uint32_t i = 0 ; i < multisampleTargetBuffers.size (); ++i) {
127+ destroyImage (context, &multisampleTargetBuffers[i]);
128+ }
129+ for (uint32_t i = 0 ; i < gaussBuffers.size (); ++i) {
130+ destroyImage (context, &gaussBuffers[i]);
131+ }
126132 destroyRenderpass (context, renderPass);
127133 destroyRenderpass (context, gaussRenderPass);
128134 destroyRenderpass (context, gaussRenderPassFinal);
@@ -149,7 +155,7 @@ void recreateRenderPass() {
149155 for (uint32_t i = 0 ; i < swapchain.images .size (); ++i) {
150156 createImage (context, &depthBuffers.data ()[i], swapchain.width , swapchain.height , VK_FORMAT_D32_SFLOAT, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, VK_SAMPLE_COUNT_4_BIT);
151157 createImage (context, &colorBuffers.data ()[i], swapchain.width , swapchain.height , swapchain.format , VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_SAMPLE_COUNT_4_BIT);
152- createImage (context, &multisampleTargetBuffers.data ()[i], swapchain.width , swapchain.height , swapchain.format , VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
158+ createImage (context, &multisampleTargetBuffers.data ()[i], swapchain.width , swapchain.height , swapchain.format , VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
153159 createImage (context, &gaussBuffers.data ()[i], swapchain.width , swapchain.height , swapchain.format , VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
154160
155161 VkFramebufferCreateInfo createInfo = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
@@ -317,7 +323,7 @@ void initApplication(SDL_Window* window) {
317323 VkDescriptorPoolSize poolSizes[] = {
318324 {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, FRAMES_IN_FLIGHT},
319325 {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, FRAMES_IN_FLIGHT},
320- {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, FRAMES_IN_FLIGHT},
326+ {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, FRAMES_IN_FLIGHT * 2 },
321327 };
322328 VkDescriptorPoolCreateInfo createInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
323329 createInfo.maxSets = FRAMES_IN_FLIGHT * 2 ;
@@ -567,6 +573,7 @@ void initApplication(SDL_Window* window) {
567573 {
568574 VkDescriptorSetLayoutBinding bindings[] = {
569575 {0 , VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1 , VK_SHADER_STAGE_COMPUTE_BIT, 0 },
576+ {1 , VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1 , VK_SHADER_STAGE_COMPUTE_BIT, 0 },
570577 };
571578 VkDescriptorSetLayoutCreateInfo createInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
572579 createInfo.bindingCount = ARRAY_COUNT (bindings);
@@ -616,6 +623,7 @@ void renderApplication() {
616623 static float greenChannel = 0 .0f ;
617624 static float time = 0 .0f ;
618625 static double frameGpuAvg = 0.0 ;
626+ static double computeAvg = 0.0 ;
619627 time += 0 .01f ;
620628 greenChannel += 0 .01f ;
621629 if (greenChannel > 1 .0f ) greenChannel = 0 .0f ;
@@ -640,13 +648,16 @@ void renderApplication() {
640648 }
641649
642650 // Query timestamps
643- uint64_t timestamps[2 ] = {};
651+ uint64_t timestamps[3 ] = {};
644652 VkResult timestampsValid = VK (vkGetQueryPoolResults (context->device , timestampQueryPools[frameIndex], 0 , ARRAY_COUNT (timestamps), sizeof (timestamps), timestamps, sizeof (timestamps[0 ]), VK_QUERY_RESULT_64_BIT));
645653 if (timestampsValid == VK_SUCCESS) {
646654 double frameGpuBegin = double (timestamps[0 ]) * context->physicalDeviceProperties .limits .timestampPeriod * 1e-6 ;
647655 double frameGpuEnd = double (timestamps[1 ]) * context->physicalDeviceProperties .limits .timestampPeriod * 1e-6 ;
656+ double comptueEnd = double (timestamps[2 ]) * context->physicalDeviceProperties .limits .timestampPeriod * 1e-6 ;
648657 frameGpuAvg = frameGpuAvg * 0.95 + (frameGpuEnd - frameGpuBegin) * 0.05 ;
658+ computeAvg = computeAvg * 0.95 + (comptueEnd - frameGpuEnd) * 0.05 ;
649659 // LOG_INFO("GPU frametime: ", frameGpuAvg, "ms");
660+ LOG_INFO (" Compute time: " , computeAvg, " ms" );
650661 }
651662
652663 VKA (vkResetCommandPool (context->device , commandPools[frameIndex], 0 ));
@@ -783,18 +794,40 @@ void renderApplication() {
783794 imageBarrier.subresourceRange = subresourceRange;
784795 vkCmdPipelineBarrier (commandBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0 , 0 , 0 , 0 , 0 , 1 , &imageBarrier);
785796 }
797+ { // MultisampleTarget Shader Read -> Compute Read
798+ VkImageSubresourceRange subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0 , 1 , 0 , 1 };
799+ VkImageMemoryBarrier imageBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
800+ imageBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
801+ imageBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
802+ imageBarrier.oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
803+ imageBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
804+ imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
805+ imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
806+ imageBarrier.image = multisampleTargetBuffers[imageIndex].image ;
807+ imageBarrier.subresourceRange = subresourceRange;
808+ vkCmdPipelineBarrier (commandBuffer, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0 , 0 , 0 , 0 , 0 , 1 , &imageBarrier);
809+ }
786810
787811 vkCmdBindPipeline (commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipeline );
788812 imageInfo = {0 , swapchain.imageViews [imageIndex], VK_IMAGE_LAYOUT_GENERAL};
789- descriptorWrite = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
790- descriptorWrite.dstSet = computeDescriptorSets[frameIndex];
791- descriptorWrite.dstBinding = 0 ;
792- descriptorWrite.descriptorCount = 1 ;
793- descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
794- descriptorWrite.pImageInfo = &imageInfo;
795- vkUpdateDescriptorSets (context->device , 1 , &descriptorWrite, 0 , 0 );
813+ VkDescriptorImageInfo imageInfo2 = {0 , multisampleTargetBuffers[imageIndex].view , VK_IMAGE_LAYOUT_GENERAL};
814+ VkWriteDescriptorSet descriptorWrites[2 ];
815+ descriptorWrites[0 ] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
816+ descriptorWrites[0 ].dstSet = computeDescriptorSets[frameIndex];
817+ descriptorWrites[0 ].dstBinding = 0 ;
818+ descriptorWrites[0 ].descriptorCount = 1 ;
819+ descriptorWrites[0 ].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
820+ descriptorWrites[0 ].pImageInfo = &imageInfo;
821+ descriptorWrites[1 ] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
822+ descriptorWrites[1 ].dstSet = computeDescriptorSets[frameIndex];
823+ descriptorWrites[1 ].dstBinding = 1 ;
824+ descriptorWrites[1 ].descriptorCount = 1 ;
825+ descriptorWrites[1 ].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
826+ descriptorWrites[1 ].pImageInfo = &imageInfo2;
827+ vkUpdateDescriptorSets (context->device , ARRAY_COUNT (descriptorWrites), descriptorWrites, 0 , 0 );
796828 vkCmdBindDescriptorSets (commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.pipelineLayout , 0 , 1 , &computeDescriptorSets[frameIndex], 0 , 0 );
797- vkCmdDispatch (commandBuffer, (swapchain.width + 7 ) / 8 , (swapchain.height + 7 ) / 8 , 1 );
829+ #define GROUP_SIZE 8
830+ vkCmdDispatch (commandBuffer, (swapchain.width + (GROUP_SIZE-1 )) / GROUP_SIZE, (swapchain.height + (GROUP_SIZE-1 )) / GROUP_SIZE, 1 );
798831
799832 { // Swapchain Compute Write -> Present
800833 VkImageSubresourceRange subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0 , 1 , 0 , 1 };
@@ -810,6 +843,8 @@ void renderApplication() {
810843 vkCmdPipelineBarrier (commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0 , 0 , 0 , 0 , 0 , 1 , &imageBarrier);
811844 }
812845
846+ VK (vkCmdWriteTimestamp (commandBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, timestampQueryPools[frameIndex], 2 ));
847+
813848 VKA (vkEndCommandBuffer (commandBuffer));
814849 }
815850
0 commit comments