X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_meta_resolve_cs.c;h=7c569aa92027e55def9af08da71ec48ee5dcddb0;hb=2ce11ac11fee594ca01608c4006b38c0c8ea37ff;hp=832ae7b8c99d1f2f89d61dc78549d5821d978ac4;hpb=8286c3a49f03dc219e57d4a9ec27a4d840c5f603;p=mesa.git diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c index 832ae7b8c99..7c569aa9202 100644 --- a/src/amd/vulkan/radv_meta_resolve_cs.c +++ b/src/amd/vulkan/radv_meta_resolve_cs.c @@ -31,6 +31,45 @@ #include "sid.h" #include "vk_format.h" +static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b, + nir_ssa_def *input) +{ + nir_const_value v; + unsigned i; + v.u32[0] = 0x3b4d2e1c; // 0.00313080009 + + nir_ssa_def *cmp[3]; + for (i = 0; i < 3; i++) + cmp[i] = nir_flt(b, nir_channel(b, input, i), + nir_build_imm(b, 1, 32, v)); + + nir_ssa_def *ltvals[3]; + v.f32[0] = 12.92; + for (i = 0; i < 3; i++) + ltvals[i] = nir_fmul(b, nir_channel(b, input, i), + nir_build_imm(b, 1, 32, v)); + + nir_ssa_def *gtvals[3]; + + for (i = 0; i < 3; i++) { + v.f32[0] = 1.0/2.4; + gtvals[i] = nir_fpow(b, nir_channel(b, input, i), + nir_build_imm(b, 1, 32, v)); + v.f32[0] = 1.055; + gtvals[i] = nir_fmul(b, gtvals[i], + nir_build_imm(b, 1, 32, v)); + v.f32[0] = 0.055; + gtvals[i] = nir_fsub(b, gtvals[i], + nir_build_imm(b, 1, 32, v)); + } + + nir_ssa_def *comp[4]; + for (i = 0; i < 3; i++) + comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]); + comp[3] = nir_channels(b, input, 1 << 3); + return nir_vec(b, comp, 4); +} + static nir_shader * build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples) { @@ -88,10 +127,13 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3); nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color"); - radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb, samples, - input_img, color, img_coord); + radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, + color, img_coord); nir_ssa_def *outval = nir_load_var(&b, color); + if (is_srgb) + outval = radv_meta_build_resolve_srgb_conversion(&b, outval); + nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa); nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store); store->src[0] = nir_src_for_ssa(coord); @@ -208,7 +250,6 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device) { struct radv_meta_state *state = &device->meta_state; VkResult res; - memset(&device->meta_state.resolve_compute, 0, sizeof(device->meta_state.resolve_compute)); res = create_layout(device); if (res != VK_SUCCESS) @@ -310,10 +351,9 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, pipeline = device->meta_state.resolve_compute.rc[samples_log2].srgb_pipeline; else pipeline = device->meta_state.resolve_compute.rc[samples_log2].pipeline; - if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) { - radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - } + + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); unsigned push_constants[4] = { src_offset->x, @@ -337,7 +377,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, uint32_t region_count, const VkImageResolve *regions) { - struct radv_meta_saved_compute_state saved_state; + struct radv_meta_saved_state saved_state; for (uint32_t r = 0; r < region_count; ++r) { const VkImageResolve *region = ®ions[r]; @@ -353,7 +393,10 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, radv_fast_clear_flush_image_inplace(cmd_buffer, src_image, &range); } - radv_meta_save_compute(&saved_state, cmd_buffer, 16); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | + RADV_META_SAVE_CONSTANTS | + RADV_META_SAVE_DESCRIPTORS); for (uint32_t r = 0; r < region_count; ++r) { const VkImageResolve *region = ®ions[r]; @@ -420,7 +463,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, &(VkExtent2D) {extent.width, extent.height }); } } - radv_meta_restore_compute(&saved_state, cmd_buffer, 16); + radv_meta_restore(&saved_state, cmd_buffer); } /** @@ -431,7 +474,7 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer) { struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; const struct radv_subpass *subpass = cmd_buffer->state.subpass; - struct radv_meta_saved_compute_state saved_state; + struct radv_meta_saved_state saved_state; /* FINISHME(perf): Skip clears for resolve attachments. * * From the Vulkan 1.0 spec: @@ -444,6 +487,14 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer) if (!subpass->has_resolve) return; + /* Resolves happen before the end-of-subpass barriers get executed, + * so we have to make the attachment shader-readable */ + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | + RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | + RADV_CMD_FLAG_INV_GLOBAL_L2 | + RADV_CMD_FLAG_INV_VMEM_L1; + for (uint32_t i = 0; i < subpass->color_count; ++i) { VkAttachmentReference src_att = subpass->color_attachments[i]; VkAttachmentReference dest_att = subpass->resolve_attachments[i]; @@ -455,10 +506,7 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer) struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image; struct radv_image_view *src_iview = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment; - if (dst_img->surface.dcc_size) { - radv_initialize_dcc(cmd_buffer, dst_img, 0xffffffff); - cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - } + assert(!dst_img->surface.dcc_size); VkImageSubresourceRange range; range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; @@ -469,7 +517,10 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer) radv_fast_clear_flush_image_inplace(cmd_buffer, src_iview->image, &range); } - radv_meta_save_compute(&saved_state, cmd_buffer, 16); + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | + RADV_META_SAVE_CONSTANTS | + RADV_META_SAVE_DESCRIPTORS); for (uint32_t i = 0; i < subpass->color_count; ++i) { VkAttachmentReference src_att = subpass->color_attachments[i]; @@ -479,21 +530,6 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer) if (dest_att.attachment == VK_ATTACHMENT_UNUSED) continue; - struct radv_subpass resolve_subpass = { - .color_count = 1, - .color_attachments = (VkAttachmentReference[]) { dest_att }, - .depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED }, - }; - - radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false); - - /* Subpass resolves must respect the render area. We can ignore the - * render area here because vkCmdBeginRenderPass set the render area - * with 3DSTATE_DRAWING_RECTANGLE. - * - * XXX(chadv): Does the hardware really respect - * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST? - */ emit_resolve(cmd_buffer, src_iview, dst_iview, @@ -502,7 +538,7 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer) &(VkExtent2D) { fb->width, fb->height }); } - radv_meta_restore_compute(&saved_state, cmd_buffer, 16); + radv_meta_restore(&saved_state, cmd_buffer); for (uint32_t i = 0; i < subpass->color_count; ++i) { VkAttachmentReference dest_att = subpass->resolve_attachments[i];