aco: Fix integer overflows when emitting parallel copies during RA
[mesa.git] / src / amd / vulkan / radv_meta_fmask_expand.c
index a9567eaf0fd3fc7f9afa7ad553b5082394f55b41..3a109110c951725d738aed6af3075dc6ddc536d9 100644 (file)
 
 #include "radv_meta.h"
 #include "radv_private.h"
+#include "vk_format.h"
 
 static nir_shader *
 build_fmask_expand_compute_shader(struct radv_device *device, int samples)
 {
        nir_builder b;
        char name[64];
-       const struct glsl_type *img_type =
+       const struct glsl_type *type =
                glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false,
                                  GLSL_TYPE_FLOAT);
+       const struct glsl_type *img_type =
+               glsl_image_type(GLSL_SAMPLER_DIM_MS, false,
+                                 GLSL_TYPE_FLOAT);
 
        snprintf(name, 64, "meta_fmask_expand_cs-%d", samples);
 
@@ -43,7 +47,7 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples)
        b.shader->info.cs.local_size[2] = 1;
 
        nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
-                                                     img_type, "s_tex");
+                                                     type, "s_tex");
        input_img->data.descriptor_set = 0;
        input_img->data.binding = 0;
 
@@ -51,10 +55,10 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples)
                                                       img_type, "out_img");
        output_img->data.descriptor_set = 0;
        output_img->data.binding = 0;
-       output_img->data.image.access = ACCESS_NON_READABLE;
+       output_img->data.access = ACCESS_NON_READABLE;
 
        nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
-       nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+       nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
        nir_ssa_def *block_size = nir_imm_ivec4(&b,
                                                b.shader->info.cs.local_size[0],
                                                b.shader->info.cs.local_size[1],
@@ -97,6 +101,7 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples)
                store->src[1] = nir_src_for_ssa(global_id);
                store->src[2] = nir_src_for_ssa(nir_imm_int(&b, i));
                store->src[3] = nir_src_for_ssa(outval);
+               store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
                nir_builder_instr_insert(&b, &store->instr);
        }
 
@@ -122,7 +127,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
        radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
                             VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
 
-       for (unsigned l = 0; l < subresourceRange->layerCount; l++) {
+       for (unsigned l = 0; l < radv_get_layerCount(image, subresourceRange); l++) {
                struct radv_image_view iview;
 
                radv_image_view_init(&iview, device,
@@ -130,7 +135,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
                                             .image = radv_image_to_handle(image),
                                             .viewType = radv_meta_get_view_type(image),
-                                            .format = image->vk_format,
+                                            .format = vk_format_no_srgb(image->vk_format),
                                             .subresourceRange = {
                                                     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
                                                     .baseMipLevel = 0,
@@ -138,7 +143,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                                     .baseArrayLayer = subresourceRange->baseArrayLayer + l,
                                                     .layerCount = 1,
                                             },
-                                    });
+                                    }, NULL);
 
                radv_meta_push_descriptor_set(cmd_buffer,
                                              VK_PIPELINE_BIND_POINT_COMPUTE,
@@ -168,10 +173,10 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
        radv_meta_restore(&saved_state, cmd_buffer);
 
        cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-                                       RADV_CMD_FLAG_INV_GLOBAL_L2;
+                                       RADV_CMD_FLAG_INV_L2;
 
        /* Re-initialize FMASK in fully expanded mode. */
-       radv_initialize_fmask(cmd_buffer, image);
+       radv_initialize_fmask(cmd_buffer, image, subresourceRange);
 }
 
 void radv_device_finish_meta_fmask_expand_state(struct radv_device *device)