aco: Fix integer overflows when emitting parallel copies during RA

[mesa.git] / src / amd / vulkan / radv_nir_to_llvm.c
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c

index e707ba8c907174b5aec0a48ca5a6c19f4131ea17..db21ad809b784e424da9b5839300c2979ce65210 100644 (file)
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -589,11 +589,12 @@ store_tcs_output(struct ac_shader_abi *abi,
                  LLVMValueRef param_index,
                  unsigned const_index,
                  LLVMValueRef src,
-                unsigned writemask)
+                unsigned writemask,
+                unsigned component,
+                unsigned driver_location)
  {
         struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
         const unsigned location = var->data.location;
-       unsigned component = var->data.location_frac;
         const bool is_patch = var->data.patch;
         const bool is_compact = var->data.compact;
         LLVMValueRef dw_addr;
@@ -1271,7 +1272,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
                  * access are detected. Only GFX6 and GFX10 are affected.
                  */
                 bool unaligned_vertex_fetches = false;
-               if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class == GFX10) &&
+               if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) &&
                     vtx_info->chan_format != data_format &&
                     ((attrib_offset % vtx_info->element_size) ||
                      (attrib_stride % vtx_info->element_size)))
@@ -1282,7 +1283,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
                         LLVMValueRef values[4];
  
                         assert(ctx->ac.chip_class == GFX6 ||
-                              ctx->ac.chip_class == GFX10);
+                              ctx->ac.chip_class >= GFX10);
  
                         for (unsigned chan  = 0; chan < num_channels; chan++) {
                                 unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size;
@@ -1367,7 +1368,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
  static void
  handle_vs_inputs(struct radv_shader_context *ctx,
                   struct nir_shader *nir) {
-       nir_foreach_variable(variable, &nir->inputs)
+       nir_foreach_shader_in_variable(variable, nir)
                 handle_vs_input_decl(ctx, variable);
  }
  
@@ -1377,7 +1378,7 @@ prepare_interp_optimize(struct radv_shader_context *ctx,
  {
         bool uses_center = false;
         bool uses_centroid = false;
-       nir_foreach_variable(variable, &nir->inputs) {
+       nir_foreach_shader_in_variable(variable, nir) {
                 if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT ||
                     variable->data.sample)
                         continue;
@@ -1553,6 +1554,30 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
                         break;
                 }
  
+               /* Replace NaN by zero (only 32-bit) to fix game bugs if
+                * requested.
+                */
+               if (ctx->args->options->enable_mrt_output_nan_fixup &&
+                   !is_16bit &&
+                   (col_format == V_028714_SPI_SHADER_32_R ||
+                    col_format == V_028714_SPI_SHADER_32_GR ||
+                    col_format == V_028714_SPI_SHADER_32_AR ||
+                    col_format == V_028714_SPI_SHADER_32_ABGR ||
+                    col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
+                       for (unsigned i = 0; i < 4; i++) {
+                               LLVMValueRef args[2] = {
+                                       values[i],
+                                       LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN, false)
+                               };
+                               LLVMValueRef isnan =
+                                       ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
+                                                          args, 2, AC_FUNC_ATTR_READNONE);
+                               values[i] = LLVMBuildSelect(ctx->ac.builder, isnan,
+                                                           ctx->ac.f32_0,
+                                                           values[i], "");
+                       }
+               }
+
                 /* Pack f16 or norm_i16/u16. */
                 if (packf) {
                         for (chan = 0; chan < 2; chan++) {
@@ -2292,8 +2317,7 @@ static void build_streamout_vertex(struct radv_shader_context *ctx,
                 for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
                         unsigned output_usage_mask =
                                 ctx->args->shader_info->gs.output_usage_mask[i];
-                       uint8_t output_stream =
-                               output_stream = ctx->args->shader_info->gs.output_streams[i];
+                       uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
  
                         if (!(ctx->output_mask & (1ull << i)) ||
                             output_stream != stream)
@@ -3712,7 +3736,7 @@ ac_setup_rings(struct radv_shader_context *ctx)
  {
         if (ctx->args->options->chip_class <= GFX8 &&
             (ctx->stage == MESA_SHADER_GEOMETRY ||
-            ctx->args->options->key.vs_common_out.as_es || ctx->args->options->key.vs_common_out.as_es)) {
+            ctx->args->options->key.vs_common_out.as_es)) {
                 unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS
                                                                    : RING_ESGS_VS;
                 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false);
@@ -4004,13 +4028,15 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                                 ctx.tcs_num_inputs = args->options->key.tcs.num_inputs;
                         else
                                 ctx.tcs_num_inputs = util_last_bit64(args->shader_info->vs.ls_outputs_written);
+                       unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
+                       unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
                         ctx.tcs_num_patches =
                                 get_tcs_num_patches(
                                         ctx.args->options->key.tcs.input_vertices,
                                         ctx.shader->info.tess.tcs_vertices_out,
                                         ctx.tcs_num_inputs,
-                                       ctx.args->shader_info->tcs.outputs_written,
-                                       ctx.args->shader_info->tcs.patch_outputs_written,
+                                       tcs_num_outputs,
+                                       tcs_num_patch_outputs,
                                         ctx.args->options->tess_offchip_block_dw_size,
                                         ctx.args->options->chip_class,
                                         ctx.args->options->family);
@@ -4065,7 +4091,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                         ac_emit_barrier(&ctx.ac, ctx.stage);
                 }
  
-               nir_foreach_variable(variable, &shaders[i]->outputs)
+               nir_foreach_shader_out_variable(variable, shaders[i])
                         scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
  
                 ac_setup_rings(&ctx);
@@ -4114,15 +4140,18 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                 }
  
                 if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+                       unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
+                       unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
                         args->shader_info->tcs.num_patches = ctx.tcs_num_patches;
-                       args->shader_info->tcs.lds_size =
+                       args->shader_info->tcs.num_lds_blocks =
                                 calculate_tess_lds_size(
+                                       ctx.args->options->chip_class,
                                         ctx.args->options->key.tcs.input_vertices,
                                         ctx.shader->info.tess.tcs_vertices_out,
                                         ctx.tcs_num_inputs,
                                         ctx.tcs_num_patches,
-                                       ctx.args->shader_info->tcs.outputs_written,
-                                       ctx.args->shader_info->tcs.patch_outputs_written);
+                                       tcs_num_outputs,
+                                       tcs_num_patch_outputs);
                 }
         }
  
@@ -4381,7 +4410,7 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
  
         ac_setup_rings(&ctx);
  
-       nir_foreach_variable(variable, &geom_shader->outputs) {
+       nir_foreach_shader_out_variable(variable, geom_shader) {
                 scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
                 ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader,
                                              variable, MESA_SHADER_VERTEX);
@@ -4413,8 +4442,6 @@ llvm_compile_shader(struct radv_device *device,
         tm_options |= AC_TM_SUPPORTS_SPILL;
         if (args->options->check_ir)
                 tm_options |= AC_TM_CHECK_IR;
-       if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
-               tm_options |= AC_TM_NO_LOAD_STORE_OPT;
  
         thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);