ac/nir: export some undef as zero
authorPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Tue, 28 Apr 2020 14:34:49 +0000 (16:34 +0200)
committerPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Tue, 5 May 2020 10:26:26 +0000 (12:26 +0200)
NIR already optimizes undef usage.
If undef reaches llvm, it's probably because of a broken shader.

In this situation, rather than letting llvm use the undef values
to do more optimization and probably produce incorrect results,
we replace undef values by 0.

"undef" values that are directly used in exports are kept as undef,
because this allows llvm to optimize them away.

This is only enabled for radeonsi.

Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2689
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4607>

src/amd/llvm/ac_nir_to_llvm.c
src/amd/llvm/ac_shader_abi.h
src/gallium/drivers/radeonsi/si_shader_llvm.c

index 42cd952b69f31e1acf922efd1d821b55656c69f4..d3489614d4ce7147850b416de952ac4774d76a36 100644 (file)
@@ -4799,19 +4799,46 @@ static void phi_post_pass(struct ac_nir_context *ctx)
 }
 
 
+static bool is_def_used_in_an_export(const nir_ssa_def* def) {
+       nir_foreach_use(use_src, def) {
+               if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
+                       nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
+                       if (instr->intrinsic == nir_intrinsic_store_deref)
+                               return true;
+               } else if (use_src->parent_instr->type == nir_instr_type_alu) {
+                       nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
+                       if (instr->op == nir_op_vec4 &&
+                           is_def_used_in_an_export(&instr->dest.dest.ssa)) {
+                               return true;
+                       }
+               }
+       }
+       return false;
+}
+
 static void visit_ssa_undef(struct ac_nir_context *ctx,
                            const nir_ssa_undef_instr *instr)
 {
        unsigned num_components = instr->def.num_components;
        LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
-       LLVMValueRef undef;
 
-       if (num_components == 1)
-               undef = LLVMGetUndef(type);
-       else {
-               undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+       if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
+               LLVMValueRef undef;
+
+               if (num_components == 1)
+                       undef = LLVMGetUndef(type);
+               else {
+                       undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+               }
+               ctx->ssa_defs[instr->def.index] = undef;
+       } else {
+               LLVMValueRef zero = LLVMConstInt(type, 0, false);
+               if (num_components > 1) {
+                       zero = ac_build_gather_values_extended(
+                               &ctx->ac, &zero, 4, 0, false, false);
+               }
+               ctx->ssa_defs[instr->def.index] = zero;
        }
-       ctx->ssa_defs[instr->def.index] = undef;
 }
 
 static void visit_jump(struct ac_llvm_context *ctx,
index ea3717413d2d49415dd9327534aee339417c6358..aa31ff9c52da07cac63ebe132df2b26cf6f69db9 100644 (file)
@@ -189,6 +189,9 @@ struct ac_shader_abi {
 
        /* Check for Inf interpolation coeff */
        bool kill_ps_if_inf_interp;
+
+       /* Whether undef values must be converted to zero */
+       bool convert_undef_to_zero;
 };
 
 #endif /* AC_SHADER_ABI_H */
index d26b80423fd46805a2e1d9bbdd9d833313c76144..5c5833bd36c40306e4a015edc22c5f9c43162ff9 100644 (file)
@@ -459,6 +459,7 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
    ctx->abi.inputs = &ctx->inputs[0];
    ctx->abi.clamp_shadow_reference = true;
    ctx->abi.robust_buffer_access = true;
+   ctx->abi.convert_undef_to_zero = true;
 
    if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) {
       assert(gl_shader_stage_is_compute(nir->info.stage));