ac/nir: Move VS position exports before param exports.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 3 Jul 2017 22:49:55 +0000 (00:49 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Wed, 5 Jul 2017 18:23:00 +0000 (20:23 +0200)
According to Nicolai the SX can already start work when all
the position exports are done, so do those first.

Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c

index e72747ab78aac6d55aa6c29fa622e19d48fd8e13..beafd5685f3cbe1ea291439786cc38a81e6d2916 100644 (file)
@@ -5245,66 +5245,30 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
 
        }
 
-       for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
-               LLVMValueRef values[4];
-               if (!(ctx->output_mask & (1ull << i)))
-                       continue;
-
+       LLVMValueRef pos_values[4] = {ctx->f32zero, ctx->f32zero, ctx->f32zero, ctx->f32one};
+       if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) {
                for (unsigned j = 0; j < 4; j++)
-                       values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
-                                             ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
-
-               if (i == VARYING_SLOT_POS) {
-                       target = V_008DFC_SQ_EXP_POS;
-               } else if (i == VARYING_SLOT_CLIP_DIST0) {
-                       continue;
-               } else if (i == VARYING_SLOT_PSIZ) {
-                       outinfo->writes_pointsize = true;
-                       psize_value = values[0];
-                       continue;
-               } else if (i == VARYING_SLOT_LAYER) {
-                       outinfo->writes_layer = true;
-                       layer_value = values[0];
-                       target = V_008DFC_SQ_EXP_PARAM + param_count;
-                       outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count;
-                       param_count++;
-               } else if (i == VARYING_SLOT_VIEWPORT) {
-                       outinfo->writes_viewport_index = true;
-                       viewport_index_value = values[0];
-                       continue;
-               } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
-                       target = V_008DFC_SQ_EXP_PARAM + param_count;
-                       outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
-                       param_count++;
-               } else if (i >= VARYING_SLOT_VAR0) {
-                       outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
-                       target = V_008DFC_SQ_EXP_PARAM + param_count;
-                       outinfo->vs_output_param_offset[i] = param_count;
-                       param_count++;
-               }
+                       pos_values[j] = LLVMBuildLoad(ctx->builder,
+                                                ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], "");
+       }
+       si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]);
 
-               si_llvm_init_export_args(ctx, values, target, &args);
+       if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) {
+               outinfo->writes_pointsize = true;
+               psize_value = LLVMBuildLoad(ctx->builder,
+                                           ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], "");
+       }
 
-               if (target >= V_008DFC_SQ_EXP_POS &&
-                   target <= (V_008DFC_SQ_EXP_POS + 3)) {
-                       memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
-                              &args, sizeof(args));
-               } else {
-                       ac_build_export(&ctx->ac, &args);
-               }
+       if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) {
+               outinfo->writes_layer = true;
+               layer_value = LLVMBuildLoad(ctx->builder,
+                                           ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], "");
        }
 
-       /* We need to add the position output manually if it's missing. */
-       if (!pos_args[0].out[0]) {
-               pos_args[0].enabled_channels = 0xf;
-               pos_args[0].valid_mask = 0;
-               pos_args[0].done = 0;
-               pos_args[0].target = V_008DFC_SQ_EXP_POS;
-               pos_args[0].compr = 0;
-               pos_args[0].out[0] = ctx->f32zero; /* X */
-               pos_args[0].out[1] = ctx->f32zero; /* Y */
-               pos_args[0].out[2] = ctx->f32zero; /* Z */
-               pos_args[0].out[3] = ctx->f32one;  /* W */
+       if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) {
+               outinfo->writes_viewport_index = true;
+               viewport_index_value = LLVMBuildLoad(ctx->builder,
+                                                    ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], "");
        }
 
        uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) |
@@ -5345,6 +5309,41 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
                ac_build_export(&ctx->ac, &pos_args[i]);
        }
 
+       for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
+               LLVMValueRef values[4];
+               if (!(ctx->output_mask & (1ull << i)))
+                       continue;
+
+               for (unsigned j = 0; j < 4; j++)
+                       values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
+                                             ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
+
+               if (i == VARYING_SLOT_LAYER) {
+                       target = V_008DFC_SQ_EXP_PARAM + param_count;
+                       outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count;
+                       param_count++;
+               } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
+                       target = V_008DFC_SQ_EXP_PARAM + param_count;
+                       outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
+                       param_count++;
+               } else if (i >= VARYING_SLOT_VAR0) {
+                       outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
+                       target = V_008DFC_SQ_EXP_PARAM + param_count;
+                       outinfo->vs_output_param_offset[i] = param_count;
+                       param_count++;
+               } else
+                       continue;
+
+               si_llvm_init_export_args(ctx, values, target, &args);
+
+               if (target >= V_008DFC_SQ_EXP_POS &&
+                   target <= (V_008DFC_SQ_EXP_POS + 3)) {
+                       memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
+                              &args, sizeof(args));
+               } else {
+                       ac_build_export(&ctx->ac, &args);
+               }
+       }
 
        if (export_prim_id) {
                LLVMValueRef values[4];