radv/ac: overhaul vs output/ps input routing
authorDave Airlie <airlied@redhat.com>
Fri, 21 Apr 2017 02:17:23 +0000 (03:17 +0100)
committerDave Airlie <airlied@redhat.com>
Tue, 25 Apr 2017 22:24:39 +0000 (23:24 +0100)
In order to cleanly eliminate exports rewrite the
code first to mirror how radeonsi works for now.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c
src/amd/common/ac_nir_to_llvm.h
src/amd/vulkan/radv_pipeline.c

index 514c9e9ca35b8141c3325f67afd9cf39cb876e11..ab929bc81fe62781fe3705bf8f1f79d6f581ae5e 100644 (file)
@@ -5133,8 +5133,9 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
        LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
        int i;
 
-       outinfo->prim_id_output = 0xffffffff;
-       outinfo->layer_output = 0xffffffff;
+       memset(outinfo->vs_output_param_offset, EXP_PARAM_UNDEFINED,
+              sizeof(outinfo->vs_output_param_offset));
+
        if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
                LLVMValueRef slots[8];
                unsigned j;
@@ -5184,20 +5185,21 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
                } else if (i == VARYING_SLOT_LAYER) {
                        outinfo->writes_layer = true;
                        layer_value = values[0];
-                       outinfo->layer_output = param_count;
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
+                       outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count;
                        param_count++;
                } else if (i == VARYING_SLOT_VIEWPORT) {
                        outinfo->writes_viewport_index = true;
                        viewport_index_value = values[0];
                        continue;
                } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
-                       outinfo->prim_id_output = param_count;
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
+                       outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
                        param_count++;
                } else if (i >= VARYING_SLOT_VAR0) {
                        outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
+                       outinfo->vs_output_param_offset[i] = param_count;
                        param_count++;
                }
 
index 401d284a7c4134f0ad33c58d6f5221cb329d7fd8..f77a9b8d2bb9284fae6c6d4e680cc2ed8672ca3a 100644 (file)
@@ -120,14 +120,25 @@ struct ac_userdata_locations {
        struct ac_userdata_info shader_data[AC_UD_MAX_UD];
 };
 
+enum {
+       /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
+       EXP_PARAM_OFFSET_0 = 0,
+       EXP_PARAM_OFFSET_31 = 31,
+       /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
+       EXP_PARAM_DEFAULT_VAL_0000 = 64,
+       EXP_PARAM_DEFAULT_VAL_0001,
+       EXP_PARAM_DEFAULT_VAL_1110,
+       EXP_PARAM_DEFAULT_VAL_1111,
+       EXP_PARAM_UNDEFINED = 255,
+};
+
 struct ac_vs_output_info {
+       uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
        uint8_t clip_dist_mask;
        uint8_t cull_dist_mask;
        bool writes_pointsize;
        bool writes_layer;
        bool writes_viewport_index;
-       uint32_t prim_id_output;
-       uint32_t layer_output;
        uint32_t export_mask;
        unsigned param_exports;
        unsigned pos_exports;
index aada4d2f30d71f4aad114cdf868f7f8ae1f67f1d..d6989137a55eb376784e1cbef7126a7605ba3187 100644 (file)
@@ -1870,6 +1870,25 @@ static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
                clip_dist_mask;
 
 }
+
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+{
+       uint32_t ps_input_cntl;
+       if (offset <= EXP_PARAM_OFFSET_31)
+               ps_input_cntl = S_028644_OFFSET(offset);
+       else {
+               /* The input is a DEFAULT_VAL constant. */
+               assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
+                      offset <= EXP_PARAM_DEFAULT_VAL_1111);
+               offset -= EXP_PARAM_DEFAULT_VAL_0000;
+               ps_input_cntl = S_028644_OFFSET(0x20) |
+                       S_028644_DEFAULT_VAL(offset);
+       }
+       if (flat_shade)
+               ps_input_cntl |= S_028644_FLAT_SHADE(1);
+       return ps_input_cntl;
+}
+
 static void calculate_ps_inputs(struct radv_pipeline *pipeline)
 {
        struct radv_shader_variant *ps, *vs;
@@ -1882,24 +1901,20 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline)
 
        unsigned ps_offset = 0;
 
-       if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {
-               unsigned vs_offset, flat_shade;
-               unsigned val;
-               vs_offset = outinfo->prim_id_output;
-               flat_shade = true;
-               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
-               pipeline->graphics.ps_input_cntl[ps_offset] = val;
-               ++ps_offset;
+       if (ps->info.fs.prim_id_input) {
+               unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
+               if (vs_offset != EXP_PARAM_UNDEFINED) {
+                       pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+                       ++ps_offset;
+               }
        }
 
-       if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {
-               unsigned vs_offset, flat_shade;
-               unsigned val;
-               vs_offset = outinfo->layer_output;
-               flat_shade = true;
-               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
-               pipeline->graphics.ps_input_cntl[ps_offset] = val;
-               ++ps_offset;
+       if (ps->info.fs.layer_input) {
+               unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
+               if (vs_offset != EXP_PARAM_UNDEFINED) {
+                       pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+                       ++ps_offset;
+               }
        }
 
        if (ps->info.fs.has_pcoord) {
@@ -1910,31 +1925,21 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline)
        }
 
        for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
-               unsigned vs_offset, flat_shade;
-               unsigned val;
-
+               unsigned vs_offset;
+               bool flat_shade;
                if (!(ps->info.fs.input_mask & (1u << i)))
                        continue;
 
-               if (!(outinfo->export_mask & (1u << i))) {
+               vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
+               if (vs_offset == EXP_PARAM_UNDEFINED) {
                        pipeline->graphics.ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
                        ++ps_offset;
                        continue;
                }
 
-               vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1));
-               if (outinfo->prim_id_output != 0xffffffff) {
-                       if (vs_offset >= outinfo->prim_id_output)
-                               vs_offset++;
-               }
-               if (outinfo->layer_output != 0xffffffff) {
-                       if (vs_offset >= outinfo->layer_output)
-                         vs_offset++;
-               }
                flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
 
-               val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
-               pipeline->graphics.ps_input_cntl[ps_offset] = val;
+               pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
                ++ps_offset;
        }