radv: rework vertex/export shader output handling
authorDave Airlie <airlied@redhat.com>
Mon, 27 Mar 2017 20:13:09 +0000 (06:13 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 28 Mar 2017 07:39:59 +0000 (17:39 +1000)
In order to faciliate adding tess support, split the vs/es
output info into a separate block, so we make it easier to
have the tess shaders export the same info.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c
src/amd/common/ac_nir_to_llvm.h
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_pipeline.c

index 6e36c192c3cc3d011a13b1adad1f4a416942ec4f..cfbdeae1a3b08ed3a6e30c59712707d8da04bcff 100644 (file)
@@ -4228,11 +4228,11 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
                        int length = glsl_get_length(variable->type);
                        if (idx == VARYING_SLOT_CLIP_DIST0) {
                                if (ctx->stage == MESA_SHADER_VERTEX)
-                                       ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
+                                       ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << length) - 1;
                                ctx->num_output_clips = length;
                        } else if (idx == VARYING_SLOT_CULL_DIST0) {
                                if (ctx->stage == MESA_SHADER_VERTEX)
-                                       ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
+                                       ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << length) - 1;
                                ctx->num_output_culls = length;
                        }
                        if (length > 4)
@@ -4448,7 +4448,8 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 }
 
 static void
-handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
+handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
+                      struct ac_vs_output_info *outinfo)
 {
        uint32_t param_count = 0;
        unsigned target;
@@ -4461,14 +4462,14 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
                                                       (1ull << VARYING_SLOT_CULL_DIST0) |
                                                       (1ull << VARYING_SLOT_CULL_DIST1));
 
-       ctx->shader_info->vs.prim_id_output = 0xffffffff;
-       ctx->shader_info->vs.layer_output = 0xffffffff;
+       outinfo->prim_id_output = 0xffffffff;
+       outinfo->layer_output = 0xffffffff;
        if (clip_mask) {
                LLVMValueRef slots[8];
                unsigned j;
 
-               if (ctx->shader_info->vs.cull_dist_mask)
-                       ctx->shader_info->vs.cull_dist_mask <<= ctx->num_output_clips;
+               if (outinfo->cull_dist_mask)
+                       outinfo->cull_dist_mask <<= ctx->num_output_clips;
 
                i = VARYING_SLOT_CLIP_DIST0;
                for (j = 0; j < ctx->num_output_clips; j++)
@@ -4513,25 +4514,25 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
                           i == VARYING_SLOT_CULL_DIST1) {
                        continue;
                } else if (i == VARYING_SLOT_PSIZ) {
-                       ctx->shader_info->vs.writes_pointsize = true;
+                       outinfo->writes_pointsize = true;
                        psize_value = values[0];
                        continue;
                } else if (i == VARYING_SLOT_LAYER) {
-                       ctx->shader_info->vs.writes_layer = true;
+                       outinfo->writes_layer = true;
                        layer_value = values[0];
-                       ctx->shader_info->vs.layer_output = param_count;
+                       outinfo->layer_output = param_count;
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
                        param_count++;
                } else if (i == VARYING_SLOT_VIEWPORT) {
-                       ctx->shader_info->vs.writes_viewport_index = true;
+                       outinfo->writes_viewport_index = true;
                        viewport_index_value = values[0];
                        continue;
                } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
-                       ctx->shader_info->vs.prim_id_output = param_count;
+                       outinfo->prim_id_output = param_count;
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
                        param_count++;
                } else if (i >= VARYING_SLOT_VAR0) {
-                       ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
+                       outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
                        param_count++;
                }
@@ -4560,9 +4561,9 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
                pos_args[0].out[3] = ctx->f32one;  /* W */
        }
 
-       uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 0) |
-                        (ctx->shader_info->vs.writes_layer == true ? 4 : 0) |
-                        (ctx->shader_info->vs.writes_viewport_index == true ? 8 : 0));
+       uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) |
+                        (outinfo->writes_layer == true ? 4 : 0) |
+                        (outinfo->writes_viewport_index == true ? 8 : 0));
        if (mask) {
                pos_args[1].enabled_channels = mask;
                pos_args[1].valid_mask = 0;
@@ -4574,11 +4575,11 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
                pos_args[1].out[2] = ctx->f32zero; /* Z */
                pos_args[1].out[3] = ctx->f32zero;  /* W */
 
-               if (ctx->shader_info->vs.writes_pointsize == true)
+               if (outinfo->writes_pointsize == true)
                        pos_args[1].out[0] = psize_value;
-               if (ctx->shader_info->vs.writes_layer == true)
+               if (outinfo->writes_layer == true)
                        pos_args[1].out[2] = layer_value;
-               if (ctx->shader_info->vs.writes_viewport_index == true)
+               if (outinfo->writes_viewport_index == true)
                        pos_args[1].out[3] = viewport_index_value;
        }
        for (i = 0; i < 4; i++) {
@@ -4598,12 +4599,13 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
                ac_build_export(&ctx->ac, &pos_args[i]);
        }
 
-       ctx->shader_info->vs.pos_exports = num_pos_exports;
-       ctx->shader_info->vs.param_exports = param_count;
+       outinfo->pos_exports = num_pos_exports;
+       outinfo->param_exports = param_count;
 }
 
 static void
-handle_es_outputs_post(struct nir_to_llvm_context *ctx)
+handle_es_outputs_post(struct nir_to_llvm_context *ctx,
+                      struct ac_es_output_info *outinfo)
 {
        int j;
        uint64_t max_output_written = 0;
@@ -4638,7 +4640,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx)
                                               1, 1, true, true);
                }
        }
-       ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16;
+       outinfo->esgs_itemsize = (max_output_written + 1) * 16;
 }
 
 static void
@@ -4761,9 +4763,9 @@ handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
        switch (ctx->stage) {
        case MESA_SHADER_VERTEX:
                if (ctx->options->key.vs.as_es)
-                       handle_es_outputs_post(ctx);
+                       handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
                else
-                       handle_vs_outputs_post(ctx);
+                       handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo);
                break;
        case MESA_SHADER_FRAGMENT:
                handle_fs_outputs_post(ctx);
@@ -5170,7 +5172,7 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
                }
                idx += slot_inc;
        }
-       handle_vs_outputs_post(ctx);
+       handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo);
 }
 
 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
index b4c4a73a1c1e0ecb910213bebe4ef8dfeb2a9cc1..15afbd77456402b88e211dfea24d8638b4f88650 100644 (file)
@@ -91,6 +91,23 @@ struct ac_userdata_locations {
        struct ac_userdata_info shader_data[AC_UD_MAX_UD];
 };
 
+struct ac_vs_output_info {
+       uint8_t clip_dist_mask;
+       uint8_t cull_dist_mask;
+       bool writes_pointsize;
+       bool writes_layer;
+       bool writes_viewport_index;
+       uint32_t prim_id_output;
+       uint32_t layer_output;
+       uint32_t export_mask;
+       unsigned param_exports;
+       unsigned pos_exports;
+};
+
+struct ac_es_output_info {
+       uint32_t esgs_itemsize;
+};
+
 struct ac_shader_variant_info {
        struct ac_userdata_locations user_sgprs_locs;
        unsigned num_user_sgprs;
@@ -98,19 +115,10 @@ struct ac_shader_variant_info {
        unsigned num_input_vgprs;
        union {
                struct {
-                       unsigned param_exports;
-                       unsigned pos_exports;
+                       struct ac_vs_output_info outinfo;
+                       struct ac_es_output_info es_info;
                        unsigned vgpr_comp_cnt;
-                       uint32_t export_mask;
-                       bool writes_pointsize;
-                       bool writes_layer;
-                       bool writes_viewport_index;
                        bool as_es;
-                       uint8_t clip_dist_mask;
-                       uint8_t cull_dist_mask;
-                       uint32_t esgs_itemsize;
-                       uint32_t prim_id_output;
-                       uint32_t layer_output;
                } vs;
                struct {
                        unsigned num_interp;
index eb2a7b0ddea074940cbdb0a79be4a3a5a65135ff..ce34204b8ac6943a5b221c904f10ad95a871f593 100644 (file)
@@ -502,7 +502,8 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer,
 static void
 radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
                struct radv_pipeline *pipeline,
-               struct radv_shader_variant *shader)
+               struct radv_shader_variant *shader,
+               struct ac_vs_output_info *outinfo)
 {
        struct radeon_winsys *ws = cmd_buffer->device->ws;
        uint64_t va = ws->buffer_get_va(shader->bo);
@@ -510,19 +511,19 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
 
        ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
 
-       export_count = MAX2(1, shader->info.vs.param_exports);
+       export_count = MAX2(1, outinfo->param_exports);
        radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG,
                               S_0286C4_VS_EXPORT_COUNT(export_count - 1));
 
        radeon_set_context_reg(cmd_buffer->cs, R_02870C_SPI_SHADER_POS_FORMAT,
                               S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
-                              S_02870C_POS1_EXPORT_FORMAT(shader->info.vs.pos_exports > 1 ?
+                              S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
                                                           V_02870C_SPI_SHADER_4COMP :
                                                           V_02870C_SPI_SHADER_NONE) |
-                              S_02870C_POS2_EXPORT_FORMAT(shader->info.vs.pos_exports > 2 ?
+                              S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
                                                           V_02870C_SPI_SHADER_4COMP :
                                                           V_02870C_SPI_SHADER_NONE) |
-                              S_02870C_POS3_EXPORT_FORMAT(shader->info.vs.pos_exports > 3 ?
+                              S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
                                                           V_02870C_SPI_SHADER_4COMP :
                                                           V_02870C_SPI_SHADER_NONE));
 
@@ -540,17 +541,17 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
                               S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
 
        unsigned clip_dist_mask, cull_dist_mask, total_mask;
-       clip_dist_mask = shader->info.vs.clip_dist_mask;
-       cull_dist_mask = shader->info.vs.cull_dist_mask;
+       clip_dist_mask = outinfo->clip_dist_mask;
+       cull_dist_mask = outinfo->cull_dist_mask;
        total_mask = clip_dist_mask | cull_dist_mask;
 
        radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL,
-                              S_02881C_USE_VTX_POINT_SIZE(shader->info.vs.writes_pointsize) |
-                              S_02881C_USE_VTX_RENDER_TARGET_INDX(shader->info.vs.writes_layer) |
-                              S_02881C_USE_VTX_VIEWPORT_INDX(shader->info.vs.writes_viewport_index) |
-                              S_02881C_VS_OUT_MISC_VEC_ENA(shader->info.vs.writes_pointsize ||
-                                                           shader->info.vs.writes_layer ||
-                                                           shader->info.vs.writes_viewport_index) |
+                              S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
+                              S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
+                              S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
+                              S_02881C_VS_OUT_MISC_VEC_ENA(outinfo->writes_pointsize ||
+                                                           outinfo->writes_layer ||
+                                                           outinfo->writes_viewport_index) |
                               S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
                               S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
                               pipeline->graphics.raster.pa_cl_vs_out_cntl |
@@ -558,12 +559,13 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
                               clip_dist_mask);
 
        radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF,
-                              S_028AB4_REUSE_OFF(shader->info.vs.writes_viewport_index));
+                              S_028AB4_REUSE_OFF(outinfo->writes_viewport_index));
 }
 
 static void
 radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer,
-               struct radv_shader_variant *shader)
+               struct radv_shader_variant *shader,
+               struct ac_es_output_info *outinfo)
 {
        struct radeon_winsys *ws = cmd_buffer->device->ws;
        uint64_t va = ws->buffer_get_va(shader->bo);
@@ -571,7 +573,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer,
        ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
 
        radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-                              shader->info.vs.esgs_itemsize / 4);
+                              outinfo->esgs_itemsize / 4);
        radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
        radeon_emit(cmd_buffer->cs, va >> 8);
        radeon_emit(cmd_buffer->cs, va >> 40);
@@ -590,9 +592,9 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer,
        vs = pipeline->shaders[MESA_SHADER_VERTEX];
 
        if (vs->info.vs.as_es)
-               radv_emit_hw_es(cmd_buffer, vs);
+               radv_emit_hw_es(cmd_buffer, vs, &vs->info.vs.es_info);
        else
-               radv_emit_hw_vs(cmd_buffer, pipeline, vs);
+               radv_emit_hw_vs(cmd_buffer, pipeline, vs, &vs->info.vs.outinfo);
 
        radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, 0);
 }
@@ -666,7 +668,7 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer,
        radeon_emit(cmd_buffer->cs, gs->rsrc1);
        radeon_emit(cmd_buffer->cs, gs->rsrc2);
 
-       radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader);
+       radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader, &pipeline->gs_copy_shader->info.vs.outinfo);
 
        struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
                                                             AC_UD_GS_VS_RING_STRIDE_ENTRIES);
@@ -696,10 +698,14 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
        struct radv_blend_state *blend = &pipeline->graphics.blend;
        unsigned ps_offset = 0;
        unsigned z_order;
+       struct ac_vs_output_info *outinfo;
        assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
 
        ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
        vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX];
+
+       outinfo = &vs->info.vs.outinfo;
+
        va = ws->buffer_get_va(ps->bo);
        ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8);
 
@@ -757,20 +763,20 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
                ps_offset++;
        }
 
-       if (ps->info.fs.prim_id_input && (vs->info.vs.prim_id_output != 0xffffffff)) {
+       if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {
                unsigned vs_offset, flat_shade;
                unsigned val;
-               vs_offset = vs->info.vs.prim_id_output;
+               vs_offset = outinfo->prim_id_output;
                flat_shade = true;
                val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
                radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
                ++ps_offset;
        }
 
-       if (ps->info.fs.layer_input && (vs->info.vs.layer_output != 0xffffffff)) {
+       if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {
                unsigned vs_offset, flat_shade;
                unsigned val;
-               vs_offset = vs->info.vs.layer_output;
+               vs_offset = outinfo->layer_output;
                flat_shade = true;
                val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
                radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val);
@@ -785,20 +791,20 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
                        continue;
 
 
-               if (!(vs->info.vs.export_mask & (1u << i))) {
+               if (!(outinfo->export_mask & (1u << i))) {
                        radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset,
                                               S_028644_OFFSET(0x20));
                        ++ps_offset;
                        continue;
                }
 
-               vs_offset = util_bitcount(vs->info.vs.export_mask & ((1u << i) - 1));
-               if (vs->info.vs.prim_id_output != 0xffffffff) {
-                       if (vs_offset >= vs->info.vs.prim_id_output)
+               vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1));
+               if (outinfo->prim_id_output != 0xffffffff) {
+                       if (vs_offset >= outinfo->prim_id_output)
                                vs_offset++;
                }
-               if (vs->info.vs.layer_output != 0xffffffff) {
-                       if (vs_offset >= vs->info.vs.layer_output)
+               if (outinfo->layer_output != 0xffffffff) {
+                       if (vs_offset >= outinfo->layer_output)
                          vs_offset++;
                }
                flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
index 1becb65055bafa940cfd881cc644543bb11e28e6..2c710f4eb8faf5191eddbb4ac7880ca6ebc8137f 100644 (file)
@@ -1467,15 +1467,15 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
        unsigned alignment = 256 * num_se;
        /* The maximum size is 63.999 MB per SE. */
        unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
-
+       struct ac_es_output_info *es_info = &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info;
        struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
-       struct ac_shader_variant_info *es_info = &pipeline->shaders[MESA_SHADER_VERTEX]->info;
+
        /* Calculate the minimum size. */
-       unsigned min_esgs_ring_size = align(es_info->vs.esgs_itemsize * gs_vertex_reuse *
+       unsigned min_esgs_ring_size = align(es_info->esgs_itemsize * gs_vertex_reuse *
                                            wave_size, alignment);
        /* These are recommended sizes, not minimum sizes. */
        unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
-               es_info->vs.esgs_itemsize * gs_info->gs.vertices_in;
+               es_info->esgs_itemsize * gs_info->gs.vertices_in;
        unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
                gs_info->gs.max_gsvs_emit_size * 1; // no streams in VK (gs->max_gs_stream + 1);