radeonsi: add support for geometry shader invocations.
authorDave Airlie <airlied@redhat.com>
Thu, 25 Jun 2015 02:55:54 +0000 (03:55 +0100)
committerDave Airlie <airlied@redhat.com>
Fri, 26 Jun 2015 23:24:30 +0000 (00:24 +0100)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
docs/GL3.txt
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index df913bdd8c97902c6594bfa3979aa7da7276cd2b..81014a5f9b5002c34025339fe72c9db5d32ffeac 100644 (file)
@@ -104,7 +104,7 @@ GL 4.0, GLSL 4.00:
   - Fused multiply-add                                 DONE ()
   - Packing/bitfield/conversion functions              DONE (r600, radeonsi, softpipe)
   - Enhanced textureGather                             DONE (r600, radeonsi, softpipe)
-  - Geometry shader instancing                         DONE (r600, llvmpipe, softpipe)
+  - Geometry shader instancing                         DONE (r600, radeonsi, llvmpipe, softpipe)
   - Geometry shader multiple streams                   DONE ()
   - Enhanced per-sample shading                        DONE (r600, radeonsi)
   - Interpolation functions                            DONE (r600)
index 4ca31728dff3bd17b71b61137582fe7a2608efc8..4d97b58aec80612e0014167b324b858e0a3ecdf6 100644 (file)
@@ -630,6 +630,11 @@ static void declare_system_value(
                                     SI_PARAM_BASE_VERTEX);
                break;
 
+       case TGSI_SEMANTIC_INVOCATIONID:
+               value = LLVMGetParam(radeon_bld->main_fn,
+                                    SI_PARAM_GS_INSTANCE_ID);
+               break;
+
        case TGSI_SEMANTIC_SAMPLEID:
                value = get_sample_id(radeon_bld);
                break;
index 51055afe36adad21b7e5ecdbaa6835ab646adb18..b4339ae2b360ed81b0b32e495eeb170a56e4bff2 100644 (file)
@@ -115,6 +115,7 @@ struct si_shader_selector {
 
        unsigned        gs_output_prim;
        unsigned        gs_max_out_vertices;
+       unsigned        gs_num_invocations;
        uint64_t        gs_used_inputs; /* mask of "get_unique_index" bits */
 };
 
index 752467bcfd77630b4175fd108e0734ee74f1b604..0dd08a248f46b1195805d0545e64e1b3a06f3831 100644 (file)
@@ -3078,7 +3078,6 @@ void si_init_config(struct si_context *sctx)
        si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
        si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
        si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
-       si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
 
        si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
        si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
index 48128fa44e1b90388c5655c4affd64543f838f8f..eef3baad1640e5de6af94979f3ce1a989103958d 100644 (file)
@@ -76,6 +76,7 @@ static void si_shader_gs(struct si_shader *shader)
        unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 >> 2);
        unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
        unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+       unsigned gs_num_invocations = shader->selector->gs_num_invocations;
        unsigned cut_mode;
        struct si_pm4_state *pm4;
        unsigned num_sgprs, num_user_sgprs;
@@ -118,6 +119,10 @@ static void si_shader_gs(struct si_shader *shader)
 
        si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);
 
+       si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
+                      S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
+                      S_028B90_ENABLE(gs_num_invocations > 0));
+
        va = shader->bo->gpu_address;
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
        si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
@@ -490,6 +495,8 @@ static void *si_create_shader_state(struct pipe_context *ctx,
                        sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
                sel->gs_max_out_vertices =
                        sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
+               sel->gs_num_invocations =
+                       sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
 
                for (i = 0; i < sel->info.num_inputs; i++) {
                        unsigned name = sel->info.input_semantic_name[i];