radeonsi: add support for viewport array (v3)
authorDave Airlie <airlied@redhat.com>
Thu, 25 Jun 2015 02:36:23 +0000 (03:36 +0100)
committerDave Airlie <airlied@redhat.com>
Fri, 26 Jun 2015 23:24:07 +0000 (00:24 +0100)
This isn't pretty and I'd suggest it the pm4 interface builder
could be tweaked to do this more efficently, but I'd need
guidance on how that would look.

This seems to pass the few piglit tests I threw at it.

v2: handle passing layer/viewport index to fragment shader.
fix crash in blit changes,
add support to io_get_unique_index for layer/viewport index
update docs.
v3: avoid looking up viewport index and layer in es (Marek).

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
docs/GL3.txt
docs/relnotes/10.7.0.html
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 220bcc8742faa8bbb4260c9bf66a11348758fd33..df913bdd8c97902c6594bfa3979aa7da7276cd2b 100644 (file)
@@ -128,7 +128,7 @@ GL 4.1, GLSL 4.10:
   GL_ARB_separate_shader_objects                       DONE (all drivers)
   GL_ARB_shader_precision                              started (Micah)
   GL_ARB_vertex_attrib_64bit                           DONE (nvc0, softpipe)
-  GL_ARB_viewport_array                                DONE (i965, nv50, nvc0, r600, llvmpipe)
+  GL_ARB_viewport_array                                DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
 
 
 GL 4.2, GLSL 4.20:
@@ -156,7 +156,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_copy_image                                    DONE (i965) (gallium - in progress, VMware)
   GL_KHR_debug                                         DONE (all drivers)
   GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
-  GL_ARB_fragment_layer_viewport                       DONE (nv50, nvc0, r600, llvmpipe)
+  GL_ARB_fragment_layer_viewport                       DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
   GL_ARB_framebuffer_no_attachments                    DONE (i965)
   GL_ARB_internalformat_query2                         not started
   GL_ARB_invalidate_subdata                            DONE (all drivers)
index e089889667dda41fbed77c6412e95ddd8e368fc9..fcc50811b69f966fec916b366a729da6ccfcab40 100644 (file)
@@ -44,8 +44,11 @@ Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
+<li>GL_AMD_vertex_shader_viewport_index on radeonsi</li>
 <li>GL_ARB_framebuffer_no_attachments on i965</li>
 <li>GL_ARB_shader_stencil_export on llvmpipe</li>
+<li>GL_ARB_viewport_array on radeonsi</li>
+<li>GL_ARB_fragment_layer_viewport on radeonsi</li>
 </ul>
 
 <h2>Bug fixes</h2>
index 1f2c4082dbcecdc217ad4410b4e9d5e028f4f3e6..6c7b383a4a377d321b3e4f4146aa859cd45f64ca 100644 (file)
@@ -63,11 +63,11 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
                util_blitter_save_sample_mask(sctx->blitter,
                                              sctx->queued.named.sample_mask->sample_mask);
        }
-       if (sctx->queued.named.viewport) {
-               util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport->viewport);
+       if (sctx->queued.named.viewport[0]) {
+               util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
        }
-       if (sctx->queued.named.scissor) {
-               util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor->scissor);
+       if (sctx->queued.named.scissor[0]) {
+               util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
        }
        util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
        util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
index 53ae71a8c926e8f1fae33d80bd6c9a90837757d1..480a3010d31a661780b97f68e8b2de344a019188 100644 (file)
@@ -335,7 +335,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
                return 8;
 
        case PIPE_CAP_MAX_VIEWPORTS:
-               return 1;
+               return 16;
 
        /* Timer queries, present when the clock frequency is non zero. */
        case PIPE_CAP_QUERY_TIMESTAMP:
index a293ef36fbb2972107505b6f6cbaf3ae9e1e6fc7..4ca31728dff3bd17b71b61137582fe7a2608efc8 100644 (file)
@@ -1132,7 +1132,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
                                &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
        LLVMValueRef args[9];
        LLVMValueRef pos_args[4][9] = { { 0 } };
-       LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL;
+       LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
        unsigned semantic_name, semantic_index;
        unsigned target;
        unsigned param_count = 0;
@@ -1158,7 +1158,12 @@ handle_semantic:
                        continue;
                case TGSI_SEMANTIC_LAYER:
                        layer_value = outputs[i].values[0];
-                       continue;
+                       semantic_name = TGSI_SEMANTIC_GENERIC;
+                       goto handle_semantic;
+               case TGSI_SEMANTIC_VIEWPORT_INDEX:
+                       viewport_index_value = outputs[i].values[0];
+                       semantic_name = TGSI_SEMANTIC_GENERIC;
+                       goto handle_semantic;
                case TGSI_SEMANTIC_POSITION:
                        target = V_008DFC_SQ_EXP_POS;
                        break;
@@ -1224,11 +1229,13 @@ handle_semantic:
        /* Write the misc vector (point size, edgeflag, layer, viewport). */
        if (shader->selector->info.writes_psize ||
            shader->selector->info.writes_edgeflag ||
+           shader->selector->info.writes_viewport_index ||
            shader->selector->info.writes_layer) {
                pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
                                                      shader->selector->info.writes_psize |
                                                      (shader->selector->info.writes_edgeflag << 1) |
-                                                     (shader->selector->info.writes_layer << 2));
+                                                     (shader->selector->info.writes_layer << 2) |
+                                                     (shader->selector->info.writes_viewport_index << 3));
                pos_args[1][1] = uint->zero; /* EXEC mask */
                pos_args[1][2] = uint->zero; /* last export? */
                pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
@@ -1259,6 +1266,9 @@ handle_semantic:
 
                if (shader->selector->info.writes_layer)
                        pos_args[1][7] = layer_value;
+
+               if (shader->selector->info.writes_viewport_index)
+                       pos_args[1][8] = viewport_index_value;
        }
 
        for (i = 0; i < 4; i++)
@@ -1299,10 +1309,15 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
        for (i = 0; i < info->num_outputs; i++) {
                LLVMValueRef *out_ptr =
                        si_shader_ctx->radeon_bld.soa.outputs[i];
-               int param_index = get_param_index(info->output_semantic_name[i],
-                                                 info->output_semantic_index[i],
-                                                 es->key.vs.gs_used_inputs);
+               int param_index;
+
+               if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
+                   info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
+                       continue;
 
+               param_index = get_param_index(info->output_semantic_name[i],
+                                             info->output_semantic_index[i],
+                                             es->key.vs.gs_used_inputs);
                if (param_index < 0)
                        continue;
 
index 6c18836d1895d3c77f7e5bb05d1e8f537eb00965..752467bcfd77630b4175fd108e0734ee74f1b604 100644 (file)
@@ -489,11 +489,13 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
                S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
                S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
                S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
+               S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
                S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
                S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
                S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
                                            info->writes_edgeflag ||
-                                           info->writes_layer) |
+                                           info->writes_layer ||
+                                            info->writes_viewport_index) |
                (sctx->queued.named.rasterizer->clip_plane_enable &
                 clipdist_mask));
        r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
@@ -509,20 +511,26 @@ static void si_set_scissor_states(struct pipe_context *ctx,
                                   const struct pipe_scissor_state *state)
 {
        struct si_context *sctx = (struct si_context *)ctx;
-       struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
-       struct si_pm4_state *pm4 = &scissor->pm4;
-
-       if (scissor == NULL)
-               return;
+       struct si_state_scissor *scissor;
+       struct si_pm4_state *pm4;
+       int i;
 
-       scissor->scissor = *state;
-       si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
-                      S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
-                      S_028250_WINDOW_OFFSET_DISABLE(1));
-       si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
-                      S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
+       for (i = start_slot; i < start_slot + num_scissors; i++) {
+               int idx = i - start_slot;
+               int offset = i * 4 * 2;
 
-       si_pm4_set_state(sctx, scissor, scissor);
+               scissor = CALLOC_STRUCT(si_state_scissor);
+               if (scissor == NULL)
+                       return;
+               pm4 = &scissor->pm4;
+               scissor->scissor = state[idx];
+               si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
+                              S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
+                              S_028250_WINDOW_OFFSET_DISABLE(1));
+               si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
+                              S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
+               si_pm4_set_state(sctx, scissor[i], scissor);
+       }
 }
 
 static void si_set_viewport_states(struct pipe_context *ctx,
@@ -531,21 +539,29 @@ static void si_set_viewport_states(struct pipe_context *ctx,
                                    const struct pipe_viewport_state *state)
 {
        struct si_context *sctx = (struct si_context *)ctx;
-       struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
-       struct si_pm4_state *pm4 = &viewport->pm4;
+       struct si_state_viewport *viewport;
+       struct si_pm4_state *pm4;
+       int i;
 
-       if (viewport == NULL)
-               return;
+       for (i = start_slot; i < start_slot + num_viewports; i++) {
+               int idx = i - start_slot;
+               int offset = i * 4 * 6;
 
-       viewport->viewport = *state;
-       si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
-       si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
-       si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
-       si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
-       si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
-       si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
+               viewport = CALLOC_STRUCT(si_state_viewport);
+               if (!viewport)
+                       return;
+               pm4 = &viewport->pm4;
+
+               viewport->viewport = state[idx];
+               si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0 + offset, fui(state[idx].scale[0]));
+               si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0 + offset, fui(state[idx].translate[0]));
+               si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0 + offset, fui(state[idx].scale[1]));
+               si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0 + offset, fui(state[idx].translate[1]));
+               si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0 + offset, fui(state[idx].scale[2]));
+               si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0 + offset, fui(state[idx].translate[2]));
 
-       si_pm4_set_state(sctx, viewport, viewport);
+               si_pm4_set_state(sctx, viewport[i], viewport);
+       }
 }
 
 /*
index 5e68b16213753f2c6030a1a87cf05abbb1ef87be..d1f2dff2c3f98044bf603423ae70e702349225f9 100644 (file)
@@ -92,8 +92,8 @@ union si_state {
                struct si_pm4_state             *blend_color;
                struct si_pm4_state             *clip;
                struct si_state_sample_mask     *sample_mask;
-               struct si_state_scissor         *scissor;
-               struct si_state_viewport        *viewport;
+               struct si_state_scissor         *scissor[16];
+               struct si_state_viewport        *viewport[16];
                struct si_state_rasterizer      *rasterizer;
                struct si_state_dsa             *dsa;
                struct si_pm4_state             *fb_rs;
index 208c8523ef107807c68364d70b850cdb5520e96f..48128fa44e1b90388c5655c4affd64543f838f8f 100644 (file)
@@ -187,8 +187,6 @@ static void si_shader_vs(struct si_shader *shader)
                case TGSI_SEMANTIC_POSITION:
                case TGSI_SEMANTIC_PSIZE:
                case TGSI_SEMANTIC_EDGEFLAG:
-               case TGSI_SEMANTIC_VIEWPORT_INDEX:
-               case TGSI_SEMANTIC_LAYER:
                        break;
                default:
                        nparams++;