radeonsi: optimize viewport states
authorMarek Olšák <marek.olsak@amd.com>
Fri, 28 Aug 2015 19:48:37 +0000 (21:48 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 1 Sep 2015 19:51:13 +0000 (21:51 +0200)
same as scissors

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_hw_context.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index b2f342f4fa15afeaf1e2dfcb4e82de1a341f09b5..c28b2a80088dbfbe41ce180e6bb7905fc6ad243d 100644 (file)
@@ -65,9 +65,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
                util_blitter_save_sample_mask(sctx->blitter,
                                              sctx->queued.named.sample_mask->sample_mask);
        }
-       if (sctx->queued.named.viewport[0]) {
-               util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
-       }
+       util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
        util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
        util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
        util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
index 873a47279767e6e6c50b0ac9f5709d77218866ed..8284306f30a6c10bdc65aaa7cffbe1b70f497f72 100644 (file)
@@ -195,7 +195,9 @@ void si_begin_new_cs(struct si_context *ctx)
        si_all_descriptors_begin_new_cs(ctx);
 
        ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+       ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
        si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+       si_mark_atom_dirty(ctx, &ctx->viewports.atom);
 
        r600_postflush_resume_features(&ctx->b);
 
index 9060f94897151854171159c7efefca8e59057168..a1845ba893d027a46da94d347c70e1b0e25f20f1 100644 (file)
@@ -135,6 +135,12 @@ struct si_scissors {
        struct pipe_scissor_state       states[SI_MAX_VIEWPORTS];
 };
 
+struct si_viewports {
+       struct r600_atom                atom;
+       unsigned                        dirty_mask;
+       struct pipe_viewport_state      states[SI_MAX_VIEWPORTS];
+};
+
 #define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0]))
 
 struct si_context {
@@ -163,6 +169,7 @@ struct si_context {
                        struct r600_atom *clip_regs;
                        struct r600_atom *shader_userdata;
                        struct r600_atom *scissors;
+                       struct r600_atom *viewports;
                } s;
                struct r600_atom *array[0];
        } atoms;
@@ -191,6 +198,7 @@ struct si_context {
        unsigned                        border_color_offset;
 
        struct si_scissors              scissors;
+       struct si_viewports             viewports;
        struct r600_atom                clip_regs;
        struct r600_atom                msaa_sample_locs;
        struct r600_atom                msaa_config;
index 8bd35a8422eaee1605dc0578b8add8a12e47b4d4..940aaa0b247ce04be843077443a9cebdbfcfdef2 100644 (file)
@@ -578,29 +578,52 @@ static void si_set_viewport_states(struct pipe_context *ctx,
                                    const struct pipe_viewport_state *state)
 {
        struct si_context *sctx = (struct si_context *)ctx;
-       struct si_state_viewport *viewport;
-       struct si_pm4_state *pm4;
        int i;
 
-       for (i = start_slot; i < start_slot + num_viewports; i++) {
-               int idx = i - start_slot;
-               int offset = i * 4 * 6;
+       for (i = 0; i < num_viewports; i++)
+               sctx->viewports.states[start_slot + i] = state[i];
 
-               viewport = CALLOC_STRUCT(si_state_viewport);
-               if (!viewport)
-                       return;
-               pm4 = &viewport->pm4;
+       sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
+       si_mark_atom_dirty(sctx, &sctx->viewports.atom);
+}
+
+static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
+{
+       struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+       struct pipe_viewport_state *states = sctx->viewports.states;
+       unsigned mask = sctx->viewports.dirty_mask;
+
+       /* The simple case: Only 1 viewport is active. */
+       if (mask & 1 &&
+           !si_get_vs_info(sctx)->writes_viewport_index) {
+               r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+               radeon_emit(cs, fui(states[0].scale[0]));
+               radeon_emit(cs, fui(states[0].translate[0]));
+               radeon_emit(cs, fui(states[0].scale[1]));
+               radeon_emit(cs, fui(states[0].translate[1]));
+               radeon_emit(cs, fui(states[0].scale[2]));
+               radeon_emit(cs, fui(states[0].translate[2]));
+               sctx->viewports.dirty_mask &= ~1; /* clear one bit */
+               return;
+       }
 
-               viewport->viewport = state[idx];
-               si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE + offset, fui(state[idx].scale[0]));
-               si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET + offset, fui(state[idx].translate[0]));
-               si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE + offset, fui(state[idx].scale[1]));
-               si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET + offset, fui(state[idx].translate[1]));
-               si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE + offset, fui(state[idx].scale[2]));
-               si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET + offset, fui(state[idx].translate[2]));
+       while (mask) {
+               int start, count, i;
 
-               si_pm4_set_state(sctx, viewport[i], viewport);
+               u_bit_scan_consecutive_range(&mask, &start, &count);
+
+               r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
+                                              start * 4 * 6, count * 6);
+               for (i = start; i < start+count; i++) {
+                       radeon_emit(cs, fui(states[i].scale[0]));
+                       radeon_emit(cs, fui(states[i].translate[0]));
+                       radeon_emit(cs, fui(states[i].scale[1]));
+                       radeon_emit(cs, fui(states[i].translate[1]));
+                       radeon_emit(cs, fui(states[i].scale[2]));
+                       radeon_emit(cs, fui(states[i].translate[2]));
+               }
        }
+       sctx->viewports.dirty_mask = 0;
 }
 
 /*
@@ -3011,6 +3034,7 @@ void si_init_state_functions(struct si_context *sctx)
        si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10);
        si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6);
        si_init_atom(&sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors, 16*4);
+       si_init_atom(&sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports, 16*8);
 
        sctx->b.b.create_blend_state = si_create_blend_state;
        sctx->b.b.bind_blend_state = si_bind_blend_state;
index 34dbba480507d7dc87f51b03cba7022474a47817..321478316997cab6121c6824d9a61a1a10bfdf37 100644 (file)
@@ -48,11 +48,6 @@ struct si_state_sample_mask {
        uint16_t                sample_mask;
 };
 
-struct si_state_viewport {
-       struct si_pm4_state             pm4;
-       struct pipe_viewport_state      viewport;
-};
-
 struct si_state_rasterizer {
        struct si_pm4_state     pm4;
        bool                    flatshade;
@@ -91,7 +86,6 @@ union si_state {
                struct si_pm4_state             *blend_color;
                struct si_pm4_state             *clip;
                struct si_state_sample_mask     *sample_mask;
-               struct si_state_viewport        *viewport[16];
                struct si_state_rasterizer      *rasterizer;
                struct si_state_dsa             *dsa;
                struct si_pm4_state             *fb_rs;
index 5a9ef29a549b67feb4341e3a9242f8f102e2b1a4..4ca9aa5081430395d88a12aea22fe25f2edee35c 100644 (file)
@@ -775,6 +775,8 @@ static void si_update_viewports_and_scissors(struct si_context *sctx)
 
        if (sctx->scissors.dirty_mask)
            si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+       if (sctx->viewports.dirty_mask)
+           si_mark_atom_dirty(sctx, &sctx->viewports.atom);
 }
 
 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)