From 7b89fcec416ed7e6ddadec2438aab63609d825f8 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 22 May 2018 02:12:38 +0200 Subject: [PATCH] llvmpipe: improve rasterization discard logic This unifies the explicit rasterization discard as well as the implicit rasterization disabled logic (which we need for another state tracker), which really should do the exact same thing. We'll now toss out the prims early on in setup with (implicit or explicit) discard, rather than do setup and binning with them, which was entirely pointless. (We should eventually get rid of implicit discard, which should also enable us to discard stuff already in draw, hence draw would be able to skip the pointless clip and fallback stages in this case.) We still need separate logic for only null ps - this is not the same as rasterization discard. But simplify the logic there and don't count primitives simply when there's an empty fs, regardless of depth/stencil tests, which seems perfectly acceptable by d3d10. While here, also fix statistics for primitives if face culling is enabled. No piglit changes. Reviewed-by: Brian Paul Reviewed-by: Jose Fonseca --- src/gallium/drivers/llvmpipe/lp_context.h | 1 - src/gallium/drivers/llvmpipe/lp_jit.c | 1 + src/gallium/drivers/llvmpipe/lp_jit.h | 5 +++ src/gallium/drivers/llvmpipe/lp_rast.c | 12 ++---- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 6 --- src/gallium/drivers/llvmpipe/lp_scene.c | 5 +-- src/gallium/drivers/llvmpipe/lp_scene.h | 10 ++--- src/gallium/drivers/llvmpipe/lp_setup.c | 18 ++++---- src/gallium/drivers/llvmpipe/lp_setup_line.c | 28 ++++++++----- src/gallium/drivers/llvmpipe/lp_setup_point.c | 22 ++++++---- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 29 +++++++++---- src/gallium/drivers/llvmpipe/lp_setup_vbuf.c | 2 +- .../drivers/llvmpipe/lp_state_derived.c | 22 ++++++++-- src/gallium/drivers/llvmpipe/lp_state_fs.c | 41 +++++++++---------- src/gallium/drivers/llvmpipe/lp_state_fs.h | 5 --- 15 files changed, 118 insertions(+), 89 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 54d98fdbf7d..7a2f2539842 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -136,7 +136,6 @@ struct llvmpipe_context { struct blitter_context *blitter; unsigned tex_timestamp; - boolean no_rast; /** List of all fragment shader variants */ struct lp_fs_variant_list_item fs_variants_list; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index a2762f39a04..e2309f47157 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -212,6 +212,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) elem_types[LP_JIT_THREAD_DATA_CACHE] = LLVMPointerType(lp_build_format_cache_type(gallivm), 0); elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc); + elem_types[LP_JIT_THREAD_DATA_INVOCATIONS] = LLVMInt64TypeInContext(lc); elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] = LLVMInt32TypeInContext(lc); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 9db26f2cba9..312d1a1281d 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -192,6 +192,7 @@ struct lp_jit_thread_data { struct lp_build_format_cache *cache; uint64_t vis_counter; + uint64_t ps_invocations; /* * Non-interpolated rasterizer state passed through to the fragment shader. @@ -205,6 +206,7 @@ struct lp_jit_thread_data enum { LP_JIT_THREAD_DATA_CACHE = 0, LP_JIT_THREAD_DATA_COUNTER, + LP_JIT_THREAD_DATA_INVOCATIONS, LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, LP_JIT_THREAD_DATA_COUNT }; @@ -216,6 +218,9 @@ enum { #define lp_jit_thread_data_counter(_gallivm, _ptr) \ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter") +#define lp_jit_thread_data_invocations(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_INVOCATIONS, "invocs") + #define lp_jit_thread_data_raster_state_viewport_index(_gallivm, _ptr) \ lp_build_struct_get(_gallivm, _ptr, \ LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, \ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 939944aa791..9d4f9f8d027 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -107,7 +107,7 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task, task->scene->fb.height - y * TILE_SIZE : TILE_SIZE; task->thread_data.vis_counter = 0; - task->ps_invocations = 0; + task->thread_data.ps_invocations = 0; for (i = 0; i < task->scene->fb.nr_cbufs; i++) { if (task->scene->fb.cbufs[i]) { @@ -446,10 +446,6 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, * allocated 4x4 blocks hence need to filter them out here. */ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { - /* not very accurate would need a popcount on the mask */ - /* always count this not worth bothering? */ - task->ps_invocations += 1 * variant->ps_inv_multiplier; - /* Propagate non-interpolated raster state. */ task->thread_data.raster_state.viewport_index = inputs->viewport_index; @@ -491,7 +487,7 @@ lp_rast_begin_query(struct lp_rasterizer_task *task, pq->start[task->thread_index] = task->thread_data.vis_counter; break; case PIPE_QUERY_PIPELINE_STATISTICS: - pq->start[task->thread_index] = task->ps_invocations; + pq->start[task->thread_index] = task->thread_data.ps_invocations; break; default: assert(0); @@ -524,7 +520,7 @@ lp_rast_end_query(struct lp_rasterizer_task *task, break; case PIPE_QUERY_PIPELINE_STATISTICS: pq->end[task->thread_index] += - task->ps_invocations - pq->start[task->thread_index]; + task->thread_data.ps_invocations - pq->start[task->thread_index]; pq->start[task->thread_index] = 0; break; default: @@ -679,7 +675,7 @@ rasterize_scene(struct lp_rasterizer_task *task, #endif #endif - if (!task->rast->no_rast && !scene->discard) { + if (!task->rast->no_rast) { /* loop over scene bins, rasterize each */ { struct cmd_bin *bin; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index fe078d5b869..59d3a2d8c88 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -99,8 +99,6 @@ struct lp_rasterizer_task /** Non-interpolated passthru state and occlude counter for visible pixels */ struct lp_jit_thread_data thread_data; - uint64_t ps_invocations; - uint8_t ps_inv_multiplier; pipe_semaphore work_ready; pipe_semaphore work_done; @@ -259,10 +257,6 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, * allocated 4x4 blocks hence need to filter them out here. */ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { - /* not very accurate would need a popcount on the mask */ - /* always count this not worth bothering? */ - task->ps_invocations += 1 * variant->ps_inv_multiplier; - /* Propagate non-interpolated raster state. */ task->thread_data.raster_state.viewport_index = inputs->viewport_index; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index dfad9fabb20..ef0136c4fd2 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -507,15 +507,14 @@ end: } -void lp_scene_begin_binning( struct lp_scene *scene, - struct pipe_framebuffer_state *fb, boolean discard ) +void lp_scene_begin_binning(struct lp_scene *scene, + struct pipe_framebuffer_state *fb) { int i; unsigned max_layer = ~0; assert(lp_scene_is_empty(scene)); - scene->discard = discard; util_copy_framebuffer_state(&scene->fb, fb); scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE; diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index da29057f1ef..b4ed8817ea7 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -166,7 +166,6 @@ struct lp_scene { unsigned resource_reference_size; boolean alloc_failed; - boolean discard; /** * Number of active tiles in each dimension. * This basically the framebuffer size divided by tile size @@ -389,12 +388,11 @@ lp_scene_bin_iter_next( struct lp_scene *scene, int *x, int *y ); /* Begin/end binning of a scene */ void -lp_scene_begin_binning( struct lp_scene *scene, - struct pipe_framebuffer_state *fb, - boolean discard ); +lp_scene_begin_binning(struct lp_scene *scene, + struct pipe_framebuffer_state *fb); void -lp_scene_end_binning( struct lp_scene *scene ); +lp_scene_end_binning(struct lp_scene *scene); /* Begin/end rasterization of a scene @@ -403,7 +401,7 @@ void lp_scene_begin_rasterization(struct lp_scene *scene); void -lp_scene_end_rasterization(struct lp_scene *scene ); +lp_scene_end_rasterization(struct lp_scene *scene); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index c1573231335..b0873694732 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -82,7 +82,7 @@ lp_setup_get_empty_scene(struct lp_setup_context *setup) lp_fence_wait(setup->scene->fence); } - lp_scene_begin_binning(setup->scene, &setup->fb, setup->rasterizer_discard); + lp_scene_begin_binning(setup->scene, &setup->fb); } @@ -724,25 +724,27 @@ lp_setup_set_scissors( struct lp_setup_context *setup, void -lp_setup_set_flatshade_first( struct lp_setup_context *setup, - boolean flatshade_first ) +lp_setup_set_flatshade_first(struct lp_setup_context *setup, + boolean flatshade_first) { setup->flatshade_first = flatshade_first; } void -lp_setup_set_rasterizer_discard( struct lp_setup_context *setup, - boolean rasterizer_discard ) +lp_setup_set_rasterizer_discard(struct lp_setup_context *setup, + boolean rasterizer_discard) { if (setup->rasterizer_discard != rasterizer_discard) { setup->rasterizer_discard = rasterizer_discard; - set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ ); + setup->line = first_line; + setup->point = first_point; + setup->triangle = first_triangle; } } void -lp_setup_set_vertex_info( struct lp_setup_context *setup, - struct vertex_info *vertex_info ) +lp_setup_set_vertex_info(struct lp_setup_context *setup, + struct vertex_info *vertex_info) { /* XXX: just silently holding onto the pointer: */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index d0bac5efb99..c1d8237a8ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -616,8 +616,7 @@ try_setup_line( struct lp_setup_context *setup, LP_COUNT(nr_tris); - if (lp_context->active_statistics_queries && - !llvmpipe_rasterization_disabled(lp_context)) { + if (lp_context->active_statistics_queries) { lp_context->pipeline_statistics.c_primitives++; } @@ -759,24 +758,33 @@ try_setup_line( struct lp_setup_context *setup, } -static void lp_setup_line( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4] ) +static void lp_setup_line_discard(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) { - if (!try_setup_line( setup, v0, v1 )) - { +} + +static void lp_setup_line(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + if (!try_setup_line(setup, v0, v1)) { if (!lp_setup_flush_and_restart(setup)) return; - if (!try_setup_line( setup, v0, v1 )) + if (!try_setup_line(setup, v0, v1)) return; } } -void lp_setup_choose_line( struct lp_setup_context *setup ) +void lp_setup_choose_line(struct lp_setup_context *setup) { - setup->line = lp_setup_line; + if (setup->rasterizer_discard) { + setup->line = lp_setup_line_discard; + } else { + setup->line = lp_setup_line; + } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 8cb6b83f916..2192789bd4c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -458,8 +458,7 @@ try_setup_point( struct lp_setup_context *setup, LP_COUNT(nr_tris); - if (lp_context->active_statistics_queries && - !llvmpipe_rasterization_disabled(lp_context)) { + if (lp_context->active_statistics_queries) { lp_context->pipeline_statistics.c_primitives++; } @@ -518,24 +517,33 @@ try_setup_point( struct lp_setup_context *setup, static void +lp_setup_point_discard(struct lp_setup_context *setup, + const float (*v0)[4]) +{ +} + +static void lp_setup_point(struct lp_setup_context *setup, const float (*v0)[4]) { - if (!try_setup_point( setup, v0 )) - { + if (!try_setup_point(setup, v0)) { if (!lp_setup_flush_and_restart(setup)) return; - if (!try_setup_point( setup, v0 )) + if (!try_setup_point(setup, v0)) return; } } void -lp_setup_choose_point( struct lp_setup_context *setup ) +lp_setup_choose_point(struct lp_setup_context *setup) { - setup->point = lp_setup_point; + if (setup->rasterizer_discard) { + setup->point = lp_setup_point_discard; + } else { + setup->point = lp_setup_point; + } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 39755d6b581..cec6198ec63 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -1127,6 +1127,11 @@ static void triangle_cw(struct lp_setup_context *setup, const float (*v2)[4]) { PIPE_ALIGN_VAR(16) struct fixed_position position; + struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; + + if (lp_context->active_statistics_queries) { + lp_context->pipeline_statistics.c_primitives++; + } calc_fixed_position(setup, &position, v0, v1, v2); @@ -1148,6 +1153,11 @@ static void triangle_ccw(struct lp_setup_context *setup, const float (*v2)[4]) { PIPE_ALIGN_VAR(16) struct fixed_position position; + struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; + + if (lp_context->active_statistics_queries) { + lp_context->pipeline_statistics.c_primitives++; + } calc_fixed_position(setup, &position, v0, v1, v2); @@ -1166,8 +1176,7 @@ static void triangle_both(struct lp_setup_context *setup, PIPE_ALIGN_VAR(16) struct fixed_position position; struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; - if (lp_context->active_statistics_queries && - !llvmpipe_rasterization_disabled(lp_context)) { + if (lp_context->active_statistics_queries) { lp_context->pipeline_statistics.c_primitives++; } @@ -1196,17 +1205,21 @@ static void triangle_both(struct lp_setup_context *setup, } -static void triangle_nop( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +static void triangle_noop(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { } void -lp_setup_choose_triangle( struct lp_setup_context *setup ) +lp_setup_choose_triangle(struct lp_setup_context *setup) { + if (setup->rasterizer_discard) { + setup->triangle = triangle_noop; + return; + } switch (setup->cullmode) { case PIPE_FACE_NONE: setup->triangle = triangle_both; @@ -1218,7 +1231,7 @@ lp_setup_choose_triangle( struct lp_setup_context *setup ) setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw; break; default: - setup->triangle = triangle_nop; + setup->triangle = triangle_noop; break; } } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 28a48d48820..6675b20168b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -571,7 +571,7 @@ lp_setup_pipeline_statistics( stats->gs_invocations; llvmpipe->pipeline_statistics.gs_primitives += stats->gs_primitives; - if (!llvmpipe_rasterization_disabled(llvmpipe)) { + if (!setup->rasterizer_discard) { llvmpipe->pipeline_statistics.c_invocations += stats->c_invocations; } else { diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 3e75d44dac6..4bcca907244 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -207,13 +207,27 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_SAMPLER | LP_NEW_SAMPLER_VIEW | LP_NEW_OCCLUSION_QUERY)) - llvmpipe_update_fs( llvmpipe ); + llvmpipe_update_fs(llvmpipe); - if (llvmpipe->dirty & (LP_NEW_RASTERIZER)) { + if (llvmpipe->dirty & (LP_NEW_FS | + LP_NEW_FRAMEBUFFER | + LP_NEW_RASTERIZER | + LP_NEW_DEPTH_STENCIL_ALPHA)) { + + /* + * Rasterization is disabled if there is no pixel shader and + * both depth and stencil testing are disabled: + * http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125 + * FIXME: set rasterizer_discard in state tracker instead. + */ + boolean null_fs = !llvmpipe->fs || + llvmpipe->fs->info.base.num_instructions <= 1; boolean discard = (llvmpipe->sample_mask & 1) == 0 || - (llvmpipe->rasterizer ? llvmpipe->rasterizer->rasterizer_discard : FALSE); - + (llvmpipe->rasterizer ? llvmpipe->rasterizer->rasterizer_discard : FALSE) || + (null_fs && + !llvmpipe->depth_stencil->depth.enabled && + !llvmpipe->depth_stencil->stencil[0].enabled); lp_setup_set_rasterizer_discard(llvmpipe->setup, discard); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 91b68e7c96e..b7e16f92469 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -2554,6 +2554,25 @@ generate_fragment(struct llvmpipe_context *lp, assert(builder); LLVMPositionBuilderAtEnd(builder, block); + /* + * Must not count ps invocations if there's a null shader. + * (It would be ok to count with null shader if there's d/s tests, + * but only if there's d/s buffers too, which is different + * to implicit rasterization disable which must not depend + * on the d/s buffers.) + * Could use popcount on mask, but pixel accuracy is not required. + * Could disable if there's no stats query, but maybe not worth it. + */ + if (shader->info.base.num_instructions > 1) { + LLVMValueRef invocs, val; + invocs = lp_jit_thread_data_invocations(gallivm, thread_data_ptr); + val = LLVMBuildLoad(builder, invocs, ""); + val = LLVMBuildAdd(builder, val, + LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 1, 0), + "invoc_count"); + LLVMBuildStore(builder, val, invocs); + } + /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->state); @@ -2843,14 +2862,6 @@ generate_variant(struct llvmpipe_context *lp, !shader->info.base.writes_samplemask ? TRUE : FALSE; - /* if num_instructions == 1, it's a nop shader with only an END instruction */ - if ((shader->info.base.num_instructions <= 1) && - !key->depth.enabled && !key->stencil[0].enabled) { - variant->ps_inv_multiplier = 0; - } else { - variant->ps_inv_multiplier = 1; - } - if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) { lp_debug_fs_variant(variant); } @@ -3471,18 +3482,4 @@ llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer; } -/* - * Rasterization is disabled if there is no pixel shader and - * both depth and stencil testing are disabled: - * http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125 - */ -boolean -llvmpipe_rasterization_disabled(struct llvmpipe_context *lp) -{ - /* if num_instructions == 1, it's a nop shader with only an END instruction */ - boolean null_fs = !lp->fs || lp->fs->info.base.num_instructions <= 1; - return (null_fs && - !lp->depth_stencil->depth.enabled && - !lp->depth_stencil->stencil[0].enabled); -} diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 2ddd8518834..28eccde17f8 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -98,7 +98,6 @@ struct lp_fragment_shader_variant struct lp_fragment_shader_variant_key key; boolean opaque; - uint8_t ps_inv_multiplier; struct gallivm_state *gallivm; @@ -150,8 +149,4 @@ void llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, struct lp_fragment_shader_variant *variant); -boolean -llvmpipe_rasterization_disabled(struct llvmpipe_context *lp); - - #endif /* LP_STATE_FS_H_ */ -- 2.30.2