From 14cec07177f438717cc6fb9252525e16d6b3d8dd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Feb 2013 13:15:20 -0800 Subject: [PATCH] i965: Make perf_debug() output to GL_ARB_debug_output in a debug context. I tried to ensure that performance in the non-debug case doesn't change (we still just check one condition up front), and I think the impact is small enough in the debug context case to warrant including all of it. Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_context.c | 6 +- src/mesa/drivers/dri/i965/brw_draw.c | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 4 +- src/mesa/drivers/dri/i965/brw_program.h | 3 +- src/mesa/drivers/dri/i965/brw_queryobj.c | 2 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +- src/mesa/drivers/dri/i965/brw_state_cache.c | 2 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 +- src/mesa/drivers/dri/i965/brw_vs.c | 21 +++---- src/mesa/drivers/dri/i965/brw_wm.c | 58 +++++++++++-------- .../drivers/dri/intel/intel_buffer_objects.c | 2 +- src/mesa/drivers/dri/intel/intel_context.c | 2 + src/mesa/drivers/dri/intel/intel_context.h | 17 +++++- src/mesa/drivers/dri/intel/intel_regions.c | 2 +- src/mesa/drivers/dri/intel/intel_tex_copy.c | 3 +- .../drivers/dri/intel/intel_tex_subimage.c | 2 +- 16 files changed, 83 insertions(+), 48 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index a89ebb97aa6..fdbe3275bbb 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -397,9 +397,13 @@ brwCreateContext(int api, if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0) ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT; - if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) + if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT; + /* Turn on some extra GL_ARB_debug_output generation. */ + intel->perf_debug = true; + } + brw_fs_alloc_reg_sets(brw); if (INTEL_DEBUG & DEBUG_SHADER_TIME) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 2c2b826d196..9c96f697a2e 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -550,6 +550,7 @@ void brw_draw_prims( struct gl_context *ctx, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount ) { + struct intel_context *intel = intel_context(ctx); const struct gl_client_array **arrays = ctx->Array._DrawArrays; if (!_mesa_check_conditional_render(ctx)) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7cc49347d83..927cf13a170 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2877,7 +2877,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, bool start_busy = false; float start_time = 0; - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + if (unlikely(intel->perf_debug)) { start_busy = (intel->batch.last_bo && drm_intel_bo_busy(intel->batch.last_bo)); start_time = get_time(); @@ -2932,7 +2932,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, simd16_instructions, final_assembly_size); - if (unlikely(INTEL_DEBUG & DEBUG_PERF) && shader) { + if (unlikely(intel->perf_debug) && shader) { if (shader->compiled_once) brw_wm_debug_recompile(brw, prog, &c->key); shader->compiled_once = true; diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h index 182177527d2..010a9b87490 100644 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ b/src/mesa/drivers/dri/i965/brw_program.h @@ -45,7 +45,8 @@ struct brw_sampler_prog_key_data { void brw_populate_sampler_prog_key_data(struct gl_context *ctx, const struct gl_program *prog, struct brw_sampler_prog_key_data *key); -bool brw_debug_recompile_sampler_key(const struct brw_sampler_prog_key_data *old_key, +bool brw_debug_recompile_sampler_key(struct intel_context *intel, + const struct brw_sampler_prog_key_data *old_key, const struct brw_sampler_prog_key_data *key); void brw_add_texrect_params(struct gl_program *prog); diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 00655137048..de3be83c8d7 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -152,7 +152,7 @@ brw_queryobj_get_results(struct gl_context *ctx, if (drm_intel_bo_references(intel->batch.bo, query->bo)) intel_batchbuffer_flush(intel); - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + if (unlikely(intel->perf_debug)) { if (drm_intel_bo_busy(query->bo)) { perf_debug("Stalling on the GPU waiting for a query object.\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 2da5ed5c6cc..aeac69ac85a 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -183,7 +183,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) lower_variable_index_to_cond_assign(shader->ir, input, output, temp, uniform); - if (unlikely((INTEL_DEBUG & DEBUG_PERF) && lowered_variable_indexing)) { + if (unlikely((intel->perf_debug) && lowered_variable_indexing)) { perf_debug("Unsupported form of variable indexing in FS; falling " "back to very inefficient code generation\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index d44b2b7c5b9..505c7e8ad92 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -389,6 +389,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) void brw_state_cache_check_size(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; + /* un-tuned guess. Each object is generally a page, so 2000 of them is 8 MB of * state cache. */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 8182dccb02f..f319f32c2cc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1350,7 +1350,7 @@ brw_vs_emit(struct brw_context *brw, bool start_busy = false; float start_time = 0; - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + if (unlikely(intel->perf_debug)) { start_busy = (intel->batch.last_bo && drm_intel_bo_busy(intel->batch.last_bo)); start_time = get_time(); @@ -1383,7 +1383,7 @@ brw_vs_emit(struct brw_context *brw, const unsigned *generated =g.generate_assembly(&v.instructions, final_assembly_size); - if (unlikely(INTEL_DEBUG & DEBUG_PERF) && shader) { + if (unlikely(intel->perf_debug) && shader) { if (shader->compiled_once) { brw_vs_debug_recompile(brw, prog, &c->key); } diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 64659c084d8..3daed19546c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -319,7 +319,7 @@ do_vs_prog(struct brw_context *brw, } static bool -key_debug(const char *name, int a, int b) +key_debug(struct intel_context *intel, const char *name, int a, int b) { if (a != b) { perf_debug(" %s %d->%d\n", name, a, b); @@ -333,6 +333,7 @@ brw_vs_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, const struct brw_vs_prog_key *key) { + struct intel_context *intel = &brw->intel; struct brw_cache_item *c = NULL; const struct brw_vs_prog_key *old_key = NULL; bool found = false; @@ -359,31 +360,31 @@ brw_vs_debug_recompile(struct brw_context *brw, } for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) { - found |= key_debug("Vertex attrib w/a flags", + found |= key_debug(intel, "Vertex attrib w/a flags", old_key->gl_attrib_wa_flags[i], key->gl_attrib_wa_flags[i]); } - found |= key_debug("user clip flags", + found |= key_debug(intel, "user clip flags", old_key->userclip_active, key->userclip_active); - found |= key_debug("user clipping planes as push constants", + found |= key_debug(intel, "user clipping planes as push constants", old_key->nr_userclip_plane_consts, key->nr_userclip_plane_consts); - found |= key_debug("clip distance enable", + found |= key_debug(intel, "clip distance enable", old_key->uses_clip_distance, key->uses_clip_distance); - found |= key_debug("clip plane enable bitfield", + found |= key_debug(intel, "clip plane enable bitfield", old_key->userclip_planes_enabled_gen_4_5, key->userclip_planes_enabled_gen_4_5); - found |= key_debug("copy edgeflag", + found |= key_debug(intel, "copy edgeflag", old_key->copy_edgeflag, key->copy_edgeflag); - found |= key_debug("PointCoord replace", + found |= key_debug(intel, "PointCoord replace", old_key->point_coord_replace, key->point_coord_replace); - found |= key_debug("vertex color clamping", + found |= key_debug(intel, "vertex color clamping", old_key->clamp_vertex_color, key->clamp_vertex_color); - found |= brw_debug_recompile_sampler_key(&old_key->tex, &key->tex); + found |= brw_debug_recompile_sampler_key(intel, &old_key->tex, &key->tex); if (!found) { perf_debug(" Something else\n"); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 4b0446574ac..77bede0cc99 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -203,7 +203,7 @@ bool do_wm_prog(struct brw_context *brw, } static bool -key_debug(const char *name, int a, int b) +key_debug(struct intel_context *intel, const char *name, int a, int b) { if (a != b) { perf_debug(" %s %d->%d\n", name, a, b); @@ -214,24 +214,25 @@ key_debug(const char *name, int a, int b) } bool -brw_debug_recompile_sampler_key(const struct brw_sampler_prog_key_data *old_key, +brw_debug_recompile_sampler_key(struct intel_context *intel, + const struct brw_sampler_prog_key_data *old_key, const struct brw_sampler_prog_key_data *key) { bool found = false; for (unsigned int i = 0; i < MAX_SAMPLERS; i++) { - found |= key_debug("EXT_texture_swizzle or DEPTH_TEXTURE_MODE", + found |= key_debug(intel, "EXT_texture_swizzle or DEPTH_TEXTURE_MODE", old_key->swizzles[i], key->swizzles[i]); } - found |= key_debug("GL_CLAMP enabled on any texture unit's 1st coordinate", + found |= key_debug(intel, "GL_CLAMP enabled on any texture unit's 1st coordinate", old_key->gl_clamp_mask[0], key->gl_clamp_mask[0]); - found |= key_debug("GL_CLAMP enabled on any texture unit's 2nd coordinate", + found |= key_debug(intel, "GL_CLAMP enabled on any texture unit's 2nd coordinate", old_key->gl_clamp_mask[1], key->gl_clamp_mask[1]); - found |= key_debug("GL_CLAMP enabled on any texture unit's 3rd coordinate", + found |= key_debug(intel, "GL_CLAMP enabled on any texture unit's 3rd coordinate", old_key->gl_clamp_mask[2], key->gl_clamp_mask[2]); - found |= key_debug("GL_MESA_ycbcr texturing\n", + found |= key_debug(intel, "GL_MESA_ycbcr texturing\n", old_key->yuvtex_mask, key->yuvtex_mask); - found |= key_debug("GL_MESA_ycbcr UV swapping\n", + found |= key_debug(intel, "GL_MESA_ycbcr UV swapping\n", old_key->yuvtex_swap_mask, key->yuvtex_swap_mask); return found; @@ -242,6 +243,7 @@ brw_wm_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, const struct brw_wm_prog_key *key) { + struct intel_context *intel = &brw->intel; struct brw_cache_item *c = NULL; const struct brw_wm_prog_key *old_key = NULL; bool found = false; @@ -262,25 +264,35 @@ brw_wm_debug_recompile(struct brw_context *brw, } if (!c) { - perf_debug(" Didn't find previous compile in the shader cache for " - "debug\n"); + perf_debug(" Didn't find previous compile in the shader cache for debug\n"); return; } - found |= key_debug("alphatest, computed depth, depth test, or depth write", + found |= key_debug(intel, "alphatest, computed depth, depth test, or " + "depth write", old_key->iz_lookup, key->iz_lookup); - found |= key_debug("depth statistics", old_key->stats_wm, key->stats_wm); - found |= key_debug("flat shading", old_key->flat_shade, key->flat_shade); - found |= key_debug("number of color buffers", old_key->nr_color_regions, key->nr_color_regions); - found |= key_debug("sample alpha to coverage", old_key->sample_alpha_to_coverage, key->sample_alpha_to_coverage); - found |= key_debug("rendering to FBO", old_key->render_to_fbo, key->render_to_fbo); - found |= key_debug("fragment color clamping", old_key->clamp_fragment_color, key->clamp_fragment_color); - found |= key_debug("line smoothing", old_key->line_aa, key->line_aa); - found |= key_debug("proj_attrib_mask", old_key->proj_attrib_mask, key->proj_attrib_mask); - found |= key_debug("renderbuffer height", old_key->drawable_height, key->drawable_height); - found |= key_debug("vertex shader outputs", old_key->vp_outputs_written, key->vp_outputs_written); - - found |= brw_debug_recompile_sampler_key(&old_key->tex, &key->tex); + found |= key_debug(intel, "depth statistics", + old_key->stats_wm, key->stats_wm); + found |= key_debug(intel, "flat shading", + old_key->flat_shade, key->flat_shade); + found |= key_debug(intel, "number of color buffers", + old_key->nr_color_regions, key->nr_color_regions); + found |= key_debug(intel, "sample alpha to coverage", + old_key->sample_alpha_to_coverage, key->sample_alpha_to_coverage); + found |= key_debug(intel, "rendering to FBO", + old_key->render_to_fbo, key->render_to_fbo); + found |= key_debug(intel, "fragment color clamping", + old_key->clamp_fragment_color, key->clamp_fragment_color); + found |= key_debug(intel, "line smoothing", + old_key->line_aa, key->line_aa); + found |= key_debug(intel, "proj_attrib_mask", + old_key->proj_attrib_mask, key->proj_attrib_mask); + found |= key_debug(intel, "renderbuffer height", + old_key->drawable_height, key->drawable_height); + found |= key_debug(intel, "vertex shader outputs", + old_key->vp_outputs_written, key->vp_outputs_written); + + found |= brw_debug_recompile_sampler_key(intel, &old_key->tex, &key->tex); if (!found) { perf_debug(" Something else\n"); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index f94c6f570ff..34eb7c9ea2c 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -227,7 +227,7 @@ intel_bufferobj_subdata(struct gl_context * ctx, drm_intel_bo_unreference(temp_bo); } } else { - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + if (unlikely(intel->perf_debug)) { if (drm_intel_bo_busy(intel_obj->buffer)) { perf_debug("Stalling on the GPU in glBufferSubData().\n"); } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 435f0974445..9e508f7bb9b 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -803,6 +803,8 @@ intelInitContext(struct intel_context *intel, "shader_time debugging requires gen7 (Ivybridge) or better.\n"); INTEL_DEBUG &= ~DEBUG_SHADER_TIME; } + if (INTEL_DEBUG & DEBUG_PERF) + intel->perf_debug = true; if (INTEL_DEBUG & DEBUG_AUB) drm_intel_bufmgr_gem_set_aub_dump(intel->bufmgr, true); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index c72176ad416..42db5a7d96d 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -239,6 +239,13 @@ struct intel_context bool no_batch_wrap; bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */ + /** + * Set if we're either a debug context or the INTEL_DEBUG=perf environment + * variable is set, this is the flag indicating to do expensive work that + * might lead to a perf_debug() call. + */ + bool perf_debug; + struct { GLuint id; @@ -462,8 +469,14 @@ extern int INTEL_DEBUG; } while(0) #define perf_debug(...) do { \ - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) \ - dbg_printf(__VA_ARGS__); \ + static GLuint msg_id = 0; \ + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) \ + dbg_printf(__VA_ARGS__); \ + if (intel->perf_debug) \ + _mesa_gl_debug(&intel->ctx, &msg_id, \ + MESA_DEBUG_TYPE_PERFORMANCE, \ + MESA_DEBUG_SEVERITY_MEDIUM, \ + __VA_ARGS__); \ } while(0) #define WARN_ONCE(cond, fmt...) do { \ diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index a5e49d99e9f..7f535d7cdc4 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -123,7 +123,7 @@ intel_region_map(struct intel_context *intel, struct intel_region *region, * flush is only needed on first map of the buffer. */ - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + if (unlikely(intel->perf_debug)) { if (drm_intel_bo_busy(region->bo)) { perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); } diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 085b6fdbd35..6043ed26adb 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -114,8 +114,7 @@ intel_copy_texsubimage(struct intel_context *intel, } if (!copy_supported && !copy_supported_with_alpha_override) { - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) - fprintf(stderr, "%s mismatched formats %s, %s\n", + perf_debug("%s mismatched formats %s, %s\n", __FUNCTION__, _mesa_get_format_name(intelImage->base.Base.TexFormat), _mesa_get_format_name(intel_rb_format(irb))); diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index 7a2f713c88f..b02e5fc549a 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -230,7 +230,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, intel_batchbuffer_flush(intel); } - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + if (unlikely(intel->perf_debug)) { if (drm_intel_bo_busy(bo)) { perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); } -- 2.30.2