i965: Make perf_debug() output to GL_ARB_debug_output in a debug context.
authorEric Anholt <eric@anholt.net>
Fri, 22 Feb 2013 21:15:20 +0000 (13:15 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 5 Mar 2013 22:25:00 +0000 (14:25 -0800)
I tried to ensure that performance in the non-debug case doesn't change
(we still just check one condition up front), and I think the impact is
small enough in the debug context case to warrant including all of it.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
16 files changed:
src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_program.h
src/mesa/drivers/dri/i965/brw_queryobj.c
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_state_cache.c
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vs.c
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/drivers/dri/intel/intel_buffer_objects.c
src/mesa/drivers/dri/intel/intel_context.c
src/mesa/drivers/dri/intel/intel_context.h
src/mesa/drivers/dri/intel/intel_regions.c
src/mesa/drivers/dri/intel/intel_tex_copy.c
src/mesa/drivers/dri/intel/intel_tex_subimage.c

index a89ebb97aa6e3a32ff3e604392e4222a49b79b3c..fdbe3275bbbb1b0607f9fa5c271cfb0a628afa40 100644 (file)
@@ -397,9 +397,13 @@ brwCreateContext(int api,
    if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0)
       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
 
-   if ((flags & __DRI_CTX_FLAG_DEBUG) != 0)
+   if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
       ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
 
+      /* Turn on some extra GL_ARB_debug_output generation. */
+      intel->perf_debug = true;
+   }
+
    brw_fs_alloc_reg_sets(brw);
 
    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
index 2c2b826d1966375cdd9033b97f03790a3ec33efc..9c96f697a2e6c7ab5c077199d021290462cb9a25 100644 (file)
@@ -550,6 +550,7 @@ void brw_draw_prims( struct gl_context *ctx,
                     GLuint max_index,
                     struct gl_transform_feedback_object *tfb_vertcount )
 {
+   struct intel_context *intel = intel_context(ctx);
    const struct gl_client_array **arrays = ctx->Array._DrawArrays;
 
    if (!_mesa_check_conditional_render(ctx))
index 7cc49347d8321abb6aff72a2b4a491cc2efa0fc2..927cf13a170ea8ec9c91e95afa6e79b452a7df6c 100644 (file)
@@ -2877,7 +2877,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
    bool start_busy = false;
    float start_time = 0;
 
-   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+   if (unlikely(intel->perf_debug)) {
       start_busy = (intel->batch.last_bo &&
                     drm_intel_bo_busy(intel->batch.last_bo));
       start_time = get_time();
@@ -2932,7 +2932,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
                                                    simd16_instructions,
                                                    final_assembly_size);
 
-   if (unlikely(INTEL_DEBUG & DEBUG_PERF) && shader) {
+   if (unlikely(intel->perf_debug) && shader) {
       if (shader->compiled_once)
          brw_wm_debug_recompile(brw, prog, &c->key);
       shader->compiled_once = true;
index 182177527d2aa2ba806575a7019d9cbceeef2a85..010a9b87490e959f251aa3247c128415daa2d979 100644 (file)
@@ -45,7 +45,8 @@ struct brw_sampler_prog_key_data {
 void brw_populate_sampler_prog_key_data(struct gl_context *ctx,
                                        const struct gl_program *prog,
                                        struct brw_sampler_prog_key_data *key);
-bool brw_debug_recompile_sampler_key(const struct brw_sampler_prog_key_data *old_key,
+bool brw_debug_recompile_sampler_key(struct intel_context *intel,
+                                     const struct brw_sampler_prog_key_data *old_key,
                                      const struct brw_sampler_prog_key_data *key);
 void brw_add_texrect_params(struct gl_program *prog);
 
index 0065513704893e71ca4f9299a02c3cfd72817cb7..de3be83c8d7ee0edb1133c8f0038a9fc309c18e2 100644 (file)
@@ -152,7 +152,7 @@ brw_queryobj_get_results(struct gl_context *ctx,
    if (drm_intel_bo_references(intel->batch.bo, query->bo))
       intel_batchbuffer_flush(intel);
 
-   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+   if (unlikely(intel->perf_debug)) {
       if (drm_intel_bo_busy(query->bo)) {
          perf_debug("Stalling on the GPU waiting for a query object.\n");
       }
index 2da5ed5c6cc88514a480d52de88e79c8ef7d8cc4..aeac69ac85af983081f5ff86d74c6a11e7adb46a 100644 (file)
@@ -183,7 +183,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
          lower_variable_index_to_cond_assign(shader->ir,
                                              input, output, temp, uniform);
 
-      if (unlikely((INTEL_DEBUG & DEBUG_PERF) && lowered_variable_indexing)) {
+      if (unlikely((intel->perf_debug) && lowered_variable_indexing)) {
          perf_debug("Unsupported form of variable indexing in FS; falling "
                     "back to very inefficient code generation\n");
       }
index d44b2b7c5b926808d25f5f84b5dd1ebab9317749..505c7e8ad92036192b52c24298442932899f5d8c 100644 (file)
@@ -389,6 +389,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
 void
 brw_state_cache_check_size(struct brw_context *brw)
 {
+   struct intel_context *intel = &brw->intel;
+
    /* un-tuned guess.  Each object is generally a page, so 2000 of them is 8 MB of
     * state cache.
     */
index 8182dccb02fa7df48eed4987a093b32f04dffee1..f319f32c2cc236e183c0853c7696fc80951995ab 100644 (file)
@@ -1350,7 +1350,7 @@ brw_vs_emit(struct brw_context *brw,
    bool start_busy = false;
    float start_time = 0;
 
-   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+   if (unlikely(intel->perf_debug)) {
       start_busy = (intel->batch.last_bo &&
                     drm_intel_bo_busy(intel->batch.last_bo));
       start_time = get_time();
@@ -1383,7 +1383,7 @@ brw_vs_emit(struct brw_context *brw,
    const unsigned *generated =g.generate_assembly(&v.instructions,
                                                   final_assembly_size);
 
-   if (unlikely(INTEL_DEBUG & DEBUG_PERF) && shader) {
+   if (unlikely(intel->perf_debug) && shader) {
       if (shader->compiled_once) {
          brw_vs_debug_recompile(brw, prog, &c->key);
       }
index 64659c084d8bfd2a5b3c6ac52a80ebc1c3dac0b6..3daed19546ca8b9a9f2b5c74bcd75e1e358ac886 100644 (file)
@@ -319,7 +319,7 @@ do_vs_prog(struct brw_context *brw,
 }
 
 static bool
-key_debug(const char *name, int a, int b)
+key_debug(struct intel_context *intel, const char *name, int a, int b)
 {
    if (a != b) {
       perf_debug("  %s %d->%d\n", name, a, b);
@@ -333,6 +333,7 @@ brw_vs_debug_recompile(struct brw_context *brw,
                        struct gl_shader_program *prog,
                        const struct brw_vs_prog_key *key)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_cache_item *c = NULL;
    const struct brw_vs_prog_key *old_key = NULL;
    bool found = false;
@@ -359,31 +360,31 @@ brw_vs_debug_recompile(struct brw_context *brw,
    }
 
    for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) {
-      found |= key_debug("Vertex attrib w/a flags",
+      found |= key_debug(intel, "Vertex attrib w/a flags",
                          old_key->gl_attrib_wa_flags[i],
                          key->gl_attrib_wa_flags[i]);
    }
 
-   found |= key_debug("user clip flags",
+   found |= key_debug(intel, "user clip flags",
                       old_key->userclip_active, key->userclip_active);
 
-   found |= key_debug("user clipping planes as push constants",
+   found |= key_debug(intel, "user clipping planes as push constants",
                       old_key->nr_userclip_plane_consts,
                       key->nr_userclip_plane_consts);
 
-   found |= key_debug("clip distance enable",
+   found |= key_debug(intel, "clip distance enable",
                       old_key->uses_clip_distance, key->uses_clip_distance);
-   found |= key_debug("clip plane enable bitfield",
+   found |= key_debug(intel, "clip plane enable bitfield",
                       old_key->userclip_planes_enabled_gen_4_5,
                       key->userclip_planes_enabled_gen_4_5);
-   found |= key_debug("copy edgeflag",
+   found |= key_debug(intel, "copy edgeflag",
                       old_key->copy_edgeflag, key->copy_edgeflag);
-   found |= key_debug("PointCoord replace",
+   found |= key_debug(intel, "PointCoord replace",
                       old_key->point_coord_replace, key->point_coord_replace);
-   found |= key_debug("vertex color clamping",
+   found |= key_debug(intel, "vertex color clamping",
                       old_key->clamp_vertex_color, key->clamp_vertex_color);
 
-   found |= brw_debug_recompile_sampler_key(&old_key->tex, &key->tex);
+   found |= brw_debug_recompile_sampler_key(intel, &old_key->tex, &key->tex);
 
    if (!found) {
       perf_debug("  Something else\n");
index 4b0446574ac637f7e0e7eae711aecce7264aa770..77bede0cc99bded0fb9db505d16b408722efba8d 100644 (file)
@@ -203,7 +203,7 @@ bool do_wm_prog(struct brw_context *brw,
 }
 
 static bool
-key_debug(const char *name, int a, int b)
+key_debug(struct intel_context *intel, const char *name, int a, int b)
 {
    if (a != b) {
       perf_debug("  %s %d->%d\n", name, a, b);
@@ -214,24 +214,25 @@ key_debug(const char *name, int a, int b)
 }
 
 bool
-brw_debug_recompile_sampler_key(const struct brw_sampler_prog_key_data *old_key,
+brw_debug_recompile_sampler_key(struct intel_context *intel,
+                                const struct brw_sampler_prog_key_data *old_key,
                                 const struct brw_sampler_prog_key_data *key)
 {
    bool found = false;
 
    for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
-      found |= key_debug("EXT_texture_swizzle or DEPTH_TEXTURE_MODE",
+      found |= key_debug(intel, "EXT_texture_swizzle or DEPTH_TEXTURE_MODE",
                          old_key->swizzles[i], key->swizzles[i]);
    }
-   found |= key_debug("GL_CLAMP enabled on any texture unit's 1st coordinate",
+   found |= key_debug(intel, "GL_CLAMP enabled on any texture unit's 1st coordinate",
                       old_key->gl_clamp_mask[0], key->gl_clamp_mask[0]);
-   found |= key_debug("GL_CLAMP enabled on any texture unit's 2nd coordinate",
+   found |= key_debug(intel, "GL_CLAMP enabled on any texture unit's 2nd coordinate",
                       old_key->gl_clamp_mask[1], key->gl_clamp_mask[1]);
-   found |= key_debug("GL_CLAMP enabled on any texture unit's 3rd coordinate",
+   found |= key_debug(intel, "GL_CLAMP enabled on any texture unit's 3rd coordinate",
                       old_key->gl_clamp_mask[2], key->gl_clamp_mask[2]);
-   found |= key_debug("GL_MESA_ycbcr texturing\n",
+   found |= key_debug(intel, "GL_MESA_ycbcr texturing\n",
                       old_key->yuvtex_mask, key->yuvtex_mask);
-   found |= key_debug("GL_MESA_ycbcr UV swapping\n",
+   found |= key_debug(intel, "GL_MESA_ycbcr UV swapping\n",
                       old_key->yuvtex_swap_mask, key->yuvtex_swap_mask);
 
    return found;
@@ -242,6 +243,7 @@ brw_wm_debug_recompile(struct brw_context *brw,
                        struct gl_shader_program *prog,
                        const struct brw_wm_prog_key *key)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_cache_item *c = NULL;
    const struct brw_wm_prog_key *old_key = NULL;
    bool found = false;
@@ -262,25 +264,35 @@ brw_wm_debug_recompile(struct brw_context *brw,
    }
 
    if (!c) {
-      perf_debug("  Didn't find previous compile in the shader cache for "
-                 "debug\n");
+      perf_debug("  Didn't find previous compile in the shader cache for debug\n");
       return;
    }
 
-   found |= key_debug("alphatest, computed depth, depth test, or depth write",
+   found |= key_debug(intel, "alphatest, computed depth, depth test, or "
+                      "depth write",
                       old_key->iz_lookup, key->iz_lookup);
-   found |= key_debug("depth statistics", old_key->stats_wm, key->stats_wm);
-   found |= key_debug("flat shading", old_key->flat_shade, key->flat_shade);
-   found |= key_debug("number of color buffers", old_key->nr_color_regions, key->nr_color_regions);
-   found |= key_debug("sample alpha to coverage", old_key->sample_alpha_to_coverage, key->sample_alpha_to_coverage);
-   found |= key_debug("rendering to FBO", old_key->render_to_fbo, key->render_to_fbo);
-   found |= key_debug("fragment color clamping", old_key->clamp_fragment_color, key->clamp_fragment_color);
-   found |= key_debug("line smoothing", old_key->line_aa, key->line_aa);
-   found |= key_debug("proj_attrib_mask", old_key->proj_attrib_mask, key->proj_attrib_mask);
-   found |= key_debug("renderbuffer height", old_key->drawable_height, key->drawable_height);
-   found |= key_debug("vertex shader outputs", old_key->vp_outputs_written, key->vp_outputs_written);
-
-   found |= brw_debug_recompile_sampler_key(&old_key->tex, &key->tex);
+   found |= key_debug(intel, "depth statistics",
+                      old_key->stats_wm, key->stats_wm);
+   found |= key_debug(intel, "flat shading",
+                      old_key->flat_shade, key->flat_shade);
+   found |= key_debug(intel, "number of color buffers",
+                      old_key->nr_color_regions, key->nr_color_regions);
+   found |= key_debug(intel, "sample alpha to coverage",
+                      old_key->sample_alpha_to_coverage, key->sample_alpha_to_coverage);
+   found |= key_debug(intel, "rendering to FBO",
+                      old_key->render_to_fbo, key->render_to_fbo);
+   found |= key_debug(intel, "fragment color clamping",
+                      old_key->clamp_fragment_color, key->clamp_fragment_color);
+   found |= key_debug(intel, "line smoothing",
+                      old_key->line_aa, key->line_aa);
+   found |= key_debug(intel, "proj_attrib_mask",
+                      old_key->proj_attrib_mask, key->proj_attrib_mask);
+   found |= key_debug(intel, "renderbuffer height",
+                      old_key->drawable_height, key->drawable_height);
+   found |= key_debug(intel, "vertex shader outputs",
+                      old_key->vp_outputs_written, key->vp_outputs_written);
+
+   found |= brw_debug_recompile_sampler_key(intel, &old_key->tex, &key->tex);
 
    if (!found) {
       perf_debug("  Something else\n");
index f94c6f570ffbfdb43395fc8d69185fff31ba18b1..34eb7c9ea2c0ea6b8f8c4cec5a68c1f1a612aece 100644 (file)
@@ -227,7 +227,7 @@ intel_bufferobj_subdata(struct gl_context * ctx,
         drm_intel_bo_unreference(temp_bo);
       }
    } else {
-      if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+      if (unlikely(intel->perf_debug)) {
          if (drm_intel_bo_busy(intel_obj->buffer)) {
             perf_debug("Stalling on the GPU in glBufferSubData().\n");
          }
index 435f0974445739c9aefa2db0ac5ab6c96ae610b8..9e508f7bb9b1f983a5b981675ae7cccc95cd68d4 100644 (file)
@@ -803,6 +803,8 @@ intelInitContext(struct intel_context *intel,
               "shader_time debugging requires gen7 (Ivybridge) or better.\n");
       INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
    }
+   if (INTEL_DEBUG & DEBUG_PERF)
+      intel->perf_debug = true;
 
    if (INTEL_DEBUG & DEBUG_AUB)
       drm_intel_bufmgr_gem_set_aub_dump(intel->bufmgr, true);
index c72176ad416f2f0b3fc788ceb280e67a3f8b38c1..42db5a7d96deb29974b05f51135cfba5aca23660 100644 (file)
@@ -239,6 +239,13 @@ struct intel_context
    bool no_batch_wrap;
    bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */
 
+   /**
+    * Set if we're either a debug context or the INTEL_DEBUG=perf environment
+    * variable is set, this is the flag indicating to do expensive work that
+    * might lead to a perf_debug() call.
+    */
+   bool perf_debug;
+
    struct
    {
       GLuint id;
@@ -462,8 +469,14 @@ extern int INTEL_DEBUG;
 } while(0)
 
 #define perf_debug(...) do {                                   \
-       if (unlikely(INTEL_DEBUG & DEBUG_PERF))                 \
-               dbg_printf(__VA_ARGS__);                        \
+   static GLuint msg_id = 0;                                    \
+   if (unlikely(INTEL_DEBUG & DEBUG_PERF))                      \
+      dbg_printf(__VA_ARGS__);                                  \
+   if (intel->perf_debug)                                       \
+      _mesa_gl_debug(&intel->ctx, &msg_id,                      \
+                     MESA_DEBUG_TYPE_PERFORMANCE,               \
+                     MESA_DEBUG_SEVERITY_MEDIUM,                \
+                     __VA_ARGS__);                              \
 } while(0)
 
 #define WARN_ONCE(cond, fmt...) do {                            \
index a5e49d99e9f8bfbcf18ed1f433383cd04a4e8bcc..7f535d7cdc41149647f9ff7888a51c77acde7b7e 100644 (file)
@@ -123,7 +123,7 @@ intel_region_map(struct intel_context *intel, struct intel_region *region,
     * flush is only needed on first map of the buffer.
     */
 
-   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+   if (unlikely(intel->perf_debug)) {
       if (drm_intel_bo_busy(region->bo)) {
          perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
       }
index 085b6fdbd350a8ff2204942622d0c1cc1be5bea2..6043ed26adbf558cee2cae6ccd496e013e5fd959 100644 (file)
@@ -114,8 +114,7 @@ intel_copy_texsubimage(struct intel_context *intel,
    }
 
    if (!copy_supported && !copy_supported_with_alpha_override) {
-      if (unlikely(INTEL_DEBUG & DEBUG_PERF))
-        fprintf(stderr, "%s mismatched formats %s, %s\n",
+      perf_debug("%s mismatched formats %s, %s\n",
                 __FUNCTION__,
                 _mesa_get_format_name(intelImage->base.Base.TexFormat),
                 _mesa_get_format_name(intel_rb_format(irb)));
index 7a2f713c88fb170cb7060d74f6067a3433475e65..b02e5fc549a209ff8a11544dde4edfdf781648c2 100644 (file)
@@ -230,7 +230,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
       intel_batchbuffer_flush(intel);
    }
 
-   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+   if (unlikely(intel->perf_debug)) {
       if (drm_intel_bo_busy(bo)) {
          perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
       }