From 006c1a3c652803e2ff8d5f7ea55c9cb5d8353279 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 7 Aug 2012 10:05:38 -0700 Subject: [PATCH] i965: Add perf debug for stalls during shader compiles. v2: fix bad comment from before I gave up and decided to just use doubles. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 13 +++++++++++++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 20 ++++++++++++++++++-- src/mesa/drivers/dri/intel/intel_screen.c | 13 +++++++++++++ src/mesa/drivers/dri/intel/intel_screen.h | 1 + 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 642d95a9289..3010ed0b435 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2062,10 +2062,18 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, struct gl_shader_program *prog) { struct intel_context *intel = &brw->intel; + bool start_busy = false; + float start_time = 0; if (!prog) return false; + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + start_busy = (intel->batch.last_bo && + drm_intel_bo_busy(intel->batch.last_bo)); + start_time = get_time(); + } + struct brw_shader *shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (!shader) @@ -2108,6 +2116,11 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, if (shader->compiled_once) brw_wm_debug_recompile(brw, prog, &c->key); shader->compiled_once = true; + + if (start_busy && !drm_intel_bo_busy(intel->batch.last_bo)) { + perf_debug("FS compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) / 1000); + } } return true; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index aea7d015a0e..d7ff8de3486 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -1023,9 +1023,19 @@ extern "C" { bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) { + struct intel_context *intel = &c->func.brw->intel; + bool start_busy = false; + float start_time = 0; + if (!prog) return false; + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + start_busy = (intel->batch.last_bo && + drm_intel_bo_busy(intel->batch.last_bo)); + start_time = get_time(); + } + struct brw_shader *shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; if (!shader) @@ -1037,8 +1047,14 @@ brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) printf("\n\n"); } - if (shader->compiled_once) { - perf_debug("Recompiling vertex shader for program %d\n", prog->Name); + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + if (shader->compiled_once) { + perf_debug("Recompiling vertex shader for program %d\n", prog->Name); + } + if (start_busy && !drm_intel_bo_busy(intel->batch.last_bo)) { + perf_debug("VS compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) / 1000); + } } vec4_visitor v(c, prog, shader); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 3c595bc6e75..e1ec2eb0130 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -109,6 +109,19 @@ const GLuint __driNConfigOptions = 15; static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; #endif /*USE_NEW_INTERFACE */ +/** + * For debugging purposes, this returns a time in seconds. + */ +double +get_time(void) +{ + struct timespec tp; + + clock_gettime(CLOCK_MONOTONIC, &tp); + + return tp.tv_sec + tp.tv_nsec / 1000000000.0; +} + void aub_dump_bmp(struct gl_context *ctx) { diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index c0cc2843fcb..f5a374d2293 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -81,6 +81,7 @@ intelMakeCurrent(__DRIcontext * driContextPriv, __DRIdrawable * driDrawPriv, __DRIdrawable * driReadPriv); +double get_time(void); void aub_dump_bmp(struct gl_context *ctx); #endif -- 2.30.2