r300g: add a new debug option which disables compiler optimizations
authorMarek Olšák <maraeo@gmail.com>
Wed, 1 Sep 2010 06:12:51 +0000 (08:12 +0200)
committerMarek Olšák <maraeo@gmail.com>
Sat, 4 Sep 2010 16:56:22 +0000 (18:56 +0200)
Those are:
- dead-code elimination
- constant folding
- peephole (mainly copy propagation)
- register allocation

There are some bugs which I need to track down.

Also fix up the descriptions of all the debug options.

src/gallium/drivers/r300/r300_debug.c
src/gallium/drivers/r300/r300_fs.c
src/gallium/drivers/r300/r300_screen.h
src/gallium/drivers/r300/r300_vs.c
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
src/mesa/drivers/dri/r300/r300_blit.c
src/mesa/drivers/dri/r300/r300_fragprog_common.c
src/mesa/drivers/dri/r300/r300_vertprog.c

index c3e157e99afc2b3adbb80fdfd89b25743123dd7b..786fdf6ce8db8c4df08b21dfa90371524e38feb4 100644 (file)
 #include <stdio.h>
 
 static const struct debug_named_value debug_options[] = {
-    { "fp", DBG_FP, "Fragment program handling (for debugging)" },
-    { "vp", DBG_VP, "Vertex program handling (for debugging)" },
-    { "draw", DBG_DRAW, "Draw calls (for debugging)" },
-    { "swtcl", DBG_SWTCL, "SWTCL-specific info (for debugging)" },
-    { "rsblock", DBG_RS_BLOCK, "Rasterizer registers (for debugging)" },
-    { "psc", DBG_PSC, "Vertex stream registers (for debugging)" },
-    { "tex", DBG_TEX, "Textures (for debugging)" },
-    { "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" },
-    { "fall", DBG_FALL, "Fallbacks (for debugging)" },
-    { "rs", DBG_RS, "Rasterizer (for debugging)" },
-    { "fb", DBG_FB, "Framebuffer (for debugging)" },
-    { "cbzb", DBG_CBZB, "Fast color clear info (for debugging)" },
-    { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for debugging)" },
-    { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" },
-    { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" },
-    { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" },
-    { "stats", DBG_STATS, "Gather statistics" },
-    { "hyperz", DBG_HYPERZ, "HyperZ (for debugging)" },
+    { "fp", DBG_FP, "Log fragment program compilation" },
+    { "vp", DBG_VP, "Log bertex program compilation" },
+    { "draw", DBG_DRAW, "Log draw calls" },
+    { "swtcl", DBG_SWTCL, "Log SWTCL-specific info" },
+    { "rsblock", DBG_RS_BLOCK, "Log rasterizer registers" },
+    { "psc", DBG_PSC, "Log vertex stream registers" },
+    { "tex", DBG_TEX, "Log basic info about textures" },
+    { "texalloc", DBG_TEXALLOC, "Log texture mipmap tree info" },
+    { "fall", DBG_FALL, "Log fallbacks" },
+    { "rs", DBG_RS, "Log rasterizer" },
+    { "fb", DBG_FB, "Log framebuffer" },
+    { "cbzb", DBG_CBZB, "Log fast color clear info" },
+    { "stats", DBG_STATS, "Log emission statistics" },
+    { "hyperz", DBG_HYPERZ, "Log HyperZ info" },
+    { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" },
+    { "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" },
+    { "notiling", DBG_NO_TILING, "Disable tiling" },
+    { "noimmd", DBG_NO_IMMD, "Disable immediate mode" },
+    { "noopt", DBG_NO_OPT, "Disable shader optimizations" },
 
     /* must be last */
     DEBUG_NAMED_VALUE_END
index e8ebe90b33b4a45ec8b2cb30e72ef784d839a877..b9f4d77dea7e5207244390f90a4227660ce8d81e 100644 (file)
@@ -385,6 +385,7 @@ static void r300_translate_fragment_shader(
     compiler.code = &shader->code;
     compiler.state = shader->compare_state;
     compiler.Base.is_r500 = r300->screen->caps.is_r500;
+    compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
     compiler.Base.has_half_swizzles = TRUE;
     compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
     compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
index 13a3320b99258293d6e527d062671fb728fb11db..f2f63213e0d9986d95be4e354fb087eff744f790 100644 (file)
@@ -97,6 +97,7 @@ r300_winsys_screen(struct pipe_screen *screen) {
 #define DBG_NO_TILING   (1 << 17)
 #define DBG_NO_IMMD     (1 << 18)
 #define DBG_FAKE_OCC    (1 << 19)
+#define DBG_NO_OPT     (1 << 20)
 /* Statistics. */
 #define DBG_STATS       (1 << 24)
 /*@}*/
index 2db7b52a87e4a33d03b254bc7c720bc06ed04964..add3d552c45622704734094723c4ca5c0619376b 100644 (file)
@@ -205,6 +205,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
     compiler.code = &vs->code;
     compiler.UserData = vs;
     compiler.Base.is_r500 = r300->screen->caps.is_r500;
+    compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
     compiler.Base.has_half_swizzles = FALSE;
     compiler.Base.max_temp_regs = 32;
     compiler.Base.max_constants = 256;
index 096afe8ad6a209b88f0291db4f596968b4a2f2b0..137267f4a635052cbcc807eb4d62cbc65d59ee3a 100644 (file)
@@ -94,6 +94,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
 {
        int is_r500 = c->Base.is_r500;
        int kill_consts = c->Base.remove_unused_constants;
+       int opt = !c->Base.disable_optimizations;
 
        /* Lists of instruction transformations. */
        struct radeon_program_transformation rewrite_tex[] = {
@@ -128,9 +129,9 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
                {"transform TEX",               1, 1,           rc_local_transform,             rewrite_tex},
                {"native rewrite",              1, is_r500,     rc_local_transform,             native_rewrite_r500},
                {"native rewrite",              1, !is_r500,    rc_local_transform,             native_rewrite_r300},
-               {"deadcode",                    1, 1,           rc_dataflow_deadcode,           dataflow_outputs_mark_use},
+               {"deadcode",                    1, opt,         rc_dataflow_deadcode,           dataflow_outputs_mark_use},
                {"emulate loops",               1, !is_r500,    rc_emulate_loops,               NULL},
-               {"dataflow optimize",           1, 1,           rc_optimize,                    NULL},
+               {"dataflow optimize",           1, opt,         rc_optimize,                    NULL},
                {"dataflow swizzles",           1, 1,           rc_dataflow_swizzles,           NULL},
                {"dead constants",              1, kill_consts, rc_remove_unused_constants,     &c->code->constants_remap_table},
                /* This pass makes it easier for the scheduler to group TEX
@@ -139,7 +140,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
                {"register rename",             1, !is_r500,    rc_rename_regs,                 NULL},
                {"pair translate",              1, 1,           rc_pair_translate,              NULL},
                {"pair scheduling",             1, 1,           rc_pair_schedule,               NULL},
-               {"register allocation",         1, 1,           rc_pair_regalloc,               NULL},
+               {"register allocation",         1, opt,         rc_pair_regalloc,               NULL},
                {"final code validation",       0, 1,           rc_validate_final_shader,       NULL},
                {"machine code generation",     0, is_r500,     r500BuildFragmentProgramHwCode, NULL},
                {"machine code generation",     0, !is_r500,    r300BuildFragmentProgramHwCode, NULL},
index e9fb49e7c13777d3f2a5a39ec6650af9e670da35..3e8a8236c05facd0d79452a3a347c076684a8c78 100644 (file)
@@ -991,6 +991,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
 {
        int is_r500 = c->Base.is_r500;
        int kill_consts = c->Base.remove_unused_constants;
+       int opt = !c->Base.disable_optimizations;
 
        /* Lists of instruction transformations. */
        struct radeon_program_transformation alu_rewrite_r500[] = {
@@ -1029,12 +1030,12 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
                {"native rewrite",              1, is_r500,     rc_local_transform,             alu_rewrite_r500},
                {"native rewrite",              1, !is_r500,    rc_local_transform,             alu_rewrite_r300},
                {"emulate modifiers",           1, !is_r500,    rc_local_transform,             emulate_modifiers},
-               {"deadcode",                    1, 1,           rc_dataflow_deadcode,           dataflow_outputs_mark_used},
-               {"dataflow optimize",           1, 1,           rc_optimize,                    NULL},
+               {"deadcode",                    1, opt,         rc_dataflow_deadcode,           dataflow_outputs_mark_used},
+               {"dataflow optimize",           1, opt,         rc_optimize,                    NULL},
                /* This pass must be done after optimizations. */
                {"source conflict resolve",     1, 1,           rc_local_transform,             resolve_src_conflicts},
                {"dataflow swizzles",           1, 1,           rc_dataflow_swizzles,           NULL},
-               {"register allocation",         1, 1,           allocate_temporary_registers,   NULL},
+               {"register allocation",         1, opt,         allocate_temporary_registers,   NULL},
                {"dead constants",              1, kill_consts, rc_remove_unused_constants,     &c->code->constants_remap_table},
                {"final code validation",       0, 1,           rc_validate_final_shader,       NULL},
                {"machine code generation",     0, 1,           translate_vertex_program,       NULL},
index f6a8e8343426e485f7639e6be2f9f213730638d5..85c2e43ad672189ac19bee6d79bfb4ad9bb90170 100644 (file)
@@ -42,6 +42,7 @@ struct radeon_compiler {
        /* Hardware specification. */
        unsigned is_r500:1;
        unsigned has_half_swizzles:1;
+       unsigned disable_optimizations:1;
        unsigned max_temp_regs;
        unsigned max_constants;
        int max_alu_insts;
index e43f0eeb88925cd067bfb50363966dec17be61c4..5f261e07e62584c6cee79ffbe1ddbd9acd29e8cd 100644 (file)
@@ -89,6 +89,7 @@ static void create_vertex_program(struct r300_context *r300)
     compiler.SetHwInputOutput = vp_ins_outs;
     compiler.code = &r300->blit.vp_code;
     compiler.Base.is_r500 = r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
+    compiler.Base.disable_optimizations = 0;
     compiler.Base.has_half_swizzles = 0;
     compiler.Base.max_temp_regs = 32;
     compiler.Base.max_constants = 256;
@@ -124,6 +125,7 @@ static void create_fragment_program(struct r300_context *r300)
     compiler.OutputDepth = FRAG_RESULT_DEPTH;
     compiler.enable_shadow_ambient = GL_TRUE;
     compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515);
+    compiler.Base.disable_optimizations = 0;
     compiler.Base.has_half_swizzles = 1;
     compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
     compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
index 47cf3e49008b04b38fd1c92b58bac0833270b9fa..d0006d6c3c26088bc3318906529937aee2f37aa2 100644 (file)
@@ -220,6 +220,7 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
        compiler.state = fp->state;
        compiler.enable_shadow_ambient = GL_TRUE;
        compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
+       compiler.Base.disable_optimizations = 0;
        compiler.Base.has_half_swizzles = 1;
        compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
        compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
index 1d24266fbddc709d71b27776eba5b6ebd4e4dea4..4a3bbdb44d16115b5ec21a514da3b7b9bbb45acd 100644 (file)
@@ -245,6 +245,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
        compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
        compiler.SetHwInputOutput = &t_inputs_outputs;
        compiler.Base.is_r500 = R300_CONTEXT(ctx)->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
+       compiler.Base.disable_optimizations = 0;
        compiler.Base.has_half_swizzles = 0;
        compiler.Base.max_temp_regs = 32;
        compiler.Base.max_constants = 256;