i965: Fix INTEL_DEBUG=shader_time for fragment shaders with discards.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 28 Mar 2013 06:19:39 +0000 (23:19 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Fri, 29 Mar 2013 18:39:32 +0000 (11:39 -0700)
"discard" instructions generate HALT instructions which jump to a final
HALT near the end of the shader.  Previously, fs_generator created this
final jump target when it saw the first FS_OPCODE_FB_WRITE, causing it
to jump right before the FB write epilogue.  This is normally good.

However, INTEL_DEBUG=shader_time also has an epilogue section which
records the final timestamp.  The frontend emits IR for this just before
FS_OPCODE_FB_WRITE.  Unfortunately, this led to the following ordering:

1. Shader Time Epilogue
2. Final HALT (where discards jump)
3. Framebuffer Write Epilogue

This meant that discarded pixels completely skipped the shader time
epilogue, causing no ending timestamp to be written.  This obviously
led to inaccurate results.

This patch adds a new FS_OPCODE_PLACEHOLDER_HALT in the IR stream just
before any epilogue sections.  This is where the final HALT should be
generated, and makes it easy to ensure the correct ordering:

1. Final HALT
2. Shader Time Epilogue
3. Framebuffer Write Epilogue

For shaders that don't discard, this opcode compiles away to nothing.
The scheduler adds barrier dependencies to make sure that it doesn't
get moved above any FS_OPCODE_DISCARD_JUMP instructions.

One 8-wide shader in GLBenchmark 2.7 dropped from 2291.67 Gcycles to
a mere 5.13 Gcycles.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_emit.cpp
src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp

index 192903532c71edc921fdb45c8f66c7b52c668f86..3d07c36b74701656b1a30780a3bb3f404d04314d 100644 (file)
@@ -733,6 +733,7 @@ enum opcode {
    FS_OPCODE_PACK_HALF_2x16_SPLIT,
    FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
    FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
+   FS_OPCODE_PLACEHOLDER_HALT,
 
    VS_OPCODE_URB_WRITE,
    VS_OPCODE_SCRATCH_READ,
index 2bd7b3402ce12faf85ff0710dde98fe28ed7d854..cba116785ea5123421c1e48328fd4a5fb5cd12cd 100644 (file)
@@ -2768,6 +2768,8 @@ fs_visitor::run()
       if (failed)
         return false;
 
+      emit(FS_OPCODE_PLACEHOLDER_HALT);
+
       emit_fb_writes();
 
       split_virtual_grfs();
index ad1ca58f58c255a87fa4a3cd564f1d94b4954a6c..a729569c8404aaa2c7650ce3c261b0e8d9a311bb 100644 (file)
@@ -102,12 +102,6 @@ fs_generator::generate_fb_write(fs_inst *inst)
    struct brw_reg implied_header;
    uint32_t msg_control;
 
-   /* Note that the jumps emitted to this point mean that the g0 ->
-    * base_mrf setup must be inside of this function, so that we jump
-    * to a point containing it.
-    */
-   patch_discard_jumps_to_fb_writes();
-
    /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
     * move, here's g1.
     */
@@ -1346,6 +1340,13 @@ fs_generator::generate_code(exec_list *instructions)
          generate_unpack_half_2x16_split(inst, dst, src[0]);
          break;
 
+      case FS_OPCODE_PLACEHOLDER_HALT:
+         /* This is the place where the final HALT needs to be inserted if
+          * we've emitted any discards.  If not, this will emit no code.
+          */
+         patch_discard_jumps_to_fb_writes();
+         break;
+
       default:
         if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
            _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
index 90f1a16bcdeb22bb1db6705e77bca32fec9578f8..997341b153cfcd9a92efcd77843a9e509cc71ac6 100644 (file)
@@ -512,6 +512,9 @@ instruction_scheduler::calculate_deps()
       schedule_node *n = (schedule_node *)node;
       fs_inst *inst = n->inst;
 
+      if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT)
+         add_barrier_deps(n);
+
       /* read-after-write deps. */
       for (int i = 0; i < 3; i++) {
         if (inst->src[i].file == GRF) {
index e4392bd1b9b2e9b15f00986cc46bc0566ecca67e..066cf4e21f33000eb364f0ebcaf2936a08416a75 100644 (file)
@@ -471,6 +471,9 @@ brw_instruction_name(enum opcode op)
    case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
       return "unpack_half_2x16_split_y";
 
+   case FS_OPCODE_PLACEHOLDER_HALT:
+      return "placeholder_halt";
+
    case VS_OPCODE_URB_WRITE:
       return "urb_write";
    case VS_OPCODE_SCRATCH_READ: