i965/gen8: add debug code to show FS disasm with jump locations

[mesa.git] / src / mesa / drivers / dri / i965 / gen8_fs_generator.cpp
diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp

index 0e1214d82b99ba126cbc4160d20584e303669cb5..ef95eb0275e08a77672d2bb6adb32e4c2b09a329 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
@@ -757,6 +757,78 @@ gen8_fs_generator::generate_set_sample_id(fs_inst *ir,
     default_state.exec_size = save_exec_size;
  }
  
+/**
+ * Change the register's data type from UD to HF, doubling the strides in order
+ * to compensate for halving the data type width.
+ */
+static struct brw_reg
+ud_reg_to_hf(struct brw_reg r)
+{
+   assert(r.type == BRW_REGISTER_TYPE_UD);
+   r.type = BRW_REGISTER_TYPE_HF;
+
+   /* The BRW_*_STRIDE enums are defined so that incrementing the field
+    * doubles the real stride.
+    */
+   if (r.hstride != 0)
+      ++r.hstride;
+   if (r.vstride != 0)
+      ++r.vstride;
+
+   return r;
+}
+
+void
+gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg x,
+                                                 struct brw_reg y)
+{
+   assert(dst.type == BRW_REGISTER_TYPE_UD);
+   assert(x.type == BRW_REGISTER_TYPE_F);
+   assert(y.type == BRW_REGISTER_TYPE_F);
+
+   struct brw_reg dst_hf = ud_reg_to_hf(dst);
+
+   /* Give each 32-bit channel of dst the form below , where "." means
+    * unchanged.
+    *   0x....hhhh
+    */
+   MOV(dst_hf, y);
+
+   /* Now the form:
+    *   0xhhhh0000
+    */
+   SHL(dst, dst, brw_imm_ud(16u));
+
+   /* And, finally the form of packHalf2x16's output:
+    *   0xhhhhllll
+    */
+   MOV(dst_hf, x);
+}
+
+void
+gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
+                                                   struct brw_reg dst,
+                                                   struct brw_reg src)
+{
+   assert(dst.type == BRW_REGISTER_TYPE_F);
+   assert(src.type == BRW_REGISTER_TYPE_UD);
+
+   struct brw_reg src_hf = ud_reg_to_hf(src);
+
+   /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
+    * For the Y case, we wish to access only the upper word; therefore
+    * a 16-bit subregister offset is needed.
+    */
+   assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
+          inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
+   if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
+      src_hf.subnr += 2;
+
+   MOV(dst, src_hf);
+}
+
  void
  gen8_fs_generator::generate_code(exec_list *instructions)
  {
@@ -766,14 +838,17 @@ gen8_fs_generator::generate_code(exec_list *instructions)
  
     if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
        if (prog) {
-         printf("Native code for fragment shader %d (SIMD%d dispatch):\n",
+         fprintf(stderr,
+                 "Native code for %s fragment shader %d (SIMD%d dispatch):\n",
+                shader_prog->Label ? shader_prog->Label : "unnamed",
                  shader_prog->Name, dispatch_width);
        } else if (fp) {
-         printf("Native code for fragment program %d (SIMD%d dispatch):\n",
-                prog->Id, dispatch_width);
+         fprintf(stderr,
+                 "Native code for fragment program %d (SIMD%d dispatch):\n",
+                 prog->Id, dispatch_width);
        } else {
-         printf("Native code for blorp program (SIMD%d dispatch):\n",
-                dispatch_width);
+         fprintf(stderr, "Native code for blorp program (SIMD%d dispatch):\n",
+                 dispatch_width);
        }
     }
  
@@ -791,38 +866,38 @@ gen8_fs_generator::generate_code(exec_list *instructions)
              bblock_t *block = link->block;
  
              if (block->start == ir) {
-               printf("   START B%d", block->block_num);
+               fprintf(stderr, "   START B%d", block->block_num);
                 foreach_list(predecessor_node, &block->parents) {
                    bblock_link *predecessor_link =
                       (bblock_link *)predecessor_node;
                    bblock_t *predecessor_block = predecessor_link->block;
-                  printf(" <-B%d", predecessor_block->block_num);
+                  fprintf(stderr, " <-B%d", predecessor_block->block_num);
                 }
-               printf("\n");
+               fprintf(stderr, "\n");
              }
           }
  
           if (last_annotation_ir != ir->ir) {
              last_annotation_ir = ir->ir;
              if (last_annotation_ir) {
-               printf("   ");
+               fprintf(stderr, "   ");
                 if (prog) {
-                  ((ir_instruction *) ir->ir)->print();
+                  ((ir_instruction *) ir->ir)->fprint(stderr);
                 } else if (prog) {
                    const prog_instruction *fpi;
                    fpi = (const prog_instruction *) ir->ir;
-                  printf("%d: ", (int)(fpi - prog->Instructions));
-                  _mesa_fprint_instruction_opt(stdout,
+                  fprintf(stderr, "%d: ", (int)(fpi - prog->Instructions));
+                  _mesa_fprint_instruction_opt(stderr,
                                                 fpi,
                                                 0, PROG_PRINT_DEBUG, NULL);
                 }
-               printf("\n");
+               fprintf(stderr, "\n");
              }
           }
           if (last_annotation_string != ir->annotation) {
              last_annotation_string = ir->annotation;
              if (last_annotation_string)
-               printf("   %s\n", last_annotation_string);
+               fprintf(stderr, "   %s\n", last_annotation_string);
           }
        }
  
@@ -1140,12 +1215,12 @@ gen8_fs_generator::generate_code(exec_list *instructions)
           break;
  
        case FS_OPCODE_PACK_HALF_2x16_SPLIT:
-         assert(!"XXX: Missing Gen8 scalar support for PACK_HALF_2x16_SPLIT");
+         generate_pack_half_2x16_split(ir, dst, src[0], src[1]);
           break;
  
        case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
        case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
-         assert(!"XXX: Missing Gen8 scalar support for UNPACK_HALF_2x16_SPLIT");
+         generate_unpack_half_2x16_split(ir, dst, src[0]);
           break;
  
        case FS_OPCODE_PLACEHOLDER_HALT:
@@ -1166,21 +1241,21 @@ gen8_fs_generator::generate_code(exec_list *instructions)
        }
  
        if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
-         disassemble(stdout, last_native_inst_offset, next_inst_offset);
+         disassemble(stderr, last_native_inst_offset, next_inst_offset);
  
           foreach_list(node, &cfg->block_list) {
              bblock_link *link = (bblock_link *)node;
              bblock_t *block = link->block;
  
              if (block->end == ir) {
-               printf("   END B%d", block->block_num);
+               fprintf(stderr, "   END B%d", block->block_num);
                 foreach_list(successor_node, &block->children) {
                    bblock_link *successor_link =
                       (bblock_link *)successor_node;
                    bblock_t *successor_block = successor_link->block;
-                  printf(" ->B%d", successor_block->block_num);
+                  fprintf(stderr, " ->B%d", successor_block->block_num);
                 }
-               printf("\n");
+               fprintf(stderr, "\n");
              }
           }
        }
@@ -1189,10 +1264,19 @@ gen8_fs_generator::generate_code(exec_list *instructions)
     }
  
     if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
-      printf("\n");
+      fprintf(stderr, "\n");
     }
  
     patch_jump_targets();
+
+   /* OK, while the INTEL_DEBUG=fs above is very nice for debugging FS
+    * emit issues, it doesn't get the jump distances into the output,
+    * which is often something we want to debug.  So this is here in
+    * case you're doing that.
+    */
+   if (0 && unlikely(INTEL_DEBUG & DEBUG_WM)) {
+      disassemble(stderr, 0, next_inst_offset);
+   }
  }
  
  const unsigned *