i965/gen8: add debug code to show FS disasm with jump locations
[mesa.git] / src / mesa / drivers / dri / i965 / gen8_fs_generator.cpp
index e5fa3d2d25ffe757ed41301609f262aa39fc967e..ef95eb0275e08a77672d2bb6adb32e4c2b09a329 100644 (file)
@@ -757,6 +757,78 @@ gen8_fs_generator::generate_set_sample_id(fs_inst *ir,
    default_state.exec_size = save_exec_size;
 }
 
+/**
+ * Change the register's data type from UD to HF, doubling the strides in order
+ * to compensate for halving the data type width.
+ */
+static struct brw_reg
+ud_reg_to_hf(struct brw_reg r)
+{
+   assert(r.type == BRW_REGISTER_TYPE_UD);
+   r.type = BRW_REGISTER_TYPE_HF;
+
+   /* The BRW_*_STRIDE enums are defined so that incrementing the field
+    * doubles the real stride.
+    */
+   if (r.hstride != 0)
+      ++r.hstride;
+   if (r.vstride != 0)
+      ++r.vstride;
+
+   return r;
+}
+
+void
+gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg x,
+                                                 struct brw_reg y)
+{
+   assert(dst.type == BRW_REGISTER_TYPE_UD);
+   assert(x.type == BRW_REGISTER_TYPE_F);
+   assert(y.type == BRW_REGISTER_TYPE_F);
+
+   struct brw_reg dst_hf = ud_reg_to_hf(dst);
+
+   /* Give each 32-bit channel of dst the form below , where "." means
+    * unchanged.
+    *   0x....hhhh
+    */
+   MOV(dst_hf, y);
+
+   /* Now the form:
+    *   0xhhhh0000
+    */
+   SHL(dst, dst, brw_imm_ud(16u));
+
+   /* And, finally the form of packHalf2x16's output:
+    *   0xhhhhllll
+    */
+   MOV(dst_hf, x);
+}
+
+void
+gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
+                                                   struct brw_reg dst,
+                                                   struct brw_reg src)
+{
+   assert(dst.type == BRW_REGISTER_TYPE_F);
+   assert(src.type == BRW_REGISTER_TYPE_UD);
+
+   struct brw_reg src_hf = ud_reg_to_hf(src);
+
+   /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
+    * For the Y case, we wish to access only the upper word; therefore
+    * a 16-bit subregister offset is needed.
+    */
+   assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
+          inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
+   if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
+      src_hf.subnr += 2;
+
+   MOV(dst, src_hf);
+}
+
 void
 gen8_fs_generator::generate_code(exec_list *instructions)
 {
@@ -766,14 +838,17 @@ gen8_fs_generator::generate_code(exec_list *instructions)
 
    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
       if (prog) {
-         printf("Native code for fragment shader %d (SIMD%d dispatch):\n",
+         fprintf(stderr,
+                 "Native code for %s fragment shader %d (SIMD%d dispatch):\n",
+                shader_prog->Label ? shader_prog->Label : "unnamed",
                 shader_prog->Name, dispatch_width);
       } else if (fp) {
-         printf("Native code for fragment program %d (SIMD%d dispatch):\n",
-                prog->Id, dispatch_width);
+         fprintf(stderr,
+                 "Native code for fragment program %d (SIMD%d dispatch):\n",
+                 prog->Id, dispatch_width);
       } else {
-         printf("Native code for blorp program (SIMD%d dispatch):\n",
-                dispatch_width);
+         fprintf(stderr, "Native code for blorp program (SIMD%d dispatch):\n",
+                 dispatch_width);
       }
    }
 
@@ -791,38 +866,38 @@ gen8_fs_generator::generate_code(exec_list *instructions)
             bblock_t *block = link->block;
 
             if (block->start == ir) {
-               printf("   START B%d", block->block_num);
+               fprintf(stderr, "   START B%d", block->block_num);
                foreach_list(predecessor_node, &block->parents) {
                   bblock_link *predecessor_link =
                      (bblock_link *)predecessor_node;
                   bblock_t *predecessor_block = predecessor_link->block;
-                  printf(" <-B%d", predecessor_block->block_num);
+                  fprintf(stderr, " <-B%d", predecessor_block->block_num);
                }
-               printf("\n");
+               fprintf(stderr, "\n");
             }
          }
 
          if (last_annotation_ir != ir->ir) {
             last_annotation_ir = ir->ir;
             if (last_annotation_ir) {
-               printf("   ");
+               fprintf(stderr, "   ");
                if (prog) {
-                  ((ir_instruction *) ir->ir)->print();
+                  ((ir_instruction *) ir->ir)->fprint(stderr);
                } else if (prog) {
                   const prog_instruction *fpi;
                   fpi = (const prog_instruction *) ir->ir;
-                  printf("%d: ", (int)(fpi - prog->Instructions));
-                  _mesa_fprint_instruction_opt(stdout,
+                  fprintf(stderr, "%d: ", (int)(fpi - prog->Instructions));
+                  _mesa_fprint_instruction_opt(stderr,
                                                fpi,
                                                0, PROG_PRINT_DEBUG, NULL);
                }
-               printf("\n");
+               fprintf(stderr, "\n");
             }
          }
          if (last_annotation_string != ir->annotation) {
             last_annotation_string = ir->annotation;
             if (last_annotation_string)
-               printf("   %s\n", last_annotation_string);
+               fprintf(stderr, "   %s\n", last_annotation_string);
          }
       }
 
@@ -920,10 +995,10 @@ gen8_fs_generator::generate_code(exec_list *instructions)
          break;
 
       case BRW_OPCODE_F32TO16:
-         F32TO16(dst, src[0]);
+         MOV(retype(dst, BRW_REGISTER_TYPE_HF), src[0]);
          break;
       case BRW_OPCODE_F16TO32:
-         F16TO32(dst, src[0]);
+         MOV(dst, retype(src[0], BRW_REGISTER_TYPE_HF));
          break;
 
       case BRW_OPCODE_CMP:
@@ -1140,12 +1215,12 @@ gen8_fs_generator::generate_code(exec_list *instructions)
          break;
 
       case FS_OPCODE_PACK_HALF_2x16_SPLIT:
-         assert(!"XXX: Missing Gen8 scalar support for PACK_HALF_2x16_SPLIT");
+         generate_pack_half_2x16_split(ir, dst, src[0], src[1]);
          break;
 
       case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
       case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
-         assert(!"XXX: Missing Gen8 scalar support for UNPACK_HALF_2x16_SPLIT");
+         generate_unpack_half_2x16_split(ir, dst, src[0]);
          break;
 
       case FS_OPCODE_PLACEHOLDER_HALT:
@@ -1166,21 +1241,21 @@ gen8_fs_generator::generate_code(exec_list *instructions)
       }
 
       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
-         disassemble(stdout, last_native_inst_offset, next_inst_offset);
+         disassemble(stderr, last_native_inst_offset, next_inst_offset);
 
          foreach_list(node, &cfg->block_list) {
             bblock_link *link = (bblock_link *)node;
             bblock_t *block = link->block;
 
             if (block->end == ir) {
-               printf("   END B%d", block->block_num);
+               fprintf(stderr, "   END B%d", block->block_num);
                foreach_list(successor_node, &block->children) {
                   bblock_link *successor_link =
                      (bblock_link *)successor_node;
                   bblock_t *successor_block = successor_link->block;
-                  printf(" ->B%d", successor_block->block_num);
+                  fprintf(stderr, " ->B%d", successor_block->block_num);
                }
-               printf("\n");
+               fprintf(stderr, "\n");
             }
          }
       }
@@ -1189,10 +1264,19 @@ gen8_fs_generator::generate_code(exec_list *instructions)
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
-      printf("\n");
+      fprintf(stderr, "\n");
    }
 
    patch_jump_targets();
+
+   /* OK, while the INTEL_DEBUG=fs above is very nice for debugging FS
+    * emit issues, it doesn't get the jump distances into the output,
+    * which is often something we want to debug.  So this is here in
+    * case you're doing that.
+    */
+   if (0 && unlikely(INTEL_DEBUG & DEBUG_WM)) {
+      disassemble(stderr, 0, next_inst_offset);
+   }
 }
 
 const unsigned *