i965: Add support for gen6 DO/WHILE ISA emit.
authorEric Anholt <eric@anholt.net>
Wed, 1 Dec 2010 18:45:52 +0000 (10:45 -0800)
committerEric Anholt <eric@anholt.net>
Thu, 2 Dec 2010 00:14:31 +0000 (16:14 -0800)
There's no more DO since there's no more mask stack, and WHILE has
been shuffled like IF was.

src/mesa/drivers/dri/i965/brw_eu_emit.c
src/mesa/drivers/dri/i965/brw_fs.cpp

index 9cb941dacfdcabb53ef93237ed42beaa8a8a6a83..660f5b484536126305bf12ae9278d99615922490 100644 (file)
@@ -1058,10 +1058,26 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
 }
 
 /* DO/WHILE loop:
+ *
+ * The DO/WHILE is just an unterminated loop -- break or continue are
+ * used for control within the loop.  We have a few ways they can be
+ * done.
+ *
+ * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
+ * jip and no DO instruction.
+ *
+ * For non-uniform control flow pre-gen6, there's a DO instruction to
+ * push the mask, and a WHILE to jump back, and BREAK to get out and
+ * pop the mask.
+ *
+ * For gen6, there's no more mask stack, so no need for DO.  WHILE
+ * just points back to the first instruction of the loop.
  */
 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
 {
-   if (p->single_program_flow) {
+   struct intel_context *intel = &p->brw->intel;
+
+   if (intel->gen >= 6 || p->single_program_flow) {
       return &p->store[p->nr_insn];
    } else {
       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
@@ -1094,34 +1110,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
    if (intel->gen >= 5)
       br = 2;
 
-   if (p->single_program_flow)
-      insn = next_insn(p, BRW_OPCODE_ADD);
-   else
+   if (intel->gen >= 6) {
       insn = next_insn(p, BRW_OPCODE_WHILE);
 
-   brw_set_dest(insn, brw_ip_reg());
-   brw_set_src0(insn, brw_ip_reg());
-   brw_set_src1(insn, brw_imm_d(0x0));
+      brw_set_dest(insn, brw_imm_w(0));
+      insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
+      brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
 
-   insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.execution_size = do_insn->header.execution_size;
+      assert(insn->header.execution_size == BRW_EXECUTE_8);
+   } else {
+      if (p->single_program_flow) {
+        insn = next_insn(p, BRW_OPCODE_ADD);
 
-   if (p->single_program_flow) {
-      insn->header.execution_size = BRW_EXECUTE_1;
+        brw_set_dest(insn, brw_ip_reg());
+        brw_set_src0(insn, brw_ip_reg());
+        brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16));
+        insn->header.execution_size = BRW_EXECUTE_1;
+      } else {
+        insn = next_insn(p, BRW_OPCODE_WHILE);
 
-      insn->bits3.d = (do_insn - insn) * 16;
-   } else {
-      insn->header.execution_size = do_insn->header.execution_size;
+        assert(do_insn->header.opcode == BRW_OPCODE_DO);
 
-      assert(do_insn->header.opcode == BRW_OPCODE_DO);
-      insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
-      insn->bits3.if_else.pop_count = 0;
-      insn->bits3.if_else.pad0 = 0;
-   }
+        brw_set_dest(insn, brw_ip_reg());
+        brw_set_src0(insn, brw_ip_reg());
+        brw_set_src1(insn, brw_imm_d(0));
 
-/*    insn->header.mask_control = BRW_MASK_ENABLE; */
+        insn->header.execution_size = do_insn->header.execution_size;
+        insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+        insn->bits3.if_else.pop_count = 0;
+        insn->bits3.if_else.pad0 = 0;
+      }
+   }
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
-   /* insn->header.mask_control = BRW_MASK_DISABLE; */
-   p->current->header.predicate_control = BRW_PREDICATE_NONE;   
    return insn;
 }
 
index 7a8e9812257176e97e2adb7f2c157246612c23fb..cf45fcaa06ff84a5cef8379bde703e0ca84eab1b 100644 (file)
@@ -3402,16 +3402,18 @@ fs_visitor::generate_code()
         assert(loop_stack_depth > 0);
         loop_stack_depth--;
         inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
-        /* patch all the BREAK/CONT instructions from last BGNLOOP */
-        while (inst0 > loop_stack[loop_stack_depth]) {
-           inst0--;
-           if (inst0->header.opcode == BRW_OPCODE_BREAK &&
-               inst0->bits3.if_else.jump_count == 0) {
-              inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+        if (intel->gen < 6) {
+           /* patch all the BREAK/CONT instructions from last BGNLOOP */
+           while (inst0 > loop_stack[loop_stack_depth]) {
+              inst0--;
+              if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+                  inst0->bits3.if_else.jump_count == 0) {
+                 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
            }
-           else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
-                    inst0->bits3.if_else.jump_count == 0) {
-              inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+              else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+                       inst0->bits3.if_else.jump_count == 0) {
+                 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+              }
            }
         }
       }
@@ -3488,6 +3490,24 @@ fs_visitor::generate_code()
 
       last_native_inst = p->nr_insn;
    }
+
+   /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
+    * emit issues, it doesn't get the jump distances into the output,
+    * which is often something we want to debug.  So this is here in
+    * case you're doing that.
+    */
+   if (0) {
+      if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+        for (unsigned int i = 0; i < p->nr_insn; i++) {
+           printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+                  ((uint32_t *)&p->store[i])[3],
+                  ((uint32_t *)&p->store[i])[2],
+                  ((uint32_t *)&p->store[i])[1],
+                  ((uint32_t *)&p->store[i])[0]);
+           brw_disasm(stdout, &p->store[i], intel->gen);
+        }
+      }
+   }
 }
 
 GLboolean