i965/fs: Add an instruction flag for choosing the flag subregister.
authorEric Anholt <eric@anholt.net>
Thu, 6 Dec 2012 18:36:11 +0000 (10:36 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 11 Dec 2012 18:12:58 +0000 (10:12 -0800)
We're going to redo discard handling to track discards in the other flag
subregister, saving instructions in the discard and allowing predicated
jumps out to the end of the shader.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_eu.c
src/mesa/drivers/dri/i965/brw_eu.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_emit.cpp
src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp

index 46ada8d995cc743ade7b9f04bc1605656a0bd3e1..360089cf13170179ea23ce8e6d6aa6f7f859a1f7 100644 (file)
@@ -94,6 +94,12 @@ void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
    p->current->header.destreg__conditionalmod = conditional;
 }
 
+void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg)
+{
+   p->current->bits2.da1.flag_reg_nr = reg;
+   p->current->bits2.da1.flag_subreg_nr = subreg;
+}
+
 void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
 {
    p->current->header.access_mode = access_mode;
index e43b543dd96cde5fc8e298d0df80a4b32be3c1c1..adefcfd9badd39190f1ffe4742b2aa3a3f4f2d6e 100644 (file)
@@ -810,6 +810,7 @@ void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
 void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
 void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
 void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg);
 void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
 
 void brw_init_compile(struct brw_context *, struct brw_compile *p,
index d77a67e0240acc5d8fd73fcbde5e36e2fc0082b2..dbf48f89d7e2500290e256bfb781949b78184eb6 100644 (file)
@@ -2222,6 +2222,12 @@ fs_visitor::remove_duplicate_mrf_writes()
 void
 fs_visitor::dump_instruction(fs_inst *inst)
 {
+   if (inst->predicate) {
+      printf("(%cf0.%d) ",
+             inst->predicate_inverse ? '-' : '+',
+             inst->flag_subreg);
+   }
+
    if (inst->opcode < ARRAY_SIZE(opcode_descs) &&
        opcode_descs[inst->opcode].name) {
       printf("%s", opcode_descs[inst->opcode].name);
@@ -2230,8 +2236,18 @@ fs_visitor::dump_instruction(fs_inst *inst)
    }
    if (inst->saturate)
       printf(".sat");
+   if (inst->conditional_mod) {
+      printf(".cmod");
+      if (!inst->predicate &&
+          (intel->gen < 5 || (inst->opcode != BRW_OPCODE_SEL &&
+                              inst->opcode != BRW_OPCODE_IF &&
+                              inst->opcode != BRW_OPCODE_WHILE))) {
+         printf(".f0.%d\n", inst->flag_subreg);
+      }
+   }
    printf(" ");
 
+
    switch (inst->dst.file) {
    case GRF:
       printf("vgrf%d", inst->dst.reg);
index 51efc113fcc10cd6d663b2e60e3a49a0f41ece20..b60a37e459264db4267e77b3697f81bfb4d5a093 100644 (file)
@@ -165,6 +165,11 @@ public:
    bool saturate;
    int conditional_mod; /**< BRW_CONDITIONAL_* */
 
+   /* Chooses which flag subregister (f0.0 or f0.1) is used for conditional
+    * mod and predication.
+    */
+   uint8_t flag_subreg;
+
    int mlen; /**< SEND message length */
    int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
    uint32_t texture_offset; /**< Texture offset bitfield */
@@ -511,7 +516,7 @@ private:
                                                  struct brw_reg dst,
                                                  struct brw_reg index,
                                                  struct brw_reg offset);
-   void generate_mov_dispatch_to_flags();
+   void generate_mov_dispatch_to_flags(fs_inst *inst);
 
    struct brw_context *brw;
    struct intel_context *intel;
index 0750b86793cd9bd1c8828af77fe2c64450b2bd76..b3d7f81f1a49be7cb477c55ddcd77ae6c1d778fa 100644 (file)
@@ -742,15 +742,15 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
  * Used only on Gen6 and above.
  */
 void
-fs_generator::generate_mov_dispatch_to_flags()
+fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
 {
-   struct brw_reg f0 = brw_flag_reg(0, 0);
+   struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
    struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
 
    assert (intel->gen >= 6);
    brw_push_insn_state(p);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, f0, g1);
+   brw_MOV(p, flags, g1);
    brw_pop_insn_state(p);
 }
 
@@ -915,6 +915,7 @@ fs_generator::generate_code(exec_list *instructions)
       brw_set_conditionalmod(p, inst->conditional_mod);
       brw_set_predicate_control(p, inst->predicate);
       brw_set_predicate_inverse(p, inst->predicate_inverse);
+      brw_set_flag_reg(p, 0, inst->flag_subreg);
       brw_set_saturate(p, inst->saturate);
       brw_set_mask_control(p, inst->force_writemask_all);
 
@@ -1121,7 +1122,7 @@ fs_generator::generate_code(exec_list *instructions)
         break;
 
       case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
-         generate_mov_dispatch_to_flags();
+         generate_mov_dispatch_to_flags(inst);
          break;
 
       case SHADER_OPCODE_SHADER_TIME_ADD:
index 6b7c412f8e740e73616b6485eed61ed4ae688760..96d1131f10c0e7e55c25fdc42aa4d6a79c595d3d 100644 (file)
@@ -249,7 +249,7 @@ instruction_scheduler::calculate_deps()
 {
    schedule_node *last_grf_write[virtual_grf_count];
    schedule_node *last_mrf_write[BRW_MAX_MRF];
-   schedule_node *last_conditional_mod = NULL;
+   schedule_node *last_conditional_mod[2] = { NULL, NULL };
    /* Fixed HW registers are assumed to be separate from the virtual
     * GRFs, so they can be tracked separately.  We don't really write
     * to fixed GRFs much, so don't bother tracking them on a more
@@ -299,8 +299,8 @@ instruction_scheduler::calculate_deps()
       }
 
       if (inst->predicate) {
-        assert(last_conditional_mod);
-        add_dep(last_conditional_mod, n);
+        assert(last_conditional_mod[inst->flag_subreg]);
+        add_dep(last_conditional_mod[inst->flag_subreg], n);
       }
 
       /* write-after-write deps. */
@@ -339,15 +339,15 @@ instruction_scheduler::calculate_deps()
        */
       if (inst->conditional_mod ||
           inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
-        add_dep(last_conditional_mod, n, 0);
-        last_conditional_mod = n;
+        add_dep(last_conditional_mod[inst->flag_subreg], n, 0);
+        last_conditional_mod[inst->flag_subreg] = n;
       }
    }
 
    /* bottom-to-top dependencies: WAR */
    memset(last_grf_write, 0, sizeof(last_grf_write));
    memset(last_mrf_write, 0, sizeof(last_mrf_write));
-   last_conditional_mod = NULL;
+   memset(last_conditional_mod, 0, sizeof(last_conditional_mod));
    last_fixed_grf_write = NULL;
 
    exec_node *node;
@@ -383,7 +383,7 @@ instruction_scheduler::calculate_deps()
       }
 
       if (inst->predicate) {
-        add_dep(n, last_conditional_mod);
+        add_dep(n, last_conditional_mod[inst->flag_subreg]);
       }
 
       /* Update the things this instruction wrote, so earlier reads
@@ -422,7 +422,7 @@ instruction_scheduler::calculate_deps()
        */
       if (inst->conditional_mod ||
           inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
-        last_conditional_mod = n;
+        last_conditional_mod[inst->flag_subreg] = n;
       }
    }
 }