i965/fs: Keep track of flag dependencies with byte granularity during scheduling.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 19 May 2016 05:13:52 +0000 (22:13 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Sat, 28 May 2016 06:29:04 +0000 (23:29 -0700)
This prevents false dependencies from being created between
instructions that write disjoint 8-bit portions of the flag register
and OTOH should make sure that the scheduler considers dependencies
between instructions that write or read multiple flag subregisters
at once (e.g. 32-wide predication or conditional mods).

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp

index 6d6a19d02882293c5d98feb86cb35ca00f2a0ea8..8afdc25c2c591f4c5065351364c1317260868813 100644 (file)
@@ -914,7 +914,7 @@ fs_instruction_scheduler::calculate_deps()
     */
    schedule_node *last_grf_write[grf_count * 16];
    schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
-   schedule_node *last_conditional_mod[2] = { NULL, NULL };
+   schedule_node *last_conditional_mod[4] = {};
    schedule_node *last_accumulator_write = NULL;
    /* Fixed HW registers are assumed to be separate from the virtual
     * GRFs, so they can be tracked separately.  We don't really write
@@ -968,8 +968,13 @@ fs_instruction_scheduler::calculate_deps()
          }
       }
 
-      if (inst->reads_flag()) {
-         add_dep(last_conditional_mod[inst->flag_subreg], n);
+      if (const unsigned mask = inst->flags_read(v->devinfo)) {
+         assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
+
+         for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) {
+            if (mask & (1 << i))
+               add_dep(last_conditional_mod[i], n);
+         }
       }
 
       if (inst->reads_accumulator_implicitly()) {
@@ -1023,9 +1028,15 @@ fs_instruction_scheduler::calculate_deps()
          }
       }
 
-      if (inst->writes_flag()) {
-         add_dep(last_conditional_mod[inst->flag_subreg], n, 0);
-         last_conditional_mod[inst->flag_subreg] = n;
+      if (const unsigned mask = inst->flags_written()) {
+         assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
+
+         for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) {
+            if (mask & (1 << i)) {
+               add_dep(last_conditional_mod[i], n, 0);
+               last_conditional_mod[i] = n;
+            }
+         }
       }
 
       if (inst->writes_accumulator_implicitly(v->devinfo) &&
@@ -1080,8 +1091,13 @@ fs_instruction_scheduler::calculate_deps()
          }
       }
 
-      if (inst->reads_flag()) {
-         add_dep(n, last_conditional_mod[inst->flag_subreg]);
+      if (const unsigned mask = inst->flags_read(v->devinfo)) {
+         assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
+
+         for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) {
+            if (mask & (1 << i))
+               add_dep(n, last_conditional_mod[i]);
+         }
       }
 
       if (inst->reads_accumulator_implicitly()) {
@@ -1132,8 +1148,13 @@ fs_instruction_scheduler::calculate_deps()
          }
       }
 
-      if (inst->writes_flag()) {
-         last_conditional_mod[inst->flag_subreg] = n;
+      if (const unsigned mask = inst->flags_written()) {
+         assert(mask < (1 << ARRAY_SIZE(last_conditional_mod)));
+
+         for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) {
+            if (mask & (1 << i))
+               last_conditional_mod[i] = n;
+         }
       }
 
       if (inst->writes_accumulator_implicitly(v->devinfo)) {