From ecd7a7255aa1d6c313ead14e1b472c073c7111ac Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 18 May 2016 22:13:52 -0700 Subject: [PATCH] i965/fs: Keep track of flag dependencies with byte granularity during scheduling. This prevents false dependencies from being created between instructions that write disjoint 8-bit portions of the flag register and OTOH should make sure that the scheduler considers dependencies between instructions that write or read multiple flag subregisters at once (e.g. 32-wide predication or conditional mods). Reviewed-by: Jason Ekstrand --- .../dri/i965/brw_schedule_instructions.cpp | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 6d6a19d0288..8afdc25c2c5 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -914,7 +914,7 @@ fs_instruction_scheduler::calculate_deps() */ schedule_node *last_grf_write[grf_count * 16]; schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)]; - schedule_node *last_conditional_mod[2] = { NULL, NULL }; + schedule_node *last_conditional_mod[4] = {}; schedule_node *last_accumulator_write = NULL; /* Fixed HW registers are assumed to be separate from the virtual * GRFs, so they can be tracked separately. We don't really write @@ -968,8 +968,13 @@ fs_instruction_scheduler::calculate_deps() } } - if (inst->reads_flag()) { - add_dep(last_conditional_mod[inst->flag_subreg], n); + if (const unsigned mask = inst->flags_read(v->devinfo)) { + assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); + + for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) { + if (mask & (1 << i)) + add_dep(last_conditional_mod[i], n); + } } if (inst->reads_accumulator_implicitly()) { @@ -1023,9 +1028,15 @@ fs_instruction_scheduler::calculate_deps() } } - if (inst->writes_flag()) { - add_dep(last_conditional_mod[inst->flag_subreg], n, 0); - last_conditional_mod[inst->flag_subreg] = n; + if (const unsigned mask = inst->flags_written()) { + assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); + + for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) { + if (mask & (1 << i)) { + add_dep(last_conditional_mod[i], n, 0); + last_conditional_mod[i] = n; + } + } } if (inst->writes_accumulator_implicitly(v->devinfo) && @@ -1080,8 +1091,13 @@ fs_instruction_scheduler::calculate_deps() } } - if (inst->reads_flag()) { - add_dep(n, last_conditional_mod[inst->flag_subreg]); + if (const unsigned mask = inst->flags_read(v->devinfo)) { + assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); + + for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) { + if (mask & (1 << i)) + add_dep(n, last_conditional_mod[i]); + } } if (inst->reads_accumulator_implicitly()) { @@ -1132,8 +1148,13 @@ fs_instruction_scheduler::calculate_deps() } } - if (inst->writes_flag()) { - last_conditional_mod[inst->flag_subreg] = n; + if (const unsigned mask = inst->flags_written()) { + assert(mask < (1 << ARRAY_SIZE(last_conditional_mod))); + + for (unsigned i = 0; i < ARRAY_SIZE(last_conditional_mod); i++) { + if (mask & (1 << i)) + last_conditional_mod[i] = n; + } } if (inst->writes_accumulator_implicitly(v->devinfo)) { -- 2.30.2