From 042962df2d058c4dd4e45b7deaa3b4519141758e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Dec 2014 16:31:29 -0800 Subject: [PATCH] vc4: Fix inverted priority of instructions for QPU scheduling. We were scheduling TLB operations as early as possible, and texture setup as late as possible. When I introduced prioritization, I visually inspected that an independent operation got moved above texture results collection, which tricked me into thinking it was working (but it was just because texture setup was being pushed late). total instructions in shared programs: 57651 -> 57486 (-0.29%) instructions in affected programs: 18532 -> 18367 (-0.89%) --- src/gallium/drivers/vc4/vc4_qpu_schedule.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index 8aa83741ff5..2b0a6326b8c 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -439,24 +439,24 @@ get_instruction_priority(uint64_t inst) uint32_t baseline_score; uint32_t next_score = 0; - /* Schedule texture read setup early to hide their latency better. */ - if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul)) + /* Schedule TLB operations as late as possible, to get more + * parallelism between shaders. + */ + if (qpu_inst_is_tlb(inst)) return next_score; next_score++; - /* Default score for things that aren't otherwise special. */ - baseline_score = next_score; - next_score++; - /* Schedule texture read results collection late to hide latency. */ if (sig == QPU_SIG_LOAD_TMU0 || sig == QPU_SIG_LOAD_TMU1) return next_score; next_score++; - /* Schedule TLB operations as late as possible, to get more - * parallelism between shaders. - */ - if (qpu_inst_is_tlb(inst)) + /* Default score for things that aren't otherwise special. */ + baseline_score = next_score; + next_score++; + + /* Schedule texture read setup early to hide their latency better. */ + if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul)) return next_score; next_score++; -- 2.30.2