From 79a30543eea529c22492067ff57bce8bb88ab83e Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Mon, 17 Jun 2019 10:15:54 +0200 Subject: [PATCH] v3d: implement simultaneous peripheral access exceptions for V3D 4.1+ MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Shader-db results: total instructions in shared programs: 9117550 -> 9102719 (-0.16%) instructions in affected programs: 1752873 -> 1738042 (-0.85%) helped: 7076 HURT: 478 helped stats (abs) min: 1 max: 22 x̄: 2.19 x̃: 2 helped stats (rel) min: 0.07% max: 13.89% x̄: 1.70% x̃: 1.07% HURT stats (abs) min: 1 max: 7 x̄: 1.41 x̃: 1 HURT stats (rel) min: 0.09% max: 10.17% x̄: 0.86% x̃: 0.54% 95% mean confidence interval for instructions value: -2.00 -1.92 95% mean confidence interval for instructions %-change: -1.58% -1.50% Instructions are helped. total max-temps in shared programs: 1327774 -> 1327728 (<.01%) max-temps in affected programs: 1025 -> 979 (-4.49%) helped: 47 HURT: 2 helped stats (abs) min: 1 max: 2 x̄: 1.02 x̃: 1 helped stats (rel) min: 2.63% max: 20.00% x̄: 7.67% x̃: 5.26% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 4.17% max: 4.17% x̄: 4.17% x̃: 4.17% 95% mean confidence interval for max-temps value: -1.06 -0.82 95% mean confidence interval for max-temps %-change: -8.89% -5.49% Max-temps are helped. Reviewed-by: Eric Anholt --- src/broadcom/compiler/qpu_schedule.c | 38 +++++++++++++++++++++++----- src/broadcom/qpu/qpu_instr.c | 10 ++++++++ src/broadcom/qpu/qpu_instr.h | 1 + 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 290cfe38780..27a83a23a56 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -604,6 +604,37 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst) inst->sig.wrtmuc); } +static bool +qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *a, + const struct v3d_qpu_instr *b) +{ + const bool a_uses_peripheral = qpu_accesses_peripheral(a); + const bool b_uses_peripheral = qpu_accesses_peripheral(b); + + /* We can always do one peripheral access per instruction. */ + if (!a_uses_peripheral || !b_uses_peripheral) + return true; + + if (devinfo->ver < 41) + return false; + + /* V3D 4.1 and later allow TMU read along with a VPM read or write, and + * WRTMUC with a TMU magic register write (other than tmuc). + */ + if ((a->sig.ldtmu && v3d_qpu_uses_vpm(b)) || + (b->sig.ldtmu && v3d_qpu_uses_vpm(a))) { + return true; + } + + if ((a->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(b)) || + (b->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(a))) { + return true; + } + + return false; +} + static bool qpu_merge_inst(const struct v3d_device_info *devinfo, struct v3d_qpu_instr *result, @@ -615,12 +646,7 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, return false; } - /* Can't do more than one peripheral access in an instruction. - * - * XXX: V3D 4.1 allows TMU read along with a VPM read or write, and - * WRTMUC with a TMU magic register write (other than tmuc). - */ - if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b)) + if (!qpu_compatible_peripheral_access(devinfo, a, b)) return false; struct v3d_qpu_instr merge = *a; diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 22f574ce5e4..66e53a6accd 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -682,6 +682,16 @@ v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr)))); } +bool +v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst) +{ + return v3d_qpu_writes_tmu(inst) && + (!inst->alu.add.magic_write || + inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) && + (!inst->alu.mul.magic_write || + inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC); +} + bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) { diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index a77430ff882..968d0f6fd65 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -449,6 +449,7 @@ bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; +bool v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, -- 2.30.2