From 778594ae12be51110b6b1411e72c5fbb95cbadd3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 14 Jun 2018 11:17:11 -0700 Subject: [PATCH] v3d: Limit shader threading according to our maximum TMU fifo usage. Fixes simulator assertion failures in dEQP-GLES3.functional.shaders.texture_functions.texture.samplercubeshadow_bias_fragment and similar complicated cases. --- src/broadcom/compiler/v3d40_tex.c | 34 ++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/broadcom/compiler/v3d40_tex.c b/src/broadcom/compiler/v3d40_tex.c index 0b41f37d1ea..2fd8c3b5a6e 100644 --- a/src/broadcom/compiler/v3d40_tex.c +++ b/src/broadcom/compiler/v3d40_tex.c @@ -31,9 +31,12 @@ #include "cle/v3d_packet_v41_pack.h" static void -vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val) +vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val, + int *tmu_writes) { vir_MOV_dest(c, vir_reg(QFILE_MAGIC, waddr), val); + + (*tmu_writes)++; } static void @@ -49,6 +52,7 @@ void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) { unsigned unit = instr->texture_index; + int tmu_writes = 0; struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = { }; @@ -82,29 +86,32 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) if (non_array_components > 1) { vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUT, ntq_get_src(c, instr->src[i].src, - 1)); + 1), &tmu_writes); } if (non_array_components > 2) { vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUR, ntq_get_src(c, instr->src[i].src, - 2)); + 2), &tmu_writes); } if (instr->is_array) { vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUI, ntq_get_src(c, instr->src[i].src, - instr->coord_components - 1)); + instr->coord_components - 1), + &tmu_writes); } break; case nir_tex_src_bias: vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUB, - ntq_get_src(c, instr->src[i].src, 0)); + ntq_get_src(c, instr->src[i].src, 0), + &tmu_writes); break; case nir_tex_src_lod: vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUB, - ntq_get_src(c, instr->src[i].src, 0)); + ntq_get_src(c, instr->src[i].src, 0), + &tmu_writes); if (instr->op != nir_texop_txf && instr->op != nir_texop_tg4) { @@ -114,7 +121,8 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) case nir_tex_src_comparator: vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUDREF, - ntq_get_src(c, instr->src[i].src, 0)); + ntq_get_src(c, instr->src[i].src, 0), + &tmu_writes); break; case nir_tex_src_offset: { @@ -173,15 +181,21 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) if (instr->op == nir_texop_txf) { assert(instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE); - vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSF, s); + vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSF, s, &tmu_writes); } else if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { - vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSCM, s); + vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSCM, s, &tmu_writes); } else { - vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUS, s); + vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUS, s, &tmu_writes); } vir_emit_thrsw(c); + /* The input FIFO has 16 slots across all threads, so make sure we + * don't overfill our allocation. + */ + while (tmu_writes > 16 / c->threads) + c->threads /= 2; + struct qreg return_values[4]; for (int i = 0; i < 4; i++) { /* Swizzling .zw of an RG texture should give undefined -- 2.30.2