v3d: Limit shader threading according to our maximum TMU fifo usage.

author Eric Anholt <eric@anholt.net>

Thu, 14 Jun 2018 18:17:11 +0000 (11:17 -0700)

committer Eric Anholt <eric@anholt.net>

Fri, 15 Jun 2018 23:09:39 +0000 (16:09 -0700)
author Eric Anholt <eric@anholt.net>
Thu, 14 Jun 2018 18:17:11 +0000 (11:17 -0700)
committer Eric Anholt <eric@anholt.net>
Fri, 15 Jun 2018 23:09:39 +0000 (16:09 -0700)
diff --git a/src/broadcom/compiler/v3d40_tex.c b/src/broadcom/compiler/v3d40_tex.c

index 0b41f37d1ea46648eea6164411436e315775dc8c..2fd8c3b5a6e7a2c0f184d08c3b68838e751a5e3e 100644 (file)
--- a/src/broadcom/compiler/v3d40_tex.c
+++ b/src/broadcom/compiler/v3d40_tex.c
@@ -31,9 +31,12 @@
  #include "cle/v3d_packet_v41_pack.h"
  
  static void
-vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val)
+vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val,
+              int *tmu_writes)
  {
          vir_MOV_dest(c, vir_reg(QFILE_MAGIC, waddr), val);
+
+        (*tmu_writes)++;
  }
  
  static void
@@ -49,6 +52,7 @@ void
  v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
  {
          unsigned unit = instr->texture_index;
+        int tmu_writes = 0;
  
          struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
          };
@@ -82,29 +86,32 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                          if (non_array_components > 1) {
                                  vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUT,
                                                ntq_get_src(c, instr->src[i].src,
-                                                          1));
+                                                          1), &tmu_writes);
                          }
                          if (non_array_components > 2) {
                                  vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUR,
                                                ntq_get_src(c, instr->src[i].src,
-                                                          2));
+                                                          2), &tmu_writes);
                          }
  
                          if (instr->is_array) {
                                  vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUI,
                                                ntq_get_src(c, instr->src[i].src,
-                                                          instr->coord_components - 1));
+                                                          instr->coord_components - 1),
+                                              &tmu_writes);
                          }
                          break;
  
                  case nir_tex_src_bias:
                          vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUB,
-                                      ntq_get_src(c, instr->src[i].src, 0));
+                                      ntq_get_src(c, instr->src[i].src, 0),
+                                      &tmu_writes);
                          break;
  
                  case nir_tex_src_lod:
                          vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUB,
-                                      ntq_get_src(c, instr->src[i].src, 0));
+                                      ntq_get_src(c, instr->src[i].src, 0),
+                                      &tmu_writes);
  
                          if (instr->op != nir_texop_txf &&
                              instr->op != nir_texop_tg4) {
@@ -114,7 +121,8 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
  
                  case nir_tex_src_comparator:
                          vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUDREF,
-                                      ntq_get_src(c, instr->src[i].src, 0));
+                                      ntq_get_src(c, instr->src[i].src, 0),
+                                      &tmu_writes);
                          break;
  
                  case nir_tex_src_offset: {
@@ -173,15 +181,21 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
  
          if (instr->op == nir_texop_txf) {
                  assert(instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
-                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSF, s);
+                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSF, s, &tmu_writes);
          } else if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSCM, s);
+                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSCM, s, &tmu_writes);
          } else {
-                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUS, s);
+                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUS, s, &tmu_writes);
          }
  
          vir_emit_thrsw(c);
  
+        /* The input FIFO has 16 slots across all threads, so make sure we
+         * don't overfill our allocation.
+         */
+        while (tmu_writes > 16 / c->threads)
+                c->threads /= 2;
+
          struct qreg return_values[4];
          for (int i = 0; i < 4; i++) {
                  /* Swizzling .zw of an RG texture should give undefined
author	Eric Anholt <eric@anholt.net>
	Thu, 14 Jun 2018 18:17:11 +0000 (11:17 -0700)
committer	Eric Anholt <eric@anholt.net>
	Fri, 15 Jun 2018 23:09:39 +0000 (16:09 -0700)