v3d: Use ldunif instructions for uniforms.
authorEric Anholt <eric@anholt.net>
Tue, 26 Feb 2019 18:17:59 +0000 (10:17 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 5 Mar 2019 20:57:39 +0000 (12:57 -0800)
The idea is that for repeated use of the same uniform, we could avoid
loading it on each consumer.  The results look pretty good.

total instructions in shared programs: 6413571 -> 6521464 (1.68%)
total threads in shared programs: 154214 -> 154000 (-0.14%)
total uniforms in shared programs: 2393604 -> 2119629 (-11.45%)
total spills in shared programs: 4960 -> 4984 (0.48%)
total fills in shared programs: 6350 -> 6418 (1.07%)

Once we do scheduling at the NIR level, the register pressure (and thus
also instructions) issues we see here will drop back down.

src/broadcom/Makefile.sources
src/broadcom/compiler/meson.build
src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c
src/broadcom/compiler/vir_dump.c
src/broadcom/compiler/vir_lower_uniforms.c [deleted file]
src/broadcom/compiler/vir_opt_copy_propagate.c
src/broadcom/compiler/vir_opt_small_immediates.c
src/broadcom/compiler/vir_register_allocate.c
src/broadcom/compiler/vir_to_qpu.c

index 6cec9b81cb11c02dae7649c57f522027800867c1..75f402c23938f0df9101684228f11db9d549367a 100644 (file)
@@ -27,7 +27,6 @@ BROADCOM_FILES = \
        compiler/vir.c \
        compiler/vir_dump.c \
        compiler/vir_live_variables.c \
        compiler/vir.c \
        compiler/vir_dump.c \
        compiler/vir_live_variables.c \
-       compiler/vir_lower_uniforms.c \
        compiler/vir_opt_copy_propagate.c \
        compiler/vir_opt_dead_code.c \
        compiler/vir_opt_small_immediates.c \
        compiler/vir_opt_copy_propagate.c \
        compiler/vir_opt_dead_code.c \
        compiler/vir_opt_small_immediates.c \
index c80918db30fe9a36281a8a5f003fd940c04da0ac..af800b34c28cd12801575ccf886493454093436f 100644 (file)
@@ -23,7 +23,6 @@ libbroadcom_compiler_files = files(
   'vir.c',
   'vir_dump.c',
   'vir_live_variables.c',
   'vir.c',
   'vir_dump.c',
   'vir_live_variables.c',
-  'vir_lower_uniforms.c',
   'vir_opt_copy_propagate.c',
   'vir_opt_dead_code.c',
   'vir_opt_small_immediates.c',
   'vir_opt_copy_propagate.c',
   'vir_opt_dead_code.c',
   'vir_opt_small_immediates.c',
index 1d1bc3d5d423f0e4830776cc64a5beb7a6edd3f4..afc9f1c7ed27e237b7f7b625c87561cf92588725 100644 (file)
@@ -354,8 +354,7 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
         if (!list_empty(&c->cur_block->instructions))
                 last_inst = (struct qinst *)c->cur_block->instructions.prev;
 
         if (!list_empty(&c->cur_block->instructions))
                 last_inst = (struct qinst *)c->cur_block->instructions.prev;
 
-        assert(result.file == QFILE_UNIF ||
-               (result.file == QFILE_TEMP &&
+        assert((result.file == QFILE_TEMP &&
                 last_inst && last_inst == c->defs[result.index]));
 
         if (dest->is_ssa) {
                 last_inst && last_inst == c->defs[result.index]));
 
         if (dest->is_ssa) {
@@ -382,7 +381,8 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
                 /* Insert a MOV if the source wasn't an SSA def in the
                  * previous instruction.
                  */
                 /* Insert a MOV if the source wasn't an SSA def in the
                  * previous instruction.
                  */
-                if (result.file == QFILE_UNIF) {
+                if ((vir_in_nonuniform_control_flow(c) &&
+                     c->defs[last_inst->dst.index]->qpu.sig.ldunif)) {
                         result = vir_MOV(c, result);
                         last_inst = c->defs[result.index];
                 }
                         result = vir_MOV(c, result);
                         last_inst = c->defs[result.index];
                 }
@@ -2519,7 +2519,6 @@ v3d_nir_to_vir(struct v3d_compile *c)
         }
 
         vir_optimize(c);
         }
 
         vir_optimize(c);
-        vir_lower_uniforms(c);
 
         vir_check_payload_w(c);
 
 
         vir_check_payload_w(c);
 
index ebfbe364cab8a6c29b31d2586c21c88c1a389c4f..5bc514ce0a0d7b9ba582c0fc523b0678cc8c6273 100644 (file)
@@ -69,7 +69,6 @@ enum qfile {
          * or physical registers later.
          */
         QFILE_TEMP,
          * or physical registers later.
          */
         QFILE_TEMP,
-        QFILE_UNIF,
         QFILE_TLB,
         QFILE_TLBU,
 
         QFILE_TLB,
         QFILE_TLBU,
 
index ca8cb56d94fc5d54291e94b2c53e9fe503e5a81d..bb04c82d7770822f0adc0d13a694db8069489517 100644 (file)
@@ -1018,9 +1018,12 @@ vir_uniform(struct v3d_compile *c,
             enum quniform_contents contents,
             uint32_t data)
 {
             enum quniform_contents contents,
             uint32_t data)
 {
-        uint32_t uniform = vir_get_uniform_index(c, contents, data);
-
-        return vir_reg(QFILE_UNIF, uniform);
+        struct qinst *inst = vir_NOP(c);
+        inst->qpu.sig.ldunif = true;
+        inst->uniform = vir_get_uniform_index(c, contents, data);
+        inst->dst = vir_get_temp(c);
+        c->defs[inst->dst.index] = inst;
+        return inst->dst;
 }
 
 #define OPTPASS(func)                                                   \
 }
 
 #define OPTPASS(func)                                                   \
index 92ecc06853043c1e849d95184324889c90b409f9..ce037ff3993db963da2d0947d9d460d8dc4a2ba0 100644 (file)
@@ -135,7 +135,6 @@ vir_print_reg(struct v3d_compile *c, const struct qinst *inst,
 {
         static const char *files[] = {
                 [QFILE_TEMP] = "t",
 {
         static const char *files[] = {
                 [QFILE_TEMP] = "t",
-                [QFILE_UNIF] = "u",
                 [QFILE_TLB] = "tlb",
                 [QFILE_TLBU] = "tlbu",
         };
                 [QFILE_TLB] = "tlb",
                 [QFILE_TLBU] = "tlbu",
         };
@@ -183,14 +182,6 @@ vir_print_reg(struct v3d_compile *c, const struct qinst *inst,
                 fprintf(stderr, "%s", files[reg.file]);
                 break;
 
                 fprintf(stderr, "%s", files[reg.file]);
                 break;
 
-        case QFILE_UNIF:
-                fprintf(stderr, "%s%d", files[reg.file], reg.index);
-                fprintf(stderr, " (");
-                vir_dump_uniform(c->uniform_contents[reg.index],
-                                 c->uniform_data[reg.index]);
-                fprintf(stderr, ")");
-                break;
-
         default:
                 fprintf(stderr, "%s%d", files[reg.file], reg.index);
                 break;
         default:
                 fprintf(stderr, "%s%d", files[reg.file], reg.index);
                 break;
diff --git a/src/broadcom/compiler/vir_lower_uniforms.c b/src/broadcom/compiler/vir_lower_uniforms.c
deleted file mode 100644 (file)
index 27d58fd..0000000
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright © 2014 Broadcom
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/**
- * @file v3d_vir_lower_uniforms.c
- *
- * This is the pre-code-generation pass for fixing up instructions that try to
- * read from multiple uniform values.
- */
-
-#include "v3d_compiler.h"
-#include "util/hash_table.h"
-#include "util/u_math.h"
-
-static inline uint32_t
-index_hash(const void *key)
-{
-        return (uintptr_t)key;
-}
-
-static inline bool
-index_compare(const void *a, const void *b)
-{
-        return a == b;
-}
-
-static void
-add_uniform(struct hash_table *ht, struct qreg reg)
-{
-        struct hash_entry *entry;
-        void *key = (void *)(uintptr_t)(reg.index + 1);
-
-        entry = _mesa_hash_table_search(ht, key);
-        if (entry) {
-                entry->data++;
-        } else {
-                _mesa_hash_table_insert(ht, key, (void *)(uintptr_t)1);
-        }
-}
-
-static void
-remove_uniform(struct hash_table *ht, struct qreg reg)
-{
-        struct hash_entry *entry;
-        void *key = (void *)(uintptr_t)(reg.index + 1);
-
-        entry = _mesa_hash_table_search(ht, key);
-        assert(entry);
-        entry->data = (void *)(((uintptr_t) entry->data) - 1);
-        if (entry->data == NULL)
-                _mesa_hash_table_remove(ht, entry);
-}
-
-static bool
-is_lowerable_uniform(struct qinst *inst, int i)
-{
-        if (inst->src[i].file != QFILE_UNIF)
-                return false;
-        return true;
-}
-
-/* Returns the number of different uniform values referenced by the
- * instruction.
- */
-static uint32_t
-vir_get_instruction_uniform_count(struct qinst *inst)
-{
-        uint32_t count = vir_has_uniform(inst);
-
-        for (int i = 0; i < vir_get_nsrc(inst); i++) {
-                if (inst->src[i].file != QFILE_UNIF)
-                        continue;
-
-                bool is_duplicate = false;
-                for (int j = 0; j < i; j++) {
-                        if (inst->src[j].file == QFILE_UNIF &&
-                            inst->src[j].index == inst->src[i].index) {
-                                is_duplicate = true;
-                                break;
-                        }
-                }
-                if (!is_duplicate)
-                        count++;
-        }
-
-        return count;
-}
-
-void
-vir_lower_uniforms(struct v3d_compile *c)
-{
-        struct hash_table *ht =
-                _mesa_hash_table_create(c, index_hash, index_compare);
-
-        /* Walk the instruction list, finding which instructions have more
-         * than one uniform referenced, and add those uniform values to the
-         * ht.
-         */
-        vir_for_each_inst_inorder(inst, c) {
-                uint32_t nsrc = vir_get_nsrc(inst);
-
-                if (vir_get_instruction_uniform_count(inst) <= 1)
-                        continue;
-
-                for (int i = 0; i < nsrc; i++) {
-                        add_uniform(ht, inst->src[i]);
-                }
-        }
-
-        while (ht->entries) {
-                /* Find the most commonly used uniform in instructions that
-                 * need a uniform lowered.
-                 */
-                uint32_t max_count = 0;
-                uint32_t max_index = 0;
-                hash_table_foreach(ht, entry) {
-                        uint32_t count = (uintptr_t)entry->data;
-                        uint32_t index = (uintptr_t)entry->key - 1;
-                        if (count > max_count) {
-                                max_count = count;
-                                max_index = index;
-                        }
-                }
-
-                struct qreg unif = vir_reg(QFILE_UNIF, max_index);
-
-                /* Now, find the instructions using this uniform and make them
-                 * reference a temp instead.
-                 */
-                vir_for_each_block(block, c) {
-                        struct qreg temp = c->undef;
-
-                        vir_for_each_inst(inst, block) {
-                                uint32_t nsrc = vir_get_nsrc(inst);
-
-                                uint32_t count = vir_get_instruction_uniform_count(inst);
-
-                                if (count <= 1)
-                                        continue;
-
-                                bool removed = false;
-                                for (int i = 0; i < nsrc; i++) {
-                                        if (is_lowerable_uniform(inst, i) &&
-                                            inst->src[i].index == max_index) {
-                                                /* If the block doesn't have a
-                                                 * load of the uniform yet,
-                                                 * add it now.  We could
-                                                 * potentially do better and
-                                                 * CSE MOVs from multiple
-                                                 * blocks into dominating
-                                                 * blocks, except that may
-                                                 * cause troubles for register
-                                                 * allocation.
-                                                 */
-                                                if (temp.file == QFILE_NULL) {
-                                                        c->cursor =
-                                                                vir_before_inst(inst);
-                                                        temp = vir_MOV(c, unif);
-                                                }
-
-                                                inst->src[i] = temp;
-                                                remove_uniform(ht, unif);
-                                                removed = true;
-                                        }
-                                }
-                                if (removed)
-                                        count--;
-
-                                /* If the instruction doesn't need lowering any more,
-                                 * then drop it from the list.
-                                 */
-                                if (count <= 1) {
-                                        for (int i = 0; i < nsrc; i++) {
-                                                if (is_lowerable_uniform(inst, i))
-                                                        remove_uniform(ht, inst->src[i]);
-                                        }
-                                }
-                        }
-                }
-        }
-
-        _mesa_hash_table_destroy(ht, NULL);
-}
index 70ecfb9463628a9464bc9cca2009522b66daab32..c5bb61121733a813fc334e13e84ebf9838417574 100644 (file)
@@ -49,10 +49,8 @@ is_copy_mov(struct qinst *inst)
         if (inst->dst.file != QFILE_TEMP)
                 return false;
 
         if (inst->dst.file != QFILE_TEMP)
                 return false;
 
-        if (inst->src[0].file != QFILE_TEMP &&
-            inst->src[0].file != QFILE_UNIF) {
+        if (inst->src[0].file != QFILE_TEMP)
                 return false;
                 return false;
-        }
 
         if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
             inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
 
         if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
             inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
index d79445e6c2e25af111bc247005cf7c8b171d40b8..47d7722968d8e04c4910169903af3d3d3ed97c06 100644 (file)
@@ -55,18 +55,22 @@ vir_opt_small_immediates(struct v3d_compile *c)
                         continue;
 
                 for (int i = 0; i < vir_get_nsrc(inst); i++) {
                         continue;
 
                 for (int i = 0; i < vir_get_nsrc(inst); i++) {
-                        struct qreg src = vir_follow_movs(c, inst->src[i]);
+                        if (inst->src[i].file != QFILE_TEMP)
+                                continue;
 
 
-                        if (src.file != QFILE_UNIF ||
-                            c->uniform_contents[src.index] !=
-                            QUNIFORM_CONSTANT) {
+                        /* See if it's a uniform load. */
+                        struct qinst *src_def = c->defs[inst->src[i].index];
+                        if (!src_def || !src_def->qpu.sig.ldunif)
+                                continue;
+                        int uniform = src_def->uniform;
+
+                        if (c->uniform_contents[uniform] != QUNIFORM_CONSTANT)
                                 continue;
                                 continue;
-                        }
 
                         /* Check if the uniform is suitable as a small
                          * immediate.
                          */
 
                         /* Check if the uniform is suitable as a small
                          * immediate.
                          */
-                        uint32_t imm = c->uniform_data[src.index];
+                        uint32_t imm = c->uniform_data[uniform];
                         uint32_t packed;
                         if (!v3d_qpu_small_imm_pack(c->devinfo, imm, &packed))
                                 continue;
                         uint32_t packed;
                         if (!v3d_qpu_small_imm_pack(c->devinfo, imm, &packed))
                                 continue;
index 5cceec6d90f0b400a481a78d1a0fb59740ec9389..a55b23f74ed79e0113d787a330e77aca85de1917 100644 (file)
@@ -52,10 +52,7 @@ vir_is_mov_uniform(struct v3d_compile *c, int temp)
 {
         struct qinst *def = c->defs[temp];
 
 {
         struct qinst *def = c->defs[temp];
 
-        return (def &&
-                (def->qpu.sig.ldunif ||
-                 (vir_is_raw_mov(def) &&
-                  def->src[0].file == QFILE_UNIF)));
+        return def && def->qpu.sig.ldunif;
 }
 
 static int
 }
 
 static int
@@ -222,12 +219,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
         int uniform_index = ~0;
         if (is_uniform) {
                 struct qinst *orig_unif = c->defs[spill_temp];
         int uniform_index = ~0;
         if (is_uniform) {
                 struct qinst *orig_unif = c->defs[spill_temp];
-                if (orig_unif->qpu.sig.ldunif) {
-                        uniform_index = orig_unif->uniform;
-                } else {
-                        assert(orig_unif->src[0].file == QFILE_UNIF);
-                        uniform_index = orig_unif->src[0].index;
-                }
+                uniform_index = orig_unif->uniform;
         }
 
         vir_for_each_inst_inorder_safe(inst, c) {
         }
 
         vir_for_each_inst_inorder_safe(inst, c) {
@@ -240,10 +232,11 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
                         c->cursor = vir_before_inst(inst);
 
                         if (is_uniform) {
                         c->cursor = vir_before_inst(inst);
 
                         if (is_uniform) {
-                                inst->src[i] =
-                                        vir_MOV(c, vir_uniform(c,
-                                                               c->uniform_contents[uniform_index],
-                                                               c->uniform_data[uniform_index]));
+                                struct qreg unif =
+                                        vir_uniform(c,
+                                                    c->uniform_contents[uniform_index],
+                                                    c->uniform_data[uniform_index]);
+                                inst->src[i] = unif;
                         } else {
                                 v3d_emit_spill_tmua(c, spill_offset);
                                 vir_emit_thrsw(c);
                         } else {
                                 v3d_emit_spill_tmua(c, spill_offset);
                                 vir_emit_thrsw(c);
index cc499d8ba09fc66bd95b71ee868f6c00e614fdf4..bbc2950432693fc448db953064a7d7e833e6f0d3 100644 (file)
@@ -92,16 +92,6 @@ new_qpu_nop_before(struct qinst *inst)
         return q;
 }
 
         return q;
 }
 
-static void
-new_ldunif_instr(struct qinst *inst, int i)
-{
-        struct qinst *ldunif = new_qpu_nop_before(inst);
-
-        ldunif->qpu.sig.ldunif = true;
-        assert(inst->src[i].file == QFILE_UNIF);
-        ldunif->uniform = inst->src[i].index;
-}
-
 /**
  * Allocates the src register (accumulator or register file) into the RADDR
  * fields of the instruction.
 /**
  * Allocates the src register (accumulator or register file) into the RADDR
  * fields of the instruction.
@@ -219,7 +209,6 @@ v3d_generate_code_block(struct v3d_compile *c,
 
                 int nsrc = vir_get_nsrc(qinst);
                 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
 
                 int nsrc = vir_get_nsrc(qinst);
                 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
-                bool emitted_ldunif = false;
                 for (int i = 0; i < nsrc; i++) {
                         int index = qinst->src[i].index;
                         switch (qinst->src[i].file) {
                 for (int i = 0; i < nsrc; i++) {
                         int index = qinst->src[i].index;
                         switch (qinst->src[i].file) {
@@ -236,19 +225,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                         case QFILE_TEMP:
                                 src[i] = temp_registers[index];
                                 break;
                         case QFILE_TEMP:
                                 src[i] = temp_registers[index];
                                 break;
-                        case QFILE_UNIF:
-                                /* XXX perf: If the last ldunif we emitted was
-                                 * the same uniform value, skip it.  Common
-                                 * for multop/umul24 sequences.
-                                 */
-                                if (!emitted_ldunif) {
-                                        new_ldunif_instr(qinst, i);
-                                        c->num_uniforms++;
-                                        emitted_ldunif = true;
-                                }
-
-                                src[i] = qpu_acc(5);
-                                break;
                         case QFILE_SMALL_IMM:
                                 src[i].smimm = true;
                                 break;
                         case QFILE_SMALL_IMM:
                                 src[i].smimm = true;
                                 break;
@@ -301,7 +277,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                         dst = qpu_magic(V3D_QPU_WADDR_TLBU);
                         break;
 
                         dst = qpu_magic(V3D_QPU_WADDR_TLBU);
                         break;
 
-                case QFILE_UNIF:
                 case QFILE_SMALL_IMM:
                 case QFILE_LOAD_IMM:
                         assert(!"not reached");
                 case QFILE_SMALL_IMM:
                 case QFILE_LOAD_IMM:
                         assert(!"not reached");