pan/bi: Lower and optimize NIR
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Thu, 5 Mar 2020 15:11:39 +0000 (10:11 -0500)
committerMarge Bot <eric+marge@anholt.net>
Sat, 7 Mar 2020 00:37:39 +0000 (00:37 +0000)
Pretty much a copypaste from Midgard except where architectural
decisions diverge around vectorization. On that note, we will need our
own ALU scalarization pass at some point (or rather we'll need to extend
nir_lower_alu_scalar) to allow partial lowering for 8/16-bit ops. I.e.
we'll approximately need to lower

   vec4 16 ssa_2 = fadd ssa_0, ssa_1

to

   vec2 16 ssa_2 = fadd ssa_0.xy, ssa_1.xy
   vec2 16 ssa_3 = fadd ssa_0.zw, ssa_1.zw
   vec4 16 ssa_4 = vec4 ssa_2.x, ssa_2.y, ssa_3.x, ssa_4

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4097>

src/panfrost/bifrost/bifrost_compile.c
src/panfrost/bifrost/cmdline.c
src/panfrost/bifrost/compiler.h

index 754876f89ee6b6797418e41483c51348fa6da519..87a447a9ab3e801f1e8c8f8bf6718be62defb95d 100644 (file)
 #include "compiler.h"
 #include "bi_quirks.h"
 
+static int
+glsl_type_size(const struct glsl_type *type, bool bindless)
+{
+        return glsl_count_attribute_slots(type, false);
+}
+
+static void
+bi_optimize_nir(nir_shader *nir)
+{
+        bool progress;
+        unsigned lower_flrp = 16 | 32 | 64;
+
+        NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
+        NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
+
+        nir_lower_tex_options lower_tex_options = {
+                .lower_txs_lod = true,
+                .lower_txp = ~0,
+                .lower_tex_without_implicit_lod = true,
+                .lower_txd = true,
+        };
+
+        NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
+
+        do {
+                progress = false;
+
+                NIR_PASS(progress, nir, nir_lower_var_copies);
+                NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
+
+                NIR_PASS(progress, nir, nir_copy_prop);
+                NIR_PASS(progress, nir, nir_opt_remove_phis);
+                NIR_PASS(progress, nir, nir_opt_dce);
+                NIR_PASS(progress, nir, nir_opt_dead_cf);
+                NIR_PASS(progress, nir, nir_opt_cse);
+                NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
+                NIR_PASS(progress, nir, nir_opt_algebraic);
+                NIR_PASS(progress, nir, nir_opt_constant_folding);
+
+                if (lower_flrp != 0) {
+                        bool lower_flrp_progress = false;
+                        NIR_PASS(lower_flrp_progress,
+                                 nir,
+                                 nir_lower_flrp,
+                                 lower_flrp,
+                                 false /* always_precise */,
+                                 nir->options->lower_ffma);
+                        if (lower_flrp_progress) {
+                                NIR_PASS(progress, nir,
+                                         nir_opt_constant_folding);
+                                progress = true;
+                        }
+
+                        /* Nothing should rematerialize any flrps, so we only
+                         * need to do this lowering once.
+                         */
+                        lower_flrp = 0;
+                }
+
+                NIR_PASS(progress, nir, nir_opt_undef);
+                NIR_PASS(progress, nir, nir_opt_loop_unroll,
+                         nir_var_shader_in |
+                         nir_var_shader_out |
+                         nir_var_function_temp);
+        } while (progress);
+
+        NIR_PASS(progress, nir, nir_opt_algebraic_late);
+
+        /* Take us out of SSA */
+        NIR_PASS(progress, nir, nir_lower_locals_to_regs);
+        NIR_PASS(progress, nir, nir_convert_from_ssa, true);
+}
+
 void
 bifrost_compile_shader_nir(nir_shader *nir, bifrost_program *program, unsigned product_id)
 {
         bi_context *ctx = rzalloc(NULL, bi_context);
         ctx->nir = nir;
+        ctx->stage = nir->info.stage;
         ctx->quirks = bifrost_get_quirks(product_id);
 
+        /* Lower gl_Position pre-optimisation, but after lowering vars to ssa
+         * (so we don't accidentally duplicate the epilogue since mesa/st has
+         * messed with our I/O quite a bit already) */
+
+        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+
+        if (ctx->stage == MESA_SHADER_VERTEX) {
+                NIR_PASS_V(nir, nir_lower_viewport_transform);
+                NIR_PASS_V(nir, nir_lower_point_size, 1.0, 1024.0);
+        }
+
+        NIR_PASS_V(nir, nir_split_var_copies);
+        NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+        NIR_PASS_V(nir, nir_lower_var_copies);
+        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+        NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);
+        NIR_PASS_V(nir, nir_lower_ssbo);
+
+        /* We have to lower ALU to scalar ourselves since viewport
+         * transformations produce vector ops */
+        NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
+
+        bi_optimize_nir(nir);
         nir_print_shader(nir, stdout);
 
+        bi_print_shader(ctx, stdout);
+
         ralloc_free(ctx);
 }
index bf55ded7df818336fd73c7d023970594f581a992..7658c7cc3438dece521ab952e8407028d50b0d78 100644 (file)
@@ -58,8 +58,6 @@ compile_shader(char **argv)
                 NIR_PASS_V(nir[i], nir_split_var_copies);
                 NIR_PASS_V(nir[i], nir_lower_var_copies);
 
-                NIR_PASS_V(nir[i], nir_lower_alu_to_scalar, NULL, NULL);
-
                 /* before buffers and vars_to_ssa */
                 NIR_PASS_V(nir[i], gl_nir_lower_images, true);
 
index 74b9e2205c361546bb7ca57d46fee57ff12b8cda..97147d0a72365b55c8010e629dfc2691f0bbc30d 100644 (file)
@@ -317,6 +317,7 @@ typedef struct bi_block {
 
 typedef struct {
        nir_shader *nir;
+       gl_shader_stage stage;
        struct list_head blocks; /* list of bi_block */
        uint32_t quirks;
 } bi_context;