pan/midgard: Implement derivatives
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Mon, 29 Jul 2019 22:11:12 +0000 (15:11 -0700)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Tue, 30 Jul 2019 17:01:19 +0000 (10:01 -0700)
Implement the fdd* and fdd* opcodes in the Midgard compiler.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/panfrost/midgard/compiler.h
src/panfrost/midgard/meson.build
src/panfrost/midgard/midgard_compile.c
src/panfrost/midgard/midgard_derivatives.c [new file with mode: 0644]
src/panfrost/midgard/midgard_emit.c

index 93c39686f137b0f192f92182488447e307dfb2bc..decffeb2f70428ff42ea94243c19c5a1f3c1f266 100644 (file)
@@ -541,6 +541,13 @@ emit_ubo_read(
         nir_src *indirect_offset,
         unsigned index);
 
+void
+midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr);
+
+void
+midgard_lower_derivatives(compiler_context *ctx, midgard_block *block);
+
+bool mir_op_computes_derivatives(unsigned op);
 
 /* Final emission */
 
index 55d8d6df0cc7c85d7d1de865b445826213ee3ac6..8da5184e871b05fb19eb74befc8ae8c2eb431132 100644 (file)
@@ -24,6 +24,7 @@ libpanfrost_midgard_files = files(
   'mir.c',
   'midgard_print.c',
   'midgard_schedule.c',
+  'midgard_derivatives.c',
   'midgard_emit.c',
   'midgard_ra.c',
   'midgard_ra_pipeline.c',
index aa8fe9a68a5e9fdd39314ad59b5289b0cbb83013..ae526e89effd4495fe586232c0fedcf62e8bb8ea 100644 (file)
@@ -714,6 +714,14 @@ reg_mode_for_nir(nir_alu_instr *instr)
 static void
 emit_alu(compiler_context *ctx, nir_alu_instr *instr)
 {
+        /* Derivatives end up emitted on the texture pipe, not the ALUs. This
+         * is handled elsewhere */
+
+        if (instr->op == nir_op_fddx || instr->op == nir_op_fddy) {
+                midgard_emit_derivatives(ctx, instr);
+                return;
+        }
+
         bool is_ssa = instr->dest.dest.is_ssa;
 
         unsigned dest = nir_dest_index(ctx, &instr->dest.dest);
@@ -2347,6 +2355,7 @@ midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midga
 
         mir_foreach_block(ctx, block) {
                 midgard_lower_invert(ctx, block);
+                midgard_lower_derivatives(ctx, block);
         }
 
         /* Nested control-flow can result in dead branches at the end of the
diff --git a/src/panfrost/midgard/midgard_derivatives.c b/src/panfrost/midgard/midgard_derivatives.c
new file mode 100644 (file)
index 0000000..3a36965
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2019 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors (Collabora):
+ *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+ */
+
+#include "compiler.h"
+
+/* Derivatives in Midgard are implemented on the texture pipe, rather than the
+ * ALU pipe as suggested by NIR. The rationale is that normal texture
+ * instructions require (implicit) derivatives to be calculated anyway, so it
+ * makes sense to reuse the derivative logic. Thus, in addition to the usual
+ * texturing ops that calculate derivatives, there are two explicit texture ops
+ * dFdx/dFdy that perform differencing across helper invocations in either
+ * horizontal or vertical directions.
+ *
+ * One major caveat is that derivatives can only be calculated on up to a vec2
+ * at a time. This restriction presumably is to save some silicon, as 99% of
+ * derivatives will be vec2 (autocalculating mip levels of 2D texture
+ * coordinates). Admittedly I'm not sure why 3D textures can have their levels
+ * calculated automatically, umm... Pressing on.
+ *
+ * This caveat is handled in two steps. During the first pass (code
+ * generation), we generate texture ops 1:1 to the incoming NIR derivatives.
+ * This works for float/vec2 but not for vec3/vec4. A later lowering pass will
+ * scan for vec3/vec4 derivatives and lower (split) to multiple instructions.
+ * This pass is separated as we'll have to rewrite th e destination into a
+ * register (rather than SSA) and we'd rather do this after we have the whole
+ * IR in front of us to do it at once.
+ */
+
+static unsigned
+mir_derivative_op(nir_op op)
+{
+        switch (op) {
+        case nir_op_fddx:
+        case nir_op_fddx_fine:
+        case nir_op_fddx_coarse:
+                return TEXTURE_OP_DFDX;
+
+        case nir_op_fddy:
+        case nir_op_fddy_fine:
+        case nir_op_fddy_coarse:
+                return TEXTURE_OP_DFDY;
+
+        default:
+                unreachable("Invalid derivative op");
+        }
+}
+
+/* Returns true if a texturing op computes derivatives either explicitly or
+ * implicitly */
+
+bool
+mir_op_computes_derivatives(unsigned op)
+{
+        switch (op) {
+        case TEXTURE_OP_NORMAL:
+        case TEXTURE_OP_DFDX:
+        case TEXTURE_OP_DFDY:
+                return true;
+        default:
+                return false;
+        }
+}
+
+void
+midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr)
+{
+        /* Create texture instructions */
+
+        unsigned nr_components = nir_dest_num_components(instr->dest.dest);
+
+        midgard_instruction ins = {
+                .type = TAG_TEXTURE_4,
+                .mask = mask_of(nr_components),
+                .ssa_args = {
+                        .dest = nir_dest_index(ctx, &instr->dest.dest),
+                        .src0 = nir_alu_src_index(ctx, &instr->src[0]),
+                        .src1 = -1,
+                },
+                .texture = {
+                        .op = mir_derivative_op(instr->op),
+                        .format = MALI_TEX_2D,
+                        .swizzle = SWIZZLE_XYXX,
+                        .in_reg_swizzle = SWIZZLE_XYXX,
+
+                        .in_reg_full = 1,
+                        .out_full = 1,
+                        .sampler_type = MALI_SAMPLER_FLOAT,
+                }
+        };
+
+        if (!instr->dest.dest.is_ssa)
+                ins.mask &= instr->dest.write_mask;
+
+        emit_mir_instruction(ctx, ins);
+
+        /* TODO: Set .cont/.last automatically via dataflow analysis */
+        ctx->texture_op_count++;
+}
+
+void
+midgard_lower_derivatives(compiler_context *ctx, midgard_block *block)
+{
+        mir_foreach_instr_in_block_safe(block, ins) {
+                if (ins->type != TAG_TEXTURE_4) continue;
+                if (!OP_IS_DERIVATIVE(ins->texture.op)) continue;
+
+                /* Check if we need to split */
+
+                bool upper = ins->mask & 0b1100;
+                bool lower = ins->mask & 0b0011;
+
+                if (!(upper && lower)) continue;
+
+                /* Duplicate for dedicated upper instruction */
+
+                midgard_instruction dup;
+                memcpy(&dup, ins, sizeof(dup));
+
+                /* Fixup masks. Make original just lower and dupe just upper */
+
+                ins->mask &= 0b0011;
+                dup.mask &= 0b1100;
+
+                /* Fixup swizzles */
+                assert(ins->texture.swizzle == SWIZZLE_XYXX);
+                assert(ins->texture.in_reg_swizzle == SWIZZLE_XYXX);
+                dup.texture.swizzle = SWIZZLE_XXXY;
+                dup.texture.in_reg_swizzle = SWIZZLE_ZWWW;
+
+                /* Insert the new instruction */
+                mir_insert_instruction_before(mir_next_op(ins), dup);
+
+                /* TODO: Set .cont/.last automatically via dataflow analysis */
+                ctx->texture_op_count++;
+
+                /* We'll need both instructions to write to the same index, so
+                 * rewrite to use a register */
+
+                unsigned new = make_compiler_temp_reg(ctx);
+                mir_rewrite_index(ctx, ins->ssa_args.dest, new);
+        }
+}
index ec90d46ba2177f03ba2458dc27c46e607b8e8dec..ce85dfc5fd75791b2564c95f8fe3cb37553db238 100644 (file)
@@ -258,7 +258,7 @@ emit_binary_bundle(compiler_context *ctx,
 
                 ctx->texture_op_count--;
 
-                if (ins->texture.op == TEXTURE_OP_NORMAL) {
+                if (mir_op_computes_derivatives(ins->texture.op)) {
                         bool continues = ctx->texture_op_count > 0;
                         ins->texture.cont = continues;
                         ins->texture.last = !continues;