src/panfrost/midgard/midgard_derivatives.c

   1 /*
   2  * Copyright (C) 2019 Collabora, Ltd.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors (Collabora):
  24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
  25  */
  26
  27 #include "compiler.h"
  28
  29 /* Derivatives in Midgard are implemented on the texture pipe, rather than the
  30  * ALU pipe as suggested by NIR. The rationale is that normal texture
  31  * instructions require (implicit) derivatives to be calculated anyway, so it
  32  * makes sense to reuse the derivative logic. Thus, in addition to the usual
  33  * texturing ops that calculate derivatives, there are two explicit texture ops
  34  * dFdx/dFdy that perform differencing across helper invocations in either
  35  * horizontal or vertical directions.
  36  *
  37  * One major caveat is that derivatives can only be calculated on up to a vec2
  38  * at a time. This restriction presumably is to save some silicon, as 99% of
  39  * derivatives will be vec2 (autocalculating mip levels of 2D texture
  40  * coordinates). Admittedly I'm not sure why 3D textures can have their levels
  41  * calculated automatically, umm... Pressing on.
  42  *
  43  * This caveat is handled in two steps. During the first pass (code
  44  * generation), we generate texture ops 1:1 to the incoming NIR derivatives.
  45  * This works for float/vec2 but not for vec3/vec4. A later lowering pass will
  46  * scan for vec3/vec4 derivatives and lower (split) to multiple instructions.
  47  * This pass is separated as we'll have to rewrite th e destination into a
  48  * register (rather than SSA) and we'd rather do this after we have the whole
  49  * IR in front of us to do it at once.
  50  */
  51
  52 static unsigned
  53 mir_derivative_op(nir_op op)
  54 {
  55         switch (op) {
  56         case nir_op_fddx:
  57         case nir_op_fddx_fine:
  58         case nir_op_fddx_coarse:
  59                 return TEXTURE_OP_DFDX;
  60
  61         case nir_op_fddy:
  62         case nir_op_fddy_fine:
  63         case nir_op_fddy_coarse:
  64                 return TEXTURE_OP_DFDY;
  65
  66         default:
  67                 unreachable("Invalid derivative op");
  68         }
  69 }
  70
  71 /* Returns true if a texturing op computes derivatives either explicitly or
  72  * implicitly */
  73
  74 bool
  75 mir_op_computes_derivatives(gl_shader_stage stage, unsigned op)
  76 {
  77         /* Only fragment shaders may compute derivatives, but the sense of
  78          * "normal" changes in vertex shaders on certain GPUs */
  79
  80         if (op == TEXTURE_OP_NORMAL && stage != MESA_SHADER_FRAGMENT)
  81                 return false;
  82
  83         switch (op) {
  84         case TEXTURE_OP_NORMAL:
  85         case TEXTURE_OP_DFDX:
  86         case TEXTURE_OP_DFDY:
  87                 assert(stage == MESA_SHADER_FRAGMENT);
  88                 return true;
  89         default:
  90                 return false;
  91         }
  92 }
  93
  94 void
  95 midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr)
  96 {
  97         /* Create texture instructions */
  98
  99         unsigned nr_components = nir_dest_num_components(instr->dest.dest);
 100
 101         midgard_instruction ins = {
 102                 .type = TAG_TEXTURE_4,
 103                 .mask = mask_of(nr_components),
 104                 .dest = nir_dest_index(ctx, &instr->dest.dest),
 105                 .src = { nir_alu_src_index(ctx, &instr->src[0]), ~0, ~0, ~0 },
 106                 .texture = {
 107                         .op = mir_derivative_op(instr->op),
 108                         .format = MALI_TEX_2D,
 109                         .in_reg_full = 1,
 110                         .out_full = 1,
 111                         .sampler_type = MALI_SAMPLER_FLOAT,
 112                 }
 113         };
 114
 115         ins.swizzle[0][2] = ins.swizzle[0][3] = COMPONENT_X;
 116         ins.swizzle[1][2] = ins.swizzle[1][3] = COMPONENT_X;
 117
 118         if (!instr->dest.dest.is_ssa)
 119                 ins.mask &= instr->dest.write_mask;
 120
 121         emit_mir_instruction(ctx, ins);
 122
 123         /* TODO: Set .cont/.last automatically via dataflow analysis */
 124         ctx->texture_op_count++;
 125 }
 126
 127 void
 128 midgard_lower_derivatives(compiler_context *ctx, midgard_block *block)
 129 {
 130         mir_foreach_instr_in_block_safe(block, ins) {
 131                 if (ins->type != TAG_TEXTURE_4) continue;
 132                 if (!OP_IS_DERIVATIVE(ins->texture.op)) continue;
 133
 134                 /* Check if we need to split */
 135
 136                 bool upper = ins->mask & 0b1100;
 137                 bool lower = ins->mask & 0b0011;
 138
 139                 if (!(upper && lower)) continue;
 140
 141                 /* Duplicate for dedicated upper instruction */
 142
 143                 midgard_instruction dup;
 144                 memcpy(&dup, ins, sizeof(dup));
 145
 146                 /* Fixup masks. Make original just lower and dupe just upper */
 147
 148                 ins->mask &= 0b0011;
 149                 dup.mask &= 0b1100;
 150
 151                 /* Fixup swizzles */
 152                 dup.swizzle[0][0] = dup.swizzle[0][1] = dup.swizzle[0][2] = COMPONENT_X;
 153                 dup.swizzle[0][3] = COMPONENT_Y;
 154
 155                 dup.swizzle[1][0] = COMPONENT_Z;
 156                 dup.swizzle[1][1] = dup.swizzle[1][2] = dup.swizzle[1][3] = COMPONENT_W;
 157
 158                 /* Insert the new instruction */
 159                 mir_insert_instruction_before(ctx, mir_next_op(ins), dup);
 160
 161                 /* TODO: Set .cont/.last automatically via dataflow analysis */
 162                 ctx->texture_op_count++;
 163
 164                 /* We'll need both instructions to write to the same index, so
 165                  * rewrite to use a register */
 166
 167                 unsigned new = make_compiler_temp_reg(ctx);
 168                 mir_rewrite_index(ctx, ins->dest, new);
 169         }
 170 }