nir: Add a new memory_barrier_tcs_patch intrinsic
[mesa.git] / src / panfrost / midgard / midgard_derivatives.c
1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "compiler.h"
28
29 /* Derivatives in Midgard are implemented on the texture pipe, rather than the
30 * ALU pipe as suggested by NIR. The rationale is that normal texture
31 * instructions require (implicit) derivatives to be calculated anyway, so it
32 * makes sense to reuse the derivative logic. Thus, in addition to the usual
33 * texturing ops that calculate derivatives, there are two explicit texture ops
34 * dFdx/dFdy that perform differencing across helper invocations in either
35 * horizontal or vertical directions.
36 *
37 * One major caveat is that derivatives can only be calculated on up to a vec2
38 * at a time. This restriction presumably is to save some silicon, as 99% of
39 * derivatives will be vec2 (autocalculating mip levels of 2D texture
40 * coordinates). Admittedly I'm not sure why 3D textures can have their levels
41 * calculated automatically, umm... Pressing on.
42 *
43 * This caveat is handled in two steps. During the first pass (code
44 * generation), we generate texture ops 1:1 to the incoming NIR derivatives.
45 * This works for float/vec2 but not for vec3/vec4. A later lowering pass will
46 * scan for vec3/vec4 derivatives and lower (split) to multiple instructions.
47 * This pass is separated as we'll have to rewrite th e destination into a
48 * register (rather than SSA) and we'd rather do this after we have the whole
49 * IR in front of us to do it at once.
50 */
51
52 static unsigned
53 mir_derivative_op(nir_op op)
54 {
55 switch (op) {
56 case nir_op_fddx:
57 case nir_op_fddx_fine:
58 case nir_op_fddx_coarse:
59 return TEXTURE_OP_DFDX;
60
61 case nir_op_fddy:
62 case nir_op_fddy_fine:
63 case nir_op_fddy_coarse:
64 return TEXTURE_OP_DFDY;
65
66 default:
67 unreachable("Invalid derivative op");
68 }
69 }
70
71 /* Returns true if a texturing op computes derivatives either explicitly or
72 * implicitly */
73
74 bool
75 mir_op_computes_derivatives(gl_shader_stage stage, unsigned op)
76 {
77 /* Only fragment shaders may compute derivatives, but the sense of
78 * "normal" changes in vertex shaders on certain GPUs */
79
80 if (op == TEXTURE_OP_NORMAL && stage != MESA_SHADER_FRAGMENT)
81 return false;
82
83 switch (op) {
84 case TEXTURE_OP_NORMAL:
85 case TEXTURE_OP_DFDX:
86 case TEXTURE_OP_DFDY:
87 assert(stage == MESA_SHADER_FRAGMENT);
88 return true;
89 default:
90 return false;
91 }
92 }
93
94 void
95 midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr)
96 {
97 /* Create texture instructions */
98
99 unsigned nr_components = nir_dest_num_components(instr->dest.dest);
100
101 midgard_instruction ins = {
102 .type = TAG_TEXTURE_4,
103 .mask = mask_of(nr_components),
104 .dest = nir_dest_index(ctx, &instr->dest.dest),
105 .src = { nir_alu_src_index(ctx, &instr->src[0]), ~0, ~0, ~0 },
106 .texture = {
107 .op = mir_derivative_op(instr->op),
108 .format = MALI_TEX_2D,
109 .in_reg_full = 1,
110 .out_full = 1,
111 .sampler_type = MALI_SAMPLER_FLOAT,
112 }
113 };
114
115 ins.swizzle[0][2] = ins.swizzle[0][3] = COMPONENT_X;
116 ins.swizzle[1][2] = ins.swizzle[1][3] = COMPONENT_X;
117
118 if (!instr->dest.dest.is_ssa)
119 ins.mask &= instr->dest.write_mask;
120
121 emit_mir_instruction(ctx, ins);
122
123 /* TODO: Set .cont/.last automatically via dataflow analysis */
124 ctx->texture_op_count++;
125 }
126
127 void
128 midgard_lower_derivatives(compiler_context *ctx, midgard_block *block)
129 {
130 mir_foreach_instr_in_block_safe(block, ins) {
131 if (ins->type != TAG_TEXTURE_4) continue;
132 if (!OP_IS_DERIVATIVE(ins->texture.op)) continue;
133
134 /* Check if we need to split */
135
136 bool upper = ins->mask & 0b1100;
137 bool lower = ins->mask & 0b0011;
138
139 if (!(upper && lower)) continue;
140
141 /* Duplicate for dedicated upper instruction */
142
143 midgard_instruction dup;
144 memcpy(&dup, ins, sizeof(dup));
145
146 /* Fixup masks. Make original just lower and dupe just upper */
147
148 ins->mask &= 0b0011;
149 dup.mask &= 0b1100;
150
151 /* Fixup swizzles */
152 dup.swizzle[0][0] = dup.swizzle[0][1] = dup.swizzle[0][2] = COMPONENT_X;
153 dup.swizzle[0][3] = COMPONENT_Y;
154
155 dup.swizzle[1][0] = COMPONENT_Z;
156 dup.swizzle[1][1] = dup.swizzle[1][2] = dup.swizzle[1][3] = COMPONENT_W;
157
158 /* Insert the new instruction */
159 mir_insert_instruction_before(ctx, mir_next_op(ins), dup);
160
161 /* TODO: Set .cont/.last automatically via dataflow analysis */
162 ctx->texture_op_count++;
163
164 /* We'll need both instructions to write to the same index, so
165 * rewrite to use a register */
166
167 unsigned new = make_compiler_temp_reg(ctx);
168 mir_rewrite_index(ctx, ins->dest, new);
169 }
170 }