pan/midgard: Add units for more instructions
[mesa.git] / src / panfrost / midgard / midgard_opt_perspective.c
1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /* Midgard has some accelerated support for perspective projection on the
25 * load/store pipes. So the first perspective projection pass looks for
26 * lowered/open-coded perspective projection of the form "fmul (A.xyz,
27 * frcp(A.w))" or "fmul (A.xy, frcp(A.z))" and rewrite with a native
28 * perspective division opcode (on the load/store pipe). Caveats apply: the
29 * frcp should be used only once to make this optimization worthwhile. And the
30 * source of the frcp ought to be a varying to make it worthwhile...
31 *
32 * The second pass in this file is a step #2 of sorts: fusing that load/store
33 * projection into a varying load instruction (they can be done together
34 * implicitly). This depends on the combination pass. Again caveat: the vary
35 * should only be used once to make this worthwhile.
36 */
37
38 #include "compiler.h"
39
40 bool
41 midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
42 {
43 bool progress = false;
44
45 mir_foreach_instr_in_block_safe(block, ins) {
46 /* First search for fmul */
47 if (ins->type != TAG_ALU_4) continue;
48 if (ins->alu.op != midgard_alu_op_fmul) continue;
49
50 /* TODO: Flip */
51
52 /* Check the swizzles */
53
54 midgard_vector_alu_src src1 =
55 vector_alu_from_unsigned(ins->alu.src1);
56
57 midgard_vector_alu_src src2 =
58 vector_alu_from_unsigned(ins->alu.src2);
59
60 if (!mir_is_simple_swizzle(src1.swizzle, ins->mask)) continue;
61 if (src2.swizzle != SWIZZLE_XXXX) continue;
62
63 /* Awesome, we're the right form. Now check where src2 is from */
64 unsigned frcp = ins->ssa_args.src1;
65 unsigned to = ins->ssa_args.dest;
66
67 if (frcp & IS_REG) continue;
68 if (to & IS_REG) continue;
69
70 bool frcp_found = false;
71 unsigned frcp_component = 0;
72 unsigned frcp_from = 0;
73
74 mir_foreach_instr_in_block_safe(block, sub) {
75 if (sub->ssa_args.dest != frcp) continue;
76
77 midgard_vector_alu_src s =
78 vector_alu_from_unsigned(sub->alu.src1);
79
80 frcp_component = s.swizzle & 3;
81 frcp_from = sub->ssa_args.src0;
82
83 frcp_found =
84 (sub->type == TAG_ALU_4) &&
85 (sub->alu.op == midgard_alu_op_frcp);
86 break;
87 }
88
89 if (!frcp_found) continue;
90 if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z) continue;
91 if (!mir_single_use(ctx, frcp)) continue;
92
93 /* Heuristic: check if the frcp is from a single-use varying */
94
95 bool ok = false;
96
97 /* One for frcp and one for fmul */
98 if (mir_use_count(ctx, frcp_from) > 2) continue;
99
100 mir_foreach_instr_in_block_safe(block, v) {
101 if (v->ssa_args.dest != frcp_from) continue;
102 if (v->type != TAG_LOAD_STORE_4) break;
103 if (!OP_IS_LOAD_VARY_F(v->load_store.op)) break;
104
105 ok = true;
106 break;
107 }
108
109 if (!ok)
110 continue;
111
112 /* Nice, we got the form spot on. Let's convert! */
113
114 midgard_instruction accel = {
115 .type = TAG_LOAD_STORE_4,
116 .mask = ins->mask,
117 .ssa_args = {
118 .dest = to,
119 .src0 = frcp_from,
120 .src1 = -1
121 },
122 .load_store = {
123 .op = frcp_component == COMPONENT_W ?
124 midgard_op_ldst_perspective_division_w :
125 midgard_op_ldst_perspective_division_z,
126 .swizzle = SWIZZLE_XYZW,
127 .unknown = 0x24,
128 }
129 };
130
131 mir_insert_instruction_before(ins, accel);
132 mir_remove_instruction(ins);
133
134 progress |= true;
135 }
136
137 return progress;
138 }
139
140 bool
141 midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
142 {
143 bool progress = false;
144
145 mir_foreach_instr_in_block_safe(block, ins) {
146 /* Search for a projection */
147 if (ins->type != TAG_LOAD_STORE_4) continue;
148 if (!OP_IS_PROJECTION(ins->load_store.op)) continue;
149
150 unsigned vary = ins->ssa_args.src0;
151 unsigned to = ins->ssa_args.dest;
152
153 if (vary & IS_REG) continue;
154 if (to & IS_REG) continue;
155 if (!mir_single_use(ctx, vary)) continue;
156
157 /* Check for a varying source. If we find it, we rewrite */
158
159 bool rewritten = false;
160
161 mir_foreach_instr_in_block_safe(block, v) {
162 if (v->ssa_args.dest != vary) continue;
163 if (v->type != TAG_LOAD_STORE_4) break;
164 if (!OP_IS_LOAD_VARY_F(v->load_store.op)) break;
165
166 /* We found it, so rewrite it to project. Grab the
167 * modifier */
168
169 unsigned param = v->load_store.varying_parameters;
170 midgard_varying_parameter p;
171 memcpy(&p, &param, sizeof(p));
172
173 if (p.modifier != midgard_varying_mod_none)
174 break;
175
176 bool projects_w =
177 ins->load_store.op == midgard_op_ldst_perspective_division_w;
178
179 p.modifier = projects_w ?
180 midgard_varying_mod_perspective_w :
181 midgard_varying_mod_perspective_z;
182
183 /* Aliasing rules are annoying */
184 memcpy(&param, &p, sizeof(p));
185 v->load_store.varying_parameters = param;
186
187 /* Use the new destination */
188 v->ssa_args.dest = to;
189
190 rewritten = true;
191 break;
192 }
193
194 if (rewritten)
195 mir_remove_instruction(ins);
196
197 progress |= rewritten;
198 }
199
200 return progress;
201 }