ir3: Plumb through bindless support
[mesa.git] / src / freedreno / ir3 / ir3_depth.c
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/u_math.h"
28
29 #include "ir3.h"
30 #include "ir3_shader.h"
31
32 /*
33 * Instruction Depth:
34 *
35 * Calculates weighted instruction depth, ie. the sum of # of needed
36 * instructions plus delay slots back to original input (ie INPUT or
37 * CONST). That is to say, an instructions depth is:
38 *
39 * depth(instr) {
40 * d = 0;
41 * // for each src register:
42 * foreach (src in instr->regs[1..n])
43 * d = max(d, delayslots(src->instr, n) + depth(src->instr));
44 * return d + 1;
45 * }
46 *
47 * After an instruction's depth is calculated, it is inserted into the
48 * blocks depth sorted list, which is used by the scheduling pass.
49 */
50
51 void
52 ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list)
53 {
54 /* remove from existing spot in list: */
55 list_delinit(&instr->node);
56
57 /* find where to re-insert instruction: */
58 foreach_instr (pos, list) {
59 if (pos->depth > instr->depth) {
60 list_add(&instr->node, &pos->node);
61 return;
62 }
63 }
64 /* if we get here, we didn't find an insertion spot: */
65 list_addtail(&instr->node, list);
66 }
67
68 static void
69 ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep)
70 {
71 struct ir3_instruction *src;
72
73 /* don't mark falsedep's as used, but otherwise process them normally: */
74 if (!falsedep)
75 instr->flags &= ~IR3_INSTR_UNUSED;
76
77 if (ir3_instr_check_mark(instr))
78 return;
79
80 instr->depth = 0;
81
82 foreach_ssa_src_n (src, i, instr) {
83 unsigned sd;
84
85 /* visit child to compute it's depth: */
86 ir3_instr_depth(src, boost, __is_false_dep(instr, i));
87
88 /* for array writes, no need to delay on previous write: */
89 if (i == 0)
90 continue;
91
92 sd = ir3_delayslots(src, instr, i, true) + src->depth;
93 sd += boost;
94
95 instr->depth = MAX2(instr->depth, sd);
96 }
97
98 if (!is_meta(instr))
99 instr->depth++;
100
101 ir3_insert_by_depth(instr, &instr->block->instr_list);
102 }
103
104 static bool
105 remove_unused_by_block(struct ir3_block *block)
106 {
107 bool progress = false;
108 foreach_instr_safe (instr, &block->instr_list) {
109 if (instr->opc == OPC_END || instr->opc == OPC_CHSH || instr->opc == OPC_CHMASK)
110 continue;
111 if (instr->flags & IR3_INSTR_UNUSED) {
112 if (instr->opc == OPC_META_SPLIT) {
113 struct ir3_instruction *src = ssa(instr->regs[1]);
114 /* tex (cat5) instructions have a writemask, so we can
115 * mask off unused components. Other instructions do not.
116 */
117 if (is_tex_or_prefetch(src) && (src->regs[0]->wrmask > 1)) {
118 src->regs[0]->wrmask &= ~(1 << instr->split.off);
119
120 /* prune no-longer needed right-neighbors. We could
121 * probably do the same for left-neighbors (ie. tex
122 * fetch that only need .yw components), but that
123 * makes RA a bit more confusing than it already is
124 */
125 struct ir3_instruction *n = instr;
126 while (n && n->cp.right)
127 n = n->cp.right;
128 while (n->flags & IR3_INSTR_UNUSED) {
129 n = n->cp.left;
130 if (!n)
131 break;
132 n->cp.right = NULL;
133 }
134 }
135 }
136 list_delinit(&instr->node);
137 progress = true;
138 }
139 }
140 return progress;
141 }
142
143 static bool
144 compute_depth_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
145 {
146 unsigned i;
147 bool progress = false;
148
149 ir3_clear_mark(ir);
150
151 /* initially mark everything as unused, we'll clear the flag as we
152 * visit the instructions:
153 */
154 foreach_block (block, &ir->block_list) {
155 foreach_instr (instr, &block->instr_list) {
156 /* special case, if pre-fs texture fetch used, we cannot
157 * eliminate the barycentric i/j input
158 */
159 if (so->num_sampler_prefetch &&
160 (instr->opc == OPC_META_INPUT) &&
161 (instr->input.sysval == SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL))
162 continue;
163 instr->flags |= IR3_INSTR_UNUSED;
164 }
165 }
166
167 struct ir3_instruction *out;
168 foreach_output (out, ir)
169 ir3_instr_depth(out, 0, false);
170
171 foreach_block (block, &ir->block_list) {
172 for (i = 0; i < block->keeps_count; i++)
173 ir3_instr_depth(block->keeps[i], 0, false);
174
175 /* We also need to account for if-condition: */
176 if (block->condition)
177 ir3_instr_depth(block->condition, 6, false);
178 }
179
180 /* remove un-used instructions: */
181 foreach_block (block, &ir->block_list) {
182 progress |= remove_unused_by_block(block);
183 }
184
185 /* fixup wrmask of split instructions to account for adjusted tex
186 * wrmask's:
187 */
188 foreach_block (block, &ir->block_list) {
189 foreach_instr (instr, &block->instr_list) {
190 if (instr->opc != OPC_META_SPLIT)
191 continue;
192
193 struct ir3_instruction *src = ssa(instr->regs[1]);
194 if (!is_tex_or_prefetch(src))
195 continue;
196
197 instr->regs[1]->wrmask = src->regs[0]->wrmask;
198 }
199 }
200
201 /* note that we can end up with unused indirects, but we should
202 * not end up with unused predicates.
203 */
204 for (i = 0; i < ir->a0_users_count; i++) {
205 struct ir3_instruction *instr = ir->a0_users[i];
206 if (instr && (instr->flags & IR3_INSTR_UNUSED))
207 ir->a0_users[i] = NULL;
208 }
209
210 for (i = 0; i < ir->a1_users_count; i++) {
211 struct ir3_instruction *instr = ir->a1_users[i];
212 if (instr && (instr->flags & IR3_INSTR_UNUSED))
213 ir->a1_users[i] = NULL;
214 }
215
216 /* cleanup unused inputs: */
217 struct ir3_instruction *in;
218 foreach_input_n (in, n, ir)
219 if (in->flags & IR3_INSTR_UNUSED)
220 ir->inputs[n] = NULL;
221
222 return progress;
223 }
224
225 void
226 ir3_depth(struct ir3 *ir, struct ir3_shader_variant *so)
227 {
228 bool progress;
229 do {
230 progress = compute_depth_and_remove_unused(ir, so);
231 } while (progress);
232 }