freedreno/ir3: remove tgsi f/e
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_group.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "freedreno_util.h"
30
31 #include "ir3.h"
32
33 /*
34 * Find/group instruction neighbors:
35 */
36
37 /* stop condition for iteration: */
38 static bool check_stop(struct ir3_instruction *instr)
39 {
40 if (ir3_instr_check_mark(instr))
41 return true;
42
43 /* stay within the block.. don't try to operate across
44 * basic block boundaries or we'll have problems when
45 * dealing with multiple basic blocks:
46 */
47 if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
48 return true;
49
50 return false;
51 }
52
53 /* bleh.. we need to do the same group_n() thing for both inputs/outputs
54 * (where we have a simple instr[] array), and fanin nodes (where we have
55 * an extra indirection via reg->instr).
56 */
57 struct group_ops {
58 struct ir3_instruction *(*get)(void *arr, int idx);
59 void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr);
60 };
61
62 static struct ir3_instruction *arr_get(void *arr, int idx)
63 {
64 return ((struct ir3_instruction **)arr)[idx];
65 }
66 static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr)
67 {
68 ((struct ir3_instruction **)arr)[idx] =
69 ir3_MOV(instr->block, instr, TYPE_F32);
70 }
71 static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
72 {
73 /* so, we can't insert a mov in front of a meta:in.. and the downstream
74 * instruction already has a pointer to 'instr'. So we cheat a bit and
75 * morph the meta:in instruction into a mov and insert a new meta:in
76 * in front.
77 */
78 struct ir3_instruction *in;
79
80 debug_assert(instr->regs_count == 1);
81
82 in = ir3_instr_create(instr->block, -1, OPC_META_INPUT);
83 in->inout.block = instr->block;
84 ir3_reg_create(in, instr->regs[0]->num, 0);
85
86 /* create src reg for meta:in and fixup to now be a mov: */
87 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in;
88 instr->category = 1;
89 instr->opc = 0;
90 instr->cat1.src_type = TYPE_F32;
91 instr->cat1.dst_type = TYPE_F32;
92
93 ((struct ir3_instruction **)arr)[idx] = in;
94 }
95 static struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out };
96 static struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in };
97
98 static struct ir3_instruction *instr_get(void *arr, int idx)
99 {
100 return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
101 }
102 static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
103 {
104 ((struct ir3_instruction *)arr)->regs[idx+1]->instr =
105 ir3_MOV(instr->block, instr, TYPE_F32);
106 }
107 static struct group_ops instr_ops = { instr_get, instr_insert_mov };
108
109
110 static void group_n(struct group_ops *ops, void *arr, unsigned n)
111 {
112 unsigned i, j;
113
114 /* first pass, figure out what has conflicts and needs a mov
115 * inserted. Do this up front, before starting to setup
116 * left/right neighbor pointers. Trying to do it in a single
117 * pass could result in a situation where we can't even setup
118 * the mov's right neighbor ptr if the next instr also needs
119 * a mov.
120 */
121 restart:
122 for (i = 0; i < n; i++) {
123 struct ir3_instruction *instr = ops->get(arr, i);
124 if (instr) {
125 struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
126 struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
127 bool conflict;
128
129 /* check for left/right neighbor conflicts: */
130 conflict = conflicts(instr->cp.left, left) ||
131 conflicts(instr->cp.right, right);
132
133 /* we also can't have an instr twice in the group: */
134 for (j = i + 1; (j < n) && !conflict; j++)
135 if (ops->get(arr, j) == instr)
136 conflict = true;
137
138 if (conflict) {
139 ops->insert_mov(arr, i, instr);
140 /* inserting the mov may have caused a conflict
141 * against the previous:
142 */
143 goto restart;
144 }
145 }
146 }
147
148 /* second pass, now that we've inserted mov's, fixup left/right
149 * neighbors. This is guaranteed to succeed, since by definition
150 * the newly inserted mov's cannot conflict with anything.
151 */
152 for (i = 0; i < n; i++) {
153 struct ir3_instruction *instr = ops->get(arr, i);
154 if (instr) {
155 struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
156 struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
157
158 debug_assert(!conflicts(instr->cp.left, left));
159 if (left) {
160 instr->cp.left_cnt++;
161 instr->cp.left = left;
162 }
163
164 debug_assert(!conflicts(instr->cp.right, right));
165 if (right) {
166 instr->cp.right_cnt++;
167 instr->cp.right = right;
168 }
169 }
170 }
171 }
172
173 static void instr_find_neighbors(struct ir3_instruction *instr)
174 {
175 struct ir3_instruction *src;
176
177 if (check_stop(instr))
178 return;
179
180 if (is_meta(instr) && (instr->opc == OPC_META_FI))
181 group_n(&instr_ops, instr, instr->regs_count - 1);
182
183 foreach_ssa_src(src, instr)
184 instr_find_neighbors(src);
185 }
186
187 /* a bit of sadness.. we can't have "holes" in inputs from PoV of
188 * register assignment, they still need to be grouped together. So
189 * we need to insert dummy/padding instruction for grouping, and
190 * then take it back out again before anyone notices.
191 */
192 static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
193 {
194 int i, mask = 0;
195 struct ir3_block *block = NULL;
196
197 for (i = n - 1; i >= 0; i--) {
198 struct ir3_instruction *instr = input[i];
199 if (instr) {
200 block = instr->block;
201 } else if (block) {
202 instr = ir3_NOP(block);
203 ir3_reg_create(instr, 0, IR3_REG_SSA); /* dummy dst */
204 input[i] = instr;
205 mask |= (1 << i);
206 }
207 }
208
209 group_n(&arr_ops_in, input, n);
210
211 for (i = 0; i < n; i++) {
212 if (mask & (1 << i))
213 input[i] = NULL;
214 }
215 }
216
217 static void block_find_neighbors(struct ir3_block *block)
218 {
219 unsigned i;
220
221 /* shader inputs/outputs themselves must be contiguous as well:
222 *
223 * NOTE: group inputs first, since we only insert mov's
224 * *before* the conflicted instr (and that would go badly
225 * for inputs). By doing inputs first, we should never
226 * have a conflict on inputs.. pushing any conflict to
227 * resolve to the outputs, for stuff like:
228 *
229 * MOV OUT[n], IN[m].wzyx
230 *
231 * NOTE: we assume here inputs/outputs are grouped in vec4.
232 * This logic won't quite cut it if we don't align smaller
233 * on vec4 boundaries
234 */
235 for (i = 0; i < block->ninputs; i += 4)
236 pad_and_group_input(&block->inputs[i], 4);
237 for (i = 0; i < block->noutputs; i += 4)
238 group_n(&arr_ops_out, &block->outputs[i], 4);
239
240
241 for (i = 0; i < block->noutputs; i++) {
242 if (block->outputs[i]) {
243 struct ir3_instruction *instr = block->outputs[i];
244 instr_find_neighbors(instr);
245 }
246 }
247 }
248
249 void ir3_block_group(struct ir3_block *block)
250 {
251 ir3_clear_mark(block->shader);
252 block_find_neighbors(block);
253 }