st/nine: Add D3DFMT_DF16 support
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_group.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "freedreno_util.h"
30
31 #include "ir3.h"
32
33 /*
34 * Find/group instruction neighbors:
35 */
36
37 /* stop condition for iteration: */
38 static bool check_stop(struct ir3_instruction *instr)
39 {
40 if (ir3_instr_check_mark(instr))
41 return true;
42
43 /* stay within the block.. don't try to operate across
44 * basic block boundaries or we'll have problems when
45 * dealing with multiple basic blocks:
46 */
47 if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
48 return true;
49
50 return false;
51 }
52
53 /* bleh.. we need to do the same group_n() thing for both inputs/outputs
54 * (where we have a simple instr[] array), and fanin nodes (where we have
55 * an extra indirection via reg->instr).
56 */
57 struct group_ops {
58 struct ir3_instruction *(*get)(void *arr, int idx);
59 void (*set)(void *arr, int idx, struct ir3_instruction *instr);
60 };
61
62 static struct ir3_instruction *arr_get(void *arr, int idx)
63 {
64 return ((struct ir3_instruction **)arr)[idx];
65 }
66 static void arr_set_out(void *arr, int idx, struct ir3_instruction *instr)
67 {
68 ((struct ir3_instruction **)arr)[idx] = instr;
69 }
70 static void arr_set_in(void *arr, int idx, struct ir3_instruction *instr)
71 {
72 debug_printf("cannot insert mov before input!\n");
73 debug_assert(0);
74 }
75 static struct group_ops arr_ops_out = { arr_get, arr_set_out };
76 static struct group_ops arr_ops_in = { arr_get, arr_set_in };
77
78 static struct ir3_instruction *instr_get(void *arr, int idx)
79 {
80 return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
81 }
82 static void instr_set(void *arr, int idx, struct ir3_instruction *instr)
83 {
84 ((struct ir3_instruction *)arr)->regs[idx+1]->instr = instr;
85 }
86 static struct group_ops instr_ops = { instr_get, instr_set };
87
88
89
90 static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
91 {
92 return (a && b) && (a != b);
93 }
94
95 static struct ir3_instruction *
96 create_mov(struct ir3_instruction *instr)
97 {
98 struct ir3_instruction *mov;
99
100 mov = ir3_instr_create(instr->block, 1, 0);
101 mov->cat1.src_type = TYPE_F32;
102 mov->cat1.dst_type = TYPE_F32;
103 ir3_reg_create(mov, 0, 0); /* dst */
104 ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = instr;
105
106 return mov;
107 }
108
109 static void group_n(struct group_ops *ops, void *arr, unsigned n)
110 {
111 unsigned i, j;
112
113 /* first pass, figure out what has conflicts and needs a mov
114 * inserted. Do this up front, before starting to setup
115 * left/right neighbor pointers. Trying to do it in a single
116 * pass could result in a situation where we can't even setup
117 * the mov's right neighbor ptr if the next instr also needs
118 * a mov.
119 */
120 restart:
121 for (i = 0; i < n; i++) {
122 struct ir3_instruction *instr = ops->get(arr, i);
123 if (instr) {
124 struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
125 struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
126 bool conflict;
127
128 /* check for left/right neighbor conflicts: */
129 conflict = conflicts(instr->cp.left, left) ||
130 conflicts(instr->cp.right, right);
131
132 /* we also can't have an instr twice in the group: */
133 for (j = i + 1; (j < n) && !conflict; j++)
134 if (ops->get(arr, j) == instr)
135 conflict = true;
136
137 if (conflict) {
138 instr = create_mov(instr);
139 ops->set(arr, i, instr);
140 /* inserting the mov may have caused a conflict
141 * against the previous:
142 */
143 goto restart;
144 }
145 }
146 }
147
148 /* second pass, now that we've inserted mov's, fixup left/right
149 * neighbors. This is guaranteed to succeed, since by definition
150 * the newly inserted mov's cannot conflict with anything.
151 */
152 for (i = 0; i < n; i++) {
153 struct ir3_instruction *instr = ops->get(arr, i);
154 if (instr) {
155 struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
156 struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
157
158 debug_assert(!conflicts(instr->cp.left, left));
159 if (left) {
160 instr->cp.left_cnt++;
161 instr->cp.left = left;
162 }
163
164 debug_assert(!conflicts(instr->cp.right, right));
165 if (right) {
166 instr->cp.right_cnt++;
167 instr->cp.right = right;
168 }
169 }
170 }
171 }
172
173 static void instr_find_neighbors(struct ir3_instruction *instr)
174 {
175 unsigned i;
176
177 if (check_stop(instr))
178 return;
179
180 if (is_meta(instr) && (instr->opc == OPC_META_FI))
181 group_n(&instr_ops, instr, instr->regs_count - 1);
182
183 for (i = 1; i < instr->regs_count; i++) {
184 struct ir3_instruction *src_instr = ssa(instr->regs[i]);
185 if (src_instr)
186 instr_find_neighbors(src_instr);
187 }
188 }
189
190 /* a bit of sadness.. we can't have "holes" in inputs from PoV of
191 * register assignment, they still need to be grouped together. So
192 * we need to insert dummy/padding instruction for grouping, and
193 * then take it back out again before anyone notices.
194 */
195 static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
196 {
197 int i, mask = 0;
198 struct ir3_block *block = NULL;
199
200 for (i = n - 1; i >= 0; i--) {
201 struct ir3_instruction *instr = input[i];
202 if (instr) {
203 block = instr->block;
204 } else if (block) {
205 instr = ir3_instr_create(block, 0, OPC_NOP);
206 ir3_reg_create(instr, 0, IR3_REG_SSA); /* dst */
207 input[i] = instr;
208 mask |= (1 << i);
209 }
210 }
211
212 group_n(&arr_ops_in, input, n);
213
214 for (i = 0; i < n; i++) {
215 if (mask & (1 << i))
216 input[i] = NULL;
217 }
218 }
219
220 static void block_find_neighbors(struct ir3_block *block)
221 {
222 unsigned i;
223
224 for (i = 0; i < block->noutputs; i++) {
225 if (block->outputs[i]) {
226 struct ir3_instruction *instr = block->outputs[i];
227 instr_find_neighbors(instr);
228 }
229 }
230
231 /* shader inputs/outputs themselves must be contiguous as well:
232 */
233 if (!block->parent) {
234 /* NOTE: group inputs first, since we only insert mov's
235 * *before* the conflicted instr (and that would go badly
236 * for inputs). By doing inputs first, we should never
237 * have a conflict on inputs.. pushing any conflict to
238 * resolve to the outputs, for stuff like:
239 *
240 * MOV OUT[n], IN[m].wzyx
241 *
242 * NOTE: we assume here inputs/outputs are grouped in vec4.
243 * This logic won't quite cut it if we don't align smaller
244 * on vec4 boundaries
245 */
246 for (i = 0; i < block->ninputs; i += 4)
247 pad_and_group_input(&block->inputs[i], 4);
248 for (i = 0; i < block->noutputs; i += 4)
249 group_n(&arr_ops_out, &block->outputs[i], 4);
250
251 }
252 }
253
254 void ir3_block_group(struct ir3_block *block)
255 {
256 ir3_clear_mark(block->shader);
257 block_find_neighbors(block);
258 }