lima/ppir: implement discard and discard_if
[mesa.git] / src / gallium / drivers / lima / ir / pp / node.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/u_math.h"
26 #include "util/ralloc.h"
27 #include "util/bitscan.h"
28
29 #include "ppir.h"
30
31 const ppir_op_info ppir_op_infos[] = {
32 [ppir_op_mov] = {
33 .name = "mov",
34 .slots = (int []) {
35 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
36 PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
37 PPIR_INSTR_SLOT_END
38 },
39 },
40 [ppir_op_mul] = {
41 .name = "mul",
42 .slots = (int []) {
43 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
44 PPIR_INSTR_SLOT_END
45 },
46 },
47 [ppir_op_add] = {
48 .name = "add",
49 .slots = (int []) {
50 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
51 PPIR_INSTR_SLOT_END
52 },
53 },
54 [ppir_op_dot2] = {
55 .name = "dot2",
56 },
57 [ppir_op_dot3] = {
58 .name = "dot3",
59 },
60 [ppir_op_dot4] = {
61 .name = "dot4",
62 },
63 [ppir_op_sum3] = {
64 .name = "sum3",
65 .slots = (int []) {
66 PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END
67 },
68 },
69 [ppir_op_sum4] = {
70 .name = "sum4",
71 .slots = (int []) {
72 PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END
73 },
74 },
75 [ppir_op_rsqrt] = {
76 .name = "rsqrt",
77 .slots = (int []) {
78 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
79 },
80 },
81 [ppir_op_log2] = {
82 .name = "log2",
83 .slots = (int []) {
84 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
85 },
86 },
87 [ppir_op_exp2] = {
88 .name = "exp2",
89 .slots = (int []) {
90 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
91 },
92 },
93 [ppir_op_sqrt] = {
94 .name = "sqrt",
95 .slots = (int []) {
96 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
97 },
98 },
99 [ppir_op_sin] = {
100 .name = "sin",
101 .slots = (int []) {
102 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
103 },
104 },
105 [ppir_op_cos] = {
106 .name = "cos",
107 .slots = (int []) {
108 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
109 },
110 },
111 [ppir_op_max] = {
112 .name = "max",
113 .slots = (int []) {
114 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
115 PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
116 PPIR_INSTR_SLOT_END
117 },
118 },
119 [ppir_op_min] = {
120 .name = "min",
121 .slots = (int []) {
122 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
123 PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
124 PPIR_INSTR_SLOT_END
125 },
126 },
127 [ppir_op_floor] = {
128 .name = "floor",
129 .slots = (int []) {
130 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
131 PPIR_INSTR_SLOT_END
132 },
133 },
134 [ppir_op_ceil] = {
135 .name = "ceil",
136 .slots = (int []) {
137 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
138 PPIR_INSTR_SLOT_END
139 },
140 },
141 [ppir_op_fract] = {
142 .name = "fract",
143 .slots = (int []) {
144 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
145 PPIR_INSTR_SLOT_END
146 },
147 },
148 [ppir_op_and] = {
149 .name = "and",
150 .slots = (int []) {
151 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
152 PPIR_INSTR_SLOT_END
153 },
154 },
155 [ppir_op_or] = {
156 .name = "or",
157 .slots = (int []) {
158 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
159 PPIR_INSTR_SLOT_END
160 },
161 },
162 [ppir_op_xor] = {
163 .name = "xor",
164 .slots = (int []) {
165 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
166 PPIR_INSTR_SLOT_END
167 },
168 },
169 [ppir_op_not] = {
170 .name = "not",
171 .slots = (int []) {
172 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
173 PPIR_INSTR_SLOT_END
174 },
175 },
176 [ppir_op_lt] = {
177 .name = "lt",
178 },
179 [ppir_op_le] = {
180 .name = "le",
181 },
182 [ppir_op_gt] = {
183 .name = "gt",
184 .slots = (int []) {
185 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
186 PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
187 PPIR_INSTR_SLOT_END
188 },
189 },
190 [ppir_op_ge] = {
191 .name = "ge",
192 .slots = (int []) {
193 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
194 PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
195 PPIR_INSTR_SLOT_END
196 },
197 },
198 [ppir_op_eq] = {
199 .name = "eq",
200 .slots = (int []) {
201 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
202 PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
203 PPIR_INSTR_SLOT_END
204 },
205 },
206 [ppir_op_ne] = {
207 .name = "ne",
208 .slots = (int []) {
209 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
210 PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
211 PPIR_INSTR_SLOT_END
212 },
213 },
214 [ppir_op_select] = {
215 .name = "select",
216 .slots = (int []) {
217 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
218 PPIR_INSTR_SLOT_END
219 },
220 },
221 [ppir_op_rcp] = {
222 .name = "rcp",
223 .slots = (int []) {
224 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
225 },
226 },
227 [ppir_op_load_varying] = {
228 .name = "ld_var",
229 .type = ppir_node_type_load,
230 .slots = (int []) {
231 PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
232 },
233 },
234 [ppir_op_load_coords] = {
235 .name = "ld_coords",
236 .type = ppir_node_type_load,
237 .slots = (int []) {
238 PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
239 },
240 },
241 [ppir_op_load_fragcoord] = {
242 .name = "ld_fragcoord",
243 .type = ppir_node_type_load,
244 .slots = (int []) {
245 PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
246 },
247 },
248 [ppir_op_load_uniform] = {
249 .name = "ld_uni",
250 .type = ppir_node_type_load,
251 .slots = (int []) {
252 PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END
253 },
254 },
255 [ppir_op_load_texture] = {
256 .name = "ld_tex",
257 .type = ppir_node_type_load_texture,
258 .slots = (int []) {
259 PPIR_INSTR_SLOT_TEXLD, PPIR_INSTR_SLOT_END
260 },
261 },
262 [ppir_op_load_temp] = {
263 .name = "ld_temp",
264 .type = ppir_node_type_load,
265 .slots = (int []) {
266 PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END
267 },
268 },
269 [ppir_op_const] = {
270 .name = "const",
271 .type = ppir_node_type_const,
272 },
273 [ppir_op_store_color] = {
274 .name = "st_col",
275 .type = ppir_node_type_store,
276 },
277 [ppir_op_store_temp] = {
278 .name = "st_temp",
279 .type = ppir_node_type_store,
280 .slots = (int []) {
281 PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END
282 },
283 },
284 [ppir_op_discard] = {
285 .name = "discard",
286 .type = ppir_node_type_discard,
287 .slots = (int []) {
288 PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
289 },
290 },
291 [ppir_op_branch] = {
292 .name = "branch",
293 .type = ppir_node_type_branch,
294 .slots = (int []) {
295 PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
296 },
297 },
298 };
299
300 void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
301 {
302 ppir_compiler *comp = block->comp;
303 static const int node_size[] = {
304 [ppir_node_type_alu] = sizeof(ppir_alu_node),
305 [ppir_node_type_const] = sizeof(ppir_const_node),
306 [ppir_node_type_load] = sizeof(ppir_load_node),
307 [ppir_node_type_store] = sizeof(ppir_store_node),
308 [ppir_node_type_load_texture] = sizeof(ppir_load_texture_node),
309 [ppir_node_type_discard] = sizeof(ppir_discard_node),
310 [ppir_node_type_branch] = sizeof(ppir_branch_node),
311 };
312
313 ppir_node_type type = ppir_op_infos[op].type;
314 int size = node_size[type];
315 ppir_node *node = rzalloc_size(block, size);
316 if (!node)
317 return NULL;
318
319 list_inithead(&node->succ_list);
320 list_inithead(&node->pred_list);
321
322 if (index >= 0) {
323 if (mask) {
324 /* reg has 4 slots for each componemt write node */
325 while (mask)
326 comp->var_nodes[(index << 2) + comp->reg_base + u_bit_scan(&mask)] = node;
327 snprintf(node->name, sizeof(node->name), "reg%d", index);
328 } else {
329 comp->var_nodes[index] = node;
330 snprintf(node->name, sizeof(node->name), "ssa%d", index);
331 }
332 }
333 else
334 snprintf(node->name, sizeof(node->name), "new");
335
336 node->op = op;
337 node->type = type;
338 node->index = comp->cur_index++;
339 node->block = block;
340
341 return node;
342 }
343
344 void ppir_node_add_dep(ppir_node *succ, ppir_node *pred)
345 {
346 /* don't add dep for two nodes from different block */
347 if (succ->block != pred->block)
348 return;
349
350 /* don't add duplicated dep */
351 ppir_node_foreach_pred(succ, dep) {
352 if (dep->pred == pred)
353 return;
354 }
355
356 ppir_dep *dep = ralloc(succ, ppir_dep);
357 dep->pred = pred;
358 dep->succ = succ;
359 list_addtail(&dep->pred_link, &succ->pred_list);
360 list_addtail(&dep->succ_link, &pred->succ_list);
361 }
362
363 void ppir_node_remove_dep(ppir_dep *dep)
364 {
365 list_del(&dep->succ_link);
366 list_del(&dep->pred_link);
367 ralloc_free(dep);
368 }
369
370 static void _ppir_node_replace_child(ppir_src *src, ppir_node *old_child, ppir_node *new_child)
371 {
372 ppir_dest *od = ppir_node_get_dest(old_child);
373 if (ppir_node_target_equal(src, od)) {
374 ppir_dest *nd = ppir_node_get_dest(new_child);
375 ppir_node_target_assign(src, nd);
376 }
377 }
378
379 void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child)
380 {
381 if (parent->type == ppir_node_type_alu) {
382 ppir_alu_node *alu = ppir_node_to_alu(parent);
383 for (int i = 0; i < alu->num_src; i++)
384 _ppir_node_replace_child(alu->src + i, old_child, new_child);
385 }
386 else if (parent->type == ppir_node_type_store) {
387 ppir_store_node *store = ppir_node_to_store(parent);
388 _ppir_node_replace_child(&store->src, old_child, new_child);
389 }
390 }
391
392 void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred)
393 {
394 list_del(&dep->succ_link);
395 dep->pred = new_pred;
396 list_addtail(&dep->succ_link, &new_pred->succ_list);
397 }
398
399 void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src)
400 {
401 ppir_node_foreach_succ_safe(src, dep) {
402 ppir_node_replace_pred(dep, dst);
403 ppir_node_replace_child(dep->succ, src, dst);
404 }
405 }
406
407 void ppir_node_delete(ppir_node *node)
408 {
409 ppir_node_foreach_succ_safe(node, dep)
410 ppir_node_remove_dep(dep);
411
412 ppir_node_foreach_pred_safe(node, dep)
413 ppir_node_remove_dep(dep);
414
415 list_del(&node->list);
416 ralloc_free(node);
417 }
418
419 static void ppir_node_print_node(ppir_node *node, int space)
420 {
421 for (int i = 0; i < space; i++)
422 printf(" ");
423 printf("%s%s %d %s\n", node->printed && !ppir_node_is_leaf(node) ? "+" : "",
424 ppir_op_infos[node->op].name, node->index, node->name);
425
426 if (!node->printed) {
427 ppir_node_foreach_pred(node, dep) {
428 ppir_node *pred = dep->pred;
429 ppir_node_print_node(pred, space + 2);
430 }
431
432 node->printed = true;
433 }
434 }
435
436 void ppir_node_print_prog(ppir_compiler *comp)
437 {
438 if (!(lima_debug & LIMA_DEBUG_PP))
439 return;
440
441 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
442 list_for_each_entry(ppir_node, node, &block->node_list, list) {
443 node->printed = false;
444 }
445 }
446
447 printf("========prog========\n");
448 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
449 printf("-------block------\n");
450 list_for_each_entry(ppir_node, node, &block->node_list, list) {
451 if (ppir_node_is_root(node))
452 ppir_node_print_node(node, 0);
453 }
454 }
455 printf("====================\n");
456 }