lima/ppir: Add fneg op
[mesa.git] / src / gallium / drivers / lima / ir / pp / node.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/u_math.h"
26 #include "util/ralloc.h"
27 #include "util/bitscan.h"
28
29 #include "ppir.h"
30
31 const ppir_op_info ppir_op_infos[] = {
32 [ppir_op_mov] = {
33 .name = "mov",
34 .slots = (int []) {
35 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
36 PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
37 PPIR_INSTR_SLOT_END
38 },
39 },
40 [ppir_op_abs] = {
41 .name = "abs",
42 },
43 [ppir_op_neg] = {
44 .name = "neg",
45 },
46 [ppir_op_mul] = {
47 .name = "mul",
48 .slots = (int []) {
49 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
50 PPIR_INSTR_SLOT_END
51 },
52 },
53 [ppir_op_add] = {
54 .name = "add",
55 .slots = (int []) {
56 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
57 PPIR_INSTR_SLOT_END
58 },
59 },
60 [ppir_op_dot2] = {
61 .name = "dot2",
62 },
63 [ppir_op_dot3] = {
64 .name = "dot3",
65 },
66 [ppir_op_dot4] = {
67 .name = "dot4",
68 },
69 [ppir_op_sum3] = {
70 .name = "sum3",
71 .slots = (int []) {
72 PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END
73 },
74 },
75 [ppir_op_sum4] = {
76 .name = "sum4",
77 .slots = (int []) {
78 PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END
79 },
80 },
81 [ppir_op_rsqrt] = {
82 .name = "rsqrt",
83 .slots = (int []) {
84 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
85 },
86 },
87 [ppir_op_log2] = {
88 .name = "log2",
89 .slots = (int []) {
90 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
91 },
92 },
93 [ppir_op_exp2] = {
94 .name = "exp2",
95 .slots = (int []) {
96 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
97 },
98 },
99 [ppir_op_sqrt] = {
100 .name = "sqrt",
101 .slots = (int []) {
102 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
103 },
104 },
105 [ppir_op_sin] = {
106 .name = "sin",
107 .slots = (int []) {
108 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
109 },
110 },
111 [ppir_op_cos] = {
112 .name = "cos",
113 .slots = (int []) {
114 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
115 },
116 },
117 [ppir_op_max] = {
118 .name = "max",
119 .slots = (int []) {
120 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
121 PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
122 PPIR_INSTR_SLOT_END
123 },
124 },
125 [ppir_op_min] = {
126 .name = "min",
127 .slots = (int []) {
128 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
129 PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
130 PPIR_INSTR_SLOT_END
131 },
132 },
133 [ppir_op_floor] = {
134 .name = "floor",
135 .slots = (int []) {
136 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
137 PPIR_INSTR_SLOT_END
138 },
139 },
140 [ppir_op_ceil] = {
141 .name = "ceil",
142 .slots = (int []) {
143 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
144 PPIR_INSTR_SLOT_END
145 },
146 },
147 [ppir_op_fract] = {
148 .name = "fract",
149 .slots = (int []) {
150 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
151 PPIR_INSTR_SLOT_END
152 },
153 },
154 [ppir_op_and] = {
155 .name = "and",
156 .slots = (int []) {
157 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
158 PPIR_INSTR_SLOT_END
159 },
160 },
161 [ppir_op_or] = {
162 .name = "or",
163 .slots = (int []) {
164 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
165 PPIR_INSTR_SLOT_END
166 },
167 },
168 [ppir_op_xor] = {
169 .name = "xor",
170 .slots = (int []) {
171 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
172 PPIR_INSTR_SLOT_END
173 },
174 },
175 [ppir_op_not] = {
176 .name = "not",
177 .slots = (int []) {
178 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
179 PPIR_INSTR_SLOT_END
180 },
181 },
182 [ppir_op_lt] = {
183 .name = "lt",
184 },
185 [ppir_op_le] = {
186 .name = "le",
187 },
188 [ppir_op_gt] = {
189 .name = "gt",
190 .slots = (int []) {
191 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
192 PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
193 PPIR_INSTR_SLOT_END
194 },
195 },
196 [ppir_op_ge] = {
197 .name = "ge",
198 .slots = (int []) {
199 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
200 PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
201 PPIR_INSTR_SLOT_END
202 },
203 },
204 [ppir_op_eq] = {
205 .name = "eq",
206 .slots = (int []) {
207 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
208 PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
209 PPIR_INSTR_SLOT_END
210 },
211 },
212 [ppir_op_ne] = {
213 .name = "ne",
214 .slots = (int []) {
215 PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
216 PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
217 PPIR_INSTR_SLOT_END
218 },
219 },
220 [ppir_op_select] = {
221 .name = "select",
222 .slots = (int []) {
223 PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
224 PPIR_INSTR_SLOT_END
225 },
226 },
227 [ppir_op_rcp] = {
228 .name = "rcp",
229 .slots = (int []) {
230 PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
231 },
232 },
233 [ppir_op_load_varying] = {
234 .name = "ld_var",
235 .type = ppir_node_type_load,
236 .slots = (int []) {
237 PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
238 },
239 },
240 [ppir_op_load_coords] = {
241 .name = "ld_coords",
242 .type = ppir_node_type_load,
243 .slots = (int []) {
244 PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
245 },
246 },
247 [ppir_op_load_fragcoord] = {
248 .name = "ld_fragcoord",
249 .type = ppir_node_type_load,
250 .slots = (int []) {
251 PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
252 },
253 },
254 [ppir_op_load_uniform] = {
255 .name = "ld_uni",
256 .type = ppir_node_type_load,
257 .slots = (int []) {
258 PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END
259 },
260 },
261 [ppir_op_load_texture] = {
262 .name = "ld_tex",
263 .type = ppir_node_type_load_texture,
264 .slots = (int []) {
265 PPIR_INSTR_SLOT_TEXLD, PPIR_INSTR_SLOT_END
266 },
267 },
268 [ppir_op_load_temp] = {
269 .name = "ld_temp",
270 .type = ppir_node_type_load,
271 .slots = (int []) {
272 PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END
273 },
274 },
275 [ppir_op_const] = {
276 .name = "const",
277 .type = ppir_node_type_const,
278 },
279 [ppir_op_store_color] = {
280 .name = "st_col",
281 .type = ppir_node_type_store,
282 },
283 [ppir_op_store_temp] = {
284 .name = "st_temp",
285 .type = ppir_node_type_store,
286 .slots = (int []) {
287 PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END
288 },
289 },
290 [ppir_op_discard] = {
291 .name = "discard",
292 .type = ppir_node_type_discard,
293 .slots = (int []) {
294 PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
295 },
296 },
297 [ppir_op_branch] = {
298 .name = "branch",
299 .type = ppir_node_type_branch,
300 .slots = (int []) {
301 PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
302 },
303 },
304 };
305
306 void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
307 {
308 ppir_compiler *comp = block->comp;
309 static const int node_size[] = {
310 [ppir_node_type_alu] = sizeof(ppir_alu_node),
311 [ppir_node_type_const] = sizeof(ppir_const_node),
312 [ppir_node_type_load] = sizeof(ppir_load_node),
313 [ppir_node_type_store] = sizeof(ppir_store_node),
314 [ppir_node_type_load_texture] = sizeof(ppir_load_texture_node),
315 [ppir_node_type_discard] = sizeof(ppir_discard_node),
316 [ppir_node_type_branch] = sizeof(ppir_branch_node),
317 };
318
319 ppir_node_type type = ppir_op_infos[op].type;
320 int size = node_size[type];
321 ppir_node *node = rzalloc_size(block, size);
322 if (!node)
323 return NULL;
324
325 list_inithead(&node->succ_list);
326 list_inithead(&node->pred_list);
327
328 if (index >= 0) {
329 if (mask) {
330 /* reg has 4 slots for each componemt write node */
331 while (mask)
332 comp->var_nodes[(index << 2) + comp->reg_base + u_bit_scan(&mask)] = node;
333 snprintf(node->name, sizeof(node->name), "reg%d", index);
334 } else {
335 comp->var_nodes[index] = node;
336 snprintf(node->name, sizeof(node->name), "ssa%d", index);
337 }
338 }
339 else
340 snprintf(node->name, sizeof(node->name), "new");
341
342 node->op = op;
343 node->type = type;
344 node->index = comp->cur_index++;
345 node->block = block;
346
347 return node;
348 }
349
350 void ppir_node_add_dep(ppir_node *succ, ppir_node *pred)
351 {
352 /* don't add dep for two nodes from different block */
353 if (succ->block != pred->block)
354 return;
355
356 /* don't add duplicated dep */
357 ppir_node_foreach_pred(succ, dep) {
358 if (dep->pred == pred)
359 return;
360 }
361
362 ppir_dep *dep = ralloc(succ, ppir_dep);
363 dep->pred = pred;
364 dep->succ = succ;
365 list_addtail(&dep->pred_link, &succ->pred_list);
366 list_addtail(&dep->succ_link, &pred->succ_list);
367 }
368
369 void ppir_node_remove_dep(ppir_dep *dep)
370 {
371 list_del(&dep->succ_link);
372 list_del(&dep->pred_link);
373 ralloc_free(dep);
374 }
375
376 static void _ppir_node_replace_child(ppir_src *src, ppir_node *old_child, ppir_node *new_child)
377 {
378 ppir_dest *od = ppir_node_get_dest(old_child);
379 if (ppir_node_target_equal(src, od)) {
380 ppir_dest *nd = ppir_node_get_dest(new_child);
381 ppir_node_target_assign(src, nd);
382 }
383 }
384
385 void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child)
386 {
387 if (parent->type == ppir_node_type_alu) {
388 ppir_alu_node *alu = ppir_node_to_alu(parent);
389 for (int i = 0; i < alu->num_src; i++)
390 _ppir_node_replace_child(alu->src + i, old_child, new_child);
391 }
392 else if (parent->type == ppir_node_type_store) {
393 ppir_store_node *store = ppir_node_to_store(parent);
394 _ppir_node_replace_child(&store->src, old_child, new_child);
395 }
396 }
397
398 void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred)
399 {
400 list_del(&dep->succ_link);
401 dep->pred = new_pred;
402 list_addtail(&dep->succ_link, &new_pred->succ_list);
403 }
404
405 void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src)
406 {
407 ppir_node_foreach_succ_safe(src, dep) {
408 ppir_node_replace_pred(dep, dst);
409 ppir_node_replace_child(dep->succ, src, dst);
410 }
411 }
412
413 void ppir_node_delete(ppir_node *node)
414 {
415 ppir_node_foreach_succ_safe(node, dep)
416 ppir_node_remove_dep(dep);
417
418 ppir_node_foreach_pred_safe(node, dep)
419 ppir_node_remove_dep(dep);
420
421 list_del(&node->list);
422 ralloc_free(node);
423 }
424
425 static void ppir_node_print_node(ppir_node *node, int space)
426 {
427 for (int i = 0; i < space; i++)
428 printf(" ");
429 printf("%s%s %d %s\n", node->printed && !ppir_node_is_leaf(node) ? "+" : "",
430 ppir_op_infos[node->op].name, node->index, node->name);
431
432 if (!node->printed) {
433 ppir_node_foreach_pred(node, dep) {
434 ppir_node *pred = dep->pred;
435 ppir_node_print_node(pred, space + 2);
436 }
437
438 node->printed = true;
439 }
440 }
441
442 void ppir_node_print_prog(ppir_compiler *comp)
443 {
444 if (!(lima_debug & LIMA_DEBUG_PP))
445 return;
446
447 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
448 list_for_each_entry(ppir_node, node, &block->node_list, list) {
449 node->printed = false;
450 }
451 }
452
453 printf("========prog========\n");
454 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
455 printf("-------block------\n");
456 list_for_each_entry(ppir_node, node, &block->node_list, list) {
457 if (ppir_node_is_root(node))
458 ppir_node_print_node(node, 0);
459 }
460 }
461 printf("====================\n");
462 }