2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
28 #include "lima_context.h"
30 static bool gpir_lower_const(gpir_compiler
*comp
)
33 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
34 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
35 if (node
->op
== gpir_op_const
) {
36 if (gpir_node_is_root(node
))
37 gpir_node_delete(node
);
45 union fi
*constant
= ralloc_array(comp
->prog
, union fi
, num_constant
);
49 comp
->prog
->constant
= constant
;
50 comp
->prog
->constant_size
= num_constant
* sizeof(union fi
);
53 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
54 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
55 if (node
->op
== gpir_op_const
) {
56 gpir_const_node
*c
= gpir_node_to_const(node
);
58 if (!gpir_node_is_root(node
)) {
59 gpir_load_node
*load
= gpir_node_create(block
, gpir_op_load_uniform
);
63 load
->index
= comp
->constant_base
+ (index
>> 2);
64 load
->component
= index
% 4;
65 constant
[index
++] = c
->value
;
67 gpir_node_replace_succ(&load
->node
, node
);
69 list_addtail(&load
->node
.list
, &node
->list
);
71 gpir_debug("lower const create uniform %d for const %d\n",
72 load
->node
.index
, node
->index
);
75 gpir_node_delete(node
);
84 /* duplicate load to all its successors */
85 static bool gpir_lower_load(gpir_compiler
*comp
)
87 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
88 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
89 if (node
->type
== gpir_node_type_load
) {
90 gpir_load_node
*load
= gpir_node_to_load(node
);
93 gpir_node_foreach_succ_safe(node
, dep
) {
94 gpir_node
*succ
= dep
->succ
;
101 gpir_node
*new = gpir_node_create(succ
->block
, node
->op
);
104 list_addtail(&new->list
, &succ
->list
);
106 gpir_debug("lower load create %d from %d for succ %d\n",
107 new->index
, node
->index
, succ
->index
);
109 gpir_load_node
*nload
= gpir_node_to_load(new);
110 nload
->index
= load
->index
;
111 nload
->component
= load
->component
;
113 nload
->reg
= load
->reg
;
114 list_addtail(&nload
->reg_link
, &load
->reg
->uses_list
);
117 gpir_node_replace_pred(dep
, new);
118 gpir_node_replace_child(succ
, node
, new);
127 static bool gpir_lower_neg(gpir_block
*block
, gpir_node
*node
)
129 gpir_alu_node
*neg
= gpir_node_to_alu(node
);
130 gpir_node
*child
= neg
->children
[0];
132 /* check if child can dest negate */
133 if (child
->type
== gpir_node_type_alu
) {
134 /* negate must be its only successor */
135 if (list_is_singular(&child
->succ_list
) &&
136 gpir_op_infos
[child
->op
].dest_neg
) {
137 gpir_alu_node
*alu
= gpir_node_to_alu(child
);
138 alu
->dest_negate
= !alu
->dest_negate
;
140 gpir_node_replace_succ(child
, node
);
141 gpir_node_delete(node
);
146 /* check if child can src negate */
147 gpir_node_foreach_succ_safe(node
, dep
) {
148 gpir_node
*succ
= dep
->succ
;
149 if (succ
->type
!= gpir_node_type_alu
)
153 gpir_alu_node
*alu
= gpir_node_to_alu(dep
->succ
);
154 for (int i
= 0; i
< alu
->num_child
; i
++) {
155 if (alu
->children
[i
] == node
) {
156 if (gpir_op_infos
[succ
->op
].src_neg
[i
]) {
157 alu
->children_negate
[i
] = !alu
->children_negate
[i
];
158 alu
->children
[i
] = child
;
166 gpir_node_replace_pred(dep
, child
);
169 if (gpir_node_is_root(node
))
170 gpir_node_delete(node
);
175 static bool gpir_lower_complex(gpir_block
*block
, gpir_node
*node
)
177 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
178 gpir_node
*child
= alu
->children
[0];
180 if (node
->op
== gpir_op_exp2
) {
181 gpir_alu_node
*preexp2
= gpir_node_create(block
, gpir_op_preexp2
);
182 if (unlikely(!preexp2
))
185 preexp2
->children
[0] = child
;
186 preexp2
->num_child
= 1;
187 gpir_node_add_dep(&preexp2
->node
, child
, GPIR_DEP_INPUT
);
188 list_addtail(&preexp2
->node
.list
, &node
->list
);
190 child
= &preexp2
->node
;
193 gpir_alu_node
*complex2
= gpir_node_create(block
, gpir_op_complex2
);
194 if (unlikely(!complex2
))
197 complex2
->children
[0] = child
;
198 complex2
->num_child
= 1;
199 gpir_node_add_dep(&complex2
->node
, child
, GPIR_DEP_INPUT
);
200 list_addtail(&complex2
->node
.list
, &node
->list
);
205 impl_op
= gpir_op_rcp_impl
;
208 impl_op
= gpir_op_rsqrt_impl
;
211 impl_op
= gpir_op_exp2_impl
;
214 impl_op
= gpir_op_log2_impl
;
220 gpir_alu_node
*impl
= gpir_node_create(block
, impl_op
);
224 impl
->children
[0] = child
;
226 gpir_node_add_dep(&impl
->node
, child
, GPIR_DEP_INPUT
);
227 list_addtail(&impl
->node
.list
, &node
->list
);
229 gpir_alu_node
*complex1
= gpir_node_create(block
, gpir_op_complex1
);
230 complex1
->children
[0] = &impl
->node
;
231 complex1
->children
[1] = &complex2
->node
;
232 complex1
->children
[2] = child
;
233 complex1
->num_child
= 3;
234 gpir_node_add_dep(&complex1
->node
, child
, GPIR_DEP_INPUT
);
235 gpir_node_add_dep(&complex1
->node
, &impl
->node
, GPIR_DEP_INPUT
);
236 gpir_node_add_dep(&complex1
->node
, &complex2
->node
, GPIR_DEP_INPUT
);
237 list_addtail(&complex1
->node
.list
, &node
->list
);
239 gpir_node
*result
= &complex1
->node
;
241 if (node
->op
== gpir_op_log2
) {
242 gpir_alu_node
*postlog2
= gpir_node_create(block
, gpir_op_postlog2
);
243 if (unlikely(!postlog2
))
246 postlog2
->children
[0] = result
;
247 postlog2
->num_child
= 1;
248 gpir_node_add_dep(&postlog2
->node
, result
, GPIR_DEP_INPUT
);
249 list_addtail(&postlog2
->node
.list
, &node
->list
);
251 result
= &postlog2
->node
;
254 gpir_node_replace_succ(result
, node
);
255 gpir_node_delete(node
);
260 static bool gpir_lower_node_may_consume_two_slots(gpir_compiler
*comp
)
262 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
263 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
264 if (gpir_op_infos
[node
->op
].may_consume_two_slots
) {
265 /* dummy_f/m are auxiliary nodes for value reg alloc:
266 * 1. before reg alloc, create fake nodes dummy_f, dummy_m,
267 * so the tree become: (dummy_m (node dummy_f))
268 * dummy_m can be spilled, but other nodes in the tree can't
270 * 2. After reg allocation and fake dep add, merge all deps of
271 * dummy_m and dummy_f to node and remove dummy_m & dummy_f
273 * We may also not use dummy_f/m, but alloc two value reg for
274 * node. But that means we need to make sure there're 2 free
275 * slot after the node successors, but we just need one slot
276 * after to be able to schedule it because we can use one move for
277 * the two slot node. It's also not easy to handle the spill case
278 * for the alloc 2 value method.
280 * With the dummy_f/m method, there's no such requirement, the
281 * node can be scheduled only when there's two slots for it,
282 * otherwise a move. And the node can be spilled with one reg.
284 gpir_node
*dummy_m
= gpir_node_create(block
, gpir_op_dummy_m
);
285 if (unlikely(!dummy_m
))
287 list_add(&dummy_m
->list
, &node
->list
);
289 gpir_node
*dummy_f
= gpir_node_create(block
, gpir_op_dummy_f
);
290 if (unlikely(!dummy_f
))
292 list_add(&dummy_f
->list
, &node
->list
);
294 gpir_alu_node
*alu
= gpir_node_to_alu(dummy_m
);
295 alu
->children
[0] = node
;
296 alu
->children
[1] = dummy_f
;
299 gpir_node_replace_succ(dummy_m
, node
);
300 gpir_node_add_dep(dummy_m
, node
, GPIR_DEP_INPUT
);
301 gpir_node_add_dep(dummy_m
, dummy_f
, GPIR_DEP_INPUT
);
311 * There are no 'equal' or 'not-equal' opcodes.
312 * eq (a == b) is lowered to and(a >= b, b >= a)
313 * ne (a != b) is lowered to or(a < b, b < a)
315 static bool gpir_lower_eq_ne(gpir_block
*block
, gpir_node
*node
)
321 cmp_node_op
= gpir_op_ge
;
322 node_new_op
= gpir_op_min
; /* and */
325 cmp_node_op
= gpir_op_lt
;
326 node_new_op
= gpir_op_max
; /* or */
332 gpir_alu_node
*e
= gpir_node_to_alu(node
);
334 gpir_alu_node
*cmp1
= gpir_node_create(block
, cmp_node_op
);
335 list_addtail(&cmp1
->node
.list
, &node
->list
);
336 gpir_alu_node
*cmp2
= gpir_node_create(block
, cmp_node_op
);
337 list_addtail(&cmp2
->node
.list
, &node
->list
);
339 cmp1
->children
[0] = e
->children
[0];
340 cmp1
->children
[1] = e
->children
[1];
343 cmp2
->children
[0] = e
->children
[1];
344 cmp2
->children
[1] = e
->children
[0];
347 gpir_node_add_dep(&cmp1
->node
, e
->children
[0], GPIR_DEP_INPUT
);
348 gpir_node_add_dep(&cmp1
->node
, e
->children
[1], GPIR_DEP_INPUT
);
350 gpir_node_add_dep(&cmp2
->node
, e
->children
[0], GPIR_DEP_INPUT
);
351 gpir_node_add_dep(&cmp2
->node
, e
->children
[1], GPIR_DEP_INPUT
);
353 gpir_node_foreach_pred_safe(node
, dep
) {
354 gpir_node_remove_dep(node
, dep
->pred
);
357 gpir_node_add_dep(node
, &cmp1
->node
, GPIR_DEP_INPUT
);
358 gpir_node_add_dep(node
, &cmp2
->node
, GPIR_DEP_INPUT
);
360 node
->op
= node_new_op
;
361 e
->children
[0] = &cmp1
->node
;
362 e
->children
[1] = &cmp2
->node
;
369 * There is no 'abs' opcode.
370 * abs(a) is lowered to max(a, -a)
372 static bool gpir_lower_abs(gpir_block
*block
, gpir_node
*node
)
374 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
376 assert(node
->op
== gpir_op_abs
);
378 node
->op
= gpir_op_max
;
380 alu
->children
[1] = alu
->children
[0];
381 alu
->children_negate
[1] = true;
388 * There is no 'not' opcode.
389 * not(a) is lowered to add(1, -a)
391 static bool gpir_lower_not(gpir_block
*block
, gpir_node
*node
)
393 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
395 assert(alu
->node
.op
== gpir_op_not
);
397 node
->op
= gpir_op_add
;
399 gpir_node
*node_const
= gpir_node_create(block
, gpir_op_const
);
400 gpir_const_node
*c
= gpir_node_to_const(node_const
);
402 assert(c
->node
.op
== gpir_op_const
);
404 list_addtail(&c
->node
.list
, &node
->list
);
406 gpir_node_add_dep(&alu
->node
, &c
->node
, GPIR_DEP_INPUT
);
408 alu
->children_negate
[1] = !alu
->children_negate
[0];
409 alu
->children
[1] = alu
->children
[0];
410 alu
->children
[0] = &c
->node
;
417 static bool (*gpir_pre_rsched_lower_funcs
[gpir_op_num
])(gpir_block
*, gpir_node
*) = {
418 [gpir_op_not
] = gpir_lower_not
,
421 static bool (*gpir_post_rsched_lower_funcs
[gpir_op_num
])(gpir_block
*, gpir_node
*) = {
422 [gpir_op_neg
] = gpir_lower_neg
,
423 [gpir_op_rcp
] = gpir_lower_complex
,
424 [gpir_op_rsqrt
] = gpir_lower_complex
,
425 [gpir_op_exp2
] = gpir_lower_complex
,
426 [gpir_op_log2
] = gpir_lower_complex
,
427 [gpir_op_eq
] = gpir_lower_eq_ne
,
428 [gpir_op_ne
] = gpir_lower_eq_ne
,
429 [gpir_op_abs
] = gpir_lower_abs
,
432 bool gpir_pre_rsched_lower_prog(gpir_compiler
*comp
)
434 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
435 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
436 if (gpir_pre_rsched_lower_funcs
[node
->op
] &&
437 !gpir_pre_rsched_lower_funcs
[node
->op
](block
, node
))
442 if (!gpir_lower_const(comp
))
445 if (!gpir_lower_load(comp
))
448 gpir_debug("pre rsched lower prog\n");
449 gpir_node_print_prog_seq(comp
);
453 bool gpir_post_rsched_lower_prog(gpir_compiler
*comp
)
455 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
456 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
457 if (gpir_post_rsched_lower_funcs
[node
->op
] &&
458 !gpir_post_rsched_lower_funcs
[node
->op
](block
, node
))
463 if (!gpir_lower_node_may_consume_two_slots(comp
))
466 gpir_debug("post rsched lower prog\n");
467 gpir_node_print_prog_seq(comp
);