2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
28 #include "lima_context.h"
30 static bool gpir_lower_const(gpir_compiler
*comp
)
33 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
34 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
35 if (node
->op
== gpir_op_const
) {
36 if (gpir_node_is_root(node
))
37 gpir_node_delete(node
);
45 union fi
*constant
= ralloc_array(comp
->prog
, union fi
, num_constant
);
49 comp
->prog
->constant
= constant
;
50 comp
->prog
->constant_size
= num_constant
* sizeof(union fi
);
53 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
54 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
55 if (node
->op
== gpir_op_const
) {
56 gpir_const_node
*c
= gpir_node_to_const(node
);
58 if (!gpir_node_is_root(node
)) {
59 gpir_load_node
*load
= gpir_node_create(block
, gpir_op_load_uniform
);
63 load
->index
= comp
->constant_base
+ (index
>> 2);
64 load
->component
= index
% 4;
65 constant
[index
++] = c
->value
;
67 gpir_node_replace_succ(&load
->node
, node
);
69 list_addtail(&load
->node
.list
, &node
->list
);
71 gpir_debug("lower const create uniform %d for const %d\n",
72 load
->node
.index
, node
->index
);
75 gpir_node_delete(node
);
84 /* duplicate load to all its successors */
85 static bool gpir_lower_load(gpir_compiler
*comp
)
87 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
88 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
89 if (node
->type
== gpir_node_type_load
) {
90 gpir_load_node
*load
= gpir_node_to_load(node
);
93 gpir_node_foreach_succ_safe(node
, dep
) {
94 gpir_node
*succ
= dep
->succ
;
101 gpir_node
*new = gpir_node_create(succ
->block
, node
->op
);
104 list_addtail(&new->list
, &succ
->list
);
106 gpir_debug("lower load create %d from %d for succ %d\n",
107 new->index
, node
->index
, succ
->index
);
109 gpir_load_node
*nload
= gpir_node_to_load(new);
110 nload
->index
= load
->index
;
111 nload
->component
= load
->component
;
112 nload
->reg
= load
->reg
;
114 gpir_node_replace_pred(dep
, new);
115 gpir_node_replace_child(succ
, node
, new);
124 static bool gpir_lower_neg(gpir_block
*block
, gpir_node
*node
)
126 gpir_alu_node
*neg
= gpir_node_to_alu(node
);
127 gpir_node
*child
= neg
->children
[0];
129 /* check if child can dest negate */
130 if (child
->type
== gpir_node_type_alu
) {
131 /* negate must be its only successor */
132 if (list_is_singular(&child
->succ_list
) &&
133 gpir_op_infos
[child
->op
].dest_neg
) {
134 gpir_alu_node
*alu
= gpir_node_to_alu(child
);
135 alu
->dest_negate
= !alu
->dest_negate
;
137 gpir_node_replace_succ(child
, node
);
138 gpir_node_delete(node
);
143 /* check if child can src negate */
144 gpir_node_foreach_succ_safe(node
, dep
) {
145 gpir_node
*succ
= dep
->succ
;
146 if (succ
->type
!= gpir_node_type_alu
)
150 gpir_alu_node
*alu
= gpir_node_to_alu(dep
->succ
);
151 for (int i
= 0; i
< alu
->num_child
; i
++) {
152 if (alu
->children
[i
] == node
) {
153 if (gpir_op_infos
[succ
->op
].src_neg
[i
]) {
154 alu
->children_negate
[i
] = !alu
->children_negate
[i
];
155 alu
->children
[i
] = child
;
163 gpir_node_replace_pred(dep
, child
);
166 if (gpir_node_is_root(node
))
167 gpir_node_delete(node
);
172 static bool gpir_lower_complex(gpir_block
*block
, gpir_node
*node
)
174 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
175 gpir_node
*child
= alu
->children
[0];
177 if (node
->op
== gpir_op_exp2
) {
178 gpir_alu_node
*preexp2
= gpir_node_create(block
, gpir_op_preexp2
);
179 if (unlikely(!preexp2
))
182 preexp2
->children
[0] = child
;
183 preexp2
->num_child
= 1;
184 gpir_node_add_dep(&preexp2
->node
, child
, GPIR_DEP_INPUT
);
185 list_addtail(&preexp2
->node
.list
, &node
->list
);
187 child
= &preexp2
->node
;
190 gpir_alu_node
*complex2
= gpir_node_create(block
, gpir_op_complex2
);
191 if (unlikely(!complex2
))
194 complex2
->children
[0] = child
;
195 complex2
->num_child
= 1;
196 gpir_node_add_dep(&complex2
->node
, child
, GPIR_DEP_INPUT
);
197 list_addtail(&complex2
->node
.list
, &node
->list
);
202 impl_op
= gpir_op_rcp_impl
;
205 impl_op
= gpir_op_rsqrt_impl
;
208 impl_op
= gpir_op_exp2_impl
;
211 impl_op
= gpir_op_log2_impl
;
217 gpir_alu_node
*impl
= gpir_node_create(block
, impl_op
);
221 impl
->children
[0] = child
;
223 gpir_node_add_dep(&impl
->node
, child
, GPIR_DEP_INPUT
);
224 list_addtail(&impl
->node
.list
, &node
->list
);
226 gpir_alu_node
*complex1
= gpir_node_create(block
, gpir_op_complex1
);
227 complex1
->children
[0] = &impl
->node
;
228 complex1
->children
[1] = &complex2
->node
;
229 complex1
->children
[2] = child
;
230 complex1
->num_child
= 3;
231 gpir_node_add_dep(&complex1
->node
, child
, GPIR_DEP_INPUT
);
232 gpir_node_add_dep(&complex1
->node
, &impl
->node
, GPIR_DEP_INPUT
);
233 gpir_node_add_dep(&complex1
->node
, &complex2
->node
, GPIR_DEP_INPUT
);
234 list_addtail(&complex1
->node
.list
, &node
->list
);
236 gpir_node
*result
= &complex1
->node
;
238 if (node
->op
== gpir_op_log2
) {
239 gpir_alu_node
*postlog2
= gpir_node_create(block
, gpir_op_postlog2
);
240 if (unlikely(!postlog2
))
243 postlog2
->children
[0] = result
;
244 postlog2
->num_child
= 1;
245 gpir_node_add_dep(&postlog2
->node
, result
, GPIR_DEP_INPUT
);
246 list_addtail(&postlog2
->node
.list
, &node
->list
);
248 result
= &postlog2
->node
;
251 gpir_node_replace_succ(result
, node
);
252 gpir_node_delete(node
);
257 static bool gpir_lower_node_may_consume_two_slots(gpir_compiler
*comp
)
259 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
260 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
261 if (gpir_op_infos
[node
->op
].may_consume_two_slots
) {
262 /* dummy_f/m are auxiliary nodes for value reg alloc:
263 * 1. before reg alloc, create fake nodes dummy_f, dummy_m,
264 * so the tree become: (dummy_m (node dummy_f))
265 * dummy_m can be spilled, but other nodes in the tree can't
267 * 2. After reg allocation and fake dep add, merge all deps of
268 * dummy_m and dummy_f to node and remove dummy_m & dummy_f
270 * We may also not use dummy_f/m, but alloc two value reg for
271 * node. But that means we need to make sure there're 2 free
272 * slot after the node successors, but we just need one slot
273 * after to be able to schedule it because we can use one move for
274 * the two slot node. It's also not easy to handle the spill case
275 * for the alloc 2 value method.
277 * With the dummy_f/m method, there's no such requirement, the
278 * node can be scheduled only when there's two slots for it,
279 * otherwise a move. And the node can be spilled with one reg.
281 gpir_node
*dummy_m
= gpir_node_create(block
, gpir_op_dummy_m
);
282 if (unlikely(!dummy_m
))
284 list_add(&dummy_m
->list
, &node
->list
);
286 gpir_node
*dummy_f
= gpir_node_create(block
, gpir_op_dummy_f
);
287 if (unlikely(!dummy_f
))
289 list_add(&dummy_f
->list
, &node
->list
);
291 gpir_alu_node
*alu
= gpir_node_to_alu(dummy_m
);
292 alu
->children
[0] = node
;
293 alu
->children
[1] = dummy_f
;
296 gpir_node_replace_succ(dummy_m
, node
);
297 gpir_node_add_dep(dummy_m
, node
, GPIR_DEP_INPUT
);
298 gpir_node_add_dep(dummy_m
, dummy_f
, GPIR_DEP_INPUT
);
308 * There are no 'equal' or 'not-equal' opcodes.
309 * eq (a == b) is lowered to and(a >= b, b >= a)
310 * ne (a != b) is lowered to or(a < b, b < a)
312 static bool gpir_lower_eq_ne(gpir_block
*block
, gpir_node
*node
)
318 cmp_node_op
= gpir_op_ge
;
319 node_new_op
= gpir_op_min
; /* and */
322 cmp_node_op
= gpir_op_lt
;
323 node_new_op
= gpir_op_max
; /* or */
326 unreachable("bad node op");
329 gpir_alu_node
*e
= gpir_node_to_alu(node
);
331 gpir_alu_node
*cmp1
= gpir_node_create(block
, cmp_node_op
);
332 list_addtail(&cmp1
->node
.list
, &node
->list
);
333 gpir_alu_node
*cmp2
= gpir_node_create(block
, cmp_node_op
);
334 list_addtail(&cmp2
->node
.list
, &node
->list
);
336 cmp1
->children
[0] = e
->children
[0];
337 cmp1
->children
[1] = e
->children
[1];
340 cmp2
->children
[0] = e
->children
[1];
341 cmp2
->children
[1] = e
->children
[0];
344 gpir_node_add_dep(&cmp1
->node
, e
->children
[0], GPIR_DEP_INPUT
);
345 gpir_node_add_dep(&cmp1
->node
, e
->children
[1], GPIR_DEP_INPUT
);
347 gpir_node_add_dep(&cmp2
->node
, e
->children
[0], GPIR_DEP_INPUT
);
348 gpir_node_add_dep(&cmp2
->node
, e
->children
[1], GPIR_DEP_INPUT
);
350 gpir_node_foreach_pred_safe(node
, dep
) {
351 gpir_node_remove_dep(node
, dep
->pred
);
354 gpir_node_add_dep(node
, &cmp1
->node
, GPIR_DEP_INPUT
);
355 gpir_node_add_dep(node
, &cmp2
->node
, GPIR_DEP_INPUT
);
357 node
->op
= node_new_op
;
358 e
->children
[0] = &cmp1
->node
;
359 e
->children
[1] = &cmp2
->node
;
366 * There is no 'abs' opcode.
367 * abs(a) is lowered to max(a, -a)
369 static bool gpir_lower_abs(gpir_block
*block
, gpir_node
*node
)
371 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
373 assert(node
->op
== gpir_op_abs
);
375 node
->op
= gpir_op_max
;
377 alu
->children
[1] = alu
->children
[0];
378 alu
->children_negate
[1] = true;
385 * There is no 'not' opcode.
386 * not(a) is lowered to add(1, -a)
388 static bool gpir_lower_not(gpir_block
*block
, gpir_node
*node
)
390 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
392 assert(alu
->node
.op
== gpir_op_not
);
394 node
->op
= gpir_op_add
;
396 gpir_node
*node_const
= gpir_node_create(block
, gpir_op_const
);
397 gpir_const_node
*c
= gpir_node_to_const(node_const
);
399 assert(c
->node
.op
== gpir_op_const
);
401 list_addtail(&c
->node
.list
, &node
->list
);
403 gpir_node_add_dep(&alu
->node
, &c
->node
, GPIR_DEP_INPUT
);
405 alu
->children_negate
[1] = !alu
->children_negate
[0];
406 alu
->children
[1] = alu
->children
[0];
407 alu
->children
[0] = &c
->node
;
413 /* There is no unconditional branch instruction, so we have to lower it to a
414 * conditional branch with a condition of 1.0.
417 static bool gpir_lower_branch_uncond(gpir_block
*block
, gpir_node
*node
)
419 gpir_branch_node
*branch
= gpir_node_to_branch(node
);
421 gpir_node
*node_const
= gpir_node_create(block
, gpir_op_const
);
422 gpir_const_node
*c
= gpir_node_to_const(node_const
);
424 list_addtail(&c
->node
.list
, &node
->list
);
426 gpir_node_add_dep(&branch
->node
, &c
->node
, GPIR_DEP_INPUT
);
428 branch
->node
.op
= gpir_op_branch_cond
;
429 branch
->cond
= node_const
;
434 static bool (*gpir_pre_rsched_lower_funcs
[gpir_op_num
])(gpir_block
*, gpir_node
*) = {
435 [gpir_op_not
] = gpir_lower_not
,
436 [gpir_op_neg
] = gpir_lower_neg
,
437 [gpir_op_rcp
] = gpir_lower_complex
,
438 [gpir_op_rsqrt
] = gpir_lower_complex
,
439 [gpir_op_exp2
] = gpir_lower_complex
,
440 [gpir_op_log2
] = gpir_lower_complex
,
441 [gpir_op_eq
] = gpir_lower_eq_ne
,
442 [gpir_op_ne
] = gpir_lower_eq_ne
,
443 [gpir_op_abs
] = gpir_lower_abs
,
444 [gpir_op_branch_uncond
] = gpir_lower_branch_uncond
,
447 bool gpir_pre_rsched_lower_prog(gpir_compiler
*comp
)
449 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
450 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
451 if (gpir_pre_rsched_lower_funcs
[node
->op
] &&
452 !gpir_pre_rsched_lower_funcs
[node
->op
](block
, node
))
457 if (!gpir_lower_const(comp
))
460 if (!gpir_lower_load(comp
))
463 if (!gpir_lower_node_may_consume_two_slots(comp
))
466 gpir_debug("pre rsched lower prog\n");
467 gpir_node_print_prog_seq(comp
);