2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
28 #include "lima_context.h"
30 static bool gpir_lower_const(gpir_compiler
*comp
)
33 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
34 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
35 if (node
->op
== gpir_op_const
) {
36 if (gpir_node_is_root(node
))
37 gpir_node_delete(node
);
45 union fi
*constant
= ralloc_array(comp
->prog
, union fi
, num_constant
);
49 comp
->prog
->constant
= constant
;
50 comp
->prog
->constant_size
= num_constant
* sizeof(union fi
);
53 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
54 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
55 if (node
->op
== gpir_op_const
) {
56 gpir_const_node
*c
= gpir_node_to_const(node
);
58 if (!gpir_node_is_root(node
)) {
59 gpir_load_node
*load
= gpir_node_create(block
, gpir_op_load_uniform
);
63 load
->index
= comp
->constant_base
+ (index
>> 2);
64 load
->component
= index
% 4;
65 constant
[index
++] = c
->value
;
67 gpir_node_replace_succ(&load
->node
, node
);
69 list_addtail(&load
->node
.list
, &node
->list
);
71 gpir_debug("lower const create uniform %d for const %d\n",
72 load
->node
.index
, node
->index
);
75 gpir_node_delete(node
);
84 /* duplicate load to all its successors */
85 static bool gpir_lower_load(gpir_compiler
*comp
)
87 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
88 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
89 if (node
->type
== gpir_node_type_load
) {
90 gpir_load_node
*load
= gpir_node_to_load(node
);
93 gpir_node_foreach_succ_safe(node
, dep
) {
94 gpir_node
*succ
= dep
->succ
;
101 gpir_node
*new = gpir_node_create(succ
->block
, node
->op
);
104 list_addtail(&new->list
, &succ
->list
);
106 gpir_debug("lower load create %d from %d for succ %d\n",
107 new->index
, node
->index
, succ
->index
);
109 gpir_load_node
*nload
= gpir_node_to_load(new);
110 nload
->index
= load
->index
;
111 nload
->component
= load
->component
;
113 nload
->reg
= load
->reg
;
114 list_addtail(&nload
->reg_link
, &load
->reg
->uses_list
);
117 gpir_node_replace_pred(dep
, new);
118 gpir_node_replace_child(succ
, node
, new);
127 static bool gpir_lower_neg(gpir_block
*block
, gpir_node
*node
)
129 gpir_alu_node
*neg
= gpir_node_to_alu(node
);
130 gpir_node
*child
= neg
->children
[0];
132 /* check if child can dest negate */
133 if (child
->type
== gpir_node_type_alu
) {
134 /* negate must be its only successor */
135 if (list_is_singular(&child
->succ_list
) &&
136 gpir_op_infos
[child
->op
].dest_neg
) {
137 gpir_alu_node
*alu
= gpir_node_to_alu(child
);
138 alu
->dest_negate
= !alu
->dest_negate
;
140 gpir_node_replace_succ(child
, node
);
141 gpir_node_delete(node
);
146 /* check if child can src negate */
147 gpir_node_foreach_succ_safe(node
, dep
) {
148 gpir_node
*succ
= dep
->succ
;
149 if (succ
->type
!= gpir_node_type_alu
)
153 gpir_alu_node
*alu
= gpir_node_to_alu(dep
->succ
);
154 for (int i
= 0; i
< alu
->num_child
; i
++) {
155 if (alu
->children
[i
] == node
) {
156 if (gpir_op_infos
[succ
->op
].src_neg
[i
]) {
157 alu
->children_negate
[i
] = !alu
->children_negate
[i
];
158 alu
->children
[i
] = child
;
166 gpir_node_replace_pred(dep
, child
);
169 if (gpir_node_is_root(node
))
170 gpir_node_delete(node
);
175 static bool gpir_lower_complex(gpir_block
*block
, gpir_node
*node
)
177 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
178 gpir_node
*child
= alu
->children
[0];
180 gpir_alu_node
*complex2
= gpir_node_create(block
, gpir_op_complex2
);
181 if (unlikely(!complex2
))
184 complex2
->children
[0] = child
;
185 complex2
->num_child
= 1;
186 gpir_node_add_dep(&complex2
->node
, child
, GPIR_DEP_INPUT
);
187 list_addtail(&complex2
->node
.list
, &node
->list
);
192 impl_op
= gpir_op_rcp_impl
;
195 impl_op
= gpir_op_rsqrt_impl
;
201 gpir_alu_node
*impl
= gpir_node_create(block
, impl_op
);
205 impl
->children
[0] = child
;
207 gpir_node_add_dep(&impl
->node
, child
, GPIR_DEP_INPUT
);
208 list_addtail(&impl
->node
.list
, &node
->list
);
210 /* change node to complex1 node */
211 node
->op
= gpir_op_complex1
;
212 alu
->children
[0] = &impl
->node
;
213 alu
->children
[1] = &complex2
->node
;
214 alu
->children
[2] = child
;
216 gpir_node_add_dep(node
, &impl
->node
, GPIR_DEP_INPUT
);
217 gpir_node_add_dep(node
, &complex2
->node
, GPIR_DEP_INPUT
);
222 static bool gpir_lower_node_may_consume_two_slots(gpir_compiler
*comp
)
224 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
225 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
226 if (gpir_op_infos
[node
->op
].may_consume_two_slots
) {
227 /* dummy_f/m are auxiliary nodes for value reg alloc:
228 * 1. before reg alloc, create fake nodes dummy_f, dummy_m,
229 * so the tree become: (dummy_m (node dummy_f))
230 * dummy_m can be spilled, but other nodes in the tree can't
232 * 2. After reg allocation and fake dep add, merge all deps of
233 * dummy_m and dummy_f to node and remove dummy_m & dummy_f
235 * We may also not use dummy_f/m, but alloc two value reg for
236 * node. But that means we need to make sure there're 2 free
237 * slot after the node successors, but we just need one slot
238 * after to be able to schedule it because we can use one move for
239 * the two slot node. It's also not easy to handle the spill case
240 * for the alloc 2 value method.
242 * With the dummy_f/m method, there's no such requirement, the
243 * node can be scheduled only when there's two slots for it,
244 * otherwise a move. And the node can be spilled with one reg.
246 gpir_node
*dummy_m
= gpir_node_create(block
, gpir_op_dummy_m
);
247 if (unlikely(!dummy_m
))
249 list_add(&dummy_m
->list
, &node
->list
);
251 gpir_node
*dummy_f
= gpir_node_create(block
, gpir_op_dummy_f
);
252 if (unlikely(!dummy_f
))
254 list_add(&dummy_f
->list
, &node
->list
);
256 gpir_alu_node
*alu
= gpir_node_to_alu(dummy_m
);
257 alu
->children
[0] = node
;
258 alu
->children
[1] = dummy_f
;
261 gpir_node_replace_succ(dummy_m
, node
);
262 gpir_node_add_dep(dummy_m
, node
, GPIR_DEP_INPUT
);
263 gpir_node_add_dep(dummy_m
, dummy_f
, GPIR_DEP_INPUT
);
273 * There are no 'equal' or 'not-equal' opcodes.
274 * eq (a == b) is lowered to and(a >= b, b >= a)
275 * ne (a != b) is lowered to or(a < b, b < a)
277 static bool gpir_lower_eq_ne(gpir_block
*block
, gpir_node
*node
)
283 cmp_node_op
= gpir_op_ge
;
284 node_new_op
= gpir_op_min
; /* and */
287 cmp_node_op
= gpir_op_lt
;
288 node_new_op
= gpir_op_max
; /* or */
294 gpir_alu_node
*e
= gpir_node_to_alu(node
);
296 gpir_alu_node
*cmp1
= gpir_node_create(block
, cmp_node_op
);
297 list_addtail(&cmp1
->node
.list
, &node
->list
);
298 gpir_alu_node
*cmp2
= gpir_node_create(block
, cmp_node_op
);
299 list_addtail(&cmp2
->node
.list
, &node
->list
);
301 cmp1
->children
[0] = e
->children
[0];
302 cmp1
->children
[1] = e
->children
[1];
305 cmp2
->children
[0] = e
->children
[1];
306 cmp2
->children
[1] = e
->children
[0];
309 gpir_node_add_dep(&cmp1
->node
, e
->children
[0], GPIR_DEP_INPUT
);
310 gpir_node_add_dep(&cmp1
->node
, e
->children
[1], GPIR_DEP_INPUT
);
312 gpir_node_add_dep(&cmp2
->node
, e
->children
[0], GPIR_DEP_INPUT
);
313 gpir_node_add_dep(&cmp2
->node
, e
->children
[1], GPIR_DEP_INPUT
);
315 gpir_node_foreach_pred_safe(node
, dep
) {
316 gpir_node_remove_dep(node
, dep
->pred
);
319 gpir_node_add_dep(node
, &cmp1
->node
, GPIR_DEP_INPUT
);
320 gpir_node_add_dep(node
, &cmp2
->node
, GPIR_DEP_INPUT
);
322 node
->op
= node_new_op
;
323 e
->children
[0] = &cmp1
->node
;
324 e
->children
[1] = &cmp2
->node
;
331 * There is no 'abs' opcode.
332 * abs(a) is lowered to max(a, -a)
334 static bool gpir_lower_abs(gpir_block
*block
, gpir_node
*node
)
336 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
338 assert(node
->op
== gpir_op_abs
);
340 node
->op
= gpir_op_max
;
342 alu
->children
[1] = alu
->children
[0];
343 alu
->children_negate
[1] = true;
350 * There is no 'not' opcode.
351 * not(a) is lowered to add(1, -a)
353 static bool gpir_lower_not(gpir_block
*block
, gpir_node
*node
)
355 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
357 assert(alu
->node
.op
== gpir_op_not
);
359 node
->op
= gpir_op_add
;
361 gpir_node
*node_const
= gpir_node_create(block
, gpir_op_const
);
362 gpir_const_node
*c
= gpir_node_to_const(node_const
);
364 assert(c
->node
.op
== gpir_op_const
);
366 list_addtail(&c
->node
.list
, &node
->list
);
368 gpir_node_add_dep(&alu
->node
, &c
->node
, GPIR_DEP_INPUT
);
370 alu
->children_negate
[1] = !alu
->children_negate
[0];
371 alu
->children
[1] = alu
->children
[0];
372 alu
->children
[0] = &c
->node
;
379 static bool (*gpir_pre_rsched_lower_funcs
[gpir_op_num
])(gpir_block
*, gpir_node
*) = {
380 [gpir_op_not
] = gpir_lower_not
,
383 static bool (*gpir_post_rsched_lower_funcs
[gpir_op_num
])(gpir_block
*, gpir_node
*) = {
384 [gpir_op_neg
] = gpir_lower_neg
,
385 [gpir_op_rcp
] = gpir_lower_complex
,
386 [gpir_op_rsqrt
] = gpir_lower_complex
,
387 [gpir_op_eq
] = gpir_lower_eq_ne
,
388 [gpir_op_ne
] = gpir_lower_eq_ne
,
389 [gpir_op_abs
] = gpir_lower_abs
,
392 bool gpir_pre_rsched_lower_prog(gpir_compiler
*comp
)
394 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
395 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
396 if (gpir_pre_rsched_lower_funcs
[node
->op
] &&
397 !gpir_pre_rsched_lower_funcs
[node
->op
](block
, node
))
402 if (!gpir_lower_const(comp
))
405 if (!gpir_lower_load(comp
))
408 gpir_debug("pre rsched lower prog\n");
409 gpir_node_print_prog_seq(comp
);
413 bool gpir_post_rsched_lower_prog(gpir_compiler
*comp
)
415 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
416 list_for_each_entry_safe(gpir_node
, node
, &block
->node_list
, list
) {
417 if (gpir_post_rsched_lower_funcs
[node
->op
] &&
418 !gpir_post_rsched_lower_funcs
[node
->op
](block
, node
))
423 if (!gpir_lower_node_may_consume_two_slots(comp
))
426 gpir_debug("post rsched lower prog\n");
427 gpir_node_print_prog_seq(comp
);