2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/bitscan.h"
26 #include "util/ralloc.h"
30 static bool ppir_lower_const(ppir_block
*block
, ppir_node
*node
)
32 if (ppir_node_is_root(node
)) {
33 ppir_node_delete(node
);
37 ppir_node
*move
= NULL
;
38 ppir_dest
*dest
= ppir_node_get_dest(node
);
40 /* const (register) can only be used in alu node, create a move
41 * node for other types of node */
42 ppir_node_foreach_succ_safe(node
, dep
) {
43 ppir_node
*succ
= dep
->succ
;
45 if (succ
->type
!= ppir_node_type_alu
) {
47 move
= ppir_node_create(block
, ppir_op_mov
, -1, 0);
51 ppir_debug("lower const create move %d for %d\n",
52 move
->index
, node
->index
);
54 ppir_alu_node
*alu
= ppir_node_to_alu(move
);
57 ppir_node_target_assign(alu
->src
, dest
);
58 for (int i
= 0; i
< 4; i
++)
59 alu
->src
->swizzle
[i
] = i
;
62 ppir_node_replace_pred(dep
, move
);
63 ppir_node_replace_child(succ
, node
, move
);
68 ppir_node_add_dep(move
, node
);
69 list_addtail(&move
->list
, &node
->list
);
75 /* lower dot to mul+sum */
76 static bool ppir_lower_dot(ppir_block
*block
, ppir_node
*node
)
78 ppir_alu_node
*mul
= ppir_node_create(block
, ppir_op_mul
, -1, 0);
81 list_addtail(&mul
->node
.list
, &node
->list
);
83 ppir_alu_node
*dot
= ppir_node_to_alu(node
);
84 mul
->src
[0] = dot
->src
[0];
85 mul
->src
[1] = dot
->src
[1];
88 int num_components
= node
->op
- ppir_op_dot2
+ 2;
89 ppir_dest
*dest
= &mul
->dest
;
90 dest
->type
= ppir_target_ssa
;
91 dest
->ssa
.num_components
= num_components
;
92 dest
->ssa
.live_in
= INT_MAX
;
93 dest
->ssa
.live_out
= 0;
94 dest
->write_mask
= u_bit_consecutive(0, num_components
);
96 ppir_node_foreach_pred_safe(node
, dep
) {
97 ppir_node
*pred
= dep
->pred
;
98 ppir_node_remove_dep(dep
);
99 ppir_node_add_dep(&mul
->node
, pred
);
101 ppir_node_add_dep(node
, &mul
->node
);
103 if (node
->op
== ppir_op_dot2
) {
104 node
->op
= ppir_op_add
;
106 ppir_node_target_assign(dot
->src
, dest
);
107 dot
->src
[0].swizzle
[0] = 0;
108 dot
->src
[0].absolute
= false;
109 dot
->src
[0].negate
= false;
111 ppir_node_target_assign(dot
->src
+ 1, dest
);
112 dot
->src
[1].swizzle
[0] = 1;
113 dot
->src
[1].absolute
= false;
114 dot
->src
[1].negate
= false;
117 node
->op
= node
->op
== ppir_op_dot3
? ppir_op_sum3
: ppir_op_sum4
;
119 ppir_node_target_assign(dot
->src
, dest
);
120 for (int i
= 0; i
< 4; i
++)
121 dot
->src
[0].swizzle
[i
] = i
;
122 dot
->src
[0].absolute
= false;
123 dot
->src
[0].negate
= false;
131 static ppir_reg
*create_reg(ppir_compiler
*comp
, int num_components
)
133 ppir_reg
*r
= rzalloc(comp
, ppir_reg
);
137 r
->num_components
= num_components
;
138 r
->live_in
= INT_MAX
;
141 list_addtail(&r
->list
, &comp
->reg_list
);
146 /* lower vector alu node to multi scalar nodes */
147 static bool ppir_lower_vec_to_scalar(ppir_block
*block
, ppir_node
*node
)
149 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
150 ppir_dest
*dest
= &alu
->dest
;
155 unsigned mask
= dest
->write_mask
;
157 index
[n
++] = u_bit_scan(&mask
);
163 /* we need a reg for scalar nodes to store output */
164 if (dest
->type
== ppir_target_register
)
167 r
= create_reg(block
->comp
, n
);
171 /* change all successors to use reg r */
172 ppir_node_foreach_succ(node
, dep
) {
173 ppir_node
*succ
= dep
->succ
;
174 if (succ
->type
== ppir_node_type_alu
) {
175 ppir_alu_node
*sa
= ppir_node_to_alu(succ
);
176 for (int i
= 0; i
< sa
->num_src
; i
++) {
177 ppir_src
*src
= sa
->src
+ i
;
178 if (ppir_node_target_equal(src
, dest
)) {
179 src
->type
= ppir_target_register
;
185 assert(succ
->type
== ppir_node_type_store
);
186 ppir_store_node
*ss
= ppir_node_to_store(succ
);
187 ppir_src
*src
= &ss
->src
;
188 src
->type
= ppir_target_register
;
194 /* create each component's scalar node */
195 for (int i
= 0; i
< n
; i
++) {
196 ppir_node
*s
= ppir_node_create(block
, node
->op
, -1, 0);
199 list_addtail(&s
->list
, &node
->list
);
201 ppir_alu_node
*sa
= ppir_node_to_alu(s
);
202 ppir_dest
*sd
= &sa
->dest
;
203 sd
->type
= ppir_target_register
;
205 sd
->modifier
= dest
->modifier
;
206 sd
->write_mask
= 1 << index
[i
];
208 for (int j
= 0; j
< alu
->num_src
; j
++)
209 sa
->src
[j
] = alu
->src
[j
];
210 sa
->num_src
= alu
->num_src
;
212 /* TODO: need per reg component dependancy */
213 ppir_node_foreach_succ(node
, dep
) {
214 ppir_node_add_dep(dep
->succ
, s
);
217 ppir_node_foreach_pred(node
, dep
) {
218 ppir_node_add_dep(s
, dep
->pred
);
222 ppir_node_delete(node
);
226 static bool ppir_lower_swap_args(ppir_block
*block
, ppir_node
*node
)
228 /* swapped op must be the next op */
231 assert(node
->type
== ppir_node_type_alu
);
232 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
233 assert(alu
->num_src
== 2);
235 ppir_src tmp
= alu
->src
[0];
236 alu
->src
[0] = alu
->src
[1];
241 static bool ppir_lower_texture(ppir_block
*block
, ppir_node
*node
)
243 ppir_load_texture_node
*load_tex
= ppir_node_to_load_texture(node
);
245 if (ppir_node_has_single_pred(node
)) {
246 ppir_node
*pred
= ppir_node_first_pred(node
);
247 if (pred
->op
== ppir_op_load_varying
) {
248 /* If ldtex is the only successor of load_varying node
249 * we're good. Just change load_varying op type to load_coords.
251 if (ppir_node_has_single_succ(pred
)) {
252 pred
->op
= ppir_op_load_coords
;
258 /* Otherwise we need to create load_coords node */
259 ppir_load_node
*load
= ppir_node_create(block
, ppir_op_load_coords
, -1, 0);
262 list_addtail(&load
->node
.list
, &node
->list
);
264 ppir_debug("%s create load_coords node %d for %d\n",
265 __FUNCTION__
, load
->node
.index
, node
->index
);
267 ppir_dest
*dest
= &load
->dest
;
268 dest
->type
= ppir_target_ssa
;
269 dest
->ssa
.num_components
= load_tex
->src_coords
.ssa
->num_components
;
270 dest
->ssa
.live_in
= INT_MAX
;
271 dest
->ssa
.live_out
= 0;
272 dest
->write_mask
= u_bit_consecutive(0, dest
->ssa
.num_components
);
274 load
->src
= load_tex
->src_coords
;
276 ppir_src
*src
= &load_tex
->src_coords
;
277 src
->type
= ppir_target_ssa
;
278 src
->ssa
= &dest
->ssa
;
280 ppir_node_foreach_pred_safe(node
, dep
) {
281 ppir_node
*pred
= dep
->pred
;
282 ppir_node_remove_dep(dep
);
283 ppir_node_add_dep(&load
->node
, pred
);
286 ppir_node_add_dep(node
, &load
->node
);
290 /* Prepare for sin and cos and then lower vector alu node to multi
292 static bool ppir_lower_sin_cos_vec_to_scalar(ppir_block
*block
, ppir_node
*node
)
294 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
296 ppir_node
*inv_2pi_node
= ppir_node_create(block
, ppir_op_const
, -1, 0);
299 list_addtail(&inv_2pi_node
->list
, &node
->list
);
301 /* For sin and cos, the input has to multiplied by the constant
302 * 1/(2*pi), presumably to simplify the hardware. */
303 ppir_const_node
*inv_2pi_const
= ppir_node_to_const(inv_2pi_node
);
304 inv_2pi_const
->constant
.num
= 1;
305 inv_2pi_const
->constant
.value
[0].f
= (1.0f
/(2.0f
* M_PI
));
307 inv_2pi_const
->dest
.type
= ppir_target_ssa
;
308 inv_2pi_const
->dest
.ssa
.num_components
= 1;
309 inv_2pi_const
->dest
.ssa
.live_in
= INT_MAX
;
310 inv_2pi_const
->dest
.ssa
.live_out
= 0;
311 inv_2pi_const
->dest
.write_mask
= 0x01;
313 ppir_node
*mul_node
= ppir_node_create(block
, ppir_op_mul
, -1, 0);
316 list_addtail(&mul_node
->list
, &node
->list
);
318 ppir_alu_node
*mul_alu
= ppir_node_to_alu(mul_node
);
319 mul_alu
->num_src
= 2;
320 mul_alu
->src
[0] = alu
->src
[0];
321 mul_alu
->src
[1].type
= ppir_target_ssa
;
322 mul_alu
->src
[1].ssa
= &inv_2pi_const
->dest
.ssa
;
324 int num_components
= alu
->src
[0].ssa
->num_components
;
325 mul_alu
->dest
.type
= ppir_target_ssa
;
326 mul_alu
->dest
.ssa
.num_components
= num_components
;
327 mul_alu
->dest
.ssa
.live_in
= INT_MAX
;
328 mul_alu
->dest
.ssa
.live_out
= 0;
329 mul_alu
->dest
.write_mask
= u_bit_consecutive(0, num_components
);
331 alu
->src
[0].type
= ppir_target_ssa
;
332 alu
->src
[0].ssa
= &mul_alu
->dest
.ssa
;
333 for (int i
= 0; i
< 4; i
++)
334 alu
->src
->swizzle
[i
] = i
;
336 ppir_node_foreach_pred_safe(node
, dep
) {
337 ppir_node
*pred
= dep
->pred
;
338 ppir_node_remove_dep(dep
);
339 ppir_node_add_dep(mul_node
, pred
);
341 ppir_node_add_dep(node
, mul_node
);
342 ppir_node_add_dep(mul_node
, inv_2pi_node
);
344 return ppir_lower_vec_to_scalar(block
, node
);
347 /* insert a move as the select condition to make sure it can
348 * be inserted to select instr float mul slot
350 static bool ppir_lower_select(ppir_block
*block
, ppir_node
*node
)
352 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
354 ppir_node
*move
= ppir_node_create(block
, ppir_op_mov
, -1, 0);
357 list_addtail(&move
->list
, &node
->list
);
359 ppir_alu_node
*move_alu
= ppir_node_to_alu(move
);
360 ppir_src
*move_src
= move_alu
->src
, *src
= alu
->src
;
361 move_src
->type
= src
->type
;
362 move_src
->ssa
= src
->ssa
;
363 move_src
->swizzle
[0] = src
->swizzle
[0];
364 move_alu
->num_src
= 1;
366 ppir_dest
*move_dest
= &move_alu
->dest
;
367 move_dest
->type
= ppir_target_ssa
;
368 move_dest
->ssa
.num_components
= 1;
369 move_dest
->ssa
.live_in
= INT_MAX
;
370 move_dest
->ssa
.live_out
= 0;
371 move_dest
->write_mask
= 1;
373 ppir_node_foreach_pred(node
, dep
) {
374 ppir_node
*pred
= dep
->pred
;
375 ppir_dest
*dest
= ppir_node_get_dest(pred
);
376 if (ppir_node_target_equal(alu
->src
, dest
)) {
377 ppir_node_replace_pred(dep
, move
);
378 ppir_node_add_dep(move
, pred
);
382 /* move must be the first pred of select node which make sure
383 * the float mul slot is free when node to instr
385 assert(ppir_node_first_pred(node
) == move
);
388 ppir_node_target_assign(alu
->src
, move_dest
);
392 static bool ppir_lower_trunc(ppir_block
*block
, ppir_node
*node
)
394 /* Turn it into a mov with a round to integer output modifier */
395 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
396 ppir_dest
*move_dest
= &alu
->dest
;
397 move_dest
->modifier
= ppir_outmod_round
;
398 node
->op
= ppir_op_mov
;
403 static bool ppir_lower_abs(ppir_block
*block
, ppir_node
*node
)
405 /* Turn it into a mov and set the absolute modifier */
406 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
408 assert(alu
->num_src
== 1);
410 alu
->src
[0].absolute
= true;
411 alu
->src
[0].negate
= false;
412 node
->op
= ppir_op_mov
;
417 static bool ppir_lower_neg(ppir_block
*block
, ppir_node
*node
)
419 /* Turn it into a mov and set the negate modifier */
420 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
422 assert(alu
->num_src
== 1);
424 alu
->src
[0].negate
= !alu
->src
[0].negate
;
425 node
->op
= ppir_op_mov
;
430 static bool ppir_lower_sat(ppir_block
*block
, ppir_node
*node
)
432 /* Turn it into a mov with the saturate output modifier */
433 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
435 assert(alu
->num_src
== 1);
437 ppir_dest
*move_dest
= &alu
->dest
;
438 move_dest
->modifier
= ppir_outmod_clamp_fraction
;
439 node
->op
= ppir_op_mov
;
444 static bool ppir_lower_branch(ppir_block
*block
, ppir_node
*node
)
446 ppir_branch_node
*branch
= ppir_node_to_branch(node
);
447 ppir_const_node
*zero
= ppir_node_create(block
, ppir_op_const
, -1, 0);
452 list_addtail(&zero
->node
.list
, &node
->list
);
454 zero
->constant
.value
[0].f
= 0;
455 zero
->constant
.num
= 1;
456 zero
->dest
.type
= ppir_target_ssa
;
457 zero
->dest
.ssa
.num_components
= 1;
458 zero
->dest
.ssa
.live_in
= INT_MAX
;
459 zero
->dest
.ssa
.live_out
= 0;
460 zero
->dest
.write_mask
= 0x01;
462 /* For now we're just comparing branch condition with 0,
463 * in future we should look whether it's possible to move
464 * comparision node into branch itself and use current
465 * way as a fallback for complex conditions.
467 branch
->src
[1].type
= ppir_target_ssa
;
468 branch
->src
[1].ssa
= &zero
->dest
.ssa
;
470 branch
->cond_gt
= true;
471 branch
->cond_lt
= true;
473 ppir_node_add_dep(&branch
->node
, &zero
->node
);
478 static bool (*ppir_lower_funcs
[ppir_op_num
])(ppir_block
*, ppir_node
*) = {
479 [ppir_op_abs
] = ppir_lower_abs
,
480 [ppir_op_neg
] = ppir_lower_neg
,
481 [ppir_op_const
] = ppir_lower_const
,
482 [ppir_op_dot2
] = ppir_lower_dot
,
483 [ppir_op_dot3
] = ppir_lower_dot
,
484 [ppir_op_dot4
] = ppir_lower_dot
,
485 [ppir_op_rcp
] = ppir_lower_vec_to_scalar
,
486 [ppir_op_rsqrt
] = ppir_lower_vec_to_scalar
,
487 [ppir_op_log2
] = ppir_lower_vec_to_scalar
,
488 [ppir_op_exp2
] = ppir_lower_vec_to_scalar
,
489 [ppir_op_sqrt
] = ppir_lower_vec_to_scalar
,
490 [ppir_op_sin
] = ppir_lower_sin_cos_vec_to_scalar
,
491 [ppir_op_cos
] = ppir_lower_sin_cos_vec_to_scalar
,
492 [ppir_op_lt
] = ppir_lower_swap_args
,
493 [ppir_op_le
] = ppir_lower_swap_args
,
494 [ppir_op_load_texture
] = ppir_lower_texture
,
495 [ppir_op_select
] = ppir_lower_select
,
496 [ppir_op_trunc
] = ppir_lower_trunc
,
497 [ppir_op_sat
] = ppir_lower_sat
,
498 [ppir_op_branch
] = ppir_lower_branch
,
501 bool ppir_lower_prog(ppir_compiler
*comp
)
503 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
504 list_for_each_entry_safe(ppir_node
, node
, &block
->node_list
, list
) {
505 if (ppir_lower_funcs
[node
->op
] &&
506 !ppir_lower_funcs
[node
->op
](block
, node
))
511 ppir_node_print_prog(comp
);