2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
29 #include "lima_context.h"
31 static gpir_codegen_src
gpir_get_alu_input(gpir_node
*parent
, gpir_node
*child
)
33 static const int slot_to_src
[GPIR_INSTR_SLOT_NUM
][3] = {
34 [GPIR_INSTR_SLOT_MUL0
] = {
35 gpir_codegen_src_unused
, gpir_codegen_src_p1_mul_0
, gpir_codegen_src_p2_mul_0
},
36 [GPIR_INSTR_SLOT_MUL1
] = {
37 gpir_codegen_src_unused
, gpir_codegen_src_p1_mul_1
, gpir_codegen_src_p2_mul_1
},
39 [GPIR_INSTR_SLOT_ADD0
] = {
40 gpir_codegen_src_unused
, gpir_codegen_src_p1_acc_0
, gpir_codegen_src_p2_acc_0
},
41 [GPIR_INSTR_SLOT_ADD1
] = {
42 gpir_codegen_src_unused
, gpir_codegen_src_p1_acc_1
, gpir_codegen_src_p2_acc_1
},
44 [GPIR_INSTR_SLOT_COMPLEX
] = {
45 gpir_codegen_src_unused
, gpir_codegen_src_p1_complex
, gpir_codegen_src_unused
},
46 [GPIR_INSTR_SLOT_PASS
] = {
47 gpir_codegen_src_unused
, gpir_codegen_src_p1_pass
, gpir_codegen_src_p2_pass
},
49 [GPIR_INSTR_SLOT_REG0_LOAD0
] = {
50 gpir_codegen_src_attrib_x
, gpir_codegen_src_p1_attrib_x
, gpir_codegen_src_unused
},
51 [GPIR_INSTR_SLOT_REG0_LOAD1
] = {
52 gpir_codegen_src_attrib_y
, gpir_codegen_src_p1_attrib_y
, gpir_codegen_src_unused
},
53 [GPIR_INSTR_SLOT_REG0_LOAD2
] = {
54 gpir_codegen_src_attrib_z
, gpir_codegen_src_p1_attrib_z
, gpir_codegen_src_unused
},
55 [GPIR_INSTR_SLOT_REG0_LOAD3
] = {
56 gpir_codegen_src_attrib_w
, gpir_codegen_src_p1_attrib_w
, gpir_codegen_src_unused
},
58 [GPIR_INSTR_SLOT_REG1_LOAD0
] = {
59 gpir_codegen_src_register_x
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
60 [GPIR_INSTR_SLOT_REG1_LOAD1
] = {
61 gpir_codegen_src_register_y
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
62 [GPIR_INSTR_SLOT_REG1_LOAD2
] = {
63 gpir_codegen_src_register_z
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
64 [GPIR_INSTR_SLOT_REG1_LOAD3
] = {
65 gpir_codegen_src_register_w
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
67 [GPIR_INSTR_SLOT_MEM_LOAD0
] = {
68 gpir_codegen_src_load_x
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
69 [GPIR_INSTR_SLOT_MEM_LOAD1
] = {
70 gpir_codegen_src_load_y
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
71 [GPIR_INSTR_SLOT_MEM_LOAD2
] = {
72 gpir_codegen_src_load_z
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
73 [GPIR_INSTR_SLOT_MEM_LOAD3
] = {
74 gpir_codegen_src_load_w
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
77 int diff
= child
->sched
.instr
->index
- parent
->sched
.instr
->index
;
81 int src
= slot_to_src
[child
->sched
.pos
][diff
];
82 assert(src
!= gpir_codegen_src_unused
);
86 static void gpir_codegen_mul0_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
88 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_MUL0
];
91 code
->mul0_src0
= gpir_codegen_src_unused
;
92 code
->mul0_src1
= gpir_codegen_src_unused
;
96 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
100 code
->mul0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
101 code
->mul0_src1
= gpir_get_alu_input(node
, alu
->children
[1]);
102 if (code
->mul0_src1
== gpir_codegen_src_p1_complex
) {
103 /* Will get confused with gpir_codegen_src_ident, so need to swap inputs */
104 code
->mul0_src1
= code
->mul0_src0
;
105 code
->mul0_src0
= gpir_codegen_src_p1_complex
;
108 code
->mul0_neg
= alu
->dest_negate
;
109 if (alu
->children_negate
[0])
110 code
->mul0_neg
= !code
->mul0_neg
;
111 if (alu
->children_negate
[1])
112 code
->mul0_neg
= !code
->mul0_neg
;
116 code
->mul0_neg
= true;
119 code
->mul0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
120 code
->mul0_src1
= gpir_codegen_src_ident
;
123 case gpir_op_complex1
:
124 code
->mul0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
125 code
->mul0_src1
= gpir_get_alu_input(node
, alu
->children
[1]);
126 code
->mul_op
= gpir_codegen_mul_op_complex1
;
129 case gpir_op_complex2
:
130 code
->mul0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
131 code
->mul0_src1
= code
->mul0_src0
;
132 code
->mul_op
= gpir_codegen_mul_op_complex2
;
136 code
->mul0_src0
= gpir_get_alu_input(node
, alu
->children
[2]);
137 code
->mul0_src1
= gpir_get_alu_input(node
, alu
->children
[0]);
138 code
->mul_op
= gpir_codegen_mul_op_select
;
146 static void gpir_codegen_mul1_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
148 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_MUL1
];
151 code
->mul1_src0
= gpir_codegen_src_unused
;
152 code
->mul1_src1
= gpir_codegen_src_unused
;
156 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
160 code
->mul1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
161 code
->mul1_src1
= gpir_get_alu_input(node
, alu
->children
[1]);
162 if (code
->mul1_src1
== gpir_codegen_src_p1_complex
) {
163 /* Will get confused with gpir_codegen_src_ident, so need to swap inputs */
164 code
->mul1_src1
= code
->mul1_src0
;
165 code
->mul1_src0
= gpir_codegen_src_p1_complex
;
168 code
->mul1_neg
= alu
->dest_negate
;
169 if (alu
->children_negate
[0])
170 code
->mul1_neg
= !code
->mul1_neg
;
171 if (alu
->children_negate
[1])
172 code
->mul1_neg
= !code
->mul1_neg
;
176 code
->mul1_neg
= true;
179 code
->mul1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
180 code
->mul1_src1
= gpir_codegen_src_ident
;
183 case gpir_op_complex1
:
184 code
->mul1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
185 code
->mul1_src1
= gpir_get_alu_input(node
, alu
->children
[2]);
189 code
->mul1_src0
= gpir_get_alu_input(node
, alu
->children
[1]);
190 code
->mul1_src1
= gpir_codegen_src_unused
;
198 static void gpir_codegen_add0_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
200 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_ADD0
];
203 code
->acc0_src0
= gpir_codegen_src_unused
;
204 code
->acc0_src1
= gpir_codegen_src_unused
;
208 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
216 code
->acc0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
217 code
->acc0_src1
= gpir_get_alu_input(node
, alu
->children
[1]);
219 code
->acc0_src0_neg
= alu
->children_negate
[0];
220 code
->acc0_src1_neg
= alu
->children_negate
[1];
224 code
->acc_op
= gpir_codegen_acc_op_add
;
225 if (code
->acc0_src1
== gpir_codegen_src_p1_complex
) {
226 code
->acc0_src1
= code
->acc0_src0
;
227 code
->acc0_src0
= gpir_codegen_src_p1_complex
;
229 bool tmp
= code
->acc0_src0_neg
;
230 code
->acc0_src0_neg
= code
->acc0_src1_neg
;
231 code
->acc0_src1_neg
= tmp
;
235 code
->acc_op
= gpir_codegen_acc_op_min
;
238 code
->acc_op
= gpir_codegen_acc_op_max
;
241 code
->acc_op
= gpir_codegen_acc_op_lt
;
244 code
->acc_op
= gpir_codegen_acc_op_ge
;
254 code
->acc0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
255 code
->acc0_src0_neg
= alu
->children_negate
[0];
258 code
->acc_op
= gpir_codegen_acc_op_floor
;
261 code
->acc_op
= gpir_codegen_acc_op_sign
;
269 code
->acc0_src0_neg
= true;
272 code
->acc_op
= gpir_codegen_acc_op_add
;
273 code
->acc0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
274 code
->acc0_src1
= gpir_codegen_src_ident
;
275 code
->acc0_src1_neg
= true;
283 static void gpir_codegen_add1_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
285 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_ADD1
];
288 code
->acc1_src0
= gpir_codegen_src_unused
;
289 code
->acc1_src1
= gpir_codegen_src_unused
;
293 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
301 code
->acc1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
302 code
->acc1_src1
= gpir_get_alu_input(node
, alu
->children
[1]);
304 code
->acc1_src0_neg
= alu
->children_negate
[0];
305 code
->acc1_src1_neg
= alu
->children_negate
[1];
309 code
->acc_op
= gpir_codegen_acc_op_add
;
310 if (code
->acc1_src1
== gpir_codegen_src_p1_complex
) {
311 code
->acc1_src1
= code
->acc1_src0
;
312 code
->acc1_src0
= gpir_codegen_src_p1_complex
;
314 bool tmp
= code
->acc1_src0_neg
;
315 code
->acc1_src0_neg
= code
->acc1_src1_neg
;
316 code
->acc1_src1_neg
= tmp
;
320 code
->acc_op
= gpir_codegen_acc_op_min
;
323 code
->acc_op
= gpir_codegen_acc_op_max
;
326 code
->acc_op
= gpir_codegen_acc_op_lt
;
329 code
->acc_op
= gpir_codegen_acc_op_ge
;
339 code
->acc1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
340 code
->acc1_src0_neg
= alu
->children_negate
[0];
343 code
->acc_op
= gpir_codegen_acc_op_floor
;
346 code
->acc_op
= gpir_codegen_acc_op_sign
;
354 code
->acc1_src0_neg
= true;
357 code
->acc_op
= gpir_codegen_acc_op_add
;
358 code
->acc1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
359 code
->acc1_src1
= gpir_codegen_src_ident
;
360 code
->acc1_src1_neg
= true;
368 static void gpir_codegen_complex_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
370 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_COMPLEX
];
373 code
->complex_src
= gpir_codegen_src_unused
;
379 case gpir_op_rcp_impl
:
380 case gpir_op_rsqrt_impl
:
381 case gpir_op_exp2_impl
:
382 case gpir_op_log2_impl
:
384 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
385 code
->complex_src
= gpir_get_alu_input(node
, alu
->children
[0]);
394 code
->complex_op
= gpir_codegen_complex_op_pass
;
396 case gpir_op_rcp_impl
:
397 code
->complex_op
= gpir_codegen_complex_op_rcp
;
399 case gpir_op_rsqrt_impl
:
400 code
->complex_op
= gpir_codegen_complex_op_rsqrt
;
402 case gpir_op_exp2_impl
:
403 code
->complex_op
= gpir_codegen_complex_op_exp2
;
405 case gpir_op_log2_impl
:
406 code
->complex_op
= gpir_codegen_complex_op_log2
;
413 static void gpir_codegen_pass_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
415 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_PASS
];
418 code
->pass_op
= gpir_codegen_pass_op_pass
;
419 code
->pass_src
= gpir_codegen_src_unused
;
423 if (node
->op
== gpir_op_branch_cond
) {
424 gpir_branch_node
*branch
= gpir_node_to_branch(node
);
426 code
->pass_op
= gpir_codegen_pass_op_pass
;
427 code
->pass_src
= gpir_get_alu_input(node
, branch
->cond
);
429 /* Fill out branch information */
430 unsigned offset
= branch
->dest
->instr_offset
;
431 assert(offset
< 0x200);
433 code
->branch_target
= offset
& 0xff;
434 code
->branch_target_lo
= !(offset
>> 8);
435 code
->unknown_1
= 13;
439 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
440 code
->pass_src
= gpir_get_alu_input(node
, alu
->children
[0]);
444 code
->pass_op
= gpir_codegen_pass_op_pass
;
446 case gpir_op_preexp2
:
447 code
->pass_op
= gpir_codegen_pass_op_preexp2
;
449 case gpir_op_postlog2
:
450 code
->pass_op
= gpir_codegen_pass_op_postlog2
;
458 static void gpir_codegen_reg0_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
460 if (!instr
->reg0_use_count
)
463 code
->register0_attribute
= instr
->reg0_is_attr
;
464 code
->register0_addr
= instr
->reg0_index
;
467 static void gpir_codegen_reg1_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
469 if (!instr
->reg1_use_count
)
472 code
->register1_addr
= instr
->reg1_index
;
475 static void gpir_codegen_mem_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
477 if (!instr
->mem_use_count
) {
478 code
->load_offset
= gpir_codegen_load_off_none
;
482 code
->load_addr
= instr
->mem_index
;
483 code
->load_offset
= gpir_codegen_load_off_none
;
486 static gpir_codegen_store_src
gpir_get_store_input(gpir_node
*node
)
488 static int slot_to_src
[GPIR_INSTR_SLOT_NUM
] = {
489 [GPIR_INSTR_SLOT_MUL0
] = gpir_codegen_store_src_mul_0
,
490 [GPIR_INSTR_SLOT_MUL1
] = gpir_codegen_store_src_mul_1
,
491 [GPIR_INSTR_SLOT_ADD0
] = gpir_codegen_store_src_acc_0
,
492 [GPIR_INSTR_SLOT_ADD1
] = gpir_codegen_store_src_acc_1
,
493 [GPIR_INSTR_SLOT_COMPLEX
] = gpir_codegen_store_src_complex
,
494 [GPIR_INSTR_SLOT_PASS
] = gpir_codegen_store_src_pass
,
495 [GPIR_INSTR_SLOT_REG0_LOAD0
...GPIR_INSTR_SLOT_STORE3
] = gpir_codegen_store_src_none
,
498 gpir_store_node
*store
= gpir_node_to_store(node
);
499 return slot_to_src
[store
->child
->sched
.pos
];
502 static void gpir_codegen_store_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
505 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_STORE0
];
507 code
->store0_src_x
= gpir_get_store_input(node
);
509 code
->store0_src_x
= gpir_codegen_store_src_none
;
511 node
= instr
->slots
[GPIR_INSTR_SLOT_STORE1
];
513 code
->store0_src_y
= gpir_get_store_input(node
);
515 code
->store0_src_y
= gpir_codegen_store_src_none
;
517 node
= instr
->slots
[GPIR_INSTR_SLOT_STORE2
];
519 code
->store1_src_z
= gpir_get_store_input(node
);
521 code
->store1_src_z
= gpir_codegen_store_src_none
;
523 node
= instr
->slots
[GPIR_INSTR_SLOT_STORE3
];
525 code
->store1_src_w
= gpir_get_store_input(node
);
527 code
->store1_src_w
= gpir_codegen_store_src_none
;
529 if (instr
->store_content
[0] == GPIR_INSTR_STORE_TEMP
) {
530 code
->store0_temporary
= true;
531 code
->unknown_1
= 12;
534 code
->store0_varying
= instr
->store_content
[0] == GPIR_INSTR_STORE_VARYING
;
535 code
->store0_addr
= instr
->store_index
[0];
538 if (instr
->store_content
[1] == GPIR_INSTR_STORE_TEMP
) {
539 code
->store1_temporary
= true;
540 code
->unknown_1
= 12;
543 code
->store1_varying
= instr
->store_content
[1] == GPIR_INSTR_STORE_VARYING
;
544 code
->store1_addr
= instr
->store_index
[1];
548 static void gpir_codegen(gpir_codegen_instr
*code
, gpir_instr
*instr
)
550 gpir_codegen_mul0_slot(code
, instr
);
551 gpir_codegen_mul1_slot(code
, instr
);
553 gpir_codegen_add0_slot(code
, instr
);
554 gpir_codegen_add1_slot(code
, instr
);
556 gpir_codegen_complex_slot(code
, instr
);
557 gpir_codegen_pass_slot(code
, instr
);
559 gpir_codegen_reg0_slot(code
, instr
);
560 gpir_codegen_reg1_slot(code
, instr
);
561 gpir_codegen_mem_slot(code
, instr
);
563 gpir_codegen_store_slot(code
, instr
);
566 static void gpir_codegen_print_prog(gpir_compiler
*comp
)
568 uint32_t *data
= comp
->prog
->shader
;
569 int num_dword_per_instr
= sizeof(gpir_codegen_instr
) / sizeof(uint32_t);
571 for (int i
= 0; i
< comp
->num_instr
; i
++) {
573 for (int j
= 0; j
< num_dword_per_instr
; j
++)
574 printf("%08x ", data
[i
* num_dword_per_instr
+ j
]);
579 bool gpir_codegen_prog(gpir_compiler
*comp
)
582 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
583 block
->instr_offset
= num_instr
;
584 num_instr
+= list_length(&block
->instr_list
);
587 assert(num_instr
<= 512);
589 gpir_codegen_instr
*code
= rzalloc_array(comp
->prog
, gpir_codegen_instr
, num_instr
);
594 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
595 list_for_each_entry(gpir_instr
, instr
, &block
->instr_list
, list
) {
596 gpir_codegen(code
+ instr_index
, instr
);
601 for (int i
= 0; i
< num_instr
; i
++) {
602 if (code
[i
].register0_attribute
)
603 comp
->prog
->prefetch
= i
;
606 comp
->prog
->shader
= code
;
607 comp
->prog
->shader_size
= num_instr
* sizeof(gpir_codegen_instr
);
609 if (lima_debug
& LIMA_DEBUG_GP
) {
610 gpir_codegen_print_prog(comp
);
611 gpir_disassemble_program(code
, num_instr
);
617 static gpir_codegen_acc_op
gpir_codegen_get_acc_op(gpir_op op
)
623 return gpir_codegen_acc_op_add
;
625 return gpir_codegen_acc_op_min
;
627 return gpir_codegen_acc_op_max
;
629 return gpir_codegen_acc_op_lt
;
631 return gpir_codegen_acc_op_ge
;
633 return gpir_codegen_acc_op_floor
;
635 return gpir_codegen_acc_op_sign
;
642 bool gpir_codegen_acc_same_op(gpir_op op1
, gpir_op op2
)
644 return gpir_codegen_get_acc_op(op1
) == gpir_codegen_get_acc_op(op2
);