2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
29 #include "lima_context.h"
31 static gpir_codegen_src
gpir_get_alu_input(gpir_node
*parent
, gpir_node
*child
)
33 static const int slot_to_src
[GPIR_INSTR_SLOT_NUM
][3] = {
34 [GPIR_INSTR_SLOT_MUL0
] = {
35 gpir_codegen_src_unused
, gpir_codegen_src_p1_mul_0
, gpir_codegen_src_p2_mul_0
},
36 [GPIR_INSTR_SLOT_MUL1
] = {
37 gpir_codegen_src_unused
, gpir_codegen_src_p1_mul_1
, gpir_codegen_src_p2_mul_1
},
39 [GPIR_INSTR_SLOT_ADD0
] = {
40 gpir_codegen_src_unused
, gpir_codegen_src_p1_acc_0
, gpir_codegen_src_p2_acc_0
},
41 [GPIR_INSTR_SLOT_ADD1
] = {
42 gpir_codegen_src_unused
, gpir_codegen_src_p1_acc_1
, gpir_codegen_src_p2_acc_1
},
44 [GPIR_INSTR_SLOT_COMPLEX
] = {
45 gpir_codegen_src_unused
, gpir_codegen_src_p1_complex
, gpir_codegen_src_unused
},
46 [GPIR_INSTR_SLOT_PASS
] = {
47 gpir_codegen_src_unused
, gpir_codegen_src_p1_pass
, gpir_codegen_src_p2_pass
},
48 [GPIR_INSTR_SLOT_BRANCH
] = {
49 gpir_codegen_src_unused
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
51 [GPIR_INSTR_SLOT_REG0_LOAD0
] = {
52 gpir_codegen_src_attrib_x
, gpir_codegen_src_p1_attrib_x
, gpir_codegen_src_unused
},
53 [GPIR_INSTR_SLOT_REG0_LOAD1
] = {
54 gpir_codegen_src_attrib_y
, gpir_codegen_src_p1_attrib_y
, gpir_codegen_src_unused
},
55 [GPIR_INSTR_SLOT_REG0_LOAD2
] = {
56 gpir_codegen_src_attrib_z
, gpir_codegen_src_p1_attrib_z
, gpir_codegen_src_unused
},
57 [GPIR_INSTR_SLOT_REG0_LOAD3
] = {
58 gpir_codegen_src_attrib_w
, gpir_codegen_src_p1_attrib_w
, gpir_codegen_src_unused
},
60 [GPIR_INSTR_SLOT_REG1_LOAD0
] = {
61 gpir_codegen_src_register_x
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
62 [GPIR_INSTR_SLOT_REG1_LOAD1
] = {
63 gpir_codegen_src_register_y
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
64 [GPIR_INSTR_SLOT_REG1_LOAD2
] = {
65 gpir_codegen_src_register_z
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
66 [GPIR_INSTR_SLOT_REG1_LOAD3
] = {
67 gpir_codegen_src_register_w
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
69 [GPIR_INSTR_SLOT_MEM_LOAD0
] = {
70 gpir_codegen_src_load_x
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
71 [GPIR_INSTR_SLOT_MEM_LOAD1
] = {
72 gpir_codegen_src_load_y
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
73 [GPIR_INSTR_SLOT_MEM_LOAD2
] = {
74 gpir_codegen_src_load_z
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
75 [GPIR_INSTR_SLOT_MEM_LOAD3
] = {
76 gpir_codegen_src_load_w
, gpir_codegen_src_unused
, gpir_codegen_src_unused
},
79 int diff
= child
->sched
.instr
->index
- parent
->sched
.instr
->index
;
83 int src
= slot_to_src
[child
->sched
.pos
][diff
];
84 assert(src
!= gpir_codegen_src_unused
);
88 static void gpir_codegen_mul0_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
90 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_MUL0
];
93 code
->mul0_src0
= gpir_codegen_src_unused
;
94 code
->mul0_src1
= gpir_codegen_src_unused
;
98 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
102 code
->mul0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
103 code
->mul0_src1
= gpir_get_alu_input(node
, alu
->children
[1]);
104 if (code
->mul0_src1
== gpir_codegen_src_p1_complex
) {
105 /* Will get confused with gpir_codegen_src_ident, so need to swap inputs */
106 code
->mul0_src1
= code
->mul0_src0
;
107 code
->mul0_src0
= gpir_codegen_src_p1_complex
;
110 code
->mul0_neg
= alu
->dest_negate
;
111 if (alu
->children_negate
[0])
112 code
->mul0_neg
= !code
->mul0_neg
;
113 if (alu
->children_negate
[1])
114 code
->mul0_neg
= !code
->mul0_neg
;
118 code
->mul0_neg
= true;
120 code
->mul0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
121 code
->mul0_src1
= gpir_codegen_src_ident
;
124 case gpir_op_complex1
:
125 code
->mul0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
126 code
->mul0_src1
= gpir_get_alu_input(node
, alu
->children
[1]);
127 code
->mul_op
= gpir_codegen_mul_op_complex1
;
130 case gpir_op_complex2
:
131 code
->mul0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
132 code
->mul0_src1
= code
->mul0_src0
;
133 code
->mul_op
= gpir_codegen_mul_op_complex2
;
137 code
->mul0_src0
= gpir_get_alu_input(node
, alu
->children
[2]);
138 code
->mul0_src1
= gpir_get_alu_input(node
, alu
->children
[0]);
139 code
->mul_op
= gpir_codegen_mul_op_select
;
147 static void gpir_codegen_mul1_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
149 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_MUL1
];
152 code
->mul1_src0
= gpir_codegen_src_unused
;
153 code
->mul1_src1
= gpir_codegen_src_unused
;
157 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
161 code
->mul1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
162 code
->mul1_src1
= gpir_get_alu_input(node
, alu
->children
[1]);
163 if (code
->mul1_src1
== gpir_codegen_src_p1_complex
) {
164 /* Will get confused with gpir_codegen_src_ident, so need to swap inputs */
165 code
->mul1_src1
= code
->mul1_src0
;
166 code
->mul1_src0
= gpir_codegen_src_p1_complex
;
169 code
->mul1_neg
= alu
->dest_negate
;
170 if (alu
->children_negate
[0])
171 code
->mul1_neg
= !code
->mul1_neg
;
172 if (alu
->children_negate
[1])
173 code
->mul1_neg
= !code
->mul1_neg
;
177 code
->mul1_neg
= true;
179 code
->mul1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
180 code
->mul1_src1
= gpir_codegen_src_ident
;
183 case gpir_op_complex1
:
184 code
->mul1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
185 code
->mul1_src1
= gpir_get_alu_input(node
, alu
->children
[2]);
189 code
->mul1_src0
= gpir_get_alu_input(node
, alu
->children
[1]);
190 code
->mul1_src1
= gpir_codegen_src_unused
;
198 static void gpir_codegen_add0_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
200 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_ADD0
];
203 code
->acc0_src0
= gpir_codegen_src_unused
;
204 code
->acc0_src1
= gpir_codegen_src_unused
;
208 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
216 code
->acc0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
217 code
->acc0_src1
= gpir_get_alu_input(node
, alu
->children
[1]);
219 code
->acc0_src0_neg
= alu
->children_negate
[0];
220 code
->acc0_src1_neg
= alu
->children_negate
[1];
224 code
->acc_op
= gpir_codegen_acc_op_add
;
225 if (code
->acc0_src1
== gpir_codegen_src_p1_complex
) {
226 code
->acc0_src1
= code
->acc0_src0
;
227 code
->acc0_src0
= gpir_codegen_src_p1_complex
;
229 bool tmp
= code
->acc0_src0_neg
;
230 code
->acc0_src0_neg
= code
->acc0_src1_neg
;
231 code
->acc0_src1_neg
= tmp
;
235 code
->acc_op
= gpir_codegen_acc_op_min
;
238 code
->acc_op
= gpir_codegen_acc_op_max
;
241 code
->acc_op
= gpir_codegen_acc_op_lt
;
244 code
->acc_op
= gpir_codegen_acc_op_ge
;
254 code
->acc0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
255 code
->acc0_src0_neg
= alu
->children_negate
[0];
258 code
->acc_op
= gpir_codegen_acc_op_floor
;
261 code
->acc_op
= gpir_codegen_acc_op_sign
;
269 code
->acc0_src0_neg
= true;
271 code
->acc_op
= gpir_codegen_acc_op_add
;
272 code
->acc0_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
273 code
->acc0_src1
= gpir_codegen_src_ident
;
274 code
->acc0_src1_neg
= true;
282 static void gpir_codegen_add1_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
284 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_ADD1
];
287 code
->acc1_src0
= gpir_codegen_src_unused
;
288 code
->acc1_src1
= gpir_codegen_src_unused
;
292 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
300 code
->acc1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
301 code
->acc1_src1
= gpir_get_alu_input(node
, alu
->children
[1]);
303 code
->acc1_src0_neg
= alu
->children_negate
[0];
304 code
->acc1_src1_neg
= alu
->children_negate
[1];
308 code
->acc_op
= gpir_codegen_acc_op_add
;
309 if (code
->acc1_src1
== gpir_codegen_src_p1_complex
) {
310 code
->acc1_src1
= code
->acc1_src0
;
311 code
->acc1_src0
= gpir_codegen_src_p1_complex
;
313 bool tmp
= code
->acc1_src0_neg
;
314 code
->acc1_src0_neg
= code
->acc1_src1_neg
;
315 code
->acc1_src1_neg
= tmp
;
319 code
->acc_op
= gpir_codegen_acc_op_min
;
322 code
->acc_op
= gpir_codegen_acc_op_max
;
325 code
->acc_op
= gpir_codegen_acc_op_lt
;
328 code
->acc_op
= gpir_codegen_acc_op_ge
;
338 code
->acc1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
339 code
->acc1_src0_neg
= alu
->children_negate
[0];
342 code
->acc_op
= gpir_codegen_acc_op_floor
;
345 code
->acc_op
= gpir_codegen_acc_op_sign
;
353 code
->acc1_src0_neg
= true;
355 code
->acc_op
= gpir_codegen_acc_op_add
;
356 code
->acc1_src0
= gpir_get_alu_input(node
, alu
->children
[0]);
357 code
->acc1_src1
= gpir_codegen_src_ident
;
358 code
->acc1_src1_neg
= true;
366 static void gpir_codegen_complex_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
368 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_COMPLEX
];
371 code
->complex_src
= gpir_codegen_src_unused
;
377 case gpir_op_rcp_impl
:
378 case gpir_op_rsqrt_impl
:
379 case gpir_op_exp2_impl
:
380 case gpir_op_log2_impl
:
382 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
383 code
->complex_src
= gpir_get_alu_input(node
, alu
->children
[0]);
392 code
->complex_op
= gpir_codegen_complex_op_pass
;
394 case gpir_op_rcp_impl
:
395 code
->complex_op
= gpir_codegen_complex_op_rcp
;
397 case gpir_op_rsqrt_impl
:
398 code
->complex_op
= gpir_codegen_complex_op_rsqrt
;
400 case gpir_op_exp2_impl
:
401 code
->complex_op
= gpir_codegen_complex_op_exp2
;
403 case gpir_op_log2_impl
:
404 code
->complex_op
= gpir_codegen_complex_op_log2
;
411 static void gpir_codegen_pass_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
413 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_PASS
];
416 code
->pass_op
= gpir_codegen_pass_op_pass
;
417 code
->pass_src
= gpir_codegen_src_unused
;
421 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
422 code
->pass_src
= gpir_get_alu_input(node
, alu
->children
[0]);
426 code
->pass_op
= gpir_codegen_pass_op_pass
;
428 case gpir_op_preexp2
:
429 code
->pass_op
= gpir_codegen_pass_op_preexp2
;
431 case gpir_op_postlog2
:
432 code
->pass_op
= gpir_codegen_pass_op_postlog2
;
439 static void gpir_codegen_branch_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
441 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_BRANCH
];
449 static void gpir_codegen_reg0_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
451 if (!instr
->reg0_use_count
)
454 code
->register0_attribute
= instr
->reg0_is_attr
;
455 code
->register0_addr
= instr
->reg0_index
;
458 static void gpir_codegen_reg1_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
460 if (!instr
->reg1_use_count
)
463 code
->register1_addr
= instr
->reg1_index
;
466 static void gpir_codegen_mem_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
468 if (!instr
->mem_use_count
) {
469 code
->load_offset
= gpir_codegen_load_off_none
;
473 code
->load_addr
= instr
->mem_index
;
474 code
->load_offset
= gpir_codegen_load_off_none
;
477 static gpir_codegen_store_src
gpir_get_store_input(gpir_node
*node
)
479 static int slot_to_src
[GPIR_INSTR_SLOT_NUM
] = {
480 [GPIR_INSTR_SLOT_MUL0
] = gpir_codegen_store_src_mul_0
,
481 [GPIR_INSTR_SLOT_MUL1
] = gpir_codegen_store_src_mul_1
,
482 [GPIR_INSTR_SLOT_ADD0
] = gpir_codegen_store_src_acc_0
,
483 [GPIR_INSTR_SLOT_ADD1
] = gpir_codegen_store_src_acc_1
,
484 [GPIR_INSTR_SLOT_COMPLEX
] = gpir_codegen_store_src_complex
,
485 [GPIR_INSTR_SLOT_PASS
] = gpir_codegen_store_src_pass
,
486 [GPIR_INSTR_SLOT_BRANCH
...GPIR_INSTR_SLOT_STORE3
] = gpir_codegen_store_src_none
,
489 gpir_store_node
*store
= gpir_node_to_store(node
);
490 return slot_to_src
[store
->child
->sched
.pos
];
493 static void gpir_codegen_store_slot(gpir_codegen_instr
*code
, gpir_instr
*instr
)
496 gpir_node
*node
= instr
->slots
[GPIR_INSTR_SLOT_STORE0
];
498 code
->store0_src_x
= gpir_get_store_input(node
);
500 code
->store0_src_x
= gpir_codegen_store_src_none
;
502 node
= instr
->slots
[GPIR_INSTR_SLOT_STORE1
];
504 code
->store0_src_y
= gpir_get_store_input(node
);
506 code
->store0_src_y
= gpir_codegen_store_src_none
;
508 node
= instr
->slots
[GPIR_INSTR_SLOT_STORE2
];
510 code
->store1_src_z
= gpir_get_store_input(node
);
512 code
->store1_src_z
= gpir_codegen_store_src_none
;
514 node
= instr
->slots
[GPIR_INSTR_SLOT_STORE3
];
516 code
->store1_src_w
= gpir_get_store_input(node
);
518 code
->store1_src_w
= gpir_codegen_store_src_none
;
520 if (instr
->store_content
[0] == GPIR_INSTR_STORE_TEMP
) {
521 code
->store0_temporary
= true;
522 code
->unknown_1
= 12;
525 code
->store0_varying
= instr
->store_content
[0] == GPIR_INSTR_STORE_VARYING
;
526 code
->store0_addr
= instr
->store_index
[0];
529 if (instr
->store_content
[1] == GPIR_INSTR_STORE_TEMP
) {
530 code
->store1_temporary
= true;
531 code
->unknown_1
= 12;
534 code
->store1_varying
= instr
->store_content
[1] == GPIR_INSTR_STORE_VARYING
;
535 code
->store1_addr
= instr
->store_index
[1];
539 static void gpir_codegen(gpir_codegen_instr
*code
, gpir_instr
*instr
)
541 gpir_codegen_mul0_slot(code
, instr
);
542 gpir_codegen_mul1_slot(code
, instr
);
544 gpir_codegen_add0_slot(code
, instr
);
545 gpir_codegen_add1_slot(code
, instr
);
547 gpir_codegen_complex_slot(code
, instr
);
548 gpir_codegen_pass_slot(code
, instr
);
549 gpir_codegen_branch_slot(code
, instr
);
551 gpir_codegen_reg0_slot(code
, instr
);
552 gpir_codegen_reg1_slot(code
, instr
);
553 gpir_codegen_mem_slot(code
, instr
);
555 gpir_codegen_store_slot(code
, instr
);
558 static void gpir_codegen_print_prog(gpir_compiler
*comp
)
560 uint32_t *data
= comp
->prog
->shader
;
561 int size
= comp
->prog
->shader_size
;
562 int num_instr
= size
/ sizeof(gpir_codegen_instr
);
563 int num_dword_per_instr
= sizeof(gpir_codegen_instr
) / sizeof(uint32_t);
565 for (int i
= 0; i
< num_instr
; i
++) {
567 for (int j
= 0; j
< num_dword_per_instr
; j
++)
568 printf("%08x ", data
[i
* num_dword_per_instr
+ j
]);
573 bool gpir_codegen_prog(gpir_compiler
*comp
)
576 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
577 num_instr
+= list_length(&block
->instr_list
);
580 if (num_instr
> 512) {
581 gpir_error("shader too big (%d), GP has a 512 instruction limit.\n",
586 gpir_codegen_instr
*code
= rzalloc_array(comp
->prog
, gpir_codegen_instr
, num_instr
);
591 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
592 list_for_each_entry(gpir_instr
, instr
, &block
->instr_list
, list
) {
593 gpir_codegen(code
+ instr_index
, instr
);
598 for (int i
= 0; i
< num_instr
; i
++) {
599 if (code
[i
].register0_attribute
)
600 comp
->prog
->prefetch
= i
;
603 comp
->prog
->shader
= code
;
604 comp
->prog
->shader_size
= num_instr
* sizeof(gpir_codegen_instr
);
606 if (lima_debug
& LIMA_DEBUG_GP
) {
607 gpir_codegen_print_prog(comp
);
608 gpir_disassemble_program(code
, num_instr
);
614 static gpir_codegen_acc_op
gpir_codegen_get_acc_op(gpir_op op
)
620 return gpir_codegen_acc_op_add
;
622 return gpir_codegen_acc_op_min
;
624 return gpir_codegen_acc_op_max
;
626 return gpir_codegen_acc_op_lt
;
628 return gpir_codegen_acc_op_ge
;
630 return gpir_codegen_acc_op_floor
;
632 return gpir_codegen_acc_op_sign
;
639 bool gpir_codegen_acc_same_op(gpir_op op1
, gpir_op op2
)
641 return gpir_codegen_get_acc_op(op1
) == gpir_codegen_get_acc_op(op2
);