2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
26 #include "util/u_half.h"
27 #include "util/bitscan.h"
31 #include "lima_context.h"
33 static unsigned encode_swizzle(uint8_t *swizzle
, int shift
, int dest_shift
)
36 for (int i
= 0; i
< 4; i
++)
37 ret
|= ((swizzle
[i
] + shift
) & 0x3) << ((i
+ dest_shift
) * 2);
41 static int get_scl_reg_index(ppir_src
*src
, int component
)
43 int ret
= ppir_target_get_src_reg_index(src
);
44 ret
+= src
->swizzle
[component
];
48 static void ppir_codegen_encode_varying(ppir_node
*node
, void *code
)
50 ppir_codegen_field_varying
*f
= code
;
51 ppir_load_node
*load
= ppir_node_to_load(node
);
52 ppir_dest
*dest
= &load
->dest
;
53 int index
= ppir_target_get_dest_reg_index(dest
);
54 int num_components
= load
->num_components
;
57 assert(node
->op
== ppir_op_load_varying
||
58 node
->op
== ppir_op_load_coords
||
59 node
->op
== ppir_op_load_fragcoord
||
60 node
->op
== ppir_op_load_pointcoord
||
61 node
->op
== ppir_op_load_frontface
);
63 f
->imm
.dest
= index
>> 2;
64 f
->imm
.mask
= dest
->write_mask
<< (index
& 0x3);
66 int alignment
= num_components
== 3 ? 3 : num_components
- 1;
67 f
->imm
.alignment
= alignment
;
68 f
->imm
.offset_vector
= 0xf;
71 f
->imm
.index
= load
->index
>> 2;
73 f
->imm
.index
= load
->index
>> alignment
;
76 case ppir_op_load_fragcoord
:
77 f
->imm
.source_type
= 2;
78 f
->imm
.perspective
= 3;
80 case ppir_op_load_pointcoord
:
81 f
->imm
.source_type
= 3;
83 case ppir_op_load_frontface
:
84 f
->imm
.source_type
= 3;
85 f
->imm
.perspective
= 1;
92 assert(node
->op
== ppir_op_load_coords
);
94 f
->reg
.dest
= index
>> 2;
95 f
->reg
.mask
= dest
->write_mask
<< (index
& 0x3);
97 f
->reg
.source_type
= 1;
99 ppir_src
*src
= &load
->src
;
100 index
= ppir_target_get_src_reg_index(src
);
101 f
->reg
.source
= index
>> 2;
102 f
->reg
.negate
= src
->negate
;
103 f
->reg
.absolute
= src
->absolute
;
104 f
->reg
.swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, 0);
108 static void ppir_codegen_encode_texld(ppir_node
*node
, void *code
)
110 ppir_codegen_field_sampler
*f
= code
;
111 ppir_load_texture_node
*ldtex
= ppir_node_to_load_texture(node
);
113 f
->index
= ldtex
->sampler
;
115 f
->type
= ppir_codegen_sampler_type_2d
;
117 f
->unknown_2
= 0x39001;
120 static void ppir_codegen_encode_uniform(ppir_node
*node
, void *code
)
122 ppir_codegen_field_uniform
*f
= code
;
123 ppir_load_node
*load
= ppir_node_to_load(node
);
126 case ppir_op_load_uniform
:
127 f
->source
= ppir_codegen_uniform_src_uniform
;
129 case ppir_op_load_temp
:
130 f
->source
= ppir_codegen_uniform_src_temporary
;
136 int num_components
= load
->num_components
;
137 int alignment
= num_components
== 4 ? 2 : num_components
- 1;
139 f
->alignment
= alignment
;
141 /* TODO: uniform can be also combined like varying */
142 f
->index
= load
->index
<< (2 - alignment
);
145 static unsigned shift_to_op(int shift
)
147 assert(shift
>= -3 && shift
<= 3);
148 return shift
< 0 ? shift
+ 8 : shift
;
151 static void ppir_codegen_encode_vec_mul(ppir_node
*node
, void *code
)
153 ppir_codegen_field_vec4_mul
*f
= code
;
154 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
156 ppir_dest
*dest
= &alu
->dest
;
158 if (dest
->type
!= ppir_target_pipeline
) {
159 int index
= ppir_target_get_dest_reg_index(dest
);
160 dest_shift
= index
& 0x3;
161 f
->dest
= index
>> 2;
162 f
->mask
= dest
->write_mask
<< dest_shift
;
164 f
->dest_modifier
= dest
->modifier
;
168 f
->op
= shift_to_op(alu
->shift
);
171 f
->op
= ppir_codegen_vec4_mul_op_mov
;
174 f
->op
= ppir_codegen_vec4_mul_op_max
;
177 f
->op
= ppir_codegen_vec4_mul_op_min
;
180 f
->op
= ppir_codegen_vec4_mul_op_and
;
183 f
->op
= ppir_codegen_vec4_mul_op_or
;
186 f
->op
= ppir_codegen_vec4_mul_op_xor
;
189 f
->op
= ppir_codegen_vec4_mul_op_gt
;
192 f
->op
= ppir_codegen_vec4_mul_op_ge
;
195 f
->op
= ppir_codegen_vec4_mul_op_eq
;
198 f
->op
= ppir_codegen_vec4_mul_op_ne
;
201 f
->op
= ppir_codegen_vec4_mul_op_not
;
207 ppir_src
*src
= alu
->src
;
208 int index
= ppir_target_get_src_reg_index(src
);
209 f
->arg0_source
= index
>> 2;
210 f
->arg0_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
211 f
->arg0_absolute
= src
->absolute
;
212 f
->arg0_negate
= src
->negate
;
214 if (alu
->num_src
== 2) {
216 index
= ppir_target_get_src_reg_index(src
);
217 f
->arg1_source
= index
>> 2;
218 f
->arg1_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
219 f
->arg1_absolute
= src
->absolute
;
220 f
->arg1_negate
= src
->negate
;
224 static void ppir_codegen_encode_scl_mul(ppir_node
*node
, void *code
)
226 ppir_codegen_field_float_mul
*f
= code
;
227 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
229 ppir_dest
*dest
= &alu
->dest
;
230 int dest_component
= ffs(dest
->write_mask
) - 1;
231 assert(dest_component
>= 0);
233 if (dest
->type
!= ppir_target_pipeline
) {
234 f
->dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
237 f
->dest_modifier
= dest
->modifier
;
241 f
->op
= shift_to_op(alu
->shift
);
244 f
->op
= ppir_codegen_float_mul_op_mov
;
246 case ppir_op_sel_cond
:
247 f
->op
= ppir_codegen_float_mul_op_mov
;
250 f
->op
= ppir_codegen_float_mul_op_max
;
253 f
->op
= ppir_codegen_float_mul_op_min
;
256 f
->op
= ppir_codegen_float_mul_op_and
;
259 f
->op
= ppir_codegen_float_mul_op_or
;
262 f
->op
= ppir_codegen_float_mul_op_xor
;
265 f
->op
= ppir_codegen_float_mul_op_gt
;
268 f
->op
= ppir_codegen_float_mul_op_ge
;
271 f
->op
= ppir_codegen_float_mul_op_eq
;
274 f
->op
= ppir_codegen_float_mul_op_ne
;
277 f
->op
= ppir_codegen_float_mul_op_not
;
283 ppir_src
*src
= alu
->src
;
284 f
->arg0_source
= get_scl_reg_index(src
, dest_component
);
285 f
->arg0_absolute
= src
->absolute
;
286 f
->arg0_negate
= src
->negate
;
288 if (alu
->num_src
== 2) {
290 f
->arg1_source
= get_scl_reg_index(src
, dest_component
);
291 f
->arg1_absolute
= src
->absolute
;
292 f
->arg1_negate
= src
->negate
;
296 static void ppir_codegen_encode_vec_add(ppir_node
*node
, void *code
)
298 ppir_codegen_field_vec4_acc
*f
= code
;
299 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
301 ppir_dest
*dest
= &alu
->dest
;
302 int index
= ppir_target_get_dest_reg_index(dest
);
303 int dest_shift
= index
& 0x3;
304 f
->dest
= index
>> 2;
305 f
->mask
= dest
->write_mask
<< dest_shift
;
306 f
->dest_modifier
= dest
->modifier
;
310 f
->op
= ppir_codegen_vec4_acc_op_add
;
313 f
->op
= ppir_codegen_vec4_acc_op_mov
;
316 f
->op
= ppir_codegen_vec4_acc_op_sum3
;
320 f
->op
= ppir_codegen_vec4_acc_op_sum4
;
324 f
->op
= ppir_codegen_vec4_acc_op_floor
;
327 f
->op
= ppir_codegen_vec4_acc_op_ceil
;
330 f
->op
= ppir_codegen_vec4_acc_op_fract
;
333 f
->op
= ppir_codegen_vec4_acc_op_gt
;
336 f
->op
= ppir_codegen_vec4_acc_op_ge
;
339 f
->op
= ppir_codegen_vec4_acc_op_eq
;
342 f
->op
= ppir_codegen_vec4_acc_op_ne
;
345 f
->op
= ppir_codegen_vec4_acc_op_sel
;
348 f
->op
= ppir_codegen_vec4_acc_op_max
;
351 f
->op
= ppir_codegen_vec4_acc_op_min
;
354 f
->op
= ppir_codegen_vec4_acc_op_dFdx
;
357 f
->op
= ppir_codegen_vec4_acc_op_dFdy
;
363 ppir_src
*src
= node
->op
== ppir_op_select
? alu
->src
+ 1 : alu
->src
;
364 index
= ppir_target_get_src_reg_index(src
);
366 if (src
->type
== ppir_target_pipeline
&&
367 src
->pipeline
== ppir_pipeline_reg_vmul
)
370 f
->arg0_source
= index
>> 2;
372 f
->arg0_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
373 f
->arg0_absolute
= src
->absolute
;
374 f
->arg0_negate
= src
->negate
;
376 if (++src
< alu
->src
+ alu
->num_src
) {
377 index
= ppir_target_get_src_reg_index(src
);
378 f
->arg1_source
= index
>> 2;
379 f
->arg1_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
380 f
->arg1_absolute
= src
->absolute
;
381 f
->arg1_negate
= src
->negate
;
385 static void ppir_codegen_encode_scl_add(ppir_node
*node
, void *code
)
387 ppir_codegen_field_float_acc
*f
= code
;
388 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
390 ppir_dest
*dest
= &alu
->dest
;
391 int dest_component
= ffs(dest
->write_mask
) - 1;
392 assert(dest_component
>= 0);
394 f
->dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
396 f
->dest_modifier
= dest
->modifier
;
400 f
->op
= shift_to_op(alu
->shift
);
403 f
->op
= ppir_codegen_float_acc_op_mov
;
406 f
->op
= ppir_codegen_float_acc_op_max
;
409 f
->op
= ppir_codegen_float_acc_op_min
;
412 f
->op
= ppir_codegen_float_acc_op_floor
;
415 f
->op
= ppir_codegen_float_acc_op_ceil
;
418 f
->op
= ppir_codegen_float_acc_op_fract
;
421 f
->op
= ppir_codegen_float_acc_op_gt
;
424 f
->op
= ppir_codegen_float_acc_op_ge
;
427 f
->op
= ppir_codegen_float_acc_op_eq
;
430 f
->op
= ppir_codegen_float_acc_op_ne
;
433 f
->op
= ppir_codegen_float_acc_op_sel
;
436 f
->op
= ppir_codegen_float_acc_op_dFdx
;
439 f
->op
= ppir_codegen_float_acc_op_dFdy
;
445 ppir_src
*src
= node
->op
== ppir_op_select
? alu
->src
+ 1: alu
->src
;
446 if (src
->type
== ppir_target_pipeline
&&
447 src
->pipeline
== ppir_pipeline_reg_fmul
)
450 f
->arg0_source
= get_scl_reg_index(src
, dest_component
);
451 f
->arg0_absolute
= src
->absolute
;
452 f
->arg0_negate
= src
->negate
;
454 if (++src
< alu
->src
+ alu
->num_src
) {
455 f
->arg1_source
= get_scl_reg_index(src
, dest_component
);
456 f
->arg1_absolute
= src
->absolute
;
457 f
->arg1_negate
= src
->negate
;
461 static void ppir_codegen_encode_combine(ppir_node
*node
, void *code
)
463 ppir_codegen_field_combine
*f
= code
;
464 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
475 f
->scalar
.dest_vec
= false;
476 f
->scalar
.arg1_en
= false;
478 ppir_dest
*dest
= &alu
->dest
;
479 int dest_component
= ffs(dest
->write_mask
) - 1;
480 assert(dest_component
>= 0);
481 f
->scalar
.dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
482 f
->scalar
.dest_modifier
= dest
->modifier
;
484 ppir_src
*src
= alu
->src
;
485 f
->scalar
.arg0_src
= get_scl_reg_index(src
, dest_component
);
486 f
->scalar
.arg0_absolute
= src
->absolute
;
487 f
->scalar
.arg0_negate
= src
->negate
;
491 f
->scalar
.op
= ppir_codegen_combine_scalar_op_rsqrt
;
494 f
->scalar
.op
= ppir_codegen_combine_scalar_op_log2
;
497 f
->scalar
.op
= ppir_codegen_combine_scalar_op_exp2
;
500 f
->scalar
.op
= ppir_codegen_combine_scalar_op_rcp
;
503 f
->scalar
.op
= ppir_codegen_combine_scalar_op_sqrt
;
506 f
->scalar
.op
= ppir_codegen_combine_scalar_op_sin
;
509 f
->scalar
.op
= ppir_codegen_combine_scalar_op_cos
;
520 static void ppir_codegen_encode_store_temp(ppir_node
*node
, void *code
)
522 assert(node
->op
== ppir_op_store_temp
);
524 ppir_codegen_field_temp_write
*f
= code
;
525 ppir_store_node
*snode
= ppir_node_to_store(node
);
526 int num_components
= snode
->num_components
;
528 f
->temp_write
.dest
= 0x03; // 11 - temporary
529 f
->temp_write
.source
= snode
->src
.reg
->index
;
531 int alignment
= num_components
== 4 ? 2 : num_components
- 1;
532 f
->temp_write
.alignment
= alignment
;
533 f
->temp_write
.index
= snode
->index
<< (2 - alignment
);
535 f
->temp_write
.offset_reg
= snode
->index
>> 2;
538 static void ppir_codegen_encode_const(ppir_const
*constant
, uint16_t *code
)
540 for (int i
= 0; i
< constant
->num
; i
++)
541 code
[i
] = util_float_to_half(constant
->value
[i
].f
);
544 static void ppir_codegen_encode_discard(ppir_node
*node
, void *code
)
546 ppir_codegen_field_branch
*b
= code
;
547 assert(node
->op
== ppir_op_discard
);
549 b
->discard
.word0
= PPIR_CODEGEN_DISCARD_WORD0
;
550 b
->discard
.word1
= PPIR_CODEGEN_DISCARD_WORD1
;
551 b
->discard
.word2
= PPIR_CODEGEN_DISCARD_WORD2
;
554 static void ppir_codegen_encode_branch(ppir_node
*node
, void *code
)
556 ppir_codegen_field_branch
*b
= code
;
557 ppir_branch_node
*branch
;
558 ppir_instr
*target_instr
;
559 if (node
->op
== ppir_op_discard
) {
560 ppir_codegen_encode_discard(node
, code
);
564 assert(node
->op
== ppir_op_branch
);
565 branch
= ppir_node_to_branch(node
);
567 b
->branch
.unknown_0
= 0x0;
568 b
->branch
.arg0_source
= get_scl_reg_index(&branch
->src
[0], 0);
569 b
->branch
.arg1_source
= get_scl_reg_index(&branch
->src
[1], 0);
570 b
->branch
.cond_gt
= branch
->cond_gt
;
571 b
->branch
.cond_eq
= branch
->cond_eq
;
572 b
->branch
.cond_lt
= branch
->cond_lt
;
573 b
->branch
.unknown_1
= 0x0;
575 target_instr
= list_first_entry(&branch
->target
->instr_list
, ppir_instr
, list
);
576 b
->branch
.target
= target_instr
->offset
- node
->instr
->offset
;
577 b
->branch
.next_count
= target_instr
->encode_size
;
580 typedef void (*ppir_codegen_instr_slot_encode_func
)(ppir_node
*, void *);
582 static const ppir_codegen_instr_slot_encode_func
583 ppir_codegen_encode_slot
[PPIR_INSTR_SLOT_NUM
] = {
584 [PPIR_INSTR_SLOT_VARYING
] = ppir_codegen_encode_varying
,
585 [PPIR_INSTR_SLOT_TEXLD
] = ppir_codegen_encode_texld
,
586 [PPIR_INSTR_SLOT_UNIFORM
] = ppir_codegen_encode_uniform
,
587 [PPIR_INSTR_SLOT_ALU_VEC_MUL
] = ppir_codegen_encode_vec_mul
,
588 [PPIR_INSTR_SLOT_ALU_SCL_MUL
] = ppir_codegen_encode_scl_mul
,
589 [PPIR_INSTR_SLOT_ALU_VEC_ADD
] = ppir_codegen_encode_vec_add
,
590 [PPIR_INSTR_SLOT_ALU_SCL_ADD
] = ppir_codegen_encode_scl_add
,
591 [PPIR_INSTR_SLOT_ALU_COMBINE
] = ppir_codegen_encode_combine
,
592 [PPIR_INSTR_SLOT_STORE_TEMP
] = ppir_codegen_encode_store_temp
,
593 [PPIR_INSTR_SLOT_BRANCH
] = ppir_codegen_encode_branch
,
596 static const int ppir_codegen_field_size
[] = {
597 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
600 static inline int align_to_word(int size
)
602 return ((size
+ 0x1f) >> 5);
605 static int get_instr_encode_size(ppir_instr
*instr
)
609 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
611 size
+= ppir_codegen_field_size
[i
];
614 for (int i
= 0; i
< 2; i
++) {
615 if (instr
->constant
[i
].num
)
619 return align_to_word(size
) + 1;
622 static void bitcopy(void *dst
, int dst_offset
, void *src
, int src_size
)
624 int off1
= dst_offset
& 0x1f;
625 uint32_t *cpy_dst
= dst
, *cpy_src
= src
;
627 cpy_dst
+= (dst_offset
>> 5);
630 int off2
= 32 - off1
;
633 *cpy_dst
|= *cpy_src
<< off1
;
637 if (cpy_size
>= src_size
)
640 *cpy_dst
|= *cpy_src
>> off2
;
644 if (cpy_size
>= src_size
)
649 memcpy(cpy_dst
, cpy_src
, align_to_word(src_size
) * 4);
652 static int encode_instr(ppir_instr
*instr
, void *code
, void *last_code
)
655 ppir_codegen_ctrl
*ctrl
= code
;
657 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
658 if (instr
->slots
[i
]) {
659 /* max field size (73), align to dword */
660 uint8_t output
[12] = {0};
662 ppir_codegen_encode_slot
[i
](instr
->slots
[i
], output
);
663 bitcopy(ctrl
+ 1, size
, output
, ppir_codegen_field_size
[i
]);
665 size
+= ppir_codegen_field_size
[i
];
666 ctrl
->fields
|= 1 << i
;
670 if (instr
->slots
[PPIR_INSTR_SLOT_TEXLD
])
673 if (instr
->slots
[PPIR_INSTR_SLOT_ALU_VEC_ADD
]) {
674 ppir_node
*node
= instr
->slots
[PPIR_INSTR_SLOT_ALU_VEC_ADD
];
675 if (node
->op
== ppir_op_ddx
|| node
->op
== ppir_op_ddy
)
679 if (instr
->slots
[PPIR_INSTR_SLOT_ALU_SCL_ADD
]) {
680 ppir_node
*node
= instr
->slots
[PPIR_INSTR_SLOT_ALU_SCL_ADD
];
681 if (node
->op
== ppir_op_ddx
|| node
->op
== ppir_op_ddy
)
685 for (int i
= 0; i
< 2; i
++) {
686 if (instr
->constant
[i
].num
) {
687 uint16_t output
[4] = {0};
689 ppir_codegen_encode_const(instr
->constant
+ i
, output
);
690 bitcopy(ctrl
+ 1, size
, output
, instr
->constant
[i
].num
* 16);
693 ctrl
->fields
|= 1 << (ppir_codegen_field_shift_vec4_const_0
+ i
);
697 size
= align_to_word(size
) + 1;
704 ppir_codegen_ctrl
*last_ctrl
= last_code
;
705 last_ctrl
->next_count
= size
;
706 last_ctrl
->prefetch
= true;
712 static void ppir_codegen_print_prog(ppir_compiler
*comp
)
714 uint32_t *prog
= comp
->prog
->shader
;
717 printf("========ppir codegen========\n");
718 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
719 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
720 printf("%03d (@%6d): ", instr
->index
, instr
->offset
);
721 int n
= prog
[0] & 0x1f;
722 for (int i
= 0; i
< n
; i
++) {
725 printf("%08x ", prog
[i
]);
728 ppir_disassemble_instr(prog
, offset
);
733 printf("-----------------------\n");
736 bool ppir_codegen_prog(ppir_compiler
*comp
)
739 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
740 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
741 instr
->offset
= size
;
742 instr
->encode_size
= get_instr_encode_size(instr
);
743 size
+= instr
->encode_size
;
747 uint32_t *prog
= rzalloc_size(comp
->prog
, size
* sizeof(uint32_t));
751 uint32_t *code
= prog
, *last_code
= NULL
;
752 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
753 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
754 int offset
= encode_instr(instr
, code
, last_code
);
760 comp
->prog
->shader
= prog
;
761 comp
->prog
->shader_size
= size
* sizeof(uint32_t);
763 if (lima_debug
& LIMA_DEBUG_PP
)
764 ppir_codegen_print_prog(comp
);