2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
26 #include "util/u_half.h"
27 #include "util/bitscan.h"
31 #include "lima_context.h"
33 static unsigned encode_swizzle(uint8_t *swizzle
, int shift
, int dest_shift
)
36 for (int i
= 0; i
< 4; i
++)
37 ret
|= ((swizzle
[i
] + shift
) & 0x3) << ((i
+ dest_shift
) * 2);
41 static int get_scl_reg_index(ppir_src
*src
, int component
)
43 int ret
= ppir_target_get_src_reg_index(src
);
44 ret
+= src
->swizzle
[component
];
48 static void ppir_codegen_encode_varying(ppir_node
*node
, void *code
)
50 ppir_codegen_field_varying
*f
= code
;
51 ppir_load_node
*load
= ppir_node_to_load(node
);
52 ppir_dest
*dest
= &load
->dest
;
53 int index
= ppir_target_get_dest_reg_index(dest
);
54 int num_components
= load
->num_components
;
57 assert(node
->op
== ppir_op_load_varying
||
58 node
->op
== ppir_op_load_coords
||
59 node
->op
== ppir_op_load_fragcoord
||
60 node
->op
== ppir_op_load_pointcoord
);
62 f
->imm
.dest
= index
>> 2;
63 f
->imm
.mask
= dest
->write_mask
<< (index
& 0x3);
65 int alignment
= num_components
== 3 ? 3 : num_components
- 1;
66 f
->imm
.alignment
= alignment
;
67 f
->imm
.offset_vector
= 0xf;
70 f
->imm
.index
= load
->index
>> 2;
72 f
->imm
.index
= load
->index
>> alignment
;
75 case ppir_op_load_fragcoord
:
76 f
->imm
.source_type
= 2;
77 f
->imm
.perspective
= 3;
79 case ppir_op_load_pointcoord
:
80 f
->imm
.source_type
= 3;
87 assert(node
->op
== ppir_op_load_coords
);
89 f
->reg
.dest
= index
>> 2;
90 f
->reg
.mask
= dest
->write_mask
<< (index
& 0x3);
92 f
->reg
.source_type
= 1;
94 ppir_src
*src
= &load
->src
;
95 index
= ppir_target_get_src_reg_index(src
);
96 f
->reg
.source
= index
>> 2;
97 f
->reg
.negate
= src
->negate
;
98 f
->reg
.absolute
= src
->absolute
;
99 f
->reg
.swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, 0);
103 static void ppir_codegen_encode_texld(ppir_node
*node
, void *code
)
105 ppir_codegen_field_sampler
*f
= code
;
106 ppir_load_texture_node
*ldtex
= ppir_node_to_load_texture(node
);
108 f
->index
= ldtex
->sampler
;
110 f
->type
= ppir_codegen_sampler_type_2d
;
112 f
->unknown_2
= 0x39001;
115 static void ppir_codegen_encode_uniform(ppir_node
*node
, void *code
)
117 ppir_codegen_field_uniform
*f
= code
;
118 ppir_load_node
*load
= ppir_node_to_load(node
);
121 case ppir_op_load_uniform
:
122 f
->source
= ppir_codegen_uniform_src_uniform
;
124 case ppir_op_load_temp
:
125 f
->source
= ppir_codegen_uniform_src_temporary
;
131 int num_components
= load
->num_components
;
132 int alignment
= num_components
== 4 ? 2 : num_components
- 1;
134 f
->alignment
= alignment
;
136 /* TODO: uniform can be also combined like varying */
137 f
->index
= load
->index
<< (2 - alignment
);
140 static unsigned shift_to_op(int shift
)
142 assert(shift
>= -3 && shift
<= 3);
143 return shift
< 0 ? shift
+ 8 : shift
;
146 static void ppir_codegen_encode_vec_mul(ppir_node
*node
, void *code
)
148 ppir_codegen_field_vec4_mul
*f
= code
;
149 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
151 ppir_dest
*dest
= &alu
->dest
;
153 if (dest
->type
!= ppir_target_pipeline
) {
154 int index
= ppir_target_get_dest_reg_index(dest
);
155 dest_shift
= index
& 0x3;
156 f
->dest
= index
>> 2;
157 f
->mask
= dest
->write_mask
<< dest_shift
;
159 f
->dest_modifier
= dest
->modifier
;
163 f
->op
= shift_to_op(alu
->shift
);
166 f
->op
= ppir_codegen_vec4_mul_op_mov
;
169 f
->op
= ppir_codegen_vec4_mul_op_max
;
172 f
->op
= ppir_codegen_vec4_mul_op_min
;
175 f
->op
= ppir_codegen_vec4_mul_op_and
;
178 f
->op
= ppir_codegen_vec4_mul_op_or
;
181 f
->op
= ppir_codegen_vec4_mul_op_xor
;
184 f
->op
= ppir_codegen_vec4_mul_op_gt
;
187 f
->op
= ppir_codegen_vec4_mul_op_ge
;
190 f
->op
= ppir_codegen_vec4_mul_op_eq
;
193 f
->op
= ppir_codegen_vec4_mul_op_ne
;
196 f
->op
= ppir_codegen_vec4_mul_op_not
;
202 ppir_src
*src
= alu
->src
;
203 int index
= ppir_target_get_src_reg_index(src
);
204 f
->arg0_source
= index
>> 2;
205 f
->arg0_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
206 f
->arg0_absolute
= src
->absolute
;
207 f
->arg0_negate
= src
->negate
;
209 if (alu
->num_src
== 2) {
211 index
= ppir_target_get_src_reg_index(src
);
212 f
->arg1_source
= index
>> 2;
213 f
->arg1_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
214 f
->arg1_absolute
= src
->absolute
;
215 f
->arg1_negate
= src
->negate
;
219 static void ppir_codegen_encode_scl_mul(ppir_node
*node
, void *code
)
221 ppir_codegen_field_float_mul
*f
= code
;
222 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
224 ppir_dest
*dest
= &alu
->dest
;
225 int dest_component
= ffs(dest
->write_mask
) - 1;
226 assert(dest_component
>= 0);
228 if (dest
->type
!= ppir_target_pipeline
) {
229 f
->dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
232 f
->dest_modifier
= dest
->modifier
;
236 f
->op
= shift_to_op(alu
->shift
);
239 f
->op
= ppir_codegen_float_mul_op_mov
;
242 f
->op
= ppir_codegen_float_mul_op_max
;
245 f
->op
= ppir_codegen_float_mul_op_min
;
248 f
->op
= ppir_codegen_float_mul_op_and
;
251 f
->op
= ppir_codegen_float_mul_op_or
;
254 f
->op
= ppir_codegen_float_mul_op_xor
;
257 f
->op
= ppir_codegen_float_mul_op_gt
;
260 f
->op
= ppir_codegen_float_mul_op_ge
;
263 f
->op
= ppir_codegen_float_mul_op_eq
;
266 f
->op
= ppir_codegen_float_mul_op_ne
;
269 f
->op
= ppir_codegen_float_mul_op_not
;
275 ppir_src
*src
= alu
->src
;
276 f
->arg0_source
= get_scl_reg_index(src
, dest_component
);
277 f
->arg0_absolute
= src
->absolute
;
278 f
->arg0_negate
= src
->negate
;
280 if (alu
->num_src
== 2) {
282 f
->arg1_source
= get_scl_reg_index(src
, dest_component
);
283 f
->arg1_absolute
= src
->absolute
;
284 f
->arg1_negate
= src
->negate
;
288 static void ppir_codegen_encode_vec_add(ppir_node
*node
, void *code
)
290 ppir_codegen_field_vec4_acc
*f
= code
;
291 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
293 ppir_dest
*dest
= &alu
->dest
;
294 int index
= ppir_target_get_dest_reg_index(dest
);
295 int dest_shift
= index
& 0x3;
296 f
->dest
= index
>> 2;
297 f
->mask
= dest
->write_mask
<< dest_shift
;
298 f
->dest_modifier
= dest
->modifier
;
302 f
->op
= ppir_codegen_vec4_acc_op_add
;
305 f
->op
= ppir_codegen_vec4_acc_op_mov
;
308 f
->op
= ppir_codegen_vec4_acc_op_sum3
;
312 f
->op
= ppir_codegen_vec4_acc_op_sum4
;
316 f
->op
= ppir_codegen_vec4_acc_op_floor
;
319 f
->op
= ppir_codegen_vec4_acc_op_ceil
;
322 f
->op
= ppir_codegen_vec4_acc_op_fract
;
325 f
->op
= ppir_codegen_vec4_acc_op_gt
;
328 f
->op
= ppir_codegen_vec4_acc_op_ge
;
331 f
->op
= ppir_codegen_vec4_acc_op_eq
;
334 f
->op
= ppir_codegen_vec4_acc_op_ne
;
337 f
->op
= ppir_codegen_vec4_acc_op_sel
;
340 f
->op
= ppir_codegen_vec4_acc_op_max
;
343 f
->op
= ppir_codegen_vec4_acc_op_min
;
349 ppir_src
*src
= node
->op
== ppir_op_select
? alu
->src
+ 1 : alu
->src
;
350 index
= ppir_target_get_src_reg_index(src
);
352 if (src
->type
== ppir_target_pipeline
&&
353 src
->pipeline
== ppir_pipeline_reg_vmul
)
356 f
->arg0_source
= index
>> 2;
358 f
->arg0_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
359 f
->arg0_absolute
= src
->absolute
;
360 f
->arg0_negate
= src
->negate
;
362 if (++src
< alu
->src
+ alu
->num_src
) {
363 index
= ppir_target_get_src_reg_index(src
);
364 f
->arg1_source
= index
>> 2;
365 f
->arg1_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
366 f
->arg1_absolute
= src
->absolute
;
367 f
->arg1_negate
= src
->negate
;
371 static void ppir_codegen_encode_scl_add(ppir_node
*node
, void *code
)
373 ppir_codegen_field_float_acc
*f
= code
;
374 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
376 ppir_dest
*dest
= &alu
->dest
;
377 int dest_component
= ffs(dest
->write_mask
) - 1;
378 assert(dest_component
>= 0);
380 f
->dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
382 f
->dest_modifier
= dest
->modifier
;
386 f
->op
= shift_to_op(alu
->shift
);
389 f
->op
= ppir_codegen_float_acc_op_mov
;
392 f
->op
= ppir_codegen_float_acc_op_max
;
395 f
->op
= ppir_codegen_float_acc_op_min
;
398 f
->op
= ppir_codegen_float_acc_op_floor
;
401 f
->op
= ppir_codegen_float_acc_op_ceil
;
404 f
->op
= ppir_codegen_float_acc_op_fract
;
407 f
->op
= ppir_codegen_float_acc_op_gt
;
410 f
->op
= ppir_codegen_float_acc_op_ge
;
413 f
->op
= ppir_codegen_float_acc_op_eq
;
416 f
->op
= ppir_codegen_float_acc_op_ne
;
419 f
->op
= ppir_codegen_float_acc_op_sel
;
425 ppir_src
*src
= node
->op
== ppir_op_select
? alu
->src
+ 1: alu
->src
;
426 if (src
->type
== ppir_target_pipeline
&&
427 src
->pipeline
== ppir_pipeline_reg_fmul
)
430 f
->arg0_source
= get_scl_reg_index(src
, dest_component
);
431 f
->arg0_absolute
= src
->absolute
;
432 f
->arg0_negate
= src
->negate
;
434 if (++src
< alu
->src
+ alu
->num_src
) {
435 f
->arg1_source
= get_scl_reg_index(src
, dest_component
);
436 f
->arg1_absolute
= src
->absolute
;
437 f
->arg1_negate
= src
->negate
;
441 static void ppir_codegen_encode_combine(ppir_node
*node
, void *code
)
443 ppir_codegen_field_combine
*f
= code
;
444 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
455 f
->scalar
.dest_vec
= false;
456 f
->scalar
.arg1_en
= false;
458 ppir_dest
*dest
= &alu
->dest
;
459 int dest_component
= ffs(dest
->write_mask
) - 1;
460 assert(dest_component
>= 0);
461 f
->scalar
.dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
462 f
->scalar
.dest_modifier
= dest
->modifier
;
464 ppir_src
*src
= alu
->src
;
465 f
->scalar
.arg0_src
= get_scl_reg_index(src
, dest_component
);
466 f
->scalar
.arg0_absolute
= src
->absolute
;
467 f
->scalar
.arg0_negate
= src
->negate
;
471 f
->scalar
.op
= ppir_codegen_combine_scalar_op_rsqrt
;
474 f
->scalar
.op
= ppir_codegen_combine_scalar_op_log2
;
477 f
->scalar
.op
= ppir_codegen_combine_scalar_op_exp2
;
480 f
->scalar
.op
= ppir_codegen_combine_scalar_op_rcp
;
483 f
->scalar
.op
= ppir_codegen_combine_scalar_op_sqrt
;
486 f
->scalar
.op
= ppir_codegen_combine_scalar_op_sin
;
489 f
->scalar
.op
= ppir_codegen_combine_scalar_op_cos
;
500 static void ppir_codegen_encode_store_temp(ppir_node
*node
, void *code
)
502 assert(node
->op
== ppir_op_store_temp
);
504 ppir_codegen_field_temp_write
*f
= code
;
505 ppir_store_node
*snode
= ppir_node_to_store(node
);
506 int num_components
= snode
->num_components
;
508 f
->temp_write
.dest
= 0x03; // 11 - temporary
509 f
->temp_write
.source
= snode
->src
.reg
->index
;
511 int alignment
= num_components
== 4 ? 2 : num_components
- 1;
512 f
->temp_write
.alignment
= alignment
;
513 f
->temp_write
.index
= snode
->index
<< (2 - alignment
);
515 f
->temp_write
.offset_reg
= snode
->index
>> 2;
518 static void ppir_codegen_encode_const(ppir_const
*constant
, uint16_t *code
)
520 for (int i
= 0; i
< constant
->num
; i
++)
521 code
[i
] = util_float_to_half(constant
->value
[i
].f
);
524 static void ppir_codegen_encode_discard(ppir_node
*node
, void *code
)
526 ppir_codegen_field_branch
*b
= code
;
527 assert(node
->op
== ppir_op_discard
);
529 b
->discard
.word0
= PPIR_CODEGEN_DISCARD_WORD0
;
530 b
->discard
.word1
= PPIR_CODEGEN_DISCARD_WORD1
;
531 b
->discard
.word2
= PPIR_CODEGEN_DISCARD_WORD2
;
534 static void ppir_codegen_encode_branch(ppir_node
*node
, void *code
)
536 ppir_codegen_field_branch
*b
= code
;
537 ppir_branch_node
*branch
;
538 ppir_instr
*target_instr
;
539 if (node
->op
== ppir_op_discard
) {
540 ppir_codegen_encode_discard(node
, code
);
544 assert(node
->op
== ppir_op_branch
);
545 branch
= ppir_node_to_branch(node
);
547 b
->branch
.unknown_0
= 0x0;
548 b
->branch
.arg0_source
= get_scl_reg_index(&branch
->src
[0], 0);
549 b
->branch
.arg1_source
= get_scl_reg_index(&branch
->src
[1], 0);
550 b
->branch
.cond_gt
= branch
->cond_gt
;
551 b
->branch
.cond_eq
= branch
->cond_eq
;
552 b
->branch
.cond_lt
= branch
->cond_lt
;
553 b
->branch
.unknown_1
= 0x0;
555 target_instr
= list_first_entry(&branch
->target
->instr_list
, ppir_instr
, list
);
556 b
->branch
.target
= target_instr
->offset
- node
->instr
->offset
;
557 b
->branch
.next_count
= target_instr
->encode_size
;
560 typedef void (*ppir_codegen_instr_slot_encode_func
)(ppir_node
*, void *);
562 static const ppir_codegen_instr_slot_encode_func
563 ppir_codegen_encode_slot
[PPIR_INSTR_SLOT_NUM
] = {
564 [PPIR_INSTR_SLOT_VARYING
] = ppir_codegen_encode_varying
,
565 [PPIR_INSTR_SLOT_TEXLD
] = ppir_codegen_encode_texld
,
566 [PPIR_INSTR_SLOT_UNIFORM
] = ppir_codegen_encode_uniform
,
567 [PPIR_INSTR_SLOT_ALU_VEC_MUL
] = ppir_codegen_encode_vec_mul
,
568 [PPIR_INSTR_SLOT_ALU_SCL_MUL
] = ppir_codegen_encode_scl_mul
,
569 [PPIR_INSTR_SLOT_ALU_VEC_ADD
] = ppir_codegen_encode_vec_add
,
570 [PPIR_INSTR_SLOT_ALU_SCL_ADD
] = ppir_codegen_encode_scl_add
,
571 [PPIR_INSTR_SLOT_ALU_COMBINE
] = ppir_codegen_encode_combine
,
572 [PPIR_INSTR_SLOT_STORE_TEMP
] = ppir_codegen_encode_store_temp
,
573 [PPIR_INSTR_SLOT_BRANCH
] = ppir_codegen_encode_branch
,
576 static const int ppir_codegen_field_size
[] = {
577 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
580 static inline int align_to_word(int size
)
582 return ((size
+ 0x1f) >> 5);
585 static int get_instr_encode_size(ppir_instr
*instr
)
589 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
591 size
+= ppir_codegen_field_size
[i
];
594 for (int i
= 0; i
< 2; i
++) {
595 if (instr
->constant
[i
].num
)
599 return align_to_word(size
) + 1;
602 static void bitcopy(void *dst
, int dst_offset
, void *src
, int src_size
)
604 int off1
= dst_offset
& 0x1f;
605 uint32_t *cpy_dst
= dst
, *cpy_src
= src
;
607 cpy_dst
+= (dst_offset
>> 5);
610 int off2
= 32 - off1
;
613 *cpy_dst
|= *cpy_src
<< off1
;
617 if (cpy_size
>= src_size
)
620 *cpy_dst
|= *cpy_src
>> off2
;
624 if (cpy_size
>= src_size
)
629 memcpy(cpy_dst
, cpy_src
, align_to_word(src_size
) * 4);
632 static int encode_instr(ppir_instr
*instr
, void *code
, void *last_code
)
635 ppir_codegen_ctrl
*ctrl
= code
;
637 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
638 if (instr
->slots
[i
]) {
639 /* max field size (73), align to dword */
640 uint8_t output
[12] = {0};
642 ppir_codegen_encode_slot
[i
](instr
->slots
[i
], output
);
643 bitcopy(ctrl
+ 1, size
, output
, ppir_codegen_field_size
[i
]);
645 size
+= ppir_codegen_field_size
[i
];
646 ctrl
->fields
|= 1 << i
;
650 if (instr
->slots
[PPIR_INSTR_SLOT_TEXLD
])
653 for (int i
= 0; i
< 2; i
++) {
654 if (instr
->constant
[i
].num
) {
655 uint16_t output
[4] = {0};
657 ppir_codegen_encode_const(instr
->constant
+ i
, output
);
658 bitcopy(ctrl
+ 1, size
, output
, instr
->constant
[i
].num
* 16);
661 ctrl
->fields
|= 1 << (ppir_codegen_field_shift_vec4_const_0
+ i
);
665 size
= align_to_word(size
) + 1;
672 ppir_codegen_ctrl
*last_ctrl
= last_code
;
673 last_ctrl
->next_count
= size
;
674 last_ctrl
->prefetch
= true;
680 static void ppir_codegen_print_prog(ppir_compiler
*comp
)
682 uint32_t *prog
= comp
->prog
->shader
;
685 printf("========ppir codegen========\n");
686 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
687 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
688 printf("%03d (@%6d): ", instr
->index
, instr
->offset
);
689 int n
= prog
[0] & 0x1f;
690 for (int i
= 0; i
< n
; i
++) {
693 printf("%08x ", prog
[i
]);
696 ppir_disassemble_instr(prog
, offset
);
701 printf("-----------------------\n");
704 bool ppir_codegen_prog(ppir_compiler
*comp
)
707 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
708 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
709 instr
->offset
= size
;
710 instr
->encode_size
= get_instr_encode_size(instr
);
711 size
+= instr
->encode_size
;
715 uint32_t *prog
= rzalloc_size(comp
->prog
, size
* sizeof(uint32_t));
719 uint32_t *code
= prog
, *last_code
= NULL
;
720 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
721 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
722 int offset
= encode_instr(instr
, code
, last_code
);
728 comp
->prog
->shader
= prog
;
729 comp
->prog
->shader_size
= size
* sizeof(uint32_t);
731 if (lima_debug
& LIMA_DEBUG_PP
)
732 ppir_codegen_print_prog(comp
);