2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
26 #include "util/u_half.h"
27 #include "util/bitscan.h"
31 #include "lima_context.h"
33 static unsigned encode_swizzle(uint8_t *swizzle
, int shift
, int dest_shift
)
36 for (int i
= 0; i
< 4; i
++)
37 ret
|= ((swizzle
[i
] + shift
) & 0x3) << ((i
+ dest_shift
) * 2);
41 static int get_scl_reg_index(ppir_src
*src
, int component
)
43 int ret
= ppir_target_get_src_reg_index(src
);
44 ret
+= src
->swizzle
[component
];
48 static void ppir_codegen_encode_varying(ppir_node
*node
, void *code
)
50 ppir_codegen_field_varying
*f
= code
;
51 ppir_load_node
*load
= ppir_node_to_load(node
);
52 ppir_dest
*dest
= &load
->dest
;
53 int index
= ppir_target_get_dest_reg_index(dest
);
54 int num_components
= load
->num_components
;
57 assert(node
->op
== ppir_op_load_varying
||
58 node
->op
== ppir_op_load_coords
||
59 node
->op
== ppir_op_load_fragcoord
);
61 f
->imm
.dest
= index
>> 2;
62 f
->imm
.mask
= dest
->write_mask
<< (index
& 0x3);
64 int alignment
= num_components
== 3 ? 3 : num_components
- 1;
65 f
->imm
.alignment
= alignment
;
66 f
->imm
.offset_vector
= 0xf;
69 f
->imm
.index
= load
->index
>> 2;
71 f
->imm
.index
= load
->index
>> alignment
;
73 if (node
->op
== ppir_op_load_fragcoord
) {
74 f
->imm
.source_type
= 2;
75 f
->imm
.perspective
= 3;
79 assert(node
->op
== ppir_op_load_coords
);
81 f
->reg
.dest
= index
>> 2;
82 f
->reg
.mask
= dest
->write_mask
<< (index
& 0x3);
84 f
->reg
.source_type
= 1;
86 ppir_src
*src
= &load
->src
;
87 index
= ppir_target_get_src_reg_index(src
);
88 f
->reg
.source
= index
>> 2;
89 f
->reg
.negate
= src
->negate
;
90 f
->reg
.absolute
= src
->absolute
;
91 f
->reg
.swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, 0);
95 static void ppir_codegen_encode_texld(ppir_node
*node
, void *code
)
97 ppir_codegen_field_sampler
*f
= code
;
98 ppir_load_texture_node
*ldtex
= ppir_node_to_load_texture(node
);
100 f
->index
= ldtex
->sampler
;
102 f
->type
= ppir_codegen_sampler_type_2d
;
104 f
->unknown_2
= 0x39001;
107 static void ppir_codegen_encode_uniform(ppir_node
*node
, void *code
)
109 ppir_codegen_field_uniform
*f
= code
;
110 ppir_load_node
*load
= ppir_node_to_load(node
);
113 case ppir_op_load_uniform
:
114 f
->source
= ppir_codegen_uniform_src_uniform
;
116 case ppir_op_load_temp
:
117 f
->source
= ppir_codegen_uniform_src_temporary
;
123 int num_components
= load
->num_components
;
124 int alignment
= num_components
== 4 ? 2 : num_components
- 1;
126 f
->alignment
= alignment
;
128 /* TODO: uniform can be also combined like varying */
129 f
->index
= load
->index
<< (2 - alignment
);
132 static unsigned shift_to_op(int shift
)
134 assert(shift
>= -3 && shift
<= 3);
135 return shift
< 0 ? shift
+ 8 : shift
;
138 static void ppir_codegen_encode_vec_mul(ppir_node
*node
, void *code
)
140 ppir_codegen_field_vec4_mul
*f
= code
;
141 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
143 ppir_dest
*dest
= &alu
->dest
;
145 if (dest
->type
!= ppir_target_pipeline
) {
146 int index
= ppir_target_get_dest_reg_index(dest
);
147 dest_shift
= index
& 0x3;
148 f
->dest
= index
>> 2;
149 f
->mask
= dest
->write_mask
<< dest_shift
;
151 f
->dest_modifier
= dest
->modifier
;
155 f
->op
= shift_to_op(alu
->shift
);
158 f
->op
= ppir_codegen_vec4_mul_op_mov
;
161 f
->op
= ppir_codegen_vec4_mul_op_max
;
164 f
->op
= ppir_codegen_vec4_mul_op_min
;
167 f
->op
= ppir_codegen_vec4_mul_op_and
;
170 f
->op
= ppir_codegen_vec4_mul_op_or
;
173 f
->op
= ppir_codegen_vec4_mul_op_xor
;
176 f
->op
= ppir_codegen_vec4_mul_op_gt
;
179 f
->op
= ppir_codegen_vec4_mul_op_ge
;
182 f
->op
= ppir_codegen_vec4_mul_op_eq
;
185 f
->op
= ppir_codegen_vec4_mul_op_ne
;
188 f
->op
= ppir_codegen_vec4_mul_op_not
;
194 ppir_src
*src
= alu
->src
;
195 int index
= ppir_target_get_src_reg_index(src
);
196 f
->arg0_source
= index
>> 2;
197 f
->arg0_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
198 f
->arg0_absolute
= src
->absolute
;
199 f
->arg0_negate
= src
->negate
;
201 if (alu
->num_src
== 2) {
203 index
= ppir_target_get_src_reg_index(src
);
204 f
->arg1_source
= index
>> 2;
205 f
->arg1_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
206 f
->arg1_absolute
= src
->absolute
;
207 f
->arg1_negate
= src
->negate
;
211 static void ppir_codegen_encode_scl_mul(ppir_node
*node
, void *code
)
213 ppir_codegen_field_float_mul
*f
= code
;
214 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
216 ppir_dest
*dest
= &alu
->dest
;
217 int dest_component
= ffs(dest
->write_mask
) - 1;
218 assert(dest_component
>= 0);
220 if (dest
->type
!= ppir_target_pipeline
) {
221 f
->dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
224 f
->dest_modifier
= dest
->modifier
;
228 f
->op
= shift_to_op(alu
->shift
);
231 f
->op
= ppir_codegen_float_mul_op_mov
;
234 f
->op
= ppir_codegen_float_mul_op_max
;
237 f
->op
= ppir_codegen_float_mul_op_min
;
240 f
->op
= ppir_codegen_float_mul_op_and
;
243 f
->op
= ppir_codegen_float_mul_op_or
;
246 f
->op
= ppir_codegen_float_mul_op_xor
;
249 f
->op
= ppir_codegen_float_mul_op_gt
;
252 f
->op
= ppir_codegen_float_mul_op_ge
;
255 f
->op
= ppir_codegen_float_mul_op_eq
;
258 f
->op
= ppir_codegen_float_mul_op_ne
;
261 f
->op
= ppir_codegen_float_mul_op_not
;
267 ppir_src
*src
= alu
->src
;
268 f
->arg0_source
= get_scl_reg_index(src
, dest_component
);
269 f
->arg0_absolute
= src
->absolute
;
270 f
->arg0_negate
= src
->negate
;
272 if (alu
->num_src
== 2) {
274 f
->arg1_source
= get_scl_reg_index(src
, dest_component
);
275 f
->arg1_absolute
= src
->absolute
;
276 f
->arg1_negate
= src
->negate
;
280 static void ppir_codegen_encode_vec_add(ppir_node
*node
, void *code
)
282 ppir_codegen_field_vec4_acc
*f
= code
;
283 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
285 ppir_dest
*dest
= &alu
->dest
;
286 int index
= ppir_target_get_dest_reg_index(dest
);
287 int dest_shift
= index
& 0x3;
288 f
->dest
= index
>> 2;
289 f
->mask
= dest
->write_mask
<< dest_shift
;
290 f
->dest_modifier
= dest
->modifier
;
294 f
->op
= ppir_codegen_vec4_acc_op_add
;
297 f
->op
= ppir_codegen_vec4_acc_op_mov
;
300 f
->op
= ppir_codegen_vec4_acc_op_sum3
;
304 f
->op
= ppir_codegen_vec4_acc_op_sum4
;
308 f
->op
= ppir_codegen_vec4_acc_op_floor
;
311 f
->op
= ppir_codegen_vec4_acc_op_ceil
;
314 f
->op
= ppir_codegen_vec4_acc_op_fract
;
317 f
->op
= ppir_codegen_vec4_acc_op_gt
;
320 f
->op
= ppir_codegen_vec4_acc_op_ge
;
323 f
->op
= ppir_codegen_vec4_acc_op_eq
;
326 f
->op
= ppir_codegen_vec4_acc_op_ne
;
329 f
->op
= ppir_codegen_vec4_acc_op_sel
;
335 ppir_src
*src
= node
->op
== ppir_op_select
? alu
->src
+ 1 : alu
->src
;
336 index
= ppir_target_get_src_reg_index(src
);
338 if (src
->type
== ppir_target_pipeline
&&
339 src
->pipeline
== ppir_pipeline_reg_vmul
)
342 f
->arg0_source
= index
>> 2;
344 f
->arg0_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
345 f
->arg0_absolute
= src
->absolute
;
346 f
->arg0_negate
= src
->negate
;
348 if (++src
< alu
->src
+ alu
->num_src
) {
349 index
= ppir_target_get_src_reg_index(src
);
350 f
->arg1_source
= index
>> 2;
351 f
->arg1_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
352 f
->arg1_absolute
= src
->absolute
;
353 f
->arg1_negate
= src
->negate
;
357 static void ppir_codegen_encode_scl_add(ppir_node
*node
, void *code
)
359 ppir_codegen_field_float_acc
*f
= code
;
360 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
362 ppir_dest
*dest
= &alu
->dest
;
363 int dest_component
= ffs(dest
->write_mask
) - 1;
364 assert(dest_component
>= 0);
366 f
->dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
368 f
->dest_modifier
= dest
->modifier
;
372 f
->op
= shift_to_op(alu
->shift
);
375 f
->op
= ppir_codegen_float_acc_op_mov
;
378 f
->op
= ppir_codegen_float_acc_op_max
;
381 f
->op
= ppir_codegen_float_acc_op_min
;
384 f
->op
= ppir_codegen_float_acc_op_floor
;
387 f
->op
= ppir_codegen_float_acc_op_ceil
;
390 f
->op
= ppir_codegen_float_acc_op_fract
;
393 f
->op
= ppir_codegen_float_acc_op_gt
;
396 f
->op
= ppir_codegen_float_acc_op_ge
;
399 f
->op
= ppir_codegen_float_acc_op_eq
;
402 f
->op
= ppir_codegen_float_acc_op_ne
;
405 f
->op
= ppir_codegen_float_acc_op_sel
;
411 ppir_src
*src
= node
->op
== ppir_op_select
? alu
->src
+ 1: alu
->src
;
412 if (src
->type
== ppir_target_pipeline
&&
413 src
->pipeline
== ppir_pipeline_reg_fmul
)
416 f
->arg0_source
= get_scl_reg_index(src
, dest_component
);
417 f
->arg0_absolute
= src
->absolute
;
418 f
->arg0_negate
= src
->negate
;
420 if (++src
< alu
->src
+ alu
->num_src
) {
421 f
->arg1_source
= get_scl_reg_index(src
, dest_component
);
422 f
->arg1_absolute
= src
->absolute
;
423 f
->arg1_negate
= src
->negate
;
427 static void ppir_codegen_encode_combine(ppir_node
*node
, void *code
)
429 ppir_codegen_field_combine
*f
= code
;
430 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
441 f
->scalar
.dest_vec
= false;
442 f
->scalar
.arg1_en
= false;
444 ppir_dest
*dest
= &alu
->dest
;
445 int dest_component
= ffs(dest
->write_mask
) - 1;
446 assert(dest_component
>= 0);
447 f
->scalar
.dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
448 f
->scalar
.dest_modifier
= dest
->modifier
;
450 ppir_src
*src
= alu
->src
;
451 f
->scalar
.arg0_src
= get_scl_reg_index(src
, dest_component
);
452 f
->scalar
.arg0_absolute
= src
->absolute
;
453 f
->scalar
.arg0_negate
= src
->negate
;
457 f
->scalar
.op
= ppir_codegen_combine_scalar_op_rsqrt
;
460 f
->scalar
.op
= ppir_codegen_combine_scalar_op_log2
;
463 f
->scalar
.op
= ppir_codegen_combine_scalar_op_exp2
;
466 f
->scalar
.op
= ppir_codegen_combine_scalar_op_rcp
;
469 f
->scalar
.op
= ppir_codegen_combine_scalar_op_sqrt
;
472 f
->scalar
.op
= ppir_codegen_combine_scalar_op_sin
;
475 f
->scalar
.op
= ppir_codegen_combine_scalar_op_cos
;
486 static void ppir_codegen_encode_store_temp(ppir_node
*node
, void *code
)
488 assert(node
->op
== ppir_op_store_temp
);
490 ppir_codegen_field_temp_write
*f
= code
;
491 ppir_store_node
*snode
= ppir_node_to_store(node
);
492 int num_components
= snode
->num_components
;
494 f
->temp_write
.dest
= 0x03; // 11 - temporary
495 f
->temp_write
.source
= snode
->src
.reg
->index
;
497 int alignment
= num_components
== 4 ? 2 : num_components
- 1;
498 f
->temp_write
.alignment
= alignment
;
499 f
->temp_write
.index
= snode
->index
<< (2 - alignment
);
501 f
->temp_write
.offset_reg
= snode
->index
>> 2;
504 static void ppir_codegen_encode_const(ppir_const
*constant
, uint16_t *code
)
506 for (int i
= 0; i
< constant
->num
; i
++)
507 code
[i
] = util_float_to_half(constant
->value
[i
].f
);
510 static void ppir_codegen_encode_discard(ppir_node
*node
, void *code
)
512 ppir_codegen_field_branch
*b
= code
;
513 assert(node
->op
= ppir_op_discard
);
515 b
->discard
.word0
= PPIR_CODEGEN_DISCARD_WORD0
;
516 b
->discard
.word1
= PPIR_CODEGEN_DISCARD_WORD1
;
517 b
->discard
.word2
= PPIR_CODEGEN_DISCARD_WORD2
;
520 static void ppir_codegen_encode_branch(ppir_node
*node
, void *code
)
522 ppir_codegen_field_branch
*b
= code
;
523 ppir_branch_node
*branch
;
524 ppir_instr
*target_instr
;
525 if (node
->op
== ppir_op_discard
) {
526 ppir_codegen_encode_discard(node
, code
);
530 assert(node
->op
= ppir_op_branch
);
531 branch
= ppir_node_to_branch(node
);
533 b
->branch
.unknown_0
= 0x0;
534 b
->branch
.arg0_source
= ppir_target_get_src_reg_index(&branch
->src
[0]);
535 b
->branch
.arg1_source
= ppir_target_get_src_reg_index(&branch
->src
[1]);
536 b
->branch
.cond_gt
= branch
->cond_gt
;
537 b
->branch
.cond_eq
= branch
->cond_eq
;
538 b
->branch
.cond_lt
= branch
->cond_lt
;
539 b
->branch
.unknown_1
= 0x0;
540 b
->branch
.unknown_2
= 0x3;
542 target_instr
= list_first_entry(&branch
->target
->instr_list
, ppir_instr
, list
);
543 b
->branch
.target
= target_instr
->offset
- node
->instr
->offset
;
546 typedef void (*ppir_codegen_instr_slot_encode_func
)(ppir_node
*, void *);
548 static const ppir_codegen_instr_slot_encode_func
549 ppir_codegen_encode_slot
[PPIR_INSTR_SLOT_NUM
] = {
550 [PPIR_INSTR_SLOT_VARYING
] = ppir_codegen_encode_varying
,
551 [PPIR_INSTR_SLOT_TEXLD
] = ppir_codegen_encode_texld
,
552 [PPIR_INSTR_SLOT_UNIFORM
] = ppir_codegen_encode_uniform
,
553 [PPIR_INSTR_SLOT_ALU_VEC_MUL
] = ppir_codegen_encode_vec_mul
,
554 [PPIR_INSTR_SLOT_ALU_SCL_MUL
] = ppir_codegen_encode_scl_mul
,
555 [PPIR_INSTR_SLOT_ALU_VEC_ADD
] = ppir_codegen_encode_vec_add
,
556 [PPIR_INSTR_SLOT_ALU_SCL_ADD
] = ppir_codegen_encode_scl_add
,
557 [PPIR_INSTR_SLOT_ALU_COMBINE
] = ppir_codegen_encode_combine
,
558 [PPIR_INSTR_SLOT_STORE_TEMP
] = ppir_codegen_encode_store_temp
,
559 [PPIR_INSTR_SLOT_BRANCH
] = ppir_codegen_encode_branch
,
562 static const int ppir_codegen_field_size
[] = {
563 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
566 static inline int align_to_word(int size
)
568 return ((size
+ 0x1f) >> 5);
571 static int get_instr_encode_size(ppir_instr
*instr
)
575 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
577 size
+= ppir_codegen_field_size
[i
];
580 for (int i
= 0; i
< 2; i
++) {
581 if (instr
->constant
[i
].num
)
585 return align_to_word(size
) + 1;
588 static void bitcopy(void *dst
, int dst_offset
, void *src
, int src_size
)
590 int off1
= dst_offset
& 0x1f;
591 uint32_t *cpy_dst
= dst
, *cpy_src
= src
;
593 cpy_dst
+= (dst_offset
>> 5);
596 int off2
= 32 - off1
;
599 *cpy_dst
|= *cpy_src
<< off1
;
603 if (cpy_size
>= src_size
)
606 *cpy_dst
|= *cpy_src
>> off2
;
610 if (cpy_size
>= src_size
)
615 memcpy(cpy_dst
, cpy_src
, align_to_word(src_size
) * 4);
618 static int encode_instr(ppir_instr
*instr
, void *code
, void *last_code
)
621 ppir_codegen_ctrl
*ctrl
= code
;
623 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
624 if (instr
->slots
[i
]) {
625 /* max field size (73), align to dword */
626 uint8_t output
[12] = {0};
628 ppir_codegen_encode_slot
[i
](instr
->slots
[i
], output
);
629 bitcopy(ctrl
+ 1, size
, output
, ppir_codegen_field_size
[i
]);
631 size
+= ppir_codegen_field_size
[i
];
632 ctrl
->fields
|= 1 << i
;
636 if (instr
->slots
[PPIR_INSTR_SLOT_TEXLD
])
639 for (int i
= 0; i
< 2; i
++) {
640 if (instr
->constant
[i
].num
) {
641 uint16_t output
[4] = {0};
643 ppir_codegen_encode_const(instr
->constant
+ i
, output
);
644 bitcopy(ctrl
+ 1, size
, output
, instr
->constant
[i
].num
* 16);
647 ctrl
->fields
|= 1 << (ppir_codegen_field_shift_vec4_const_0
+ i
);
651 size
= align_to_word(size
) + 1;
658 ppir_codegen_ctrl
*last_ctrl
= last_code
;
659 last_ctrl
->next_count
= size
;
660 last_ctrl
->prefetch
= true;
666 static void ppir_codegen_print_prog(ppir_compiler
*comp
)
668 uint32_t *prog
= comp
->prog
->shader
;
671 printf("========ppir codegen========\n");
672 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
673 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
674 printf("%03d (@%6ld): ", instr
->index
, instr
->offset
);
675 int n
= prog
[0] & 0x1f;
676 for (int i
= 0; i
< n
; i
++) {
679 printf("%08x ", prog
[i
]);
682 ppir_disassemble_instr(prog
, offset
);
687 printf("-----------------------\n");
690 bool ppir_codegen_prog(ppir_compiler
*comp
)
693 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
694 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
695 instr
->offset
= size
;
696 size
+= get_instr_encode_size(instr
);
700 uint32_t *prog
= rzalloc_size(comp
->prog
, size
* sizeof(uint32_t));
704 uint32_t *code
= prog
, *last_code
= NULL
;
705 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
706 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
707 int offset
= encode_instr(instr
, code
, last_code
);
713 comp
->prog
->shader
= prog
;
714 comp
->prog
->shader_size
= size
* sizeof(uint32_t);
716 if (lima_debug
& LIMA_DEBUG_PP
)
717 ppir_codegen_print_prog(comp
);