2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
26 #include "util/u_half.h"
27 #include "util/bitscan.h"
31 #include "lima_context.h"
33 static unsigned encode_swizzle(uint8_t *swizzle
, int shift
, int dest_shift
)
36 for (int i
= 0; i
< 4; i
++)
37 ret
|= ((swizzle
[i
] + shift
) & 0x3) << ((i
+ dest_shift
) * 2);
41 static int get_scl_reg_index(ppir_src
*src
, int component
)
43 int ret
= ppir_target_get_src_reg_index(src
);
44 ret
+= src
->swizzle
[component
];
48 static void ppir_codegen_encode_varying(ppir_node
*node
, void *code
)
50 ppir_codegen_field_varying
*f
= code
;
51 ppir_load_node
*load
= ppir_node_to_load(node
);
52 ppir_dest
*dest
= &load
->dest
;
53 int index
= ppir_target_get_dest_reg_index(dest
);
54 int num_components
= load
->num_components
;
57 assert(node
->op
== ppir_op_load_varying
||
58 node
->op
== ppir_op_load_coords
||
59 node
->op
== ppir_op_load_fragcoord
);
61 f
->imm
.dest
= index
>> 2;
62 f
->imm
.mask
= dest
->write_mask
<< (index
& 0x3);
64 int alignment
= num_components
== 3 ? 3 : num_components
- 1;
65 f
->imm
.alignment
= alignment
;
66 f
->imm
.offset_vector
= 0xf;
69 f
->imm
.index
= load
->index
>> 2;
71 f
->imm
.index
= load
->index
>> alignment
;
73 if (node
->op
== ppir_op_load_fragcoord
) {
74 f
->imm
.source_type
= 2;
75 f
->imm
.perspective
= 3;
79 assert(node
->op
== ppir_op_load_coords
);
81 f
->reg
.dest
= index
>> 2;
82 f
->reg
.mask
= dest
->write_mask
<< (index
& 0x3);
84 f
->reg
.source_type
= 1;
86 ppir_src
*src
= &load
->src
;
87 index
= ppir_target_get_src_reg_index(src
);
88 f
->reg
.source
= index
>> 2;
89 f
->reg
.negate
= src
->negate
;
90 f
->reg
.absolute
= src
->absolute
;
91 f
->reg
.swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, 0);
95 static void ppir_codegen_encode_texld(ppir_node
*node
, void *code
)
97 ppir_codegen_field_sampler
*f
= code
;
98 ppir_load_texture_node
*ldtex
= ppir_node_to_load_texture(node
);
100 f
->index
= ldtex
->sampler
;
102 f
->type
= ppir_codegen_sampler_type_2d
;
104 f
->unknown_2
= 0x39001;
107 static void ppir_codegen_encode_uniform(ppir_node
*node
, void *code
)
109 ppir_codegen_field_uniform
*f
= code
;
110 ppir_load_node
*load
= ppir_node_to_load(node
);
113 case ppir_op_load_uniform
:
114 f
->source
= ppir_codegen_uniform_src_uniform
;
116 case ppir_op_load_temp
:
117 f
->source
= ppir_codegen_uniform_src_temporary
;
123 int num_components
= load
->num_components
;
124 int alignment
= num_components
== 4 ? 2 : num_components
- 1;
126 f
->alignment
= alignment
;
128 /* TODO: uniform can be also combined like varying */
129 f
->index
= load
->index
<< (2 - alignment
);
132 static unsigned shift_to_op(int shift
)
134 assert(shift
>= -3 && shift
<= 3);
135 return shift
< 0 ? shift
+ 8 : shift
;
138 static void ppir_codegen_encode_vec_mul(ppir_node
*node
, void *code
)
140 ppir_codegen_field_vec4_mul
*f
= code
;
141 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
143 ppir_dest
*dest
= &alu
->dest
;
145 if (dest
->type
!= ppir_target_pipeline
) {
146 int index
= ppir_target_get_dest_reg_index(dest
);
147 dest_shift
= index
& 0x3;
148 f
->dest
= index
>> 2;
149 f
->mask
= dest
->write_mask
<< dest_shift
;
151 f
->dest_modifier
= dest
->modifier
;
155 f
->op
= shift_to_op(alu
->shift
);
158 f
->op
= ppir_codegen_vec4_mul_op_mov
;
161 f
->op
= ppir_codegen_vec4_mul_op_max
;
164 f
->op
= ppir_codegen_vec4_mul_op_min
;
167 f
->op
= ppir_codegen_vec4_mul_op_and
;
170 f
->op
= ppir_codegen_vec4_mul_op_or
;
173 f
->op
= ppir_codegen_vec4_mul_op_xor
;
176 f
->op
= ppir_codegen_vec4_mul_op_gt
;
179 f
->op
= ppir_codegen_vec4_mul_op_ge
;
182 f
->op
= ppir_codegen_vec4_mul_op_eq
;
185 f
->op
= ppir_codegen_vec4_mul_op_ne
;
188 f
->op
= ppir_codegen_vec4_mul_op_not
;
194 ppir_src
*src
= alu
->src
;
195 int index
= ppir_target_get_src_reg_index(src
);
196 f
->arg0_source
= index
>> 2;
197 f
->arg0_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
198 f
->arg0_absolute
= src
->absolute
;
199 f
->arg0_negate
= src
->negate
;
201 if (alu
->num_src
== 2) {
203 index
= ppir_target_get_src_reg_index(src
);
204 f
->arg1_source
= index
>> 2;
205 f
->arg1_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
206 f
->arg1_absolute
= src
->absolute
;
207 f
->arg1_negate
= src
->negate
;
211 static void ppir_codegen_encode_scl_mul(ppir_node
*node
, void *code
)
213 ppir_codegen_field_float_mul
*f
= code
;
214 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
216 ppir_dest
*dest
= &alu
->dest
;
217 int dest_component
= ffs(dest
->write_mask
) - 1;
218 assert(dest_component
>= 0);
220 if (dest
->type
!= ppir_target_pipeline
) {
221 f
->dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
224 f
->dest_modifier
= dest
->modifier
;
228 f
->op
= shift_to_op(alu
->shift
);
231 f
->op
= ppir_codegen_float_mul_op_mov
;
234 f
->op
= ppir_codegen_float_mul_op_max
;
237 f
->op
= ppir_codegen_float_mul_op_min
;
240 f
->op
= ppir_codegen_float_mul_op_and
;
243 f
->op
= ppir_codegen_float_mul_op_or
;
246 f
->op
= ppir_codegen_float_mul_op_xor
;
249 f
->op
= ppir_codegen_float_mul_op_gt
;
252 f
->op
= ppir_codegen_float_mul_op_ge
;
255 f
->op
= ppir_codegen_float_mul_op_eq
;
258 f
->op
= ppir_codegen_float_mul_op_ne
;
261 f
->op
= ppir_codegen_float_mul_op_not
;
267 ppir_src
*src
= alu
->src
;
268 f
->arg0_source
= get_scl_reg_index(src
, dest_component
);
269 f
->arg0_absolute
= src
->absolute
;
270 f
->arg0_negate
= src
->negate
;
272 if (alu
->num_src
== 2) {
274 f
->arg1_source
= get_scl_reg_index(src
, dest_component
);
275 f
->arg1_absolute
= src
->absolute
;
276 f
->arg1_negate
= src
->negate
;
280 static void ppir_codegen_encode_vec_add(ppir_node
*node
, void *code
)
282 ppir_codegen_field_vec4_acc
*f
= code
;
283 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
285 ppir_dest
*dest
= &alu
->dest
;
286 int index
= ppir_target_get_dest_reg_index(dest
);
287 int dest_shift
= index
& 0x3;
288 f
->dest
= index
>> 2;
289 f
->mask
= dest
->write_mask
<< dest_shift
;
290 f
->dest_modifier
= dest
->modifier
;
294 f
->op
= ppir_codegen_vec4_acc_op_add
;
297 f
->op
= ppir_codegen_vec4_acc_op_mov
;
300 f
->op
= ppir_codegen_vec4_acc_op_sum3
;
304 f
->op
= ppir_codegen_vec4_acc_op_sum4
;
308 f
->op
= ppir_codegen_vec4_acc_op_floor
;
311 f
->op
= ppir_codegen_vec4_acc_op_ceil
;
314 f
->op
= ppir_codegen_vec4_acc_op_fract
;
317 f
->op
= ppir_codegen_vec4_acc_op_gt
;
320 f
->op
= ppir_codegen_vec4_acc_op_ge
;
323 f
->op
= ppir_codegen_vec4_acc_op_eq
;
326 f
->op
= ppir_codegen_vec4_acc_op_ne
;
329 f
->op
= ppir_codegen_vec4_acc_op_sel
;
332 f
->op
= ppir_codegen_vec4_acc_op_max
;
335 f
->op
= ppir_codegen_vec4_acc_op_min
;
341 ppir_src
*src
= node
->op
== ppir_op_select
? alu
->src
+ 1 : alu
->src
;
342 index
= ppir_target_get_src_reg_index(src
);
344 if (src
->type
== ppir_target_pipeline
&&
345 src
->pipeline
== ppir_pipeline_reg_vmul
)
348 f
->arg0_source
= index
>> 2;
350 f
->arg0_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
351 f
->arg0_absolute
= src
->absolute
;
352 f
->arg0_negate
= src
->negate
;
354 if (++src
< alu
->src
+ alu
->num_src
) {
355 index
= ppir_target_get_src_reg_index(src
);
356 f
->arg1_source
= index
>> 2;
357 f
->arg1_swizzle
= encode_swizzle(src
->swizzle
, index
& 0x3, dest_shift
);
358 f
->arg1_absolute
= src
->absolute
;
359 f
->arg1_negate
= src
->negate
;
363 static void ppir_codegen_encode_scl_add(ppir_node
*node
, void *code
)
365 ppir_codegen_field_float_acc
*f
= code
;
366 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
368 ppir_dest
*dest
= &alu
->dest
;
369 int dest_component
= ffs(dest
->write_mask
) - 1;
370 assert(dest_component
>= 0);
372 f
->dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
374 f
->dest_modifier
= dest
->modifier
;
378 f
->op
= shift_to_op(alu
->shift
);
381 f
->op
= ppir_codegen_float_acc_op_mov
;
384 f
->op
= ppir_codegen_float_acc_op_max
;
387 f
->op
= ppir_codegen_float_acc_op_min
;
390 f
->op
= ppir_codegen_float_acc_op_floor
;
393 f
->op
= ppir_codegen_float_acc_op_ceil
;
396 f
->op
= ppir_codegen_float_acc_op_fract
;
399 f
->op
= ppir_codegen_float_acc_op_gt
;
402 f
->op
= ppir_codegen_float_acc_op_ge
;
405 f
->op
= ppir_codegen_float_acc_op_eq
;
408 f
->op
= ppir_codegen_float_acc_op_ne
;
411 f
->op
= ppir_codegen_float_acc_op_sel
;
417 ppir_src
*src
= node
->op
== ppir_op_select
? alu
->src
+ 1: alu
->src
;
418 if (src
->type
== ppir_target_pipeline
&&
419 src
->pipeline
== ppir_pipeline_reg_fmul
)
422 f
->arg0_source
= get_scl_reg_index(src
, dest_component
);
423 f
->arg0_absolute
= src
->absolute
;
424 f
->arg0_negate
= src
->negate
;
426 if (++src
< alu
->src
+ alu
->num_src
) {
427 f
->arg1_source
= get_scl_reg_index(src
, dest_component
);
428 f
->arg1_absolute
= src
->absolute
;
429 f
->arg1_negate
= src
->negate
;
433 static void ppir_codegen_encode_combine(ppir_node
*node
, void *code
)
435 ppir_codegen_field_combine
*f
= code
;
436 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
447 f
->scalar
.dest_vec
= false;
448 f
->scalar
.arg1_en
= false;
450 ppir_dest
*dest
= &alu
->dest
;
451 int dest_component
= ffs(dest
->write_mask
) - 1;
452 assert(dest_component
>= 0);
453 f
->scalar
.dest
= ppir_target_get_dest_reg_index(dest
) + dest_component
;
454 f
->scalar
.dest_modifier
= dest
->modifier
;
456 ppir_src
*src
= alu
->src
;
457 f
->scalar
.arg0_src
= get_scl_reg_index(src
, dest_component
);
458 f
->scalar
.arg0_absolute
= src
->absolute
;
459 f
->scalar
.arg0_negate
= src
->negate
;
463 f
->scalar
.op
= ppir_codegen_combine_scalar_op_rsqrt
;
466 f
->scalar
.op
= ppir_codegen_combine_scalar_op_log2
;
469 f
->scalar
.op
= ppir_codegen_combine_scalar_op_exp2
;
472 f
->scalar
.op
= ppir_codegen_combine_scalar_op_rcp
;
475 f
->scalar
.op
= ppir_codegen_combine_scalar_op_sqrt
;
478 f
->scalar
.op
= ppir_codegen_combine_scalar_op_sin
;
481 f
->scalar
.op
= ppir_codegen_combine_scalar_op_cos
;
492 static void ppir_codegen_encode_store_temp(ppir_node
*node
, void *code
)
494 assert(node
->op
== ppir_op_store_temp
);
496 ppir_codegen_field_temp_write
*f
= code
;
497 ppir_store_node
*snode
= ppir_node_to_store(node
);
498 int num_components
= snode
->num_components
;
500 f
->temp_write
.dest
= 0x03; // 11 - temporary
501 f
->temp_write
.source
= snode
->src
.reg
->index
;
503 int alignment
= num_components
== 4 ? 2 : num_components
- 1;
504 f
->temp_write
.alignment
= alignment
;
505 f
->temp_write
.index
= snode
->index
<< (2 - alignment
);
507 f
->temp_write
.offset_reg
= snode
->index
>> 2;
510 static void ppir_codegen_encode_const(ppir_const
*constant
, uint16_t *code
)
512 for (int i
= 0; i
< constant
->num
; i
++)
513 code
[i
] = util_float_to_half(constant
->value
[i
].f
);
516 static void ppir_codegen_encode_discard(ppir_node
*node
, void *code
)
518 ppir_codegen_field_branch
*b
= code
;
519 assert(node
->op
== ppir_op_discard
);
521 b
->discard
.word0
= PPIR_CODEGEN_DISCARD_WORD0
;
522 b
->discard
.word1
= PPIR_CODEGEN_DISCARD_WORD1
;
523 b
->discard
.word2
= PPIR_CODEGEN_DISCARD_WORD2
;
526 static void ppir_codegen_encode_branch(ppir_node
*node
, void *code
)
528 ppir_codegen_field_branch
*b
= code
;
529 ppir_branch_node
*branch
;
530 ppir_instr
*target_instr
;
531 if (node
->op
== ppir_op_discard
) {
532 ppir_codegen_encode_discard(node
, code
);
536 assert(node
->op
== ppir_op_branch
);
537 branch
= ppir_node_to_branch(node
);
539 b
->branch
.unknown_0
= 0x0;
540 b
->branch
.arg0_source
= ppir_target_get_src_reg_index(&branch
->src
[0]);
541 b
->branch
.arg1_source
= ppir_target_get_src_reg_index(&branch
->src
[1]);
542 b
->branch
.cond_gt
= branch
->cond_gt
;
543 b
->branch
.cond_eq
= branch
->cond_eq
;
544 b
->branch
.cond_lt
= branch
->cond_lt
;
545 b
->branch
.unknown_1
= 0x0;
547 target_instr
= list_first_entry(&branch
->target
->instr_list
, ppir_instr
, list
);
548 b
->branch
.target
= target_instr
->offset
- node
->instr
->offset
;
549 b
->branch
.next_count
= target_instr
->encode_size
;
552 typedef void (*ppir_codegen_instr_slot_encode_func
)(ppir_node
*, void *);
554 static const ppir_codegen_instr_slot_encode_func
555 ppir_codegen_encode_slot
[PPIR_INSTR_SLOT_NUM
] = {
556 [PPIR_INSTR_SLOT_VARYING
] = ppir_codegen_encode_varying
,
557 [PPIR_INSTR_SLOT_TEXLD
] = ppir_codegen_encode_texld
,
558 [PPIR_INSTR_SLOT_UNIFORM
] = ppir_codegen_encode_uniform
,
559 [PPIR_INSTR_SLOT_ALU_VEC_MUL
] = ppir_codegen_encode_vec_mul
,
560 [PPIR_INSTR_SLOT_ALU_SCL_MUL
] = ppir_codegen_encode_scl_mul
,
561 [PPIR_INSTR_SLOT_ALU_VEC_ADD
] = ppir_codegen_encode_vec_add
,
562 [PPIR_INSTR_SLOT_ALU_SCL_ADD
] = ppir_codegen_encode_scl_add
,
563 [PPIR_INSTR_SLOT_ALU_COMBINE
] = ppir_codegen_encode_combine
,
564 [PPIR_INSTR_SLOT_STORE_TEMP
] = ppir_codegen_encode_store_temp
,
565 [PPIR_INSTR_SLOT_BRANCH
] = ppir_codegen_encode_branch
,
568 static const int ppir_codegen_field_size
[] = {
569 34, 62, 41, 43, 30, 44, 31, 30, 41, 73
572 static inline int align_to_word(int size
)
574 return ((size
+ 0x1f) >> 5);
577 static int get_instr_encode_size(ppir_instr
*instr
)
581 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
583 size
+= ppir_codegen_field_size
[i
];
586 for (int i
= 0; i
< 2; i
++) {
587 if (instr
->constant
[i
].num
)
591 return align_to_word(size
) + 1;
594 static void bitcopy(void *dst
, int dst_offset
, void *src
, int src_size
)
596 int off1
= dst_offset
& 0x1f;
597 uint32_t *cpy_dst
= dst
, *cpy_src
= src
;
599 cpy_dst
+= (dst_offset
>> 5);
602 int off2
= 32 - off1
;
605 *cpy_dst
|= *cpy_src
<< off1
;
609 if (cpy_size
>= src_size
)
612 *cpy_dst
|= *cpy_src
>> off2
;
616 if (cpy_size
>= src_size
)
621 memcpy(cpy_dst
, cpy_src
, align_to_word(src_size
) * 4);
624 static int encode_instr(ppir_instr
*instr
, void *code
, void *last_code
)
627 ppir_codegen_ctrl
*ctrl
= code
;
629 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
630 if (instr
->slots
[i
]) {
631 /* max field size (73), align to dword */
632 uint8_t output
[12] = {0};
634 ppir_codegen_encode_slot
[i
](instr
->slots
[i
], output
);
635 bitcopy(ctrl
+ 1, size
, output
, ppir_codegen_field_size
[i
]);
637 size
+= ppir_codegen_field_size
[i
];
638 ctrl
->fields
|= 1 << i
;
642 if (instr
->slots
[PPIR_INSTR_SLOT_TEXLD
])
645 for (int i
= 0; i
< 2; i
++) {
646 if (instr
->constant
[i
].num
) {
647 uint16_t output
[4] = {0};
649 ppir_codegen_encode_const(instr
->constant
+ i
, output
);
650 bitcopy(ctrl
+ 1, size
, output
, instr
->constant
[i
].num
* 16);
653 ctrl
->fields
|= 1 << (ppir_codegen_field_shift_vec4_const_0
+ i
);
657 size
= align_to_word(size
) + 1;
664 ppir_codegen_ctrl
*last_ctrl
= last_code
;
665 last_ctrl
->next_count
= size
;
666 last_ctrl
->prefetch
= true;
672 static void ppir_codegen_print_prog(ppir_compiler
*comp
)
674 uint32_t *prog
= comp
->prog
->shader
;
677 printf("========ppir codegen========\n");
678 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
679 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
680 printf("%03d (@%6d): ", instr
->index
, instr
->offset
);
681 int n
= prog
[0] & 0x1f;
682 for (int i
= 0; i
< n
; i
++) {
685 printf("%08x ", prog
[i
]);
688 ppir_disassemble_instr(prog
, offset
);
693 printf("-----------------------\n");
696 bool ppir_codegen_prog(ppir_compiler
*comp
)
699 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
700 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
701 instr
->offset
= size
;
702 instr
->encode_size
= get_instr_encode_size(instr
);
703 size
+= instr
->encode_size
;
707 uint32_t *prog
= rzalloc_size(comp
->prog
, size
* sizeof(uint32_t));
711 uint32_t *code
= prog
, *last_code
= NULL
;
712 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
713 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
714 int offset
= encode_instr(instr
, code
, last_code
);
720 comp
->prog
->shader
= prog
;
721 comp
->prog
->shader_size
= size
* sizeof(uint32_t);
723 if (lima_debug
& LIMA_DEBUG_PP
)
724 ppir_codegen_print_prog(comp
);