2 * Copyright (C) 2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "midgard_ops.h"
27 void mir_rewrite_index_src_single(midgard_instruction
*ins
, unsigned old
, unsigned new)
29 for (unsigned i
= 0; i
< ARRAY_SIZE(ins
->src
); ++i
) {
30 if (ins
->src
[i
] == old
)
35 void mir_rewrite_index_dst_single(midgard_instruction
*ins
, unsigned old
, unsigned new)
41 static midgard_vector_alu_src
42 mir_get_alu_src(midgard_instruction
*ins
, unsigned idx
)
44 unsigned b
= (idx
== 0) ? ins
->alu
.src1
: ins
->alu
.src2
;
45 return vector_alu_from_unsigned(b
);
49 mir_rewrite_index_src_single_swizzle(midgard_instruction
*ins
, unsigned old
, unsigned new, unsigned *swizzle
)
51 for (unsigned i
= 0; i
< ARRAY_SIZE(ins
->src
); ++i
) {
52 if (ins
->src
[i
] != old
) continue;
55 mir_compose_swizzle(ins
->swizzle
[i
], swizzle
, ins
->swizzle
[i
]);
60 mir_rewrite_index_src(compiler_context
*ctx
, unsigned old
, unsigned new)
62 mir_foreach_instr_global(ctx
, ins
) {
63 mir_rewrite_index_src_single(ins
, old
, new);
68 mir_rewrite_index_src_swizzle(compiler_context
*ctx
, unsigned old
, unsigned new, unsigned *swizzle
)
70 mir_foreach_instr_global(ctx
, ins
) {
71 mir_rewrite_index_src_single_swizzle(ins
, old
, new, swizzle
);
76 mir_rewrite_index_dst(compiler_context
*ctx
, unsigned old
, unsigned new)
78 mir_foreach_instr_global(ctx
, ins
) {
79 mir_rewrite_index_dst_single(ins
, old
, new);
84 mir_rewrite_index(compiler_context
*ctx
, unsigned old
, unsigned new)
86 mir_rewrite_index_src(ctx
, old
, new);
87 mir_rewrite_index_dst(ctx
, old
, new);
91 mir_use_count(compiler_context
*ctx
, unsigned value
)
93 unsigned used_count
= 0;
95 mir_foreach_instr_global(ctx
, ins
) {
96 if (mir_has_arg(ins
, value
))
103 /* Checks if a value is used only once (or totally dead), which is an important
104 * heuristic to figure out if certain optimizations are Worth It (TM) */
107 mir_single_use(compiler_context
*ctx
, unsigned value
)
109 /* We can replicate constants in places so who cares */
110 if (value
== SSA_FIXED_REGISTER(REGISTER_CONSTANT
))
113 return mir_use_count(ctx
, value
) <= 1;
117 mir_nontrivial_raw_mod(midgard_vector_alu_src src
, bool is_int
)
120 return src
.mod
== midgard_int_shift
;
126 mir_nontrivial_mod(midgard_vector_alu_src src
, bool is_int
, unsigned mask
, unsigned *swizzle
)
128 if (mir_nontrivial_raw_mod(src
, is_int
)) return true;
130 /* size-conversion */
131 if (src
.half
) return true;
133 for (unsigned c
= 0; c
< 16; ++c
) {
134 if (!(mask
& (1 << c
))) continue;
135 if (swizzle
[c
] != c
) return true;
142 mir_nontrivial_source2_mod(midgard_instruction
*ins
)
144 bool is_int
= midgard_is_integer_op(ins
->alu
.op
);
146 midgard_vector_alu_src src2
=
147 vector_alu_from_unsigned(ins
->alu
.src2
);
149 return mir_nontrivial_mod(src2
, is_int
, ins
->mask
, ins
->swizzle
[1]);
153 mir_nontrivial_source2_mod_simple(midgard_instruction
*ins
)
155 bool is_int
= midgard_is_integer_op(ins
->alu
.op
);
157 midgard_vector_alu_src src2
=
158 vector_alu_from_unsigned(ins
->alu
.src2
);
160 return mir_nontrivial_raw_mod(src2
, is_int
) || src2
.half
;
164 mir_nontrivial_outmod(midgard_instruction
*ins
)
166 bool is_int
= midgard_is_integer_op(ins
->alu
.op
);
167 unsigned mod
= ins
->alu
.outmod
;
173 /* Type conversion is a sort of outmod */
174 if (ins
->alu
.dest_override
!= midgard_dest_override_none
)
178 return mod
!= midgard_outmod_int_wrap
;
180 return mod
!= midgard_outmod_none
;
183 /* Checks if an index will be used as a special register -- basically, if we're
184 * used as the input to a non-ALU op */
187 mir_special_index(compiler_context
*ctx
, unsigned idx
)
189 mir_foreach_instr_global(ctx
, ins
) {
190 bool is_ldst
= ins
->type
== TAG_LOAD_STORE_4
;
191 bool is_tex
= ins
->type
== TAG_TEXTURE_4
;
192 bool is_writeout
= ins
->compact_branch
&& ins
->writeout
;
194 if (!(is_ldst
|| is_tex
|| is_writeout
))
197 if (mir_has_arg(ins
, idx
))
204 /* Is a node written before a given instruction? */
207 mir_is_written_before(compiler_context
*ctx
, midgard_instruction
*ins
, unsigned node
)
209 if (node
>= SSA_FIXED_MINIMUM
)
212 mir_foreach_instr_global(ctx
, q
) {
223 /* Grabs the type size. */
226 mir_typesize(midgard_instruction
*ins
)
228 if (ins
->compact_branch
)
229 return midgard_reg_mode_32
;
231 /* TODO: Type sizes for texture */
232 if (ins
->type
== TAG_TEXTURE_4
)
233 return midgard_reg_mode_32
;
235 if (ins
->type
== TAG_LOAD_STORE_4
)
236 return GET_LDST_SIZE(load_store_opcode_props
[ins
->load_store
.op
].props
);
238 if (ins
->type
== TAG_ALU_4
) {
239 midgard_reg_mode mode
= ins
->alu
.reg_mode
;
241 /* If we have an override, step down by half */
242 if (ins
->alu
.dest_override
!= midgard_dest_override_none
) {
243 assert(mode
> midgard_reg_mode_8
);
250 unreachable("Invalid instruction type");
253 /* Grabs the size of a source */
256 mir_srcsize(midgard_instruction
*ins
, unsigned i
)
258 /* TODO: 16-bit textures/ldst */
259 if (ins
->type
== TAG_TEXTURE_4
|| ins
->type
== TAG_LOAD_STORE_4
)
260 return midgard_reg_mode_32
;
262 /* TODO: 16-bit branches */
263 if (ins
->compact_branch
)
264 return midgard_reg_mode_32
;
267 /* TODO: 16-bit conditions, ffma */
269 return midgard_reg_mode_32
;
272 /* Default to type of the instruction */
274 midgard_reg_mode mode
= ins
->alu
.reg_mode
;
276 /* If we have a half modifier, step down by half */
278 if ((mir_get_alu_src(ins
, i
)).half
) {
279 assert(mode
> midgard_reg_mode_8
);
286 /* Converts per-component mask to a byte mask */
289 mir_to_bytemask(midgard_reg_mode mode
, unsigned mask
)
292 case midgard_reg_mode_8
:
295 case midgard_reg_mode_16
: {
297 ((mask
& 0x1) << (0 - 0)) |
298 ((mask
& 0x2) << (2 - 1)) |
299 ((mask
& 0x4) << (4 - 2)) |
300 ((mask
& 0x8) << (6 - 3)) |
301 ((mask
& 0x10) << (8 - 4)) |
302 ((mask
& 0x20) << (10 - 5)) |
303 ((mask
& 0x40) << (12 - 6)) |
304 ((mask
& 0x80) << (14 - 7));
306 return space
| (space
<< 1);
309 case midgard_reg_mode_32
: {
311 ((mask
& 0x1) << (0 - 0)) |
312 ((mask
& 0x2) << (4 - 1)) |
313 ((mask
& 0x4) << (8 - 2)) |
314 ((mask
& 0x8) << (12 - 3));
316 return space
| (space
<< 1) | (space
<< 2) | (space
<< 3);
319 case midgard_reg_mode_64
: {
320 unsigned A
= (mask
& 0x1) ? 0xFF : 0x00;
321 unsigned B
= (mask
& 0x2) ? 0xFF : 0x00;
326 unreachable("Invalid register mode");
330 /* ...and the inverse */
333 mir_bytes_for_mode(midgard_reg_mode mode
)
336 case midgard_reg_mode_8
:
338 case midgard_reg_mode_16
:
340 case midgard_reg_mode_32
:
342 case midgard_reg_mode_64
:
345 unreachable("Invalid register mode");
350 mir_from_bytemask(uint16_t bytemask
, midgard_reg_mode mode
)
353 unsigned count
= mir_bytes_for_mode(mode
);
355 for (unsigned c
= 0, d
= 0; c
< 16; c
+= count
, ++d
) {
356 bool a
= (bytemask
& (1 << c
)) != 0;
358 for (unsigned q
= c
; q
< count
; ++q
)
359 assert(((bytemask
& (1 << q
)) != 0) == a
);
367 /* Rounds down a bytemask to fit a given component count. Iterate each
368 * component, and check if all bytes in the component are masked on */
371 mir_round_bytemask_down(uint16_t mask
, midgard_reg_mode mode
)
373 unsigned bytes
= mir_bytes_for_mode(mode
);
374 unsigned maxmask
= mask_of(bytes
);
375 unsigned channels
= 16 / bytes
;
377 for (unsigned c
= 0; c
< channels
; ++c
) {
378 /* Get bytes in component */
379 unsigned submask
= (mask
>> c
* channels
) & maxmask
;
381 if (submask
!= maxmask
)
382 mask
&= ~(maxmask
<< (c
* channels
));
388 /* Grabs the per-byte mask of an instruction (as opposed to per-component) */
391 mir_bytemask(midgard_instruction
*ins
)
393 return mir_to_bytemask(mir_typesize(ins
), ins
->mask
);
397 mir_set_bytemask(midgard_instruction
*ins
, uint16_t bytemask
)
399 ins
->mask
= mir_from_bytemask(bytemask
, mir_typesize(ins
));
402 /* Creates a mask of the components of a node read by an instruction, by
403 * analyzing the swizzle with respect to the instruction's mask. E.g.:
405 * fadd r0.xz, r1.yyyy, r2.zwyx
407 * will return a mask of Z/Y for r2
411 mir_bytemask_of_read_components_single(unsigned *swizzle
, unsigned inmask
, midgard_reg_mode mode
)
415 for (unsigned c
= 0; c
< MIR_VEC_COMPONENTS
; ++c
) {
416 if (!(inmask
& (1 << c
))) continue;
417 cmask
|= (1 << swizzle
[c
]);
420 return mir_to_bytemask(mode
, cmask
);
424 mir_bytemask_of_read_components(midgard_instruction
*ins
, unsigned node
)
431 mir_foreach_src(ins
, i
) {
432 if (ins
->src
[i
] != node
) continue;
434 /* Branch writeout uses all components */
435 if (ins
->compact_branch
&& ins
->writeout
&& (i
== 0))
438 /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
439 if (ins
->compact_branch
&& !ins
->prepacked_branch
&& ins
->branch
.conditional
&& (i
== 0))
442 /* ALU ops act componentwise so we need to pay attention to
443 * their mask. Texture/ldst does not so we don't clamp source
444 * readmasks based on the writemask */
445 unsigned qmask
= (ins
->type
== TAG_ALU_4
) ? ins
->mask
: ~0;
447 /* Handle dot products and things */
448 if (ins
->type
== TAG_ALU_4
&& !ins
->compact_branch
) {
449 unsigned props
= alu_opcode_props
[ins
->alu
.op
].props
;
451 unsigned channel_override
= GET_CHANNEL_COUNT(props
);
453 if (channel_override
)
454 qmask
= mask_of(channel_override
);
457 mask
|= mir_bytemask_of_read_components_single(ins
->swizzle
[i
], qmask
, mir_srcsize(ins
, i
));
464 mir_ubo_shift(midgard_load_store_op op
)
467 case midgard_op_ld_ubo_char
:
469 case midgard_op_ld_ubo_char2
:
471 case midgard_op_ld_ubo_char4
:
473 case midgard_op_ld_ubo_short4
:
475 case midgard_op_ld_ubo_int4
:
478 unreachable("Invalid op");
482 /* Register allocation occurs after instruction scheduling, which is fine until
483 * we start needing to spill registers and therefore insert instructions into
484 * an already-scheduled program. We don't have to be terribly efficient about
485 * this, since spilling is already slow. So just semantically we need to insert
486 * the instruction into a new bundle before/after the bundle of the instruction
489 static midgard_bundle
490 mir_bundle_for_op(compiler_context
*ctx
, midgard_instruction ins
)
492 midgard_instruction
*u
= mir_upload_ins(ctx
, ins
);
494 midgard_bundle bundle
= {
496 .instruction_count
= 1,
497 .instructions
= { u
},
500 if (bundle
.tag
== TAG_ALU_4
) {
501 assert(OP_IS_MOVE(u
->alu
.op
));
504 size_t bytes_emitted
= sizeof(uint32_t) + sizeof(midgard_reg_info
) + sizeof(midgard_vector_alu
);
505 bundle
.padding
= ~(bytes_emitted
- 1) & 0xF;
506 bundle
.control
= ins
.type
| u
->unit
;
513 mir_bundle_idx_for_ins(midgard_instruction
*tag
, midgard_block
*block
)
515 midgard_bundle
*bundles
=
516 (midgard_bundle
*) block
->bundles
.data
;
518 size_t count
= (block
->bundles
.size
/ sizeof(midgard_bundle
));
520 for (unsigned i
= 0; i
< count
; ++i
) {
521 for (unsigned j
= 0; j
< bundles
[i
].instruction_count
; ++j
) {
522 if (bundles
[i
].instructions
[j
] == tag
)
527 mir_print_instruction(tag
);
528 unreachable("Instruction not scheduled in block");
532 mir_insert_instruction_before_scheduled(
533 compiler_context
*ctx
,
534 midgard_block
*block
,
535 midgard_instruction
*tag
,
536 midgard_instruction ins
)
538 unsigned before
= mir_bundle_idx_for_ins(tag
, block
);
539 size_t count
= util_dynarray_num_elements(&block
->bundles
, midgard_bundle
);
540 UNUSED
void *unused
= util_dynarray_grow(&block
->bundles
, midgard_bundle
, 1);
542 midgard_bundle
*bundles
= (midgard_bundle
*) block
->bundles
.data
;
543 memmove(bundles
+ before
+ 1, bundles
+ before
, (count
- before
) * sizeof(midgard_bundle
));
544 midgard_bundle
*before_bundle
= bundles
+ before
+ 1;
546 midgard_bundle
new = mir_bundle_for_op(ctx
, ins
);
547 memcpy(bundles
+ before
, &new, sizeof(new));
549 list_addtail(&new.instructions
[0]->link
, &before_bundle
->instructions
[0]->link
);
553 mir_insert_instruction_after_scheduled(
554 compiler_context
*ctx
,
555 midgard_block
*block
,
556 midgard_instruction
*tag
,
557 midgard_instruction ins
)
559 /* We need to grow the bundles array to add our new bundle */
560 size_t count
= util_dynarray_num_elements(&block
->bundles
, midgard_bundle
);
561 UNUSED
void *unused
= util_dynarray_grow(&block
->bundles
, midgard_bundle
, 1);
563 /* Find the bundle that we want to insert after */
564 unsigned after
= mir_bundle_idx_for_ins(tag
, block
);
566 /* All the bundles after that one, we move ahead by one */
567 midgard_bundle
*bundles
= (midgard_bundle
*) block
->bundles
.data
;
568 memmove(bundles
+ after
+ 2, bundles
+ after
+ 1, (count
- after
- 1) * sizeof(midgard_bundle
));
569 midgard_bundle
*after_bundle
= bundles
+ after
;
571 midgard_bundle
new = mir_bundle_for_op(ctx
, ins
);
572 memcpy(bundles
+ after
+ 1, &new, sizeof(new));
573 list_add(&new.instructions
[0]->link
, &after_bundle
->instructions
[after_bundle
->instruction_count
- 1]->link
);
576 /* Flip the first-two arguments of a (binary) op. Currently ALU
577 * only, no known uses for ldst/tex */
580 mir_flip(midgard_instruction
*ins
)
582 unsigned temp
= ins
->src
[0];
583 ins
->src
[0] = ins
->src
[1];
586 assert(ins
->type
== TAG_ALU_4
);
588 temp
= ins
->alu
.src1
;
589 ins
->alu
.src1
= ins
->alu
.src2
;
590 ins
->alu
.src2
= temp
;
592 unsigned temp_swizzle
[16];
593 memcpy(temp_swizzle
, ins
->swizzle
[0], sizeof(ins
->swizzle
[0]));
594 memcpy(ins
->swizzle
[0], ins
->swizzle
[1], sizeof(ins
->swizzle
[0]));
595 memcpy(ins
->swizzle
[1], temp_swizzle
, sizeof(ins
->swizzle
[0]));
598 /* Before squashing, calculate ctx->temp_count just by observing the MIR */
601 mir_compute_temp_count(compiler_context
*ctx
)
606 unsigned max_dest
= 0;
608 mir_foreach_instr_global(ctx
, ins
) {
609 if (ins
->dest
< SSA_FIXED_MINIMUM
)
610 max_dest
= MAX2(max_dest
, ins
->dest
+ 1);
613 ctx
->temp_count
= max_dest
;